kernel.c revision 271533
1/* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21/* 22 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. 23 * Copyright (c) 2012, 2014 by Delphix. All rights reserved. 24 * Copyright (c) 2013, Joyent, Inc. All rights reserved. 25 */ 26 27#include <assert.h> 28#include <fcntl.h> 29#include <poll.h> 30#include <stdio.h> 31#include <stdlib.h> 32#include <string.h> 33#include <zlib.h> 34#include <libgen.h> 35#include <sys/spa.h> 36#include <sys/stat.h> 37#include <sys/processor.h> 38#include <sys/zfs_context.h> 39#include <sys/rrwlock.h> 40#include <sys/zmod.h> 41#include <sys/utsname.h> 42#include <sys/systeminfo.h> 43 44/* 45 * Emulation of kernel services in userland. 46 */ 47 48int aok; 49uint64_t physmem; 50vnode_t *rootdir = (vnode_t *)0xabcd1234; 51char hw_serial[HW_HOSTID_LEN]; 52#ifdef illumos 53kmutex_t cpu_lock; 54#endif 55 56/* If set, all blocks read will be copied to the specified directory. */ 57char *vn_dumpdir = NULL; 58 59struct utsname utsname = { 60 "userland", "libzpool", "1", "1", "na" 61}; 62 63/* this only exists to have its address taken */ 64struct proc p0; 65 66/* 67 * ========================================================================= 68 * threads 69 * ========================================================================= 70 */ 71/*ARGSUSED*/ 72kthread_t * 73zk_thread_create(void (*func)(), void *arg) 74{ 75 thread_t tid; 76 77 VERIFY(thr_create(0, 0, (void *(*)(void *))func, arg, THR_DETACHED, 78 &tid) == 0); 79 80 return ((void *)(uintptr_t)tid); 81} 82 83/* 84 * ========================================================================= 85 * kstats 86 * ========================================================================= 87 */ 88/*ARGSUSED*/ 89kstat_t * 90kstat_create(char *module, int instance, char *name, char *class, 91 uchar_t type, ulong_t ndata, uchar_t ks_flag) 92{ 93 return (NULL); 94} 95 96/*ARGSUSED*/ 97void 98kstat_install(kstat_t *ksp) 99{} 100 101/*ARGSUSED*/ 102void 103kstat_delete(kstat_t *ksp) 104{} 105 106/* 107 * ========================================================================= 108 * mutexes 109 * ========================================================================= 110 */ 111void 112zmutex_init(kmutex_t *mp) 113{ 114 mp->m_owner = NULL; 115 mp->initialized = B_TRUE; 116 (void) _mutex_init(&mp->m_lock, USYNC_THREAD, NULL); 117} 118 119void 120zmutex_destroy(kmutex_t *mp) 121{ 122 ASSERT(mp->initialized == B_TRUE); 123 ASSERT(mp->m_owner == NULL); 124 (void) _mutex_destroy(&(mp)->m_lock); 125 mp->m_owner = (void *)-1UL; 126 mp->initialized = B_FALSE; 127} 128 129int 130zmutex_owned(kmutex_t *mp) 131{ 132 ASSERT(mp->initialized == B_TRUE); 133 134 return (mp->m_owner == curthread); 135} 136 137void 138mutex_enter(kmutex_t *mp) 139{ 140 ASSERT(mp->initialized == B_TRUE); 141 ASSERT(mp->m_owner != (void *)-1UL); 142 ASSERT(mp->m_owner != curthread); 143 VERIFY(mutex_lock(&mp->m_lock) == 0); 144 ASSERT(mp->m_owner == NULL); 145 mp->m_owner = curthread; 146} 147 148int 149mutex_tryenter(kmutex_t *mp) 150{ 151 ASSERT(mp->initialized == B_TRUE); 152 ASSERT(mp->m_owner != (void *)-1UL); 153 if (0 == mutex_trylock(&mp->m_lock)) { 154 ASSERT(mp->m_owner == NULL); 155 mp->m_owner = curthread; 156 return (1); 157 } else { 158 return (0); 159 } 160} 161 162void 163mutex_exit(kmutex_t *mp) 164{ 165 ASSERT(mp->initialized == B_TRUE); 166 ASSERT(mutex_owner(mp) == curthread); 167 mp->m_owner = NULL; 168 VERIFY(mutex_unlock(&mp->m_lock) == 0); 169} 170 171void * 172mutex_owner(kmutex_t *mp) 173{ 174 ASSERT(mp->initialized == B_TRUE); 175 return (mp->m_owner); 176} 177 178/* 179 * ========================================================================= 180 * rwlocks 181 * ========================================================================= 182 */ 183/*ARGSUSED*/ 184void 185rw_init(krwlock_t *rwlp, char *name, int type, void *arg) 186{ 187 rwlock_init(&rwlp->rw_lock, USYNC_THREAD, NULL); 188 rwlp->rw_owner = NULL; 189 rwlp->initialized = B_TRUE; 190 rwlp->rw_count = 0; 191} 192 193void 194rw_destroy(krwlock_t *rwlp) 195{ 196 ASSERT(rwlp->rw_count == 0); 197 rwlock_destroy(&rwlp->rw_lock); 198 rwlp->rw_owner = (void *)-1UL; 199 rwlp->initialized = B_FALSE; 200} 201 202void 203rw_enter(krwlock_t *rwlp, krw_t rw) 204{ 205 //ASSERT(!RW_LOCK_HELD(rwlp)); 206 ASSERT(rwlp->initialized == B_TRUE); 207 ASSERT(rwlp->rw_owner != (void *)-1UL); 208 ASSERT(rwlp->rw_owner != curthread); 209 210 if (rw == RW_READER) { 211 VERIFY(rw_rdlock(&rwlp->rw_lock) == 0); 212 ASSERT(rwlp->rw_count >= 0); 213 atomic_add_int(&rwlp->rw_count, 1); 214 } else { 215 VERIFY(rw_wrlock(&rwlp->rw_lock) == 0); 216 ASSERT(rwlp->rw_count == 0); 217 rwlp->rw_count = -1; 218 rwlp->rw_owner = curthread; 219 } 220} 221 222void 223rw_exit(krwlock_t *rwlp) 224{ 225 ASSERT(rwlp->initialized == B_TRUE); 226 ASSERT(rwlp->rw_owner != (void *)-1UL); 227 228 if (rwlp->rw_owner == curthread) { 229 /* Write locked. */ 230 ASSERT(rwlp->rw_count == -1); 231 rwlp->rw_count = 0; 232 rwlp->rw_owner = NULL; 233 } else { 234 /* Read locked. */ 235 ASSERT(rwlp->rw_count > 0); 236 atomic_add_int(&rwlp->rw_count, -1); 237 } 238 VERIFY(rw_unlock(&rwlp->rw_lock) == 0); 239} 240 241int 242rw_tryenter(krwlock_t *rwlp, krw_t rw) 243{ 244 int rv; 245 246 ASSERT(rwlp->initialized == B_TRUE); 247 ASSERT(rwlp->rw_owner != (void *)-1UL); 248 ASSERT(rwlp->rw_owner != curthread); 249 250 if (rw == RW_READER) 251 rv = rw_tryrdlock(&rwlp->rw_lock); 252 else 253 rv = rw_trywrlock(&rwlp->rw_lock); 254 255 if (rv == 0) { 256 ASSERT(rwlp->rw_owner == NULL); 257 if (rw == RW_READER) { 258 ASSERT(rwlp->rw_count >= 0); 259 atomic_add_int(&rwlp->rw_count, 1); 260 } else { 261 ASSERT(rwlp->rw_count == 0); 262 rwlp->rw_count = -1; 263 rwlp->rw_owner = curthread; 264 } 265 return (1); 266 } 267 268 return (0); 269} 270 271/*ARGSUSED*/ 272int 273rw_tryupgrade(krwlock_t *rwlp) 274{ 275 ASSERT(rwlp->initialized == B_TRUE); 276 ASSERT(rwlp->rw_owner != (void *)-1UL); 277 278 return (0); 279} 280 281int 282rw_lock_held(krwlock_t *rwlp) 283{ 284 285 return (rwlp->rw_count != 0); 286} 287 288/* 289 * ========================================================================= 290 * condition variables 291 * ========================================================================= 292 */ 293/*ARGSUSED*/ 294void 295cv_init(kcondvar_t *cv, char *name, int type, void *arg) 296{ 297 VERIFY(cond_init(cv, name, NULL) == 0); 298} 299 300void 301cv_destroy(kcondvar_t *cv) 302{ 303 VERIFY(cond_destroy(cv) == 0); 304} 305 306void 307cv_wait(kcondvar_t *cv, kmutex_t *mp) 308{ 309 ASSERT(mutex_owner(mp) == curthread); 310 mp->m_owner = NULL; 311 int ret = cond_wait(cv, &mp->m_lock); 312 VERIFY(ret == 0 || ret == EINTR); 313 mp->m_owner = curthread; 314} 315 316clock_t 317cv_timedwait(kcondvar_t *cv, kmutex_t *mp, clock_t abstime) 318{ 319 int error; 320 struct timespec ts; 321 struct timeval tv; 322 clock_t delta; 323 324 abstime += ddi_get_lbolt(); 325top: 326 delta = abstime - ddi_get_lbolt(); 327 if (delta <= 0) 328 return (-1); 329 330 if (gettimeofday(&tv, NULL) != 0) 331 assert(!"gettimeofday() failed"); 332 333 ts.tv_sec = tv.tv_sec + delta / hz; 334 ts.tv_nsec = tv.tv_usec * 1000 + (delta % hz) * (NANOSEC / hz); 335 ASSERT(ts.tv_nsec >= 0); 336 337 if (ts.tv_nsec >= NANOSEC) { 338 ts.tv_sec++; 339 ts.tv_nsec -= NANOSEC; 340 } 341 342 ASSERT(mutex_owner(mp) == curthread); 343 mp->m_owner = NULL; 344 error = pthread_cond_timedwait(cv, &mp->m_lock, &ts); 345 mp->m_owner = curthread; 346 347 if (error == EINTR) 348 goto top; 349 350 if (error == ETIMEDOUT) 351 return (-1); 352 353 ASSERT(error == 0); 354 355 return (1); 356} 357 358/*ARGSUSED*/ 359clock_t 360cv_timedwait_hires(kcondvar_t *cv, kmutex_t *mp, hrtime_t tim, hrtime_t res, 361 int flag) 362{ 363 int error; 364 timestruc_t ts; 365 hrtime_t delta; 366 367 ASSERT(flag == 0); 368 369top: 370 delta = tim - gethrtime(); 371 if (delta <= 0) 372 return (-1); 373 374 ts.tv_sec = delta / NANOSEC; 375 ts.tv_nsec = delta % NANOSEC; 376 377 ASSERT(mutex_owner(mp) == curthread); 378 mp->m_owner = NULL; 379 error = pthread_cond_timedwait(cv, &mp->m_lock, &ts); 380 mp->m_owner = curthread; 381 382 if (error == ETIMEDOUT) 383 return (-1); 384 385 if (error == EINTR) 386 goto top; 387 388 ASSERT(error == 0); 389 390 return (1); 391} 392 393void 394cv_signal(kcondvar_t *cv) 395{ 396 VERIFY(cond_signal(cv) == 0); 397} 398 399void 400cv_broadcast(kcondvar_t *cv) 401{ 402 VERIFY(cond_broadcast(cv) == 0); 403} 404 405/* 406 * ========================================================================= 407 * vnode operations 408 * ========================================================================= 409 */ 410/* 411 * Note: for the xxxat() versions of these functions, we assume that the 412 * starting vp is always rootdir (which is true for spa_directory.c, the only 413 * ZFS consumer of these interfaces). We assert this is true, and then emulate 414 * them by adding '/' in front of the path. 415 */ 416 417/*ARGSUSED*/ 418int 419vn_open(char *path, int x1, int flags, int mode, vnode_t **vpp, int x2, int x3) 420{ 421 int fd; 422 int dump_fd; 423 vnode_t *vp; 424 int old_umask; 425 char realpath[MAXPATHLEN]; 426 struct stat64 st; 427 428 /* 429 * If we're accessing a real disk from userland, we need to use 430 * the character interface to avoid caching. This is particularly 431 * important if we're trying to look at a real in-kernel storage 432 * pool from userland, e.g. via zdb, because otherwise we won't 433 * see the changes occurring under the segmap cache. 434 * On the other hand, the stupid character device returns zero 435 * for its size. So -- gag -- we open the block device to get 436 * its size, and remember it for subsequent VOP_GETATTR(). 437 */ 438 if (strncmp(path, "/dev/", 5) == 0) { 439 char *dsk; 440 fd = open64(path, O_RDONLY); 441 if (fd == -1) 442 return (errno); 443 if (fstat64(fd, &st) == -1) { 444 close(fd); 445 return (errno); 446 } 447 close(fd); 448 (void) sprintf(realpath, "%s", path); 449 dsk = strstr(path, "/dsk/"); 450 if (dsk != NULL) 451 (void) sprintf(realpath + (dsk - path) + 1, "r%s", 452 dsk + 1); 453 } else { 454 (void) sprintf(realpath, "%s", path); 455 if (!(flags & FCREAT) && stat64(realpath, &st) == -1) 456 return (errno); 457 } 458 459 if (flags & FCREAT) 460 old_umask = umask(0); 461 462 /* 463 * The construct 'flags - FREAD' conveniently maps combinations of 464 * FREAD and FWRITE to the corresponding O_RDONLY, O_WRONLY, and O_RDWR. 465 */ 466 fd = open64(realpath, flags - FREAD, mode); 467 468 if (flags & FCREAT) 469 (void) umask(old_umask); 470 471 if (vn_dumpdir != NULL) { 472 char dumppath[MAXPATHLEN]; 473 (void) snprintf(dumppath, sizeof (dumppath), 474 "%s/%s", vn_dumpdir, basename(realpath)); 475 dump_fd = open64(dumppath, O_CREAT | O_WRONLY, 0666); 476 if (dump_fd == -1) 477 return (errno); 478 } else { 479 dump_fd = -1; 480 } 481 482 if (fd == -1) 483 return (errno); 484 485 if (fstat64(fd, &st) == -1) { 486 close(fd); 487 return (errno); 488 } 489 490 (void) fcntl(fd, F_SETFD, FD_CLOEXEC); 491 492 *vpp = vp = umem_zalloc(sizeof (vnode_t), UMEM_NOFAIL); 493 494 vp->v_fd = fd; 495 vp->v_size = st.st_size; 496 vp->v_path = spa_strdup(path); 497 vp->v_dump_fd = dump_fd; 498 499 return (0); 500} 501 502/*ARGSUSED*/ 503int 504vn_openat(char *path, int x1, int flags, int mode, vnode_t **vpp, int x2, 505 int x3, vnode_t *startvp, int fd) 506{ 507 char *realpath = umem_alloc(strlen(path) + 2, UMEM_NOFAIL); 508 int ret; 509 510 ASSERT(startvp == rootdir); 511 (void) sprintf(realpath, "/%s", path); 512 513 /* fd ignored for now, need if want to simulate nbmand support */ 514 ret = vn_open(realpath, x1, flags, mode, vpp, x2, x3); 515 516 umem_free(realpath, strlen(path) + 2); 517 518 return (ret); 519} 520 521/*ARGSUSED*/ 522int 523vn_rdwr(int uio, vnode_t *vp, void *addr, ssize_t len, offset_t offset, 524 int x1, int x2, rlim64_t x3, void *x4, ssize_t *residp) 525{ 526 ssize_t iolen, split; 527 528 if (uio == UIO_READ) { 529 iolen = pread64(vp->v_fd, addr, len, offset); 530 if (vp->v_dump_fd != -1) { 531 int status = 532 pwrite64(vp->v_dump_fd, addr, iolen, offset); 533 ASSERT(status != -1); 534 } 535 } else { 536 /* 537 * To simulate partial disk writes, we split writes into two 538 * system calls so that the process can be killed in between. 539 */ 540 int sectors = len >> SPA_MINBLOCKSHIFT; 541 split = (sectors > 0 ? rand() % sectors : 0) << 542 SPA_MINBLOCKSHIFT; 543 iolen = pwrite64(vp->v_fd, addr, split, offset); 544 iolen += pwrite64(vp->v_fd, (char *)addr + split, 545 len - split, offset + split); 546 } 547 548 if (iolen == -1) 549 return (errno); 550 if (residp) 551 *residp = len - iolen; 552 else if (iolen != len) 553 return (EIO); 554 return (0); 555} 556 557void 558vn_close(vnode_t *vp, int openflag, cred_t *cr, kthread_t *td) 559{ 560 close(vp->v_fd); 561 if (vp->v_dump_fd != -1) 562 close(vp->v_dump_fd); 563 spa_strfree(vp->v_path); 564 umem_free(vp, sizeof (vnode_t)); 565} 566 567/* 568 * At a minimum we need to update the size since vdev_reopen() 569 * will no longer call vn_openat(). 570 */ 571int 572fop_getattr(vnode_t *vp, vattr_t *vap) 573{ 574 struct stat64 st; 575 576 if (fstat64(vp->v_fd, &st) == -1) { 577 close(vp->v_fd); 578 return (errno); 579 } 580 581 vap->va_size = st.st_size; 582 return (0); 583} 584 585#ifdef ZFS_DEBUG 586 587/* 588 * ========================================================================= 589 * Figure out which debugging statements to print 590 * ========================================================================= 591 */ 592 593static char *dprintf_string; 594static int dprintf_print_all; 595 596int 597dprintf_find_string(const char *string) 598{ 599 char *tmp_str = dprintf_string; 600 int len = strlen(string); 601 602 /* 603 * Find out if this is a string we want to print. 604 * String format: file1.c,function_name1,file2.c,file3.c 605 */ 606 607 while (tmp_str != NULL) { 608 if (strncmp(tmp_str, string, len) == 0 && 609 (tmp_str[len] == ',' || tmp_str[len] == '\0')) 610 return (1); 611 tmp_str = strchr(tmp_str, ','); 612 if (tmp_str != NULL) 613 tmp_str++; /* Get rid of , */ 614 } 615 return (0); 616} 617 618void 619dprintf_setup(int *argc, char **argv) 620{ 621 int i, j; 622 623 /* 624 * Debugging can be specified two ways: by setting the 625 * environment variable ZFS_DEBUG, or by including a 626 * "debug=..." argument on the command line. The command 627 * line setting overrides the environment variable. 628 */ 629 630 for (i = 1; i < *argc; i++) { 631 int len = strlen("debug="); 632 /* First look for a command line argument */ 633 if (strncmp("debug=", argv[i], len) == 0) { 634 dprintf_string = argv[i] + len; 635 /* Remove from args */ 636 for (j = i; j < *argc; j++) 637 argv[j] = argv[j+1]; 638 argv[j] = NULL; 639 (*argc)--; 640 } 641 } 642 643 if (dprintf_string == NULL) { 644 /* Look for ZFS_DEBUG environment variable */ 645 dprintf_string = getenv("ZFS_DEBUG"); 646 } 647 648 /* 649 * Are we just turning on all debugging? 650 */ 651 if (dprintf_find_string("on")) 652 dprintf_print_all = 1; 653 654 if (dprintf_string != NULL) 655 zfs_flags |= ZFS_DEBUG_DPRINTF; 656} 657 658int 659sysctl_handle_64(SYSCTL_HANDLER_ARGS) 660{ 661 return (0); 662} 663 664/* 665 * ========================================================================= 666 * debug printfs 667 * ========================================================================= 668 */ 669void 670__dprintf(const char *file, const char *func, int line, const char *fmt, ...) 671{ 672 const char *newfile; 673 va_list adx; 674 675 /* 676 * Get rid of annoying "../common/" prefix to filename. 677 */ 678 newfile = strrchr(file, '/'); 679 if (newfile != NULL) { 680 newfile = newfile + 1; /* Get rid of leading / */ 681 } else { 682 newfile = file; 683 } 684 685 if (dprintf_print_all || 686 dprintf_find_string(newfile) || 687 dprintf_find_string(func)) { 688 /* Print out just the function name if requested */ 689 flockfile(stdout); 690 if (dprintf_find_string("pid")) 691 (void) printf("%d ", getpid()); 692 if (dprintf_find_string("tid")) 693 (void) printf("%lu ", thr_self()); 694#if 0 695 if (dprintf_find_string("cpu")) 696 (void) printf("%u ", getcpuid()); 697#endif 698 if (dprintf_find_string("time")) 699 (void) printf("%llu ", gethrtime()); 700 if (dprintf_find_string("long")) 701 (void) printf("%s, line %d: ", newfile, line); 702 (void) printf("%s: ", func); 703 va_start(adx, fmt); 704 (void) vprintf(fmt, adx); 705 va_end(adx); 706 funlockfile(stdout); 707 } 708} 709 710#endif /* ZFS_DEBUG */ 711 712/* 713 * ========================================================================= 714 * cmn_err() and panic() 715 * ========================================================================= 716 */ 717static char ce_prefix[CE_IGNORE][10] = { "", "NOTICE: ", "WARNING: ", "" }; 718static char ce_suffix[CE_IGNORE][2] = { "", "\n", "\n", "" }; 719 720void 721vpanic(const char *fmt, va_list adx) 722{ 723 (void) fprintf(stderr, "error: "); 724 (void) vfprintf(stderr, fmt, adx); 725 (void) fprintf(stderr, "\n"); 726 727 abort(); /* think of it as a "user-level crash dump" */ 728} 729 730void 731panic(const char *fmt, ...) 732{ 733 va_list adx; 734 735 va_start(adx, fmt); 736 vpanic(fmt, adx); 737 va_end(adx); 738} 739 740void 741vcmn_err(int ce, const char *fmt, va_list adx) 742{ 743 if (ce == CE_PANIC) 744 vpanic(fmt, adx); 745 if (ce != CE_NOTE) { /* suppress noise in userland stress testing */ 746 (void) fprintf(stderr, "%s", ce_prefix[ce]); 747 (void) vfprintf(stderr, fmt, adx); 748 (void) fprintf(stderr, "%s", ce_suffix[ce]); 749 } 750} 751 752/*PRINTFLIKE2*/ 753void 754cmn_err(int ce, const char *fmt, ...) 755{ 756 va_list adx; 757 758 va_start(adx, fmt); 759 vcmn_err(ce, fmt, adx); 760 va_end(adx); 761} 762 763/* 764 * ========================================================================= 765 * kobj interfaces 766 * ========================================================================= 767 */ 768struct _buf * 769kobj_open_file(char *name) 770{ 771 struct _buf *file; 772 vnode_t *vp; 773 774 /* set vp as the _fd field of the file */ 775 if (vn_openat(name, UIO_SYSSPACE, FREAD, 0, &vp, 0, 0, rootdir, 776 -1) != 0) 777 return ((void *)-1UL); 778 779 file = umem_zalloc(sizeof (struct _buf), UMEM_NOFAIL); 780 file->_fd = (intptr_t)vp; 781 return (file); 782} 783 784int 785kobj_read_file(struct _buf *file, char *buf, unsigned size, unsigned off) 786{ 787 ssize_t resid; 788 789 vn_rdwr(UIO_READ, (vnode_t *)file->_fd, buf, size, (offset_t)off, 790 UIO_SYSSPACE, 0, 0, 0, &resid); 791 792 return (size - resid); 793} 794 795void 796kobj_close_file(struct _buf *file) 797{ 798 vn_close((vnode_t *)file->_fd, 0, NULL, NULL); 799 umem_free(file, sizeof (struct _buf)); 800} 801 802int 803kobj_get_filesize(struct _buf *file, uint64_t *size) 804{ 805 struct stat64 st; 806 vnode_t *vp = (vnode_t *)file->_fd; 807 808 if (fstat64(vp->v_fd, &st) == -1) { 809 vn_close(vp, 0, NULL, NULL); 810 return (errno); 811 } 812 *size = st.st_size; 813 return (0); 814} 815 816/* 817 * ========================================================================= 818 * misc routines 819 * ========================================================================= 820 */ 821 822void 823delay(clock_t ticks) 824{ 825 poll(0, 0, ticks * (1000 / hz)); 826} 827 828#if 0 829/* 830 * Find highest one bit set. 831 * Returns bit number + 1 of highest bit that is set, otherwise returns 0. 832 */ 833int 834highbit64(uint64_t i) 835{ 836 int h = 1; 837 838 if (i == 0) 839 return (0); 840 if (i & 0xffffffff00000000ULL) { 841 h += 32; i >>= 32; 842 } 843 if (i & 0xffff0000) { 844 h += 16; i >>= 16; 845 } 846 if (i & 0xff00) { 847 h += 8; i >>= 8; 848 } 849 if (i & 0xf0) { 850 h += 4; i >>= 4; 851 } 852 if (i & 0xc) { 853 h += 2; i >>= 2; 854 } 855 if (i & 0x2) { 856 h += 1; 857 } 858 return (h); 859} 860#endif 861 862static int random_fd = -1, urandom_fd = -1; 863 864static int 865random_get_bytes_common(uint8_t *ptr, size_t len, int fd) 866{ 867 size_t resid = len; 868 ssize_t bytes; 869 870 ASSERT(fd != -1); 871 872 while (resid != 0) { 873 bytes = read(fd, ptr, resid); 874 ASSERT3S(bytes, >=, 0); 875 ptr += bytes; 876 resid -= bytes; 877 } 878 879 return (0); 880} 881 882int 883random_get_bytes(uint8_t *ptr, size_t len) 884{ 885 return (random_get_bytes_common(ptr, len, random_fd)); 886} 887 888int 889random_get_pseudo_bytes(uint8_t *ptr, size_t len) 890{ 891 return (random_get_bytes_common(ptr, len, urandom_fd)); 892} 893 894int 895ddi_strtoul(const char *hw_serial, char **nptr, int base, unsigned long *result) 896{ 897 char *end; 898 899 *result = strtoul(hw_serial, &end, base); 900 if (*result == 0) 901 return (errno); 902 return (0); 903} 904 905int 906ddi_strtoull(const char *str, char **nptr, int base, u_longlong_t *result) 907{ 908 char *end; 909 910 *result = strtoull(str, &end, base); 911 if (*result == 0) 912 return (errno); 913 return (0); 914} 915 916#ifdef illumos 917/* ARGSUSED */ 918cyclic_id_t 919cyclic_add(cyc_handler_t *hdlr, cyc_time_t *when) 920{ 921 return (1); 922} 923 924/* ARGSUSED */ 925void 926cyclic_remove(cyclic_id_t id) 927{ 928} 929 930/* ARGSUSED */ 931int 932cyclic_reprogram(cyclic_id_t id, hrtime_t expiration) 933{ 934 return (1); 935} 936#endif 937 938/* 939 * ========================================================================= 940 * kernel emulation setup & teardown 941 * ========================================================================= 942 */ 943static int 944umem_out_of_memory(void) 945{ 946 char errmsg[] = "out of memory -- generating core dump\n"; 947 948 write(fileno(stderr), errmsg, sizeof (errmsg)); 949 abort(); 950 return (0); 951} 952 953void 954kernel_init(int mode) 955{ 956 extern uint_t rrw_tsd_key; 957 958 umem_nofail_callback(umem_out_of_memory); 959 960 physmem = sysconf(_SC_PHYS_PAGES); 961 962 dprintf("physmem = %llu pages (%.2f GB)\n", physmem, 963 (double)physmem * sysconf(_SC_PAGE_SIZE) / (1ULL << 30)); 964 965 (void) snprintf(hw_serial, sizeof (hw_serial), "%lu", 966 (mode & FWRITE) ? (unsigned long)gethostid() : 0); 967 968 VERIFY((random_fd = open("/dev/random", O_RDONLY)) != -1); 969 VERIFY((urandom_fd = open("/dev/urandom", O_RDONLY)) != -1); 970 971 system_taskq_init(); 972 973#ifdef illumos 974 mutex_init(&cpu_lock, NULL, MUTEX_DEFAULT, NULL); 975#endif 976 977 spa_init(mode); 978 979 tsd_create(&rrw_tsd_key, rrw_tsd_destroy); 980} 981 982void 983kernel_fini(void) 984{ 985 spa_fini(); 986 987 system_taskq_fini(); 988 989 close(random_fd); 990 close(urandom_fd); 991 992 random_fd = -1; 993 urandom_fd = -1; 994} 995 996int 997z_uncompress(void *dst, size_t *dstlen, const void *src, size_t srclen) 998{ 999 int ret; 1000 uLongf len = *dstlen; 1001 1002 if ((ret = uncompress(dst, &len, src, srclen)) == Z_OK) 1003 *dstlen = (size_t)len; 1004 1005 return (ret); 1006} 1007 1008int 1009z_compress_level(void *dst, size_t *dstlen, const void *src, size_t srclen, 1010 int level) 1011{ 1012 int ret; 1013 uLongf len = *dstlen; 1014 1015 if ((ret = compress2(dst, &len, src, srclen, level)) == Z_OK) 1016 *dstlen = (size_t)len; 1017 1018 return (ret); 1019} 1020 1021uid_t 1022crgetuid(cred_t *cr) 1023{ 1024 return (0); 1025} 1026 1027uid_t 1028crgetruid(cred_t *cr) 1029{ 1030 return (0); 1031} 1032 1033gid_t 1034crgetgid(cred_t *cr) 1035{ 1036 return (0); 1037} 1038 1039int 1040crgetngroups(cred_t *cr) 1041{ 1042 return (0); 1043} 1044 1045gid_t * 1046crgetgroups(cred_t *cr) 1047{ 1048 return (NULL); 1049} 1050 1051int 1052zfs_secpolicy_snapshot_perms(const char *name, cred_t *cr) 1053{ 1054 return (0); 1055} 1056 1057int 1058zfs_secpolicy_rename_perms(const char *from, const char *to, cred_t *cr) 1059{ 1060 return (0); 1061} 1062 1063int 1064zfs_secpolicy_destroy_perms(const char *name, cred_t *cr) 1065{ 1066 return (0); 1067} 1068 1069ksiddomain_t * 1070ksid_lookupdomain(const char *dom) 1071{ 1072 ksiddomain_t *kd; 1073 1074 kd = umem_zalloc(sizeof (ksiddomain_t), UMEM_NOFAIL); 1075 kd->kd_name = spa_strdup(dom); 1076 return (kd); 1077} 1078 1079void 1080ksiddomain_rele(ksiddomain_t *ksid) 1081{ 1082 spa_strfree(ksid->kd_name); 1083 umem_free(ksid, sizeof (ksiddomain_t)); 1084} 1085 1086/* 1087 * Do not change the length of the returned string; it must be freed 1088 * with strfree(). 1089 */ 1090char * 1091kmem_asprintf(const char *fmt, ...) 1092{ 1093 int size; 1094 va_list adx; 1095 char *buf; 1096 1097 va_start(adx, fmt); 1098 size = vsnprintf(NULL, 0, fmt, adx) + 1; 1099 va_end(adx); 1100 1101 buf = kmem_alloc(size, KM_SLEEP); 1102 1103 va_start(adx, fmt); 1104 size = vsnprintf(buf, size, fmt, adx); 1105 va_end(adx); 1106 1107 return (buf); 1108} 1109 1110/* ARGSUSED */ 1111int 1112zfs_onexit_fd_hold(int fd, minor_t *minorp) 1113{ 1114 *minorp = 0; 1115 return (0); 1116} 1117 1118/* ARGSUSED */ 1119void 1120zfs_onexit_fd_rele(int fd) 1121{ 1122} 1123 1124/* ARGSUSED */ 1125int 1126zfs_onexit_add_cb(minor_t minor, void (*func)(void *), void *data, 1127 uint64_t *action_handle) 1128{ 1129 return (0); 1130} 1131 1132/* ARGSUSED */ 1133int 1134zfs_onexit_del_cb(minor_t minor, uint64_t action_handle, boolean_t fire) 1135{ 1136 return (0); 1137} 1138 1139/* ARGSUSED */ 1140int 1141zfs_onexit_cb_data(minor_t minor, uint64_t action_handle, void **data) 1142{ 1143 return (0); 1144} 1145 1146#ifdef __FreeBSD__ 1147/* ARGSUSED */ 1148int 1149zvol_create_minors(const char *name) 1150{ 1151 return (0); 1152} 1153#endif 1154 1155#ifdef illumos 1156void 1157bioinit(buf_t *bp) 1158{ 1159 bzero(bp, sizeof (buf_t)); 1160} 1161 1162void 1163biodone(buf_t *bp) 1164{ 1165 if (bp->b_iodone != NULL) { 1166 (*(bp->b_iodone))(bp); 1167 return; 1168 } 1169 ASSERT((bp->b_flags & B_DONE) == 0); 1170 bp->b_flags |= B_DONE; 1171} 1172 1173void 1174bioerror(buf_t *bp, int error) 1175{ 1176 ASSERT(bp != NULL); 1177 ASSERT(error >= 0); 1178 1179 if (error != 0) { 1180 bp->b_flags |= B_ERROR; 1181 } else { 1182 bp->b_flags &= ~B_ERROR; 1183 } 1184 bp->b_error = error; 1185} 1186 1187 1188int 1189geterror(struct buf *bp) 1190{ 1191 int error = 0; 1192 1193 if (bp->b_flags & B_ERROR) { 1194 error = bp->b_error; 1195 if (!error) 1196 error = EIO; 1197 } 1198 return (error); 1199} 1200#endif 1201