kernel.c revision 324586
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21/*
22 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
23 * Copyright (c) 2012, 2015 by Delphix. All rights reserved.
24 * Copyright (c) 2013, Joyent, Inc.  All rights reserved.
25 */
26
27#include <assert.h>
28#include <fcntl.h>
29#include <poll.h>
30#include <stdio.h>
31#include <stdlib.h>
32#include <string.h>
33#include <zlib.h>
34#include <libgen.h>
35#include <sys/spa.h>
36#include <sys/stat.h>
37#include <sys/processor.h>
38#include <sys/zfs_context.h>
39#include <sys/rrwlock.h>
40#include <sys/zmod.h>
41#include <sys/utsname.h>
42#include <sys/systeminfo.h>
43
44/*
45 * Emulation of kernel services in userland.
46 */
47
48#ifndef __FreeBSD__
49int aok;
50#endif
51uint64_t physmem;
52vnode_t *rootdir = (vnode_t *)0xabcd1234;
53char hw_serial[HW_HOSTID_LEN];
54#ifdef illumos
55kmutex_t cpu_lock;
56#endif
57
58/* If set, all blocks read will be copied to the specified directory. */
59char *vn_dumpdir = NULL;
60
61struct utsname utsname = {
62	"userland", "libzpool", "1", "1", "na"
63};
64
65/* this only exists to have its address taken */
66struct proc p0;
67
68/*
69 * =========================================================================
70 * threads
71 * =========================================================================
72 */
73/*ARGSUSED*/
74kthread_t *
75zk_thread_create(void (*func)(), void *arg)
76{
77	thread_t tid;
78
79	VERIFY(thr_create(0, 0, (void *(*)(void *))func, arg, THR_DETACHED,
80	    &tid) == 0);
81
82	return ((void *)(uintptr_t)tid);
83}
84
85/*
86 * =========================================================================
87 * kstats
88 * =========================================================================
89 */
90/*ARGSUSED*/
91kstat_t *
92kstat_create(char *module, int instance, char *name, char *class,
93    uchar_t type, ulong_t ndata, uchar_t ks_flag)
94{
95	return (NULL);
96}
97
98/*ARGSUSED*/
99void
100kstat_named_init(kstat_named_t *knp, const char *name, uchar_t type)
101{}
102
103/*ARGSUSED*/
104void
105kstat_install(kstat_t *ksp)
106{}
107
108/*ARGSUSED*/
109void
110kstat_delete(kstat_t *ksp)
111{}
112
113/*
114 * =========================================================================
115 * mutexes
116 * =========================================================================
117 */
118void
119zmutex_init(kmutex_t *mp)
120{
121	mp->m_owner = NULL;
122	mp->initialized = B_TRUE;
123	(void) _mutex_init(&mp->m_lock, USYNC_THREAD, NULL);
124}
125
126void
127zmutex_destroy(kmutex_t *mp)
128{
129	ASSERT(mp->initialized == B_TRUE);
130	ASSERT(mp->m_owner == NULL);
131	(void) _mutex_destroy(&(mp)->m_lock);
132	mp->m_owner = (void *)-1UL;
133	mp->initialized = B_FALSE;
134}
135
136int
137zmutex_owned(kmutex_t *mp)
138{
139	ASSERT(mp->initialized == B_TRUE);
140
141	return (mp->m_owner == curthread);
142}
143
144void
145mutex_enter(kmutex_t *mp)
146{
147	ASSERT(mp->initialized == B_TRUE);
148	ASSERT(mp->m_owner != (void *)-1UL);
149	ASSERT(mp->m_owner != curthread);
150	VERIFY(mutex_lock(&mp->m_lock) == 0);
151	ASSERT(mp->m_owner == NULL);
152	mp->m_owner = curthread;
153}
154
155int
156mutex_tryenter(kmutex_t *mp)
157{
158	ASSERT(mp->initialized == B_TRUE);
159	ASSERT(mp->m_owner != (void *)-1UL);
160	if (0 == mutex_trylock(&mp->m_lock)) {
161		ASSERT(mp->m_owner == NULL);
162		mp->m_owner = curthread;
163		return (1);
164	} else {
165		return (0);
166	}
167}
168
169void
170mutex_exit(kmutex_t *mp)
171{
172	ASSERT(mp->initialized == B_TRUE);
173	ASSERT(mutex_owner(mp) == curthread);
174	mp->m_owner = NULL;
175	VERIFY(mutex_unlock(&mp->m_lock) == 0);
176}
177
178void *
179mutex_owner(kmutex_t *mp)
180{
181	ASSERT(mp->initialized == B_TRUE);
182	return (mp->m_owner);
183}
184
185/*
186 * =========================================================================
187 * rwlocks
188 * =========================================================================
189 */
190/*ARGSUSED*/
191void
192rw_init(krwlock_t *rwlp, char *name, int type, void *arg)
193{
194	rwlock_init(&rwlp->rw_lock, USYNC_THREAD, NULL);
195	rwlp->rw_owner = NULL;
196	rwlp->initialized = B_TRUE;
197	rwlp->rw_count = 0;
198}
199
200void
201rw_destroy(krwlock_t *rwlp)
202{
203	ASSERT(rwlp->rw_count == 0);
204	rwlock_destroy(&rwlp->rw_lock);
205	rwlp->rw_owner = (void *)-1UL;
206	rwlp->initialized = B_FALSE;
207}
208
209void
210rw_enter(krwlock_t *rwlp, krw_t rw)
211{
212	//ASSERT(!RW_LOCK_HELD(rwlp));
213	ASSERT(rwlp->initialized == B_TRUE);
214	ASSERT(rwlp->rw_owner != (void *)-1UL);
215	ASSERT(rwlp->rw_owner != curthread);
216
217	if (rw == RW_READER) {
218		VERIFY(rw_rdlock(&rwlp->rw_lock) == 0);
219		ASSERT(rwlp->rw_count >= 0);
220		atomic_add_int(&rwlp->rw_count, 1);
221	} else {
222		VERIFY(rw_wrlock(&rwlp->rw_lock) == 0);
223		ASSERT(rwlp->rw_count == 0);
224		rwlp->rw_count = -1;
225		rwlp->rw_owner = curthread;
226	}
227}
228
229void
230rw_exit(krwlock_t *rwlp)
231{
232	ASSERT(rwlp->initialized == B_TRUE);
233	ASSERT(rwlp->rw_owner != (void *)-1UL);
234
235	if (rwlp->rw_owner == curthread) {
236		/* Write locked. */
237		ASSERT(rwlp->rw_count == -1);
238		rwlp->rw_count = 0;
239		rwlp->rw_owner = NULL;
240	} else {
241		/* Read locked. */
242		ASSERT(rwlp->rw_count > 0);
243		atomic_add_int(&rwlp->rw_count, -1);
244	}
245	VERIFY(rw_unlock(&rwlp->rw_lock) == 0);
246}
247
248int
249rw_tryenter(krwlock_t *rwlp, krw_t rw)
250{
251	int rv;
252
253	ASSERT(rwlp->initialized == B_TRUE);
254	ASSERT(rwlp->rw_owner != (void *)-1UL);
255	ASSERT(rwlp->rw_owner != curthread);
256
257	if (rw == RW_READER)
258		rv = rw_tryrdlock(&rwlp->rw_lock);
259	else
260		rv = rw_trywrlock(&rwlp->rw_lock);
261
262	if (rv == 0) {
263		ASSERT(rwlp->rw_owner == NULL);
264		if (rw == RW_READER) {
265			ASSERT(rwlp->rw_count >= 0);
266			atomic_add_int(&rwlp->rw_count, 1);
267		} else {
268			ASSERT(rwlp->rw_count == 0);
269			rwlp->rw_count = -1;
270			rwlp->rw_owner = curthread;
271		}
272		return (1);
273	}
274
275	return (0);
276}
277
278/*ARGSUSED*/
279int
280rw_tryupgrade(krwlock_t *rwlp)
281{
282	ASSERT(rwlp->initialized == B_TRUE);
283	ASSERT(rwlp->rw_owner != (void *)-1UL);
284
285	return (0);
286}
287
288int
289rw_lock_held(krwlock_t *rwlp)
290{
291
292	return (rwlp->rw_count != 0);
293}
294
295/*
296 * =========================================================================
297 * condition variables
298 * =========================================================================
299 */
300/*ARGSUSED*/
301void
302cv_init(kcondvar_t *cv, char *name, int type, void *arg)
303{
304	VERIFY(cond_init(cv, name, NULL) == 0);
305}
306
307void
308cv_destroy(kcondvar_t *cv)
309{
310	VERIFY(cond_destroy(cv) == 0);
311}
312
313void
314cv_wait(kcondvar_t *cv, kmutex_t *mp)
315{
316	ASSERT(mutex_owner(mp) == curthread);
317	mp->m_owner = NULL;
318	int ret = cond_wait(cv, &mp->m_lock);
319	VERIFY(ret == 0 || ret == EINTR);
320	mp->m_owner = curthread;
321}
322
323clock_t
324cv_timedwait(kcondvar_t *cv, kmutex_t *mp, clock_t abstime)
325{
326	int error;
327	struct timespec ts;
328	struct timeval tv;
329	clock_t delta;
330
331	abstime += ddi_get_lbolt();
332top:
333	delta = abstime - ddi_get_lbolt();
334	if (delta <= 0)
335		return (-1);
336
337	if (gettimeofday(&tv, NULL) != 0)
338		assert(!"gettimeofday() failed");
339
340	ts.tv_sec = tv.tv_sec + delta / hz;
341	ts.tv_nsec = tv.tv_usec * 1000 + (delta % hz) * (NANOSEC / hz);
342	ASSERT(ts.tv_nsec >= 0);
343
344	if (ts.tv_nsec >= NANOSEC) {
345		ts.tv_sec++;
346		ts.tv_nsec -= NANOSEC;
347	}
348
349	ASSERT(mutex_owner(mp) == curthread);
350	mp->m_owner = NULL;
351	error = pthread_cond_timedwait(cv, &mp->m_lock, &ts);
352	mp->m_owner = curthread;
353
354	if (error == EINTR)
355		goto top;
356
357	if (error == ETIMEDOUT)
358		return (-1);
359
360	ASSERT(error == 0);
361
362	return (1);
363}
364
365/*ARGSUSED*/
366clock_t
367cv_timedwait_hires(kcondvar_t *cv, kmutex_t *mp, hrtime_t tim, hrtime_t res,
368    int flag)
369{
370	int error;
371	timespec_t ts;
372	hrtime_t delta;
373
374	ASSERT(flag == 0 || flag == CALLOUT_FLAG_ABSOLUTE);
375
376top:
377	delta = tim;
378	if (flag & CALLOUT_FLAG_ABSOLUTE)
379		delta -= gethrtime();
380
381	if (delta <= 0)
382		return (-1);
383
384	clock_gettime(CLOCK_REALTIME, &ts);
385	ts.tv_sec += delta / NANOSEC;
386	ts.tv_nsec += delta % NANOSEC;
387	if (ts.tv_nsec >= NANOSEC) {
388		ts.tv_sec++;
389		ts.tv_nsec -= NANOSEC;
390	}
391
392	ASSERT(mutex_owner(mp) == curthread);
393	mp->m_owner = NULL;
394	error = pthread_cond_timedwait(cv, &mp->m_lock, &ts);
395	mp->m_owner = curthread;
396
397	if (error == ETIMEDOUT)
398		return (-1);
399
400	if (error == EINTR)
401		goto top;
402
403	ASSERT(error == 0);
404
405	return (1);
406}
407
408void
409cv_signal(kcondvar_t *cv)
410{
411	VERIFY(cond_signal(cv) == 0);
412}
413
414void
415cv_broadcast(kcondvar_t *cv)
416{
417	VERIFY(cond_broadcast(cv) == 0);
418}
419
420/*
421 * =========================================================================
422 * vnode operations
423 * =========================================================================
424 */
425/*
426 * Note: for the xxxat() versions of these functions, we assume that the
427 * starting vp is always rootdir (which is true for spa_directory.c, the only
428 * ZFS consumer of these interfaces).  We assert this is true, and then emulate
429 * them by adding '/' in front of the path.
430 */
431
432/*ARGSUSED*/
433int
434vn_open(char *path, int x1, int flags, int mode, vnode_t **vpp, int x2, int x3)
435{
436	int fd;
437	int dump_fd;
438	vnode_t *vp;
439	int old_umask;
440	char realpath[MAXPATHLEN];
441	struct stat64 st;
442
443	/*
444	 * If we're accessing a real disk from userland, we need to use
445	 * the character interface to avoid caching.  This is particularly
446	 * important if we're trying to look at a real in-kernel storage
447	 * pool from userland, e.g. via zdb, because otherwise we won't
448	 * see the changes occurring under the segmap cache.
449	 * On the other hand, the stupid character device returns zero
450	 * for its size.  So -- gag -- we open the block device to get
451	 * its size, and remember it for subsequent VOP_GETATTR().
452	 */
453	if (strncmp(path, "/dev/", 5) == 0) {
454		char *dsk;
455		fd = open64(path, O_RDONLY);
456		if (fd == -1)
457			return (errno);
458		if (fstat64(fd, &st) == -1) {
459			close(fd);
460			return (errno);
461		}
462		close(fd);
463		(void) sprintf(realpath, "%s", path);
464		dsk = strstr(path, "/dsk/");
465		if (dsk != NULL)
466			(void) sprintf(realpath + (dsk - path) + 1, "r%s",
467			    dsk + 1);
468	} else {
469		(void) sprintf(realpath, "%s", path);
470		if (!(flags & FCREAT) && stat64(realpath, &st) == -1)
471			return (errno);
472	}
473
474	if (flags & FCREAT)
475		old_umask = umask(0);
476
477	/*
478	 * The construct 'flags - FREAD' conveniently maps combinations of
479	 * FREAD and FWRITE to the corresponding O_RDONLY, O_WRONLY, and O_RDWR.
480	 */
481	fd = open64(realpath, flags - FREAD, mode);
482
483	if (flags & FCREAT)
484		(void) umask(old_umask);
485
486	if (vn_dumpdir != NULL) {
487		char dumppath[MAXPATHLEN];
488		(void) snprintf(dumppath, sizeof (dumppath),
489		    "%s/%s", vn_dumpdir, basename(realpath));
490		dump_fd = open64(dumppath, O_CREAT | O_WRONLY, 0666);
491		if (dump_fd == -1)
492			return (errno);
493	} else {
494		dump_fd = -1;
495	}
496
497	if (fd == -1)
498		return (errno);
499
500	if (fstat64(fd, &st) == -1) {
501		close(fd);
502		return (errno);
503	}
504
505	(void) fcntl(fd, F_SETFD, FD_CLOEXEC);
506
507	*vpp = vp = umem_zalloc(sizeof (vnode_t), UMEM_NOFAIL);
508
509	vp->v_fd = fd;
510	vp->v_size = st.st_size;
511	vp->v_path = spa_strdup(path);
512	vp->v_dump_fd = dump_fd;
513
514	return (0);
515}
516
517/*ARGSUSED*/
518int
519vn_openat(char *path, int x1, int flags, int mode, vnode_t **vpp, int x2,
520    int x3, vnode_t *startvp, int fd)
521{
522	char *realpath = umem_alloc(strlen(path) + 2, UMEM_NOFAIL);
523	int ret;
524
525	ASSERT(startvp == rootdir);
526	(void) sprintf(realpath, "/%s", path);
527
528	/* fd ignored for now, need if want to simulate nbmand support */
529	ret = vn_open(realpath, x1, flags, mode, vpp, x2, x3);
530
531	umem_free(realpath, strlen(path) + 2);
532
533	return (ret);
534}
535
536/*ARGSUSED*/
537int
538vn_rdwr(int uio, vnode_t *vp, void *addr, ssize_t len, offset_t offset,
539    int x1, int x2, rlim64_t x3, void *x4, ssize_t *residp)
540{
541	ssize_t iolen, split;
542
543	if (uio == UIO_READ) {
544		iolen = pread64(vp->v_fd, addr, len, offset);
545		if (vp->v_dump_fd != -1) {
546			int status =
547			    pwrite64(vp->v_dump_fd, addr, iolen, offset);
548			ASSERT(status != -1);
549		}
550	} else {
551		/*
552		 * To simulate partial disk writes, we split writes into two
553		 * system calls so that the process can be killed in between.
554		 */
555		int sectors = len >> SPA_MINBLOCKSHIFT;
556		split = (sectors > 0 ? rand() % sectors : 0) <<
557		    SPA_MINBLOCKSHIFT;
558		iolen = pwrite64(vp->v_fd, addr, split, offset);
559		iolen += pwrite64(vp->v_fd, (char *)addr + split,
560		    len - split, offset + split);
561	}
562
563	if (iolen == -1)
564		return (errno);
565	if (residp)
566		*residp = len - iolen;
567	else if (iolen != len)
568		return (EIO);
569	return (0);
570}
571
572void
573vn_close(vnode_t *vp, int openflag, cred_t *cr, kthread_t *td)
574{
575	close(vp->v_fd);
576	if (vp->v_dump_fd != -1)
577		close(vp->v_dump_fd);
578	spa_strfree(vp->v_path);
579	umem_free(vp, sizeof (vnode_t));
580}
581
582/*
583 * At a minimum we need to update the size since vdev_reopen()
584 * will no longer call vn_openat().
585 */
586int
587fop_getattr(vnode_t *vp, vattr_t *vap)
588{
589	struct stat64 st;
590
591	if (fstat64(vp->v_fd, &st) == -1) {
592		close(vp->v_fd);
593		return (errno);
594	}
595
596	vap->va_size = st.st_size;
597	return (0);
598}
599
600#ifdef ZFS_DEBUG
601
602/*
603 * =========================================================================
604 * Figure out which debugging statements to print
605 * =========================================================================
606 */
607
608static char *dprintf_string;
609static int dprintf_print_all;
610
611int
612dprintf_find_string(const char *string)
613{
614	char *tmp_str = dprintf_string;
615	int len = strlen(string);
616
617	/*
618	 * Find out if this is a string we want to print.
619	 * String format: file1.c,function_name1,file2.c,file3.c
620	 */
621
622	while (tmp_str != NULL) {
623		if (strncmp(tmp_str, string, len) == 0 &&
624		    (tmp_str[len] == ',' || tmp_str[len] == '\0'))
625			return (1);
626		tmp_str = strchr(tmp_str, ',');
627		if (tmp_str != NULL)
628			tmp_str++; /* Get rid of , */
629	}
630	return (0);
631}
632
633void
634dprintf_setup(int *argc, char **argv)
635{
636	int i, j;
637
638	/*
639	 * Debugging can be specified two ways: by setting the
640	 * environment variable ZFS_DEBUG, or by including a
641	 * "debug=..."  argument on the command line.  The command
642	 * line setting overrides the environment variable.
643	 */
644
645	for (i = 1; i < *argc; i++) {
646		int len = strlen("debug=");
647		/* First look for a command line argument */
648		if (strncmp("debug=", argv[i], len) == 0) {
649			dprintf_string = argv[i] + len;
650			/* Remove from args */
651			for (j = i; j < *argc; j++)
652				argv[j] = argv[j+1];
653			argv[j] = NULL;
654			(*argc)--;
655		}
656	}
657
658	if (dprintf_string == NULL) {
659		/* Look for ZFS_DEBUG environment variable */
660		dprintf_string = getenv("ZFS_DEBUG");
661	}
662
663	/*
664	 * Are we just turning on all debugging?
665	 */
666	if (dprintf_find_string("on"))
667		dprintf_print_all = 1;
668
669	if (dprintf_string != NULL)
670		zfs_flags |= ZFS_DEBUG_DPRINTF;
671}
672
673int
674sysctl_handle_64(SYSCTL_HANDLER_ARGS)
675{
676	return (0);
677}
678
679/*
680 * =========================================================================
681 * debug printfs
682 * =========================================================================
683 */
684void
685__dprintf(const char *file, const char *func, int line, const char *fmt, ...)
686{
687	const char *newfile;
688	va_list adx;
689
690	/*
691	 * Get rid of annoying "../common/" prefix to filename.
692	 */
693	newfile = strrchr(file, '/');
694	if (newfile != NULL) {
695		newfile = newfile + 1; /* Get rid of leading / */
696	} else {
697		newfile = file;
698	}
699
700	if (dprintf_print_all ||
701	    dprintf_find_string(newfile) ||
702	    dprintf_find_string(func)) {
703		/* Print out just the function name if requested */
704		flockfile(stdout);
705		if (dprintf_find_string("pid"))
706			(void) printf("%d ", getpid());
707		if (dprintf_find_string("tid"))
708			(void) printf("%lu ", thr_self());
709#if 0
710		if (dprintf_find_string("cpu"))
711			(void) printf("%u ", getcpuid());
712#endif
713		if (dprintf_find_string("time"))
714			(void) printf("%llu ", gethrtime());
715		if (dprintf_find_string("long"))
716			(void) printf("%s, line %d: ", newfile, line);
717		(void) printf("%s: ", func);
718		va_start(adx, fmt);
719		(void) vprintf(fmt, adx);
720		va_end(adx);
721		funlockfile(stdout);
722	}
723}
724
725#endif /* ZFS_DEBUG */
726
727/*
728 * =========================================================================
729 * cmn_err() and panic()
730 * =========================================================================
731 */
732static char ce_prefix[CE_IGNORE][10] = { "", "NOTICE: ", "WARNING: ", "" };
733static char ce_suffix[CE_IGNORE][2] = { "", "\n", "\n", "" };
734
735void
736vpanic(const char *fmt, va_list adx)
737{
738	char buf[512];
739	(void) vsnprintf(buf, 512, fmt, adx);
740	assfail(buf, NULL, 0);
741}
742
743void
744panic(const char *fmt, ...)
745{
746	va_list adx;
747
748	va_start(adx, fmt);
749	vpanic(fmt, adx);
750	va_end(adx);
751}
752
753void
754vcmn_err(int ce, const char *fmt, va_list adx)
755{
756	if (ce == CE_PANIC)
757		vpanic(fmt, adx);
758	if (ce != CE_NOTE) {	/* suppress noise in userland stress testing */
759		(void) fprintf(stderr, "%s", ce_prefix[ce]);
760		(void) vfprintf(stderr, fmt, adx);
761		(void) fprintf(stderr, "%s", ce_suffix[ce]);
762	}
763}
764
765/*PRINTFLIKE2*/
766void
767cmn_err(int ce, const char *fmt, ...)
768{
769	va_list adx;
770
771	va_start(adx, fmt);
772	vcmn_err(ce, fmt, adx);
773	va_end(adx);
774}
775
776/*
777 * =========================================================================
778 * kobj interfaces
779 * =========================================================================
780 */
781struct _buf *
782kobj_open_file(char *name)
783{
784	struct _buf *file;
785	vnode_t *vp;
786
787	/* set vp as the _fd field of the file */
788	if (vn_openat(name, UIO_SYSSPACE, FREAD, 0, &vp, 0, 0, rootdir,
789	    -1) != 0)
790		return ((void *)-1UL);
791
792	file = umem_zalloc(sizeof (struct _buf), UMEM_NOFAIL);
793	file->_fd = (intptr_t)vp;
794	return (file);
795}
796
797int
798kobj_read_file(struct _buf *file, char *buf, unsigned size, unsigned off)
799{
800	ssize_t resid;
801
802	vn_rdwr(UIO_READ, (vnode_t *)file->_fd, buf, size, (offset_t)off,
803	    UIO_SYSSPACE, 0, 0, 0, &resid);
804
805	return (size - resid);
806}
807
808void
809kobj_close_file(struct _buf *file)
810{
811	vn_close((vnode_t *)file->_fd, 0, NULL, NULL);
812	umem_free(file, sizeof (struct _buf));
813}
814
815int
816kobj_get_filesize(struct _buf *file, uint64_t *size)
817{
818	struct stat64 st;
819	vnode_t *vp = (vnode_t *)file->_fd;
820
821	if (fstat64(vp->v_fd, &st) == -1) {
822		vn_close(vp, 0, NULL, NULL);
823		return (errno);
824	}
825	*size = st.st_size;
826	return (0);
827}
828
829/*
830 * =========================================================================
831 * misc routines
832 * =========================================================================
833 */
834
835void
836delay(clock_t ticks)
837{
838	poll(0, 0, ticks * (1000 / hz));
839}
840
841#if 0
842/*
843 * Find highest one bit set.
844 *	Returns bit number + 1 of highest bit that is set, otherwise returns 0.
845 */
846int
847highbit64(uint64_t i)
848{
849	int h = 1;
850
851	if (i == 0)
852		return (0);
853	if (i & 0xffffffff00000000ULL) {
854		h += 32; i >>= 32;
855	}
856	if (i & 0xffff0000) {
857		h += 16; i >>= 16;
858	}
859	if (i & 0xff00) {
860		h += 8; i >>= 8;
861	}
862	if (i & 0xf0) {
863		h += 4; i >>= 4;
864	}
865	if (i & 0xc) {
866		h += 2; i >>= 2;
867	}
868	if (i & 0x2) {
869		h += 1;
870	}
871	return (h);
872}
873#endif
874
875static int random_fd = -1, urandom_fd = -1;
876
877static int
878random_get_bytes_common(uint8_t *ptr, size_t len, int fd)
879{
880	size_t resid = len;
881	ssize_t bytes;
882
883	ASSERT(fd != -1);
884
885	while (resid != 0) {
886		bytes = read(fd, ptr, resid);
887		ASSERT3S(bytes, >=, 0);
888		ptr += bytes;
889		resid -= bytes;
890	}
891
892	return (0);
893}
894
895int
896random_get_bytes(uint8_t *ptr, size_t len)
897{
898	return (random_get_bytes_common(ptr, len, random_fd));
899}
900
901int
902random_get_pseudo_bytes(uint8_t *ptr, size_t len)
903{
904	return (random_get_bytes_common(ptr, len, urandom_fd));
905}
906
907int
908ddi_strtoul(const char *hw_serial, char **nptr, int base, unsigned long *result)
909{
910	char *end;
911
912	*result = strtoul(hw_serial, &end, base);
913	if (*result == 0)
914		return (errno);
915	return (0);
916}
917
918int
919ddi_strtoull(const char *str, char **nptr, int base, u_longlong_t *result)
920{
921	char *end;
922
923	*result = strtoull(str, &end, base);
924	if (*result == 0)
925		return (errno);
926	return (0);
927}
928
929#ifdef illumos
930/* ARGSUSED */
931cyclic_id_t
932cyclic_add(cyc_handler_t *hdlr, cyc_time_t *when)
933{
934	return (1);
935}
936
937/* ARGSUSED */
938void
939cyclic_remove(cyclic_id_t id)
940{
941}
942
943/* ARGSUSED */
944int
945cyclic_reprogram(cyclic_id_t id, hrtime_t expiration)
946{
947	return (1);
948}
949#endif
950
951/*
952 * =========================================================================
953 * kernel emulation setup & teardown
954 * =========================================================================
955 */
956static int
957umem_out_of_memory(void)
958{
959	char errmsg[] = "out of memory -- generating core dump\n";
960
961	write(fileno(stderr), errmsg, sizeof (errmsg));
962	abort();
963	return (0);
964}
965
966void
967kernel_init(int mode)
968{
969	extern uint_t rrw_tsd_key;
970
971	umem_nofail_callback(umem_out_of_memory);
972
973	physmem = sysconf(_SC_PHYS_PAGES);
974
975	dprintf("physmem = %llu pages (%.2f GB)\n", physmem,
976	    (double)physmem * sysconf(_SC_PAGE_SIZE) / (1ULL << 30));
977
978	(void) snprintf(hw_serial, sizeof (hw_serial), "%lu",
979	    (mode & FWRITE) ? (unsigned long)gethostid() : 0);
980
981	VERIFY((random_fd = open("/dev/random", O_RDONLY)) != -1);
982	VERIFY((urandom_fd = open("/dev/urandom", O_RDONLY)) != -1);
983
984	system_taskq_init();
985
986#ifdef illumos
987	mutex_init(&cpu_lock, NULL, MUTEX_DEFAULT, NULL);
988#endif
989
990	spa_init(mode);
991
992	tsd_create(&rrw_tsd_key, rrw_tsd_destroy);
993}
994
995void
996kernel_fini(void)
997{
998	spa_fini();
999
1000	system_taskq_fini();
1001
1002	close(random_fd);
1003	close(urandom_fd);
1004
1005	random_fd = -1;
1006	urandom_fd = -1;
1007}
1008
1009int
1010z_uncompress(void *dst, size_t *dstlen, const void *src, size_t srclen)
1011{
1012	int ret;
1013	uLongf len = *dstlen;
1014
1015	if ((ret = uncompress(dst, &len, src, srclen)) == Z_OK)
1016		*dstlen = (size_t)len;
1017
1018	return (ret);
1019}
1020
1021int
1022z_compress_level(void *dst, size_t *dstlen, const void *src, size_t srclen,
1023    int level)
1024{
1025	int ret;
1026	uLongf len = *dstlen;
1027
1028	if ((ret = compress2(dst, &len, src, srclen, level)) == Z_OK)
1029		*dstlen = (size_t)len;
1030
1031	return (ret);
1032}
1033
1034uid_t
1035crgetuid(cred_t *cr)
1036{
1037	return (0);
1038}
1039
1040uid_t
1041crgetruid(cred_t *cr)
1042{
1043	return (0);
1044}
1045
1046gid_t
1047crgetgid(cred_t *cr)
1048{
1049	return (0);
1050}
1051
1052int
1053crgetngroups(cred_t *cr)
1054{
1055	return (0);
1056}
1057
1058gid_t *
1059crgetgroups(cred_t *cr)
1060{
1061	return (NULL);
1062}
1063
1064int
1065zfs_secpolicy_snapshot_perms(const char *name, cred_t *cr)
1066{
1067	return (0);
1068}
1069
1070int
1071zfs_secpolicy_rename_perms(const char *from, const char *to, cred_t *cr)
1072{
1073	return (0);
1074}
1075
1076int
1077zfs_secpolicy_destroy_perms(const char *name, cred_t *cr)
1078{
1079	return (0);
1080}
1081
1082ksiddomain_t *
1083ksid_lookupdomain(const char *dom)
1084{
1085	ksiddomain_t *kd;
1086
1087	kd = umem_zalloc(sizeof (ksiddomain_t), UMEM_NOFAIL);
1088	kd->kd_name = spa_strdup(dom);
1089	return (kd);
1090}
1091
1092void
1093ksiddomain_rele(ksiddomain_t *ksid)
1094{
1095	spa_strfree(ksid->kd_name);
1096	umem_free(ksid, sizeof (ksiddomain_t));
1097}
1098
1099/*
1100 * Do not change the length of the returned string; it must be freed
1101 * with strfree().
1102 */
1103char *
1104kmem_asprintf(const char *fmt, ...)
1105{
1106	int size;
1107	va_list adx;
1108	char *buf;
1109
1110	va_start(adx, fmt);
1111	size = vsnprintf(NULL, 0, fmt, adx) + 1;
1112	va_end(adx);
1113
1114	buf = kmem_alloc(size, KM_SLEEP);
1115
1116	va_start(adx, fmt);
1117	size = vsnprintf(buf, size, fmt, adx);
1118	va_end(adx);
1119
1120	return (buf);
1121}
1122
1123/* ARGSUSED */
1124int
1125zfs_onexit_fd_hold(int fd, minor_t *minorp)
1126{
1127	*minorp = 0;
1128	return (0);
1129}
1130
1131/* ARGSUSED */
1132void
1133zfs_onexit_fd_rele(int fd)
1134{
1135}
1136
1137/* ARGSUSED */
1138int
1139zfs_onexit_add_cb(minor_t minor, void (*func)(void *), void *data,
1140    uint64_t *action_handle)
1141{
1142	return (0);
1143}
1144
1145/* ARGSUSED */
1146int
1147zfs_onexit_del_cb(minor_t minor, uint64_t action_handle, boolean_t fire)
1148{
1149	return (0);
1150}
1151
1152/* ARGSUSED */
1153int
1154zfs_onexit_cb_data(minor_t minor, uint64_t action_handle, void **data)
1155{
1156	return (0);
1157}
1158
1159#ifdef __FreeBSD__
1160/* ARGSUSED */
1161int
1162zvol_create_minors(const char *name)
1163{
1164	return (0);
1165}
1166#endif
1167
1168#ifdef illumos
1169void
1170bioinit(buf_t *bp)
1171{
1172	bzero(bp, sizeof (buf_t));
1173}
1174
1175void
1176biodone(buf_t *bp)
1177{
1178	if (bp->b_iodone != NULL) {
1179		(*(bp->b_iodone))(bp);
1180		return;
1181	}
1182	ASSERT((bp->b_flags & B_DONE) == 0);
1183	bp->b_flags |= B_DONE;
1184}
1185
1186void
1187bioerror(buf_t *bp, int error)
1188{
1189	ASSERT(bp != NULL);
1190	ASSERT(error >= 0);
1191
1192	if (error != 0) {
1193		bp->b_flags |= B_ERROR;
1194	} else {
1195		bp->b_flags &= ~B_ERROR;
1196	}
1197	bp->b_error = error;
1198}
1199
1200
1201int
1202geterror(struct buf *bp)
1203{
1204	int error = 0;
1205
1206	if (bp->b_flags & B_ERROR) {
1207		error = bp->b_error;
1208		if (!error)
1209			error = EIO;
1210	}
1211	return (error);
1212}
1213#endif
1214