zfs_ioctl.c revision 190878
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21/*
22 * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
23 * Use is subject to license terms.
24 */
25
26#include <sys/types.h>
27#include <sys/param.h>
28#include <sys/systm.h>
29#include <sys/conf.h>
30#include <sys/kernel.h>
31#include <sys/lock.h>
32#include <sys/malloc.h>
33#include <sys/mutex.h>
34#include <sys/proc.h>
35#include <sys/errno.h>
36#include <sys/uio.h>
37#include <sys/buf.h>
38#include <sys/file.h>
39#include <sys/kmem.h>
40#include <sys/conf.h>
41#include <sys/cmn_err.h>
42#include <sys/stat.h>
43#include <sys/zfs_ioctl.h>
44#include <sys/zfs_znode.h>
45#include <sys/zap.h>
46#include <sys/spa.h>
47#include <sys/spa_impl.h>
48#include <sys/vdev.h>
49#include <sys/vdev_impl.h>
50#include <sys/dmu.h>
51#include <sys/dsl_dir.h>
52#include <sys/dsl_dataset.h>
53#include <sys/dsl_prop.h>
54#include <sys/dsl_deleg.h>
55#include <sys/dmu_objset.h>
56#include <sys/sunddi.h>
57#include <sys/policy.h>
58#include <sys/zone.h>
59#include <sys/nvpair.h>
60#include <sys/mount.h>
61#include <sys/taskqueue.h>
62#include <sys/sdt.h>
63#include <sys/varargs.h>
64#include <sys/fs/zfs.h>
65#include <sys/zfs_ctldir.h>
66#include <sys/zfs_dir.h>
67#include <sys/zvol.h>
68#include <sys/dmu_objset.h>
69
70#include "zfs_namecheck.h"
71#include "zfs_prop.h"
72#include "zfs_deleg.h"
73
74CTASSERT(sizeof(zfs_cmd_t) <= PAGE_SIZE);
75
76static struct cdev *zfsdev;
77
78extern void zfs_init(void);
79extern void zfs_fini(void);
80
81typedef int zfs_ioc_func_t(zfs_cmd_t *);
82typedef int zfs_secpolicy_func_t(zfs_cmd_t *, cred_t *);
83
84typedef struct zfs_ioc_vec {
85	zfs_ioc_func_t		*zvec_func;
86	zfs_secpolicy_func_t	*zvec_secpolicy;
87	enum {
88		NO_NAME,
89		POOL_NAME,
90		DATASET_NAME
91	} zvec_namecheck;
92	boolean_t		zvec_his_log;
93} zfs_ioc_vec_t;
94
95static void clear_props(char *dataset, nvlist_t *props);
96static int zfs_fill_zplprops_root(uint64_t, nvlist_t *, nvlist_t *,
97    boolean_t *);
98int zfs_set_prop_nvlist(const char *, nvlist_t *);
99
100/* _NOTE(PRINTFLIKE(4)) - this is printf-like, but lint is too whiney */
101void
102__dprintf(const char *file, const char *func, int line, const char *fmt, ...)
103{
104	const char *newfile;
105	char buf[256];
106	va_list adx;
107
108	/*
109	 * Get rid of annoying "../common/" prefix to filename.
110	 */
111	newfile = strrchr(file, '/');
112	if (newfile != NULL) {
113		newfile = newfile + 1; /* Get rid of leading / */
114	} else {
115		newfile = file;
116	}
117
118	va_start(adx, fmt);
119	(void) vsnprintf(buf, sizeof (buf), fmt, adx);
120	va_end(adx);
121
122	/*
123	 * To get this data, use the zfs-dprintf probe as so:
124	 * dtrace -q -n 'zfs-dprintf \
125	 *	/stringof(arg0) == "dbuf.c"/ \
126	 *	{printf("%s: %s", stringof(arg1), stringof(arg3))}'
127	 * arg0 = file name
128	 * arg1 = function name
129	 * arg2 = line number
130	 * arg3 = message
131	 */
132	DTRACE_PROBE4(zfs__dprintf,
133	    char *, newfile, char *, func, int, line, char *, buf);
134}
135
136static void
137history_str_free(char *buf)
138{
139	kmem_free(buf, HIS_MAX_RECORD_LEN);
140}
141
142static char *
143history_str_get(zfs_cmd_t *zc)
144{
145	char *buf;
146
147	if (zc->zc_history == 0)
148		return (NULL);
149
150	buf = kmem_alloc(HIS_MAX_RECORD_LEN, KM_SLEEP);
151	if (copyinstr((void *)(uintptr_t)zc->zc_history,
152	    buf, HIS_MAX_RECORD_LEN, NULL) != 0) {
153		history_str_free(buf);
154		return (NULL);
155	}
156
157	buf[HIS_MAX_RECORD_LEN -1] = '\0';
158
159	return (buf);
160}
161
162/*
163 * Check to see if the named dataset is currently defined as bootable
164 */
165static boolean_t
166zfs_is_bootfs(const char *name)
167{
168	spa_t *spa;
169	boolean_t ret = B_FALSE;
170
171	if (spa_open(name, &spa, FTAG) == 0) {
172		if (spa->spa_bootfs) {
173			objset_t *os;
174
175			if (dmu_objset_open(name, DMU_OST_ZFS,
176			    DS_MODE_USER | DS_MODE_READONLY, &os) == 0) {
177				ret = (dmu_objset_id(os) == spa->spa_bootfs);
178				dmu_objset_close(os);
179			}
180		}
181		spa_close(spa, FTAG);
182	}
183	return (ret);
184}
185
186/*
187 * zfs_earlier_version
188 *
189 *	Return non-zero if the spa version is less than requested version.
190 */
191static int
192zfs_earlier_version(const char *name, int version)
193{
194	spa_t *spa;
195
196	if (spa_open(name, &spa, FTAG) == 0) {
197		if (spa_version(spa) < version) {
198			spa_close(spa, FTAG);
199			return (1);
200		}
201		spa_close(spa, FTAG);
202	}
203	return (0);
204}
205
206/*
207 * zpl_earlier_version
208 *
209 * Return TRUE if the ZPL version is less than requested version.
210 */
211static boolean_t
212zpl_earlier_version(const char *name, int version)
213{
214	objset_t *os;
215	boolean_t rc = B_TRUE;
216
217	if (dmu_objset_open(name, DMU_OST_ANY,
218	    DS_MODE_USER | DS_MODE_READONLY, &os) == 0) {
219		uint64_t zplversion;
220
221		if (zfs_get_zplprop(os, ZFS_PROP_VERSION, &zplversion) == 0)
222			rc = zplversion < version;
223		dmu_objset_close(os);
224	}
225	return (rc);
226}
227
228static void
229zfs_log_history(zfs_cmd_t *zc)
230{
231	spa_t *spa;
232	char *buf;
233
234	if ((buf = history_str_get(zc)) == NULL)
235		return;
236
237	if (spa_open(zc->zc_name, &spa, FTAG) == 0) {
238		if (spa_version(spa) >= SPA_VERSION_ZPOOL_HISTORY)
239			(void) spa_history_log(spa, buf, LOG_CMD_NORMAL);
240		spa_close(spa, FTAG);
241	}
242	history_str_free(buf);
243}
244
245/*
246 * Policy for top-level read operations (list pools).  Requires no privileges,
247 * and can be used in the local zone, as there is no associated dataset.
248 */
249/* ARGSUSED */
250static int
251zfs_secpolicy_none(zfs_cmd_t *zc, cred_t *cr)
252{
253	return (0);
254}
255
256/*
257 * Policy for dataset read operations (list children, get statistics).  Requires
258 * no privileges, but must be visible in the local zone.
259 */
260/* ARGSUSED */
261static int
262zfs_secpolicy_read(zfs_cmd_t *zc, cred_t *cr)
263{
264	if (INGLOBALZONE(curthread) ||
265	    zone_dataset_visible(zc->zc_name, NULL))
266		return (0);
267
268	return (ENOENT);
269}
270
271static int
272zfs_dozonecheck(const char *dataset, cred_t *cr)
273{
274	uint64_t zoned;
275	int writable = 1;
276
277	/*
278	 * The dataset must be visible by this zone -- check this first
279	 * so they don't see EPERM on something they shouldn't know about.
280	 */
281	if (!INGLOBALZONE(curthread) &&
282	    !zone_dataset_visible(dataset, &writable))
283		return (ENOENT);
284
285	if (dsl_prop_get_integer(dataset, "jailed", &zoned, NULL))
286		return (ENOENT);
287
288	if (INGLOBALZONE(curthread)) {
289		/*
290		 * If the fs is zoned, only root can access it from the
291		 * global zone.
292		 */
293		if (secpolicy_zfs(cr) && zoned)
294			return (EPERM);
295	} else {
296		/*
297		 * If we are in a local zone, the 'zoned' property must be set.
298		 */
299		if (!zoned)
300			return (EPERM);
301
302		/* must be writable by this zone */
303		if (!writable)
304			return (EPERM);
305	}
306	return (0);
307}
308
309int
310zfs_secpolicy_write_perms(const char *name, const char *perm, cred_t *cr)
311{
312	int error;
313
314	error = zfs_dozonecheck(name, cr);
315	if (error == 0) {
316		error = secpolicy_zfs(cr);
317		if (error)
318			error = dsl_deleg_access(name, perm, cr);
319	}
320	return (error);
321}
322
323static int
324zfs_secpolicy_setprop(const char *name, zfs_prop_t prop, cred_t *cr)
325{
326	/*
327	 * Check permissions for special properties.
328	 */
329	switch (prop) {
330	case ZFS_PROP_ZONED:
331		/*
332		 * Disallow setting of 'zoned' from within a local zone.
333		 */
334		if (!INGLOBALZONE(curthread))
335			return (EPERM);
336		break;
337
338	case ZFS_PROP_QUOTA:
339		if (!INGLOBALZONE(curthread)) {
340			uint64_t zoned;
341			char setpoint[MAXNAMELEN];
342			/*
343			 * Unprivileged users are allowed to modify the
344			 * quota on things *under* (ie. contained by)
345			 * the thing they own.
346			 */
347			if (dsl_prop_get_integer(name, "zoned", &zoned,
348			    setpoint))
349				return (EPERM);
350			if (!zoned || strlen(name) <= strlen(setpoint))
351				return (EPERM);
352		}
353		break;
354	}
355
356	return (zfs_secpolicy_write_perms(name, zfs_prop_to_name(prop), cr));
357}
358
359int
360zfs_secpolicy_fsacl(zfs_cmd_t *zc, cred_t *cr)
361{
362	int error;
363
364	error = zfs_dozonecheck(zc->zc_name, cr);
365	if (error)
366		return (error);
367
368	/*
369	 * permission to set permissions will be evaluated later in
370	 * dsl_deleg_can_allow()
371	 */
372	return (0);
373}
374
375int
376zfs_secpolicy_rollback(zfs_cmd_t *zc, cred_t *cr)
377{
378	int error;
379	error = zfs_secpolicy_write_perms(zc->zc_name,
380	    ZFS_DELEG_PERM_ROLLBACK, cr);
381	if (error == 0)
382		error = zfs_secpolicy_write_perms(zc->zc_name,
383		    ZFS_DELEG_PERM_MOUNT, cr);
384	return (error);
385}
386
387int
388zfs_secpolicy_send(zfs_cmd_t *zc, cred_t *cr)
389{
390	return (zfs_secpolicy_write_perms(zc->zc_name,
391	    ZFS_DELEG_PERM_SEND, cr));
392}
393
394int
395zfs_secpolicy_share(zfs_cmd_t *zc, cred_t *cr)
396{
397	if (!INGLOBALZONE(curthread))
398		return (EPERM);
399
400	if (secpolicy_nfs(cr) == 0) {
401		return (0);
402	} else {
403		vnode_t *vp;
404		int error;
405
406		if ((error = lookupname(zc->zc_value, UIO_SYSSPACE,
407		    NO_FOLLOW, NULL, &vp)) != 0)
408			return (error);
409
410		/* Now make sure mntpnt and dataset are ZFS */
411
412		if (strcmp(vp->v_vfsp->mnt_stat.f_fstypename, "zfs") != 0 ||
413		    (strcmp((char *)refstr_value(vp->v_vfsp->vfs_resource),
414		    zc->zc_name) != 0)) {
415			VN_RELE(vp);
416			return (EPERM);
417		}
418
419		VN_RELE(vp);
420		return (dsl_deleg_access(zc->zc_name,
421		    ZFS_DELEG_PERM_SHARE, cr));
422	}
423}
424
425static int
426zfs_get_parent(const char *datasetname, char *parent, int parentsize)
427{
428	char *cp;
429
430	/*
431	 * Remove the @bla or /bla from the end of the name to get the parent.
432	 */
433	(void) strncpy(parent, datasetname, parentsize);
434	cp = strrchr(parent, '@');
435	if (cp != NULL) {
436		cp[0] = '\0';
437	} else {
438		cp = strrchr(parent, '/');
439		if (cp == NULL)
440			return (ENOENT);
441		cp[0] = '\0';
442	}
443
444	return (0);
445}
446
447int
448zfs_secpolicy_destroy_perms(const char *name, cred_t *cr)
449{
450	int error;
451
452	if ((error = zfs_secpolicy_write_perms(name,
453	    ZFS_DELEG_PERM_MOUNT, cr)) != 0)
454		return (error);
455
456	return (zfs_secpolicy_write_perms(name, ZFS_DELEG_PERM_DESTROY, cr));
457}
458
459static int
460zfs_secpolicy_destroy(zfs_cmd_t *zc, cred_t *cr)
461{
462	return (zfs_secpolicy_destroy_perms(zc->zc_name, cr));
463}
464
465/*
466 * Must have sys_config privilege to check the iscsi permission
467 */
468/* ARGSUSED */
469static int
470zfs_secpolicy_iscsi(zfs_cmd_t *zc, cred_t *cr)
471{
472	return (secpolicy_zfs(cr));
473}
474
475int
476zfs_secpolicy_rename_perms(const char *from, const char *to, cred_t *cr)
477{
478	char 	parentname[MAXNAMELEN];
479	int	error;
480
481	if ((error = zfs_secpolicy_write_perms(from,
482	    ZFS_DELEG_PERM_RENAME, cr)) != 0)
483		return (error);
484
485	if ((error = zfs_secpolicy_write_perms(from,
486	    ZFS_DELEG_PERM_MOUNT, cr)) != 0)
487		return (error);
488
489	if ((error = zfs_get_parent(to, parentname,
490	    sizeof (parentname))) != 0)
491		return (error);
492
493	if ((error = zfs_secpolicy_write_perms(parentname,
494	    ZFS_DELEG_PERM_CREATE, cr)) != 0)
495		return (error);
496
497	if ((error = zfs_secpolicy_write_perms(parentname,
498	    ZFS_DELEG_PERM_MOUNT, cr)) != 0)
499		return (error);
500
501	return (error);
502}
503
504static int
505zfs_secpolicy_rename(zfs_cmd_t *zc, cred_t *cr)
506{
507	return (zfs_secpolicy_rename_perms(zc->zc_name, zc->zc_value, cr));
508}
509
510static int
511zfs_secpolicy_promote(zfs_cmd_t *zc, cred_t *cr)
512{
513	char 	parentname[MAXNAMELEN];
514	objset_t *clone;
515	int error;
516
517	error = zfs_secpolicy_write_perms(zc->zc_name,
518	    ZFS_DELEG_PERM_PROMOTE, cr);
519	if (error)
520		return (error);
521
522	error = dmu_objset_open(zc->zc_name, DMU_OST_ANY,
523	    DS_MODE_USER | DS_MODE_READONLY, &clone);
524
525	if (error == 0) {
526		dsl_dataset_t *pclone = NULL;
527		dsl_dir_t *dd;
528		dd = clone->os->os_dsl_dataset->ds_dir;
529
530		rw_enter(&dd->dd_pool->dp_config_rwlock, RW_READER);
531		error = dsl_dataset_hold_obj(dd->dd_pool,
532		    dd->dd_phys->dd_origin_obj, FTAG, &pclone);
533		rw_exit(&dd->dd_pool->dp_config_rwlock);
534		if (error) {
535			dmu_objset_close(clone);
536			return (error);
537		}
538
539		error = zfs_secpolicy_write_perms(zc->zc_name,
540		    ZFS_DELEG_PERM_MOUNT, cr);
541
542		dsl_dataset_name(pclone, parentname);
543		dmu_objset_close(clone);
544		dsl_dataset_rele(pclone, FTAG);
545		if (error == 0)
546			error = zfs_secpolicy_write_perms(parentname,
547			    ZFS_DELEG_PERM_PROMOTE, cr);
548	}
549	return (error);
550}
551
552static int
553zfs_secpolicy_receive(zfs_cmd_t *zc, cred_t *cr)
554{
555	int error;
556
557	if ((error = zfs_secpolicy_write_perms(zc->zc_name,
558	    ZFS_DELEG_PERM_RECEIVE, cr)) != 0)
559		return (error);
560
561	if ((error = zfs_secpolicy_write_perms(zc->zc_name,
562	    ZFS_DELEG_PERM_MOUNT, cr)) != 0)
563		return (error);
564
565	return (zfs_secpolicy_write_perms(zc->zc_name,
566	    ZFS_DELEG_PERM_CREATE, cr));
567}
568
569int
570zfs_secpolicy_snapshot_perms(const char *name, cred_t *cr)
571{
572	int error;
573
574	if ((error = zfs_secpolicy_write_perms(name,
575	    ZFS_DELEG_PERM_SNAPSHOT, cr)) != 0)
576		return (error);
577
578	error = zfs_secpolicy_write_perms(name,
579	    ZFS_DELEG_PERM_MOUNT, cr);
580
581	return (error);
582}
583
584static int
585zfs_secpolicy_snapshot(zfs_cmd_t *zc, cred_t *cr)
586{
587
588	return (zfs_secpolicy_snapshot_perms(zc->zc_name, cr));
589}
590
591static int
592zfs_secpolicy_create(zfs_cmd_t *zc, cred_t *cr)
593{
594	char 	parentname[MAXNAMELEN];
595	int 	error;
596
597	if ((error = zfs_get_parent(zc->zc_name, parentname,
598	    sizeof (parentname))) != 0)
599		return (error);
600
601	if (zc->zc_value[0] != '\0') {
602		if ((error = zfs_secpolicy_write_perms(zc->zc_value,
603		    ZFS_DELEG_PERM_CLONE, cr)) != 0)
604			return (error);
605	}
606
607	if ((error = zfs_secpolicy_write_perms(parentname,
608	    ZFS_DELEG_PERM_CREATE, cr)) != 0)
609		return (error);
610
611	error = zfs_secpolicy_write_perms(parentname,
612	    ZFS_DELEG_PERM_MOUNT, cr);
613
614	return (error);
615}
616
617static int
618zfs_secpolicy_umount(zfs_cmd_t *zc, cred_t *cr)
619{
620	int error;
621
622	error = secpolicy_fs_unmount(cr, NULL);
623	if (error) {
624		error = dsl_deleg_access(zc->zc_name, ZFS_DELEG_PERM_MOUNT, cr);
625	}
626	return (error);
627}
628
629/*
630 * Policy for pool operations - create/destroy pools, add vdevs, etc.  Requires
631 * SYS_CONFIG privilege, which is not available in a local zone.
632 */
633/* ARGSUSED */
634static int
635zfs_secpolicy_config(zfs_cmd_t *zc, cred_t *cr)
636{
637	if (secpolicy_sys_config(cr, B_FALSE) != 0)
638		return (EPERM);
639
640	return (0);
641}
642
643/*
644 * Just like zfs_secpolicy_config, except that we will check for
645 * mount permission on the dataset for permission to create/remove
646 * the minor nodes.
647 */
648static int
649zfs_secpolicy_minor(zfs_cmd_t *zc, cred_t *cr)
650{
651	if (secpolicy_sys_config(cr, B_FALSE) != 0) {
652		return (dsl_deleg_access(zc->zc_name,
653		    ZFS_DELEG_PERM_MOUNT, cr));
654	}
655
656	return (0);
657}
658
659/*
660 * Policy for fault injection.  Requires all privileges.
661 */
662/* ARGSUSED */
663static int
664zfs_secpolicy_inject(zfs_cmd_t *zc, cred_t *cr)
665{
666	return (secpolicy_zinject(cr));
667}
668
669static int
670zfs_secpolicy_inherit(zfs_cmd_t *zc, cred_t *cr)
671{
672	zfs_prop_t prop = zfs_name_to_prop(zc->zc_value);
673
674	if (prop == ZPROP_INVAL) {
675		if (!zfs_prop_user(zc->zc_value))
676			return (EINVAL);
677		return (zfs_secpolicy_write_perms(zc->zc_name,
678		    ZFS_DELEG_PERM_USERPROP, cr));
679	} else {
680		if (!zfs_prop_inheritable(prop))
681			return (EINVAL);
682		return (zfs_secpolicy_setprop(zc->zc_name, prop, cr));
683	}
684}
685
686/*
687 * Policy for dataset backup operations (sendbackup).
688 * Requires SYS_MOUNT privilege, and must be writable in the local zone.
689 */
690static int
691zfs_secpolicy_operator(const char *dataset, cred_t *cr)
692{
693	int writable = 1;
694
695	if (!INGLOBALZONE(curthread) && !zone_dataset_visible(dataset, &writable))
696		return (ENOENT);
697	if (secpolicy_zfs(cr) != 0 && !groupmember(GID_OPERATOR, cr))
698		return (EPERM);
699	return (0);
700}
701
702/*
703 * Returns the nvlist as specified by the user in the zfs_cmd_t.
704 */
705static int
706get_nvlist(uint64_t nvl, uint64_t size, nvlist_t **nvp)
707{
708	char *packed;
709	int error;
710	nvlist_t *list = NULL;
711
712	/*
713	 * Read in and unpack the user-supplied nvlist.
714	 */
715	if (size == 0)
716		return (EINVAL);
717
718	packed = kmem_alloc(size, KM_SLEEP);
719
720	if ((error = xcopyin((void *)(uintptr_t)nvl, packed, size)) != 0) {
721		kmem_free(packed, size);
722		return (error);
723	}
724
725	if ((error = nvlist_unpack(packed, size, &list, 0)) != 0) {
726		kmem_free(packed, size);
727		return (error);
728	}
729
730	kmem_free(packed, size);
731
732	*nvp = list;
733	return (0);
734}
735
736static int
737put_nvlist(zfs_cmd_t *zc, nvlist_t *nvl)
738{
739	char *packed = NULL;
740	size_t size;
741	int error;
742
743	VERIFY(nvlist_size(nvl, &size, NV_ENCODE_NATIVE) == 0);
744
745	if (size > zc->zc_nvlist_dst_size) {
746		/*
747		 * Solaris returns ENOMEM here, because even if an error is
748		 * returned from an ioctl(2), new zc_nvlist_dst_size will be
749		 * passed to the userland. This is not the case for FreeBSD.
750		 * We need to return 0, so the kernel will copy the
751		 * zc_nvlist_dst_size back and the userland can discover that a
752		 * bigger buffer is needed.
753		 */
754		error = 0;
755	} else {
756		packed = kmem_alloc(size, KM_SLEEP);
757		VERIFY(nvlist_pack(nvl, &packed, &size, NV_ENCODE_NATIVE,
758		    KM_SLEEP) == 0);
759		error = xcopyout(packed, (void *)(uintptr_t)zc->zc_nvlist_dst,
760		    size);
761		kmem_free(packed, size);
762	}
763
764	zc->zc_nvlist_dst_size = size;
765	return (error);
766}
767
768static int
769zfs_ioc_pool_create(zfs_cmd_t *zc)
770{
771	int error;
772	nvlist_t *config, *props = NULL;
773	nvlist_t *rootprops = NULL;
774	nvlist_t *zplprops = NULL;
775	char *buf;
776
777	if (error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
778	    &config))
779		return (error);
780
781	if (zc->zc_nvlist_src_size != 0 && (error =
782	    get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size, &props))) {
783		nvlist_free(config);
784		return (error);
785	}
786
787	if (props) {
788		nvlist_t *nvl = NULL;
789		uint64_t version = SPA_VERSION;
790
791		(void) nvlist_lookup_uint64(props,
792		    zpool_prop_to_name(ZPOOL_PROP_VERSION), &version);
793		if (version < SPA_VERSION_INITIAL || version > SPA_VERSION) {
794			error = EINVAL;
795			goto pool_props_bad;
796		}
797		(void) nvlist_lookup_nvlist(props, ZPOOL_ROOTFS_PROPS, &nvl);
798		if (nvl) {
799			error = nvlist_dup(nvl, &rootprops, KM_SLEEP);
800			if (error != 0) {
801				nvlist_free(config);
802				nvlist_free(props);
803				return (error);
804			}
805			(void) nvlist_remove_all(props, ZPOOL_ROOTFS_PROPS);
806		}
807		VERIFY(nvlist_alloc(&zplprops, NV_UNIQUE_NAME, KM_SLEEP) == 0);
808		error = zfs_fill_zplprops_root(version, rootprops,
809		    zplprops, NULL);
810		if (error)
811			goto pool_props_bad;
812	}
813
814	buf = history_str_get(zc);
815
816	error = spa_create(zc->zc_name, config, props, buf, zplprops);
817
818	/*
819	 * Set the remaining root properties
820	 */
821	if (!error &&
822	    (error = zfs_set_prop_nvlist(zc->zc_name, rootprops)) != 0)
823		(void) spa_destroy(zc->zc_name);
824
825	if (buf != NULL)
826		history_str_free(buf);
827
828pool_props_bad:
829	nvlist_free(rootprops);
830	nvlist_free(zplprops);
831	nvlist_free(config);
832	nvlist_free(props);
833
834	return (error);
835}
836
837static int
838zfs_ioc_pool_destroy(zfs_cmd_t *zc)
839{
840	int error;
841	zfs_log_history(zc);
842	error = spa_destroy(zc->zc_name);
843	return (error);
844}
845
846static int
847zfs_ioc_pool_import(zfs_cmd_t *zc)
848{
849	int error;
850	nvlist_t *config, *props = NULL;
851	uint64_t guid;
852
853	if ((error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
854	    &config)) != 0)
855		return (error);
856
857	if (zc->zc_nvlist_src_size != 0 && (error =
858	    get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size, &props))) {
859		nvlist_free(config);
860		return (error);
861	}
862
863	if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID, &guid) != 0 ||
864	    guid != zc->zc_guid)
865		error = EINVAL;
866	else if (zc->zc_cookie)
867		error = spa_import_faulted(zc->zc_name, config,
868		    props);
869	else
870		error = spa_import(zc->zc_name, config, props);
871
872	nvlist_free(config);
873
874	if (props)
875		nvlist_free(props);
876
877	return (error);
878}
879
880static int
881zfs_ioc_pool_export(zfs_cmd_t *zc)
882{
883	int error;
884	boolean_t force = (boolean_t)zc->zc_cookie;
885
886	zfs_log_history(zc);
887	error = spa_export(zc->zc_name, NULL, force);
888	return (error);
889}
890
891static int
892zfs_ioc_pool_configs(zfs_cmd_t *zc)
893{
894	nvlist_t *configs;
895	int error;
896
897	if ((configs = spa_all_configs(&zc->zc_cookie)) == NULL)
898		return (EEXIST);
899
900	error = put_nvlist(zc, configs);
901
902	nvlist_free(configs);
903
904	return (error);
905}
906
907static int
908zfs_ioc_pool_stats(zfs_cmd_t *zc)
909{
910	nvlist_t *config;
911	int error;
912	int ret = 0;
913
914	error = spa_get_stats(zc->zc_name, &config, zc->zc_value,
915	    sizeof (zc->zc_value));
916
917	if (config != NULL) {
918		ret = put_nvlist(zc, config);
919		nvlist_free(config);
920
921		/*
922		 * The config may be present even if 'error' is non-zero.
923		 * In this case we return success, and preserve the real errno
924		 * in 'zc_cookie'.
925		 */
926		zc->zc_cookie = error;
927	} else {
928		ret = error;
929	}
930
931	return (ret);
932}
933
934/*
935 * Try to import the given pool, returning pool stats as appropriate so that
936 * user land knows which devices are available and overall pool health.
937 */
938static int
939zfs_ioc_pool_tryimport(zfs_cmd_t *zc)
940{
941	nvlist_t *tryconfig, *config;
942	int error;
943
944	if ((error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
945	    &tryconfig)) != 0)
946		return (error);
947
948	config = spa_tryimport(tryconfig);
949
950	nvlist_free(tryconfig);
951
952	if (config == NULL)
953		return (EINVAL);
954
955	error = put_nvlist(zc, config);
956	nvlist_free(config);
957
958	return (error);
959}
960
961static int
962zfs_ioc_pool_scrub(zfs_cmd_t *zc)
963{
964	spa_t *spa;
965	int error;
966
967	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
968		return (error);
969
970	error = spa_scrub(spa, zc->zc_cookie);
971
972	spa_close(spa, FTAG);
973
974	return (error);
975}
976
977static int
978zfs_ioc_pool_freeze(zfs_cmd_t *zc)
979{
980	spa_t *spa;
981	int error;
982
983	error = spa_open(zc->zc_name, &spa, FTAG);
984	if (error == 0) {
985		spa_freeze(spa);
986		spa_close(spa, FTAG);
987	}
988	return (error);
989}
990
991static int
992zfs_ioc_pool_upgrade(zfs_cmd_t *zc)
993{
994	spa_t *spa;
995	int error;
996
997	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
998		return (error);
999
1000	if (zc->zc_cookie < spa_version(spa) || zc->zc_cookie > SPA_VERSION) {
1001		spa_close(spa, FTAG);
1002		return (EINVAL);
1003	}
1004
1005	spa_upgrade(spa, zc->zc_cookie);
1006	spa_close(spa, FTAG);
1007
1008	return (error);
1009}
1010
1011static int
1012zfs_ioc_pool_get_history(zfs_cmd_t *zc)
1013{
1014	spa_t *spa;
1015	char *hist_buf;
1016	uint64_t size;
1017	int error;
1018
1019	if ((size = zc->zc_history_len) == 0)
1020		return (EINVAL);
1021
1022	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
1023		return (error);
1024
1025	if (spa_version(spa) < SPA_VERSION_ZPOOL_HISTORY) {
1026		spa_close(spa, FTAG);
1027		return (ENOTSUP);
1028	}
1029
1030	hist_buf = kmem_alloc(size, KM_SLEEP);
1031	if ((error = spa_history_get(spa, &zc->zc_history_offset,
1032	    &zc->zc_history_len, hist_buf)) == 0) {
1033		error = xcopyout(hist_buf,
1034		    (char *)(uintptr_t)zc->zc_history,
1035		    zc->zc_history_len);
1036	}
1037
1038	spa_close(spa, FTAG);
1039	kmem_free(hist_buf, size);
1040	return (error);
1041}
1042
1043static int
1044zfs_ioc_dsobj_to_dsname(zfs_cmd_t *zc)
1045{
1046	int error;
1047
1048	if (error = dsl_dsobj_to_dsname(zc->zc_name, zc->zc_obj, zc->zc_value))
1049		return (error);
1050
1051	return (0);
1052}
1053
1054static int
1055zfs_ioc_obj_to_path(zfs_cmd_t *zc)
1056{
1057	objset_t *osp;
1058	int error;
1059
1060	if ((error = dmu_objset_open(zc->zc_name, DMU_OST_ZFS,
1061	    DS_MODE_USER | DS_MODE_READONLY, &osp)) != 0)
1062		return (error);
1063	error = zfs_obj_to_path(osp, zc->zc_obj, zc->zc_value,
1064	    sizeof (zc->zc_value));
1065	dmu_objset_close(osp);
1066
1067	return (error);
1068}
1069
1070static int
1071zfs_ioc_vdev_add(zfs_cmd_t *zc)
1072{
1073	spa_t *spa;
1074	int error;
1075	nvlist_t *config, **l2cache, **spares;
1076	uint_t nl2cache = 0, nspares = 0;
1077
1078	error = spa_open(zc->zc_name, &spa, FTAG);
1079	if (error != 0)
1080		return (error);
1081
1082	error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
1083	    &config);
1084	(void) nvlist_lookup_nvlist_array(config, ZPOOL_CONFIG_L2CACHE,
1085	    &l2cache, &nl2cache);
1086
1087	(void) nvlist_lookup_nvlist_array(config, ZPOOL_CONFIG_SPARES,
1088	    &spares, &nspares);
1089
1090	/*
1091	 * A root pool with concatenated devices is not supported.
1092	 * Thus, can not add a device to a root pool.
1093	 *
1094	 * Intent log device can not be added to a rootpool because
1095	 * during mountroot, zil is replayed, a seperated log device
1096	 * can not be accessed during the mountroot time.
1097	 *
1098	 * l2cache and spare devices are ok to be added to a rootpool.
1099	 */
1100	if (spa->spa_bootfs != 0 && nl2cache == 0 && nspares == 0) {
1101		spa_close(spa, FTAG);
1102		return (EDOM);
1103	}
1104
1105	if (error == 0) {
1106		error = spa_vdev_add(spa, config);
1107		nvlist_free(config);
1108	}
1109	spa_close(spa, FTAG);
1110	return (error);
1111}
1112
1113static int
1114zfs_ioc_vdev_remove(zfs_cmd_t *zc)
1115{
1116	spa_t *spa;
1117	int error;
1118
1119	error = spa_open(zc->zc_name, &spa, FTAG);
1120	if (error != 0)
1121		return (error);
1122	error = spa_vdev_remove(spa, zc->zc_guid, B_FALSE);
1123	spa_close(spa, FTAG);
1124	return (error);
1125}
1126
1127static int
1128zfs_ioc_vdev_set_state(zfs_cmd_t *zc)
1129{
1130	spa_t *spa;
1131	int error;
1132	vdev_state_t newstate = VDEV_STATE_UNKNOWN;
1133
1134	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
1135		return (error);
1136	switch (zc->zc_cookie) {
1137	case VDEV_STATE_ONLINE:
1138		error = vdev_online(spa, zc->zc_guid, zc->zc_obj, &newstate);
1139		break;
1140
1141	case VDEV_STATE_OFFLINE:
1142		error = vdev_offline(spa, zc->zc_guid, zc->zc_obj);
1143		break;
1144
1145	case VDEV_STATE_FAULTED:
1146		error = vdev_fault(spa, zc->zc_guid);
1147		break;
1148
1149	case VDEV_STATE_DEGRADED:
1150		error = vdev_degrade(spa, zc->zc_guid);
1151		break;
1152
1153	default:
1154		error = EINVAL;
1155	}
1156	zc->zc_cookie = newstate;
1157	spa_close(spa, FTAG);
1158	return (error);
1159}
1160
1161static int
1162zfs_ioc_vdev_attach(zfs_cmd_t *zc)
1163{
1164	spa_t *spa;
1165	int replacing = zc->zc_cookie;
1166	nvlist_t *config;
1167	int error;
1168
1169	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
1170		return (error);
1171
1172	if ((error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
1173	    &config)) == 0) {
1174		error = spa_vdev_attach(spa, zc->zc_guid, config, replacing);
1175		nvlist_free(config);
1176	}
1177
1178	spa_close(spa, FTAG);
1179	return (error);
1180}
1181
1182static int
1183zfs_ioc_vdev_detach(zfs_cmd_t *zc)
1184{
1185	spa_t *spa;
1186	int error;
1187
1188	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
1189		return (error);
1190
1191	error = spa_vdev_detach(spa, zc->zc_guid, B_FALSE);
1192
1193	spa_close(spa, FTAG);
1194	return (error);
1195}
1196
1197static int
1198zfs_ioc_vdev_setpath(zfs_cmd_t *zc)
1199{
1200	spa_t *spa;
1201	char *path = zc->zc_value;
1202	uint64_t guid = zc->zc_guid;
1203	int error;
1204
1205	error = spa_open(zc->zc_name, &spa, FTAG);
1206	if (error != 0)
1207		return (error);
1208
1209	error = spa_vdev_setpath(spa, guid, path);
1210	spa_close(spa, FTAG);
1211	return (error);
1212}
1213
1214/*
1215 * inputs:
1216 * zc_name		name of filesystem
1217 * zc_nvlist_dst_size	size of buffer for property nvlist
1218 *
1219 * outputs:
1220 * zc_objset_stats	stats
1221 * zc_nvlist_dst	property nvlist
1222 * zc_nvlist_dst_size	size of property nvlist
1223 */
1224static int
1225zfs_ioc_objset_stats(zfs_cmd_t *zc)
1226{
1227	objset_t *os = NULL;
1228	int error;
1229	nvlist_t *nv;
1230
1231	if (error = dmu_objset_open(zc->zc_name,
1232	    DMU_OST_ANY, DS_MODE_USER | DS_MODE_READONLY, &os))
1233		return (error);
1234
1235	dmu_objset_fast_stat(os, &zc->zc_objset_stats);
1236
1237	if (zc->zc_nvlist_dst != 0 &&
1238	    (error = dsl_prop_get_all(os, &nv, FALSE)) == 0) {
1239		dmu_objset_stats(os, nv);
1240		/*
1241		 * NB: zvol_get_stats() will read the objset contents,
1242		 * which we aren't supposed to do with a
1243		 * DS_MODE_USER hold, because it could be
1244		 * inconsistent.  So this is a bit of a workaround...
1245		 */
1246		if (!zc->zc_objset_stats.dds_inconsistent) {
1247			if (dmu_objset_type(os) == DMU_OST_ZVOL)
1248				VERIFY(zvol_get_stats(os, nv) == 0);
1249		}
1250		error = put_nvlist(zc, nv);
1251		nvlist_free(nv);
1252	}
1253
1254	dmu_objset_close(os);
1255	if (error == ENOMEM)
1256		error = 0;
1257	return (error);
1258}
1259
1260static int
1261nvl_add_zplprop(objset_t *os, nvlist_t *props, zfs_prop_t prop)
1262{
1263	uint64_t value;
1264	int error;
1265
1266	/*
1267	 * zfs_get_zplprop() will either find a value or give us
1268	 * the default value (if there is one).
1269	 */
1270	if ((error = zfs_get_zplprop(os, prop, &value)) != 0)
1271		return (error);
1272	VERIFY(nvlist_add_uint64(props, zfs_prop_to_name(prop), value) == 0);
1273	return (0);
1274}
1275
1276/*
1277 * inputs:
1278 * zc_name		name of filesystem
1279 * zc_nvlist_dst_size	size of buffer for zpl property nvlist
1280 *
1281 * outputs:
1282 * zc_nvlist_dst	zpl property nvlist
1283 * zc_nvlist_dst_size	size of zpl property nvlist
1284 */
1285static int
1286zfs_ioc_objset_zplprops(zfs_cmd_t *zc)
1287{
1288	objset_t *os;
1289	int err;
1290
1291	if (err = dmu_objset_open(zc->zc_name,
1292	    DMU_OST_ANY, DS_MODE_USER | DS_MODE_READONLY, &os))
1293		return (err);
1294
1295	dmu_objset_fast_stat(os, &zc->zc_objset_stats);
1296
1297	/*
1298	 * NB: nvl_add_zplprop() will read the objset contents,
1299	 * which we aren't supposed to do with a DS_MODE_USER
1300	 * hold, because it could be inconsistent.
1301	 */
1302	if (zc->zc_nvlist_dst != 0 &&
1303	    !zc->zc_objset_stats.dds_inconsistent &&
1304	    dmu_objset_type(os) == DMU_OST_ZFS) {
1305		nvlist_t *nv;
1306
1307		VERIFY(nvlist_alloc(&nv, NV_UNIQUE_NAME, KM_SLEEP) == 0);
1308		if ((err = nvl_add_zplprop(os, nv, ZFS_PROP_VERSION)) == 0 &&
1309		    (err = nvl_add_zplprop(os, nv, ZFS_PROP_NORMALIZE)) == 0 &&
1310		    (err = nvl_add_zplprop(os, nv, ZFS_PROP_UTF8ONLY)) == 0 &&
1311		    (err = nvl_add_zplprop(os, nv, ZFS_PROP_CASE)) == 0)
1312			err = put_nvlist(zc, nv);
1313		nvlist_free(nv);
1314	} else {
1315		err = ENOENT;
1316	}
1317	dmu_objset_close(os);
1318	return (err);
1319}
1320
1321/*
1322 * inputs:
1323 * zc_name		name of filesystem
1324 * zc_cookie		zap cursor
1325 * zc_nvlist_dst_size	size of buffer for property nvlist
1326 *
1327 * outputs:
1328 * zc_name		name of next filesystem
1329 * zc_objset_stats	stats
1330 * zc_nvlist_dst	property nvlist
1331 * zc_nvlist_dst_size	size of property nvlist
1332 */
1333static int
1334zfs_ioc_dataset_list_next(zfs_cmd_t *zc)
1335{
1336	objset_t *os;
1337	int error;
1338	char *p;
1339
1340	if (error = dmu_objset_open(zc->zc_name,
1341	    DMU_OST_ANY, DS_MODE_USER | DS_MODE_READONLY, &os)) {
1342		if (error == ENOENT)
1343			error = ESRCH;
1344		return (error);
1345	}
1346
1347	p = strrchr(zc->zc_name, '/');
1348	if (p == NULL || p[1] != '\0')
1349		(void) strlcat(zc->zc_name, "/", sizeof (zc->zc_name));
1350	p = zc->zc_name + strlen(zc->zc_name);
1351
1352	do {
1353		error = dmu_dir_list_next(os,
1354		    sizeof (zc->zc_name) - (p - zc->zc_name), p,
1355		    NULL, &zc->zc_cookie);
1356		if (error == ENOENT)
1357			error = ESRCH;
1358	} while (error == 0 && !INGLOBALZONE(curthread) &&
1359	    !zone_dataset_visible(zc->zc_name, NULL));
1360	dmu_objset_close(os);
1361
1362	/*
1363	 * If it's a hidden dataset (ie. with a '$' in its name), don't
1364	 * try to get stats for it.  Userland will skip over it.
1365	 */
1366	if (error == 0 && strchr(zc->zc_name, '$') == NULL)
1367		error = zfs_ioc_objset_stats(zc); /* fill in the stats */
1368
1369	return (error);
1370}
1371
1372/*
1373 * inputs:
1374 * zc_name		name of filesystem
1375 * zc_cookie		zap cursor
1376 * zc_nvlist_dst_size	size of buffer for property nvlist
1377 *
1378 * outputs:
1379 * zc_name		name of next snapshot
1380 * zc_objset_stats	stats
1381 * zc_nvlist_dst	property nvlist
1382 * zc_nvlist_dst_size	size of property nvlist
1383 */
1384static int
1385zfs_ioc_snapshot_list_next(zfs_cmd_t *zc)
1386{
1387	objset_t *os;
1388	int error;
1389
1390	error = dmu_objset_open(zc->zc_name,
1391	    DMU_OST_ANY, DS_MODE_USER | DS_MODE_READONLY, &os);
1392	if (error)
1393		return (error == ENOENT ? ESRCH : error);
1394
1395	/*
1396	 * A dataset name of maximum length cannot have any snapshots,
1397	 * so exit immediately.
1398	 */
1399	if (strlcat(zc->zc_name, "@", sizeof (zc->zc_name)) >= MAXNAMELEN) {
1400		dmu_objset_close(os);
1401		return (ESRCH);
1402	}
1403
1404	error = dmu_snapshot_list_next(os,
1405	    sizeof (zc->zc_name) - strlen(zc->zc_name),
1406	    zc->zc_name + strlen(zc->zc_name), NULL, &zc->zc_cookie, NULL);
1407	dmu_objset_close(os);
1408	if (error == 0)
1409		error = zfs_ioc_objset_stats(zc); /* fill in the stats */
1410	else if (error == ENOENT)
1411		error = ESRCH;
1412
1413	/* if we failed, undo the @ that we tacked on to zc_name */
1414	if (error)
1415		*strchr(zc->zc_name, '@') = '\0';
1416	return (error);
1417}
1418
1419int
1420zfs_set_prop_nvlist(const char *name, nvlist_t *nvl)
1421{
1422	nvpair_t *elem;
1423	int error;
1424	uint64_t intval;
1425	char *strval;
1426
1427	/*
1428	 * First validate permission to set all of the properties
1429	 */
1430	elem = NULL;
1431	while ((elem = nvlist_next_nvpair(nvl, elem)) != NULL) {
1432		const char *propname = nvpair_name(elem);
1433		zfs_prop_t prop = zfs_name_to_prop(propname);
1434
1435		if (prop == ZPROP_INVAL) {
1436			/*
1437			 * If this is a user-defined property, it must be a
1438			 * string, and there is no further validation to do.
1439			 */
1440			if (!zfs_prop_user(propname) ||
1441			    nvpair_type(elem) != DATA_TYPE_STRING)
1442				return (EINVAL);
1443
1444			if (error = zfs_secpolicy_write_perms(name,
1445			    ZFS_DELEG_PERM_USERPROP, CRED()))
1446				return (error);
1447			continue;
1448		}
1449
1450		if ((error = zfs_secpolicy_setprop(name, prop, CRED())) != 0)
1451			return (error);
1452
1453		/*
1454		 * Check that this value is valid for this pool version
1455		 */
1456		switch (prop) {
1457		case ZFS_PROP_COMPRESSION:
1458			/*
1459			 * If the user specified gzip compression, make sure
1460			 * the SPA supports it. We ignore any errors here since
1461			 * we'll catch them later.
1462			 */
1463			if (nvpair_type(elem) == DATA_TYPE_UINT64 &&
1464			    nvpair_value_uint64(elem, &intval) == 0) {
1465				if (intval >= ZIO_COMPRESS_GZIP_1 &&
1466				    intval <= ZIO_COMPRESS_GZIP_9 &&
1467				    zfs_earlier_version(name,
1468				    SPA_VERSION_GZIP_COMPRESSION))
1469					return (ENOTSUP);
1470
1471				/*
1472				 * If this is a bootable dataset then
1473				 * verify that the compression algorithm
1474				 * is supported for booting. We must return
1475				 * something other than ENOTSUP since it
1476				 * implies a downrev pool version.
1477				 */
1478				if (zfs_is_bootfs(name) &&
1479				    !BOOTFS_COMPRESS_VALID(intval))
1480					return (ERANGE);
1481			}
1482			break;
1483
1484		case ZFS_PROP_COPIES:
1485			if (zfs_earlier_version(name,
1486			    SPA_VERSION_DITTO_BLOCKS))
1487				return (ENOTSUP);
1488			break;
1489
1490		case ZFS_PROP_SHARESMB:
1491			if (zpl_earlier_version(name, ZPL_VERSION_FUID))
1492				return (ENOTSUP);
1493			break;
1494		}
1495	}
1496
1497	elem = NULL;
1498	while ((elem = nvlist_next_nvpair(nvl, elem)) != NULL) {
1499		const char *propname = nvpair_name(elem);
1500		zfs_prop_t prop = zfs_name_to_prop(propname);
1501
1502		if (prop == ZPROP_INVAL) {
1503			VERIFY(nvpair_value_string(elem, &strval) == 0);
1504			error = dsl_prop_set(name, propname, 1,
1505			    strlen(strval) + 1, strval);
1506			if (error == 0)
1507				continue;
1508			else
1509				return (error);
1510		}
1511
1512		switch (prop) {
1513		case ZFS_PROP_QUOTA:
1514			if ((error = nvpair_value_uint64(elem, &intval)) != 0 ||
1515			    (error = dsl_dir_set_quota(name, intval)) != 0)
1516				return (error);
1517			break;
1518
1519		case ZFS_PROP_REFQUOTA:
1520			if ((error = nvpair_value_uint64(elem, &intval)) != 0 ||
1521			    (error = dsl_dataset_set_quota(name, intval)) != 0)
1522				return (error);
1523			break;
1524
1525		case ZFS_PROP_RESERVATION:
1526			if ((error = nvpair_value_uint64(elem, &intval)) != 0 ||
1527			    (error = dsl_dir_set_reservation(name,
1528			    intval)) != 0)
1529				return (error);
1530			break;
1531
1532		case ZFS_PROP_REFRESERVATION:
1533			if ((error = nvpair_value_uint64(elem, &intval)) != 0 ||
1534			    (error = dsl_dataset_set_reservation(name,
1535			    intval)) != 0)
1536				return (error);
1537			break;
1538
1539		case ZFS_PROP_VOLSIZE:
1540			if ((error = nvpair_value_uint64(elem, &intval)) != 0 ||
1541			    (error = zvol_set_volsize(name,
1542			    ddi_driver_major(zfs_dip), intval)) != 0)
1543				return (error);
1544			break;
1545
1546		case ZFS_PROP_VOLBLOCKSIZE:
1547			if ((error = nvpair_value_uint64(elem, &intval)) != 0 ||
1548			    (error = zvol_set_volblocksize(name, intval)) != 0)
1549				return (error);
1550			break;
1551
1552		case ZFS_PROP_VERSION:
1553			if ((error = nvpair_value_uint64(elem, &intval)) != 0 ||
1554			    (error = zfs_set_version(name, intval)) != 0)
1555				return (error);
1556			break;
1557
1558		default:
1559			if (nvpair_type(elem) == DATA_TYPE_STRING) {
1560				if (zfs_prop_get_type(prop) !=
1561				    PROP_TYPE_STRING)
1562					return (EINVAL);
1563				VERIFY(nvpair_value_string(elem, &strval) == 0);
1564				if ((error = dsl_prop_set(name,
1565				    nvpair_name(elem), 1, strlen(strval) + 1,
1566				    strval)) != 0)
1567					return (error);
1568			} else if (nvpair_type(elem) == DATA_TYPE_UINT64) {
1569				const char *unused;
1570
1571				VERIFY(nvpair_value_uint64(elem, &intval) == 0);
1572
1573				switch (zfs_prop_get_type(prop)) {
1574				case PROP_TYPE_NUMBER:
1575					break;
1576				case PROP_TYPE_STRING:
1577					return (EINVAL);
1578				case PROP_TYPE_INDEX:
1579					if (zfs_prop_index_to_string(prop,
1580					    intval, &unused) != 0)
1581						return (EINVAL);
1582					break;
1583				default:
1584					cmn_err(CE_PANIC,
1585					    "unknown property type");
1586					break;
1587				}
1588
1589				if ((error = dsl_prop_set(name, propname,
1590				    8, 1, &intval)) != 0)
1591					return (error);
1592			} else {
1593				return (EINVAL);
1594			}
1595			break;
1596		}
1597	}
1598
1599	return (0);
1600}
1601
1602/*
1603 * inputs:
1604 * zc_name		name of filesystem
1605 * zc_value		name of property to inherit
1606 * zc_nvlist_src{_size}	nvlist of properties to apply
1607 * zc_cookie		clear existing local props?
1608 *
1609 * outputs:		none
1610 */
1611static int
1612zfs_ioc_set_prop(zfs_cmd_t *zc)
1613{
1614	nvlist_t *nvl;
1615	int error;
1616
1617	if ((error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
1618	    &nvl)) != 0)
1619		return (error);
1620
1621	if (zc->zc_cookie) {
1622		nvlist_t *origprops;
1623		objset_t *os;
1624
1625		if (dmu_objset_open(zc->zc_name, DMU_OST_ANY,
1626		    DS_MODE_USER | DS_MODE_READONLY, &os) == 0) {
1627			if (dsl_prop_get_all(os, &origprops, TRUE) == 0) {
1628				clear_props(zc->zc_name, origprops);
1629				nvlist_free(origprops);
1630			}
1631			dmu_objset_close(os);
1632		}
1633
1634	}
1635
1636	error = zfs_set_prop_nvlist(zc->zc_name, nvl);
1637
1638	nvlist_free(nvl);
1639	return (error);
1640}
1641
1642/*
1643 * inputs:
1644 * zc_name		name of filesystem
1645 * zc_value		name of property to inherit
1646 *
1647 * outputs:		none
1648 */
1649static int
1650zfs_ioc_inherit_prop(zfs_cmd_t *zc)
1651{
1652	/* the property name has been validated by zfs_secpolicy_inherit() */
1653	return (dsl_prop_set(zc->zc_name, zc->zc_value, 0, 0, NULL));
1654}
1655
1656static int
1657zfs_ioc_pool_set_props(zfs_cmd_t *zc)
1658{
1659	nvlist_t *props;
1660	spa_t *spa;
1661	int error;
1662
1663	if ((error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
1664	    &props)))
1665		return (error);
1666
1667	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0) {
1668		nvlist_free(props);
1669		return (error);
1670	}
1671
1672	error = spa_prop_set(spa, props);
1673
1674	nvlist_free(props);
1675	spa_close(spa, FTAG);
1676
1677	return (error);
1678}
1679
1680static int
1681zfs_ioc_pool_get_props(zfs_cmd_t *zc)
1682{
1683	spa_t *spa;
1684	int error;
1685	nvlist_t *nvp = NULL;
1686
1687	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
1688		return (error);
1689
1690	error = spa_prop_get(spa, &nvp);
1691
1692	if (error == 0 && zc->zc_nvlist_dst != 0)
1693		error = put_nvlist(zc, nvp);
1694	else
1695		error = EFAULT;
1696
1697	spa_close(spa, FTAG);
1698
1699	if (nvp)
1700		nvlist_free(nvp);
1701	return (error);
1702}
1703
1704static int
1705zfs_ioc_iscsi_perm_check(zfs_cmd_t *zc)
1706{
1707#ifdef TODO
1708	nvlist_t *nvp;
1709	int error;
1710	uint32_t uid;
1711	uint32_t gid;
1712	uint32_t *groups;
1713	uint_t group_cnt;
1714	cred_t	*usercred;
1715
1716	if ((error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
1717	    &nvp)) != 0) {
1718		return (error);
1719	}
1720
1721	if ((error = nvlist_lookup_uint32(nvp,
1722	    ZFS_DELEG_PERM_UID, &uid)) != 0) {
1723		nvlist_free(nvp);
1724		return (EPERM);
1725	}
1726
1727	if ((error = nvlist_lookup_uint32(nvp,
1728	    ZFS_DELEG_PERM_GID, &gid)) != 0) {
1729		nvlist_free(nvp);
1730		return (EPERM);
1731	}
1732
1733	if ((error = nvlist_lookup_uint32_array(nvp, ZFS_DELEG_PERM_GROUPS,
1734	    &groups, &group_cnt)) != 0) {
1735		nvlist_free(nvp);
1736		return (EPERM);
1737	}
1738	usercred = cralloc();
1739	if ((crsetugid(usercred, uid, gid) != 0) ||
1740	    (crsetgroups(usercred, group_cnt, (gid_t *)groups) != 0)) {
1741		nvlist_free(nvp);
1742		crfree(usercred);
1743		return (EPERM);
1744	}
1745	nvlist_free(nvp);
1746	error = dsl_deleg_access(zc->zc_name,
1747	    zfs_prop_to_name(ZFS_PROP_SHAREISCSI), usercred);
1748	crfree(usercred);
1749	return (error);
1750#else
1751	return (EPERM);
1752#endif
1753}
1754
1755/*
1756 * inputs:
1757 * zc_name		name of filesystem
1758 * zc_nvlist_src{_size}	nvlist of delegated permissions
1759 * zc_perm_action	allow/unallow flag
1760 *
1761 * outputs:		none
1762 */
1763static int
1764zfs_ioc_set_fsacl(zfs_cmd_t *zc)
1765{
1766	int error;
1767	nvlist_t *fsaclnv = NULL;
1768
1769	if ((error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
1770	    &fsaclnv)) != 0)
1771		return (error);
1772
1773	/*
1774	 * Verify nvlist is constructed correctly
1775	 */
1776	if ((error = zfs_deleg_verify_nvlist(fsaclnv)) != 0) {
1777		nvlist_free(fsaclnv);
1778		return (EINVAL);
1779	}
1780
1781	/*
1782	 * If we don't have PRIV_SYS_MOUNT, then validate
1783	 * that user is allowed to hand out each permission in
1784	 * the nvlist(s)
1785	 */
1786
1787	error = secpolicy_zfs(CRED());
1788	if (error) {
1789		if (zc->zc_perm_action == B_FALSE) {
1790			error = dsl_deleg_can_allow(zc->zc_name,
1791			    fsaclnv, CRED());
1792		} else {
1793			error = dsl_deleg_can_unallow(zc->zc_name,
1794			    fsaclnv, CRED());
1795		}
1796	}
1797
1798	if (error == 0)
1799		error = dsl_deleg_set(zc->zc_name, fsaclnv, zc->zc_perm_action);
1800
1801	nvlist_free(fsaclnv);
1802	return (error);
1803}
1804
1805/*
1806 * inputs:
1807 * zc_name		name of filesystem
1808 *
1809 * outputs:
1810 * zc_nvlist_src{_size}	nvlist of delegated permissions
1811 */
1812static int
1813zfs_ioc_get_fsacl(zfs_cmd_t *zc)
1814{
1815	nvlist_t *nvp;
1816	int error;
1817
1818	if ((error = dsl_deleg_get(zc->zc_name, &nvp)) == 0) {
1819		error = put_nvlist(zc, nvp);
1820		nvlist_free(nvp);
1821	}
1822
1823	return (error);
1824}
1825
1826/*
1827 * inputs:
1828 * zc_name		name of volume
1829 *
1830 * outputs:		none
1831 */
1832static int
1833zfs_ioc_create_minor(zfs_cmd_t *zc)
1834{
1835	return (zvol_create_minor(zc->zc_name, ddi_driver_major(zfs_dip)));
1836}
1837
1838/*
1839 * inputs:
1840 * zc_name		name of volume
1841 *
1842 * outputs:		none
1843 */
1844static int
1845zfs_ioc_remove_minor(zfs_cmd_t *zc)
1846{
1847	return (zvol_remove_minor(zc->zc_name));
1848}
1849
1850/*
1851 * Search the vfs list for a specified resource.  Returns a pointer to it
1852 * or NULL if no suitable entry is found. The caller of this routine
1853 * is responsible for releasing the returned vfs pointer.
1854 */
1855static vfs_t *
1856zfs_get_vfs(const char *resource)
1857{
1858	vfs_t *vfsp;
1859
1860	mtx_lock(&mountlist_mtx);
1861	TAILQ_FOREACH(vfsp, &mountlist, mnt_list) {
1862		if (strcmp(refstr_value(vfsp->vfs_resource), resource) == 0) {
1863			VFS_HOLD(vfsp);
1864			break;
1865		}
1866	}
1867	mtx_unlock(&mountlist_mtx);
1868	return (vfsp);
1869}
1870
1871/* ARGSUSED */
1872static void
1873zfs_create_cb(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx)
1874{
1875	zfs_creat_t *zct = arg;
1876
1877	zfs_create_fs(os, cr, zct->zct_zplprops, tx);
1878}
1879
1880#define	ZFS_PROP_UNDEFINED	((uint64_t)-1)
1881
1882/*
1883 * inputs:
1884 * createprops		list of properties requested by creator
1885 * default_zplver	zpl version to use if unspecified in createprops
1886 * fuids_ok		fuids allowed in this version of the spa?
1887 * os			parent objset pointer (NULL if root fs)
1888 *
1889 * outputs:
1890 * zplprops	values for the zplprops we attach to the master node object
1891 * is_ci	true if requested file system will be purely case-insensitive
1892 *
1893 * Determine the settings for utf8only, normalization and
1894 * casesensitivity.  Specific values may have been requested by the
1895 * creator and/or we can inherit values from the parent dataset.  If
1896 * the file system is of too early a vintage, a creator can not
1897 * request settings for these properties, even if the requested
1898 * setting is the default value.  We don't actually want to create dsl
1899 * properties for these, so remove them from the source nvlist after
1900 * processing.
1901 */
1902static int
1903zfs_fill_zplprops_impl(objset_t *os, uint64_t default_zplver,
1904    boolean_t fuids_ok, nvlist_t *createprops, nvlist_t *zplprops,
1905    boolean_t *is_ci)
1906{
1907	uint64_t zplver = default_zplver;
1908	uint64_t sense = ZFS_PROP_UNDEFINED;
1909	uint64_t norm = ZFS_PROP_UNDEFINED;
1910	uint64_t u8 = ZFS_PROP_UNDEFINED;
1911
1912	ASSERT(zplprops != NULL);
1913
1914	/*
1915	 * Pull out creator prop choices, if any.
1916	 */
1917	if (createprops) {
1918		(void) nvlist_lookup_uint64(createprops,
1919		    zfs_prop_to_name(ZFS_PROP_VERSION), &zplver);
1920		(void) nvlist_lookup_uint64(createprops,
1921		    zfs_prop_to_name(ZFS_PROP_NORMALIZE), &norm);
1922		(void) nvlist_remove_all(createprops,
1923		    zfs_prop_to_name(ZFS_PROP_NORMALIZE));
1924		(void) nvlist_lookup_uint64(createprops,
1925		    zfs_prop_to_name(ZFS_PROP_UTF8ONLY), &u8);
1926		(void) nvlist_remove_all(createprops,
1927		    zfs_prop_to_name(ZFS_PROP_UTF8ONLY));
1928		(void) nvlist_lookup_uint64(createprops,
1929		    zfs_prop_to_name(ZFS_PROP_CASE), &sense);
1930		(void) nvlist_remove_all(createprops,
1931		    zfs_prop_to_name(ZFS_PROP_CASE));
1932	}
1933
1934	/*
1935	 * If the zpl version requested is whacky or the file system
1936	 * or pool is version is too "young" to support normalization
1937	 * and the creator tried to set a value for one of the props,
1938	 * error out.
1939	 */
1940	if ((zplver < ZPL_VERSION_INITIAL || zplver > ZPL_VERSION) ||
1941	    (zplver >= ZPL_VERSION_FUID && !fuids_ok) ||
1942	    (zplver < ZPL_VERSION_NORMALIZATION &&
1943	    (norm != ZFS_PROP_UNDEFINED || u8 != ZFS_PROP_UNDEFINED ||
1944	    sense != ZFS_PROP_UNDEFINED)))
1945		return (ENOTSUP);
1946
1947	/*
1948	 * Put the version in the zplprops
1949	 */
1950	VERIFY(nvlist_add_uint64(zplprops,
1951	    zfs_prop_to_name(ZFS_PROP_VERSION), zplver) == 0);
1952
1953	if (norm == ZFS_PROP_UNDEFINED)
1954		VERIFY(zfs_get_zplprop(os, ZFS_PROP_NORMALIZE, &norm) == 0);
1955	VERIFY(nvlist_add_uint64(zplprops,
1956	    zfs_prop_to_name(ZFS_PROP_NORMALIZE), norm) == 0);
1957
1958	/*
1959	 * If we're normalizing, names must always be valid UTF-8 strings.
1960	 */
1961	if (norm)
1962		u8 = 1;
1963	if (u8 == ZFS_PROP_UNDEFINED)
1964		VERIFY(zfs_get_zplprop(os, ZFS_PROP_UTF8ONLY, &u8) == 0);
1965	VERIFY(nvlist_add_uint64(zplprops,
1966	    zfs_prop_to_name(ZFS_PROP_UTF8ONLY), u8) == 0);
1967
1968	if (sense == ZFS_PROP_UNDEFINED)
1969		VERIFY(zfs_get_zplprop(os, ZFS_PROP_CASE, &sense) == 0);
1970	VERIFY(nvlist_add_uint64(zplprops,
1971	    zfs_prop_to_name(ZFS_PROP_CASE), sense) == 0);
1972
1973	if (is_ci)
1974		*is_ci = (sense == ZFS_CASE_INSENSITIVE);
1975
1976	return (0);
1977}
1978
1979static int
1980zfs_fill_zplprops(const char *dataset, nvlist_t *createprops,
1981    nvlist_t *zplprops, boolean_t *is_ci)
1982{
1983	boolean_t fuids_ok = B_TRUE;
1984	uint64_t zplver = ZPL_VERSION;
1985	objset_t *os = NULL;
1986	char parentname[MAXNAMELEN];
1987	char *cp;
1988	int error;
1989
1990	(void) strlcpy(parentname, dataset, sizeof (parentname));
1991	cp = strrchr(parentname, '/');
1992	ASSERT(cp != NULL);
1993	cp[0] = '\0';
1994
1995	if (zfs_earlier_version(dataset, SPA_VERSION_FUID)) {
1996		zplver = ZPL_VERSION_FUID - 1;
1997		fuids_ok = B_FALSE;
1998	}
1999
2000	/*
2001	 * Open parent object set so we can inherit zplprop values.
2002	 */
2003	if ((error = dmu_objset_open(parentname, DMU_OST_ANY,
2004	    DS_MODE_USER | DS_MODE_READONLY, &os)) != 0)
2005		return (error);
2006
2007	error = zfs_fill_zplprops_impl(os, zplver, fuids_ok, createprops,
2008	    zplprops, is_ci);
2009	dmu_objset_close(os);
2010	return (error);
2011}
2012
2013static int
2014zfs_fill_zplprops_root(uint64_t spa_vers, nvlist_t *createprops,
2015    nvlist_t *zplprops, boolean_t *is_ci)
2016{
2017	boolean_t fuids_ok = B_TRUE;
2018	uint64_t zplver = ZPL_VERSION;
2019	int error;
2020
2021	if (spa_vers < SPA_VERSION_FUID) {
2022		zplver = ZPL_VERSION_FUID - 1;
2023		fuids_ok = B_FALSE;
2024	}
2025
2026	error = zfs_fill_zplprops_impl(NULL, zplver, fuids_ok, createprops,
2027	    zplprops, is_ci);
2028	return (error);
2029}
2030
2031/*
2032 * inputs:
2033 * zc_objset_type	type of objset to create (fs vs zvol)
2034 * zc_name		name of new objset
2035 * zc_value		name of snapshot to clone from (may be empty)
2036 * zc_nvlist_src{_size}	nvlist of properties to apply
2037 *
2038 * outputs: none
2039 */
2040static int
2041zfs_ioc_create(zfs_cmd_t *zc)
2042{
2043	objset_t *clone;
2044	int error = 0;
2045	zfs_creat_t zct;
2046	nvlist_t *nvprops = NULL;
2047	void (*cbfunc)(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx);
2048	dmu_objset_type_t type = zc->zc_objset_type;
2049
2050	switch (type) {
2051
2052	case DMU_OST_ZFS:
2053		cbfunc = zfs_create_cb;
2054		break;
2055
2056	case DMU_OST_ZVOL:
2057		cbfunc = zvol_create_cb;
2058		break;
2059
2060	default:
2061		cbfunc = NULL;
2062		break;
2063	}
2064	if (strchr(zc->zc_name, '@') ||
2065	    strchr(zc->zc_name, '%'))
2066		return (EINVAL);
2067
2068	if (zc->zc_nvlist_src != 0 &&
2069	    (error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
2070	    &nvprops)) != 0)
2071		return (error);
2072
2073	zct.zct_zplprops = NULL;
2074	zct.zct_props = nvprops;
2075
2076	if (zc->zc_value[0] != '\0') {
2077		/*
2078		 * We're creating a clone of an existing snapshot.
2079		 */
2080		zc->zc_value[sizeof (zc->zc_value) - 1] = '\0';
2081		if (dataset_namecheck(zc->zc_value, NULL, NULL) != 0) {
2082			nvlist_free(nvprops);
2083			return (EINVAL);
2084		}
2085
2086		error = dmu_objset_open(zc->zc_value, type,
2087		    DS_MODE_USER | DS_MODE_READONLY, &clone);
2088		if (error) {
2089			nvlist_free(nvprops);
2090			return (error);
2091		}
2092
2093		error = dmu_objset_create(zc->zc_name, type, clone, 0,
2094		    NULL, NULL);
2095		if (error) {
2096			dmu_objset_close(clone);
2097			nvlist_free(nvprops);
2098			return (error);
2099		}
2100		dmu_objset_close(clone);
2101	} else {
2102		boolean_t is_insensitive = B_FALSE;
2103
2104		if (cbfunc == NULL) {
2105			nvlist_free(nvprops);
2106			return (EINVAL);
2107		}
2108
2109		if (type == DMU_OST_ZVOL) {
2110			uint64_t volsize, volblocksize;
2111
2112			if (nvprops == NULL ||
2113			    nvlist_lookup_uint64(nvprops,
2114			    zfs_prop_to_name(ZFS_PROP_VOLSIZE),
2115			    &volsize) != 0) {
2116				nvlist_free(nvprops);
2117				return (EINVAL);
2118			}
2119
2120			if ((error = nvlist_lookup_uint64(nvprops,
2121			    zfs_prop_to_name(ZFS_PROP_VOLBLOCKSIZE),
2122			    &volblocksize)) != 0 && error != ENOENT) {
2123				nvlist_free(nvprops);
2124				return (EINVAL);
2125			}
2126
2127			if (error != 0)
2128				volblocksize = zfs_prop_default_numeric(
2129				    ZFS_PROP_VOLBLOCKSIZE);
2130
2131			if ((error = zvol_check_volblocksize(
2132			    volblocksize)) != 0 ||
2133			    (error = zvol_check_volsize(volsize,
2134			    volblocksize)) != 0) {
2135				nvlist_free(nvprops);
2136				return (error);
2137			}
2138		} else if (type == DMU_OST_ZFS) {
2139			int error;
2140
2141			/*
2142			 * We have to have normalization and
2143			 * case-folding flags correct when we do the
2144			 * file system creation, so go figure them out
2145			 * now.
2146			 */
2147			VERIFY(nvlist_alloc(&zct.zct_zplprops,
2148			    NV_UNIQUE_NAME, KM_SLEEP) == 0);
2149			error = zfs_fill_zplprops(zc->zc_name, nvprops,
2150			    zct.zct_zplprops, &is_insensitive);
2151			if (error != 0) {
2152				nvlist_free(nvprops);
2153				nvlist_free(zct.zct_zplprops);
2154				return (error);
2155			}
2156		}
2157		error = dmu_objset_create(zc->zc_name, type, NULL,
2158		    is_insensitive ? DS_FLAG_CI_DATASET : 0, cbfunc, &zct);
2159		nvlist_free(zct.zct_zplprops);
2160	}
2161
2162	/*
2163	 * It would be nice to do this atomically.
2164	 */
2165	if (error == 0) {
2166		if ((error = zfs_set_prop_nvlist(zc->zc_name, nvprops)) != 0)
2167			(void) dmu_objset_destroy(zc->zc_name);
2168	}
2169	nvlist_free(nvprops);
2170	return (error);
2171}
2172
2173struct snap_prop_arg {
2174	nvlist_t *nvprops;
2175	const char *snapname;
2176};
2177
2178static int
2179set_snap_props(char *name, void *arg)
2180{
2181	struct snap_prop_arg *snpa = arg;
2182	int len = strlen(name) + strlen(snpa->snapname) + 2;
2183	char *buf = kmem_alloc(len, KM_SLEEP);
2184	int err;
2185
2186	(void) snprintf(buf, len, "%s@%s", name, snpa->snapname);
2187	err = zfs_set_prop_nvlist(buf, snpa->nvprops);
2188	if (err)
2189		(void) dmu_objset_destroy(buf);
2190	kmem_free(buf, len);
2191	return (err);
2192}
2193
2194/*
2195 * inputs:
2196 * zc_name	name of filesystem
2197 * zc_value	short name of snapshot
2198 * zc_cookie	recursive flag
2199 *
2200 * outputs:	none
2201 */
2202static int
2203zfs_ioc_snapshot(zfs_cmd_t *zc)
2204{
2205	nvlist_t *nvprops = NULL;
2206	int error;
2207	boolean_t recursive = zc->zc_cookie;
2208
2209	if (snapshot_namecheck(zc->zc_value, NULL, NULL) != 0)
2210		return (EINVAL);
2211
2212	if (zc->zc_nvlist_src != 0 &&
2213	    (error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
2214	    &nvprops)) != 0)
2215		return (error);
2216
2217	error = dmu_objset_snapshot(zc->zc_name, zc->zc_value, recursive);
2218
2219	/*
2220	 * It would be nice to do this atomically.
2221	 */
2222	if (error == 0) {
2223		struct snap_prop_arg snpa;
2224		snpa.nvprops = nvprops;
2225		snpa.snapname = zc->zc_value;
2226		if (recursive) {
2227			error = dmu_objset_find(zc->zc_name,
2228			    set_snap_props, &snpa, DS_FIND_CHILDREN);
2229			if (error) {
2230				(void) dmu_snapshots_destroy(zc->zc_name,
2231				    zc->zc_value);
2232			}
2233		} else {
2234			error = set_snap_props(zc->zc_name, &snpa);
2235		}
2236	}
2237	nvlist_free(nvprops);
2238	return (error);
2239}
2240
2241int
2242zfs_unmount_snap(char *name, void *arg)
2243{
2244	vfs_t *vfsp = NULL;
2245
2246	if (arg) {
2247		char *snapname = arg;
2248		int len = strlen(name) + strlen(snapname) + 2;
2249		char *buf = kmem_alloc(len, KM_SLEEP);
2250
2251		(void) strcpy(buf, name);
2252		(void) strcat(buf, "@");
2253		(void) strcat(buf, snapname);
2254		vfsp = zfs_get_vfs(buf);
2255		kmem_free(buf, len);
2256	} else if (strchr(name, '@')) {
2257		vfsp = zfs_get_vfs(name);
2258	}
2259
2260	if (vfsp) {
2261		/*
2262		 * Always force the unmount for snapshots.
2263		 */
2264		int flag = MS_FORCE;
2265		int err;
2266
2267		if ((err = vn_vfswlock(vfsp->vfs_vnodecovered)) != 0) {
2268			VFS_RELE(vfsp);
2269			return (err);
2270		}
2271		VFS_RELE(vfsp);
2272		mtx_lock(&Giant);	/* dounmount() */
2273		dounmount(vfsp, flag, curthread);
2274		mtx_unlock(&Giant);	/* dounmount() */
2275	}
2276	return (0);
2277}
2278
2279/*
2280 * inputs:
2281 * zc_name	name of filesystem
2282 * zc_value	short name of snapshot
2283 *
2284 * outputs:	none
2285 */
2286static int
2287zfs_ioc_destroy_snaps(zfs_cmd_t *zc)
2288{
2289	int err;
2290
2291	if (snapshot_namecheck(zc->zc_value, NULL, NULL) != 0)
2292		return (EINVAL);
2293	err = dmu_objset_find(zc->zc_name,
2294	    zfs_unmount_snap, zc->zc_value, DS_FIND_CHILDREN);
2295	if (err)
2296		return (err);
2297	return (dmu_snapshots_destroy(zc->zc_name, zc->zc_value));
2298}
2299
2300/*
2301 * inputs:
2302 * zc_name		name of dataset to destroy
2303 * zc_objset_type	type of objset
2304 *
2305 * outputs:		none
2306 */
2307static int
2308zfs_ioc_destroy(zfs_cmd_t *zc)
2309{
2310	if (strchr(zc->zc_name, '@') && zc->zc_objset_type == DMU_OST_ZFS) {
2311		int err = zfs_unmount_snap(zc->zc_name, NULL);
2312		if (err)
2313			return (err);
2314	}
2315
2316	return (dmu_objset_destroy(zc->zc_name));
2317}
2318
2319/*
2320 * inputs:
2321 * zc_name	name of dataset to rollback (to most recent snapshot)
2322 *
2323 * outputs:	none
2324 */
2325static int
2326zfs_ioc_rollback(zfs_cmd_t *zc)
2327{
2328	objset_t *os;
2329	int error;
2330	zfsvfs_t *zfsvfs = NULL;
2331
2332	/*
2333	 * Get the zfsvfs for the receiving objset. There
2334	 * won't be one if we're operating on a zvol, if the
2335	 * objset doesn't exist yet, or is not mounted.
2336	 */
2337	error = dmu_objset_open(zc->zc_name, DMU_OST_ANY, DS_MODE_USER, &os);
2338	if (error)
2339		return (error);
2340
2341	if (dmu_objset_type(os) == DMU_OST_ZFS) {
2342		mutex_enter(&os->os->os_user_ptr_lock);
2343		zfsvfs = dmu_objset_get_user(os);
2344		if (zfsvfs != NULL)
2345			VFS_HOLD(zfsvfs->z_vfs);
2346		mutex_exit(&os->os->os_user_ptr_lock);
2347	}
2348
2349	if (zfsvfs != NULL) {
2350		char osname[MAXNAMELEN];
2351		int mode;
2352
2353		error = zfs_suspend_fs(zfsvfs, osname, &mode);
2354		if (error == 0) {
2355			int resume_err;
2356
2357			ASSERT(strcmp(osname, zc->zc_name) == 0);
2358			error = dmu_objset_rollback(os);
2359			resume_err = zfs_resume_fs(zfsvfs, osname, mode);
2360			error = error ? error : resume_err;
2361		} else {
2362			dmu_objset_close(os);
2363		}
2364		VFS_RELE(zfsvfs->z_vfs);
2365	} else {
2366		error = dmu_objset_rollback(os);
2367	}
2368	/* Note, the dmu_objset_rollback() releases the objset for us. */
2369
2370	return (error);
2371}
2372
2373/*
2374 * inputs:
2375 * zc_name	old name of dataset
2376 * zc_value	new name of dataset
2377 * zc_cookie	recursive flag (only valid for snapshots)
2378 *
2379 * outputs:	none
2380 */
2381static int
2382zfs_ioc_rename(zfs_cmd_t *zc)
2383{
2384	boolean_t recursive = zc->zc_cookie & 1;
2385
2386	zc->zc_value[sizeof (zc->zc_value) - 1] = '\0';
2387	if (dataset_namecheck(zc->zc_value, NULL, NULL) != 0 ||
2388	    strchr(zc->zc_value, '%'))
2389		return (EINVAL);
2390
2391	/*
2392	 * Unmount snapshot unless we're doing a recursive rename,
2393	 * in which case the dataset code figures out which snapshots
2394	 * to unmount.
2395	 */
2396	if (!recursive && strchr(zc->zc_name, '@') != NULL &&
2397	    zc->zc_objset_type == DMU_OST_ZFS) {
2398		int err = zfs_unmount_snap(zc->zc_name, NULL);
2399		if (err)
2400			return (err);
2401	}
2402	return (dmu_objset_rename(zc->zc_name, zc->zc_value, recursive));
2403}
2404
2405static void
2406clear_props(char *dataset, nvlist_t *props)
2407{
2408	zfs_cmd_t *zc;
2409	nvpair_t *prop;
2410
2411	if (props == NULL)
2412		return;
2413	zc = kmem_alloc(sizeof (zfs_cmd_t), KM_SLEEP);
2414	(void) strcpy(zc->zc_name, dataset);
2415	for (prop = nvlist_next_nvpair(props, NULL); prop;
2416	    prop = nvlist_next_nvpair(props, prop)) {
2417		(void) strcpy(zc->zc_value, nvpair_name(prop));
2418		if (zfs_secpolicy_inherit(zc, CRED()) == 0)
2419			(void) zfs_ioc_inherit_prop(zc);
2420	}
2421	kmem_free(zc, sizeof (zfs_cmd_t));
2422}
2423
2424/*
2425 * inputs:
2426 * zc_name		name of containing filesystem
2427 * zc_nvlist_src{_size}	nvlist of properties to apply
2428 * zc_value		name of snapshot to create
2429 * zc_string		name of clone origin (if DRR_FLAG_CLONE)
2430 * zc_cookie		file descriptor to recv from
2431 * zc_begin_record	the BEGIN record of the stream (not byteswapped)
2432 * zc_guid		force flag
2433 *
2434 * outputs:
2435 * zc_cookie		number of bytes read
2436 */
2437static int
2438zfs_ioc_recv(zfs_cmd_t *zc)
2439{
2440	file_t *fp;
2441	objset_t *os;
2442	dmu_recv_cookie_t drc;
2443	zfsvfs_t *zfsvfs = NULL;
2444	boolean_t force = (boolean_t)zc->zc_guid;
2445	int error, fd;
2446	offset_t off;
2447	nvlist_t *props = NULL;
2448	nvlist_t *origprops = NULL;
2449	objset_t *origin = NULL;
2450	char *tosnap;
2451	char tofs[ZFS_MAXNAMELEN];
2452
2453	if (dataset_namecheck(zc->zc_value, NULL, NULL) != 0 ||
2454	    strchr(zc->zc_value, '@') == NULL ||
2455	    strchr(zc->zc_value, '%'))
2456		return (EINVAL);
2457
2458	(void) strcpy(tofs, zc->zc_value);
2459	tosnap = strchr(tofs, '@');
2460	*tosnap = '\0';
2461	tosnap++;
2462
2463	if (zc->zc_nvlist_src != 0 &&
2464	    (error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
2465	    &props)) != 0)
2466		return (error);
2467
2468	fd = zc->zc_cookie;
2469	fp = getf(fd, 0);
2470	if (fp == NULL) {
2471		nvlist_free(props);
2472		return (EBADF);
2473	}
2474
2475	if (dmu_objset_open(tofs, DMU_OST_ANY,
2476	    DS_MODE_USER | DS_MODE_READONLY, &os) == 0) {
2477		/*
2478		 * Try to get the zfsvfs for the receiving objset.
2479		 * There won't be one if we're operating on a zvol,
2480		 * if the objset doesn't exist yet, or is not mounted.
2481		 */
2482		mutex_enter(&os->os->os_user_ptr_lock);
2483		if (zfsvfs = dmu_objset_get_user(os)) {
2484			if (!mutex_tryenter(&zfsvfs->z_online_recv_lock)) {
2485				mutex_exit(&os->os->os_user_ptr_lock);
2486				dmu_objset_close(os);
2487				zfsvfs = NULL;
2488				error = EBUSY;
2489				goto out;
2490			}
2491			VFS_HOLD(zfsvfs->z_vfs);
2492		}
2493		mutex_exit(&os->os->os_user_ptr_lock);
2494
2495		/*
2496		 * If new properties are supplied, they are to completely
2497		 * replace the existing ones, so stash away the existing ones.
2498		 */
2499		if (props)
2500			(void) dsl_prop_get_all(os, &origprops, TRUE);
2501
2502		dmu_objset_close(os);
2503	}
2504
2505	if (zc->zc_string[0]) {
2506		error = dmu_objset_open(zc->zc_string, DMU_OST_ANY,
2507		    DS_MODE_USER | DS_MODE_READONLY, &origin);
2508		if (error)
2509			goto out;
2510	}
2511
2512	error = dmu_recv_begin(tofs, tosnap, &zc->zc_begin_record,
2513	    force, origin, zfsvfs != NULL, &drc);
2514	if (origin)
2515		dmu_objset_close(origin);
2516	if (error)
2517		goto out;
2518
2519	/*
2520	 * Reset properties.  We do this before we receive the stream
2521	 * so that the properties are applied to the new data.
2522	 */
2523	if (props) {
2524		clear_props(tofs, origprops);
2525		/*
2526		 * XXX - Note, this is all-or-nothing; should be best-effort.
2527		 */
2528		(void) zfs_set_prop_nvlist(tofs, props);
2529	}
2530
2531	off = fp->f_offset;
2532	error = dmu_recv_stream(&drc, fp, &off);
2533
2534	if (error == 0 && zfsvfs) {
2535		char osname[MAXNAMELEN];
2536		int mode;
2537
2538		/* online recv */
2539		error = zfs_suspend_fs(zfsvfs, osname, &mode);
2540		if (error == 0) {
2541			int resume_err;
2542
2543			error = dmu_recv_end(&drc);
2544			resume_err = zfs_resume_fs(zfsvfs, osname, mode);
2545			error = error ? error : resume_err;
2546		} else {
2547			dmu_recv_abort_cleanup(&drc);
2548		}
2549	} else if (error == 0) {
2550		error = dmu_recv_end(&drc);
2551	}
2552
2553	zc->zc_cookie = off - fp->f_offset;
2554	if (off >= 0 && off <= MAXOFFSET_T)
2555		fp->f_offset = off;
2556
2557	/*
2558	 * On error, restore the original props.
2559	 */
2560	if (error && props) {
2561		clear_props(tofs, props);
2562		(void) zfs_set_prop_nvlist(tofs, origprops);
2563	}
2564out:
2565	if (zfsvfs) {
2566		mutex_exit(&zfsvfs->z_online_recv_lock);
2567		VFS_RELE(zfsvfs->z_vfs);
2568	}
2569	nvlist_free(props);
2570	nvlist_free(origprops);
2571	releasef(fp);
2572	return (error);
2573}
2574
2575/*
2576 * inputs:
2577 * zc_name	name of snapshot to send
2578 * zc_value	short name of incremental fromsnap (may be empty)
2579 * zc_cookie	file descriptor to send stream to
2580 * zc_obj	fromorigin flag (mutually exclusive with zc_value)
2581 *
2582 * outputs: none
2583 */
2584static int
2585zfs_ioc_send(zfs_cmd_t *zc)
2586{
2587	objset_t *fromsnap = NULL;
2588	objset_t *tosnap;
2589	file_t *fp;
2590	int error;
2591	offset_t off;
2592
2593	error = dmu_objset_open(zc->zc_name, DMU_OST_ANY,
2594	    DS_MODE_USER | DS_MODE_READONLY, &tosnap);
2595	if (error)
2596		return (error);
2597
2598	if (zc->zc_value[0] != '\0') {
2599		char buf[MAXPATHLEN];
2600		char *cp;
2601
2602		(void) strncpy(buf, zc->zc_name, sizeof (buf));
2603		cp = strchr(buf, '@');
2604		if (cp)
2605			*(cp+1) = 0;
2606		(void) strlcat(buf, zc->zc_value, sizeof (buf));
2607		error = dmu_objset_open(buf, DMU_OST_ANY,
2608		    DS_MODE_USER | DS_MODE_READONLY, &fromsnap);
2609		if (error) {
2610			dmu_objset_close(tosnap);
2611			return (error);
2612		}
2613	}
2614
2615	fp = getf(zc->zc_cookie, 1);
2616	if (fp == NULL) {
2617		dmu_objset_close(tosnap);
2618		if (fromsnap)
2619			dmu_objset_close(fromsnap);
2620		return (EBADF);
2621	}
2622
2623	off = fp->f_offset;
2624	error = dmu_sendbackup(tosnap, fromsnap, zc->zc_obj, fp, &off);
2625
2626	if (off >= 0 && off <= MAXOFFSET_T)
2627		fp->f_offset = off;
2628	releasef(fp);
2629	if (fromsnap)
2630		dmu_objset_close(fromsnap);
2631	dmu_objset_close(tosnap);
2632	return (error);
2633}
2634
2635static int
2636zfs_ioc_inject_fault(zfs_cmd_t *zc)
2637{
2638	int id, error;
2639
2640	error = zio_inject_fault(zc->zc_name, (int)zc->zc_guid, &id,
2641	    &zc->zc_inject_record);
2642
2643	if (error == 0)
2644		zc->zc_guid = (uint64_t)id;
2645
2646	return (error);
2647}
2648
2649static int
2650zfs_ioc_clear_fault(zfs_cmd_t *zc)
2651{
2652	return (zio_clear_fault((int)zc->zc_guid));
2653}
2654
2655static int
2656zfs_ioc_inject_list_next(zfs_cmd_t *zc)
2657{
2658	int id = (int)zc->zc_guid;
2659	int error;
2660
2661	error = zio_inject_list_next(&id, zc->zc_name, sizeof (zc->zc_name),
2662	    &zc->zc_inject_record);
2663
2664	zc->zc_guid = id;
2665
2666	return (error);
2667}
2668
2669static int
2670zfs_ioc_error_log(zfs_cmd_t *zc)
2671{
2672	spa_t *spa;
2673	int error;
2674	size_t count = (size_t)zc->zc_nvlist_dst_size;
2675
2676	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
2677		return (error);
2678
2679	error = spa_get_errlog(spa, (void *)(uintptr_t)zc->zc_nvlist_dst,
2680	    &count);
2681	if (error == 0)
2682		zc->zc_nvlist_dst_size = count;
2683	else
2684		zc->zc_nvlist_dst_size = spa_get_errlog_size(spa);
2685
2686	spa_close(spa, FTAG);
2687
2688	return (error);
2689}
2690
2691static int
2692zfs_ioc_clear(zfs_cmd_t *zc)
2693{
2694	spa_t *spa;
2695	vdev_t *vd;
2696	int error;
2697
2698	/*
2699	 * On zpool clear we also fix up missing slogs
2700	 */
2701	mutex_enter(&spa_namespace_lock);
2702	spa = spa_lookup(zc->zc_name);
2703	if (spa == NULL) {
2704		mutex_exit(&spa_namespace_lock);
2705		return (EIO);
2706	}
2707	if (spa->spa_log_state == SPA_LOG_MISSING) {
2708		/* we need to let spa_open/spa_load clear the chains */
2709		spa->spa_log_state = SPA_LOG_CLEAR;
2710	}
2711	mutex_exit(&spa_namespace_lock);
2712
2713	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
2714		return (error);
2715
2716	spa_vdev_state_enter(spa);
2717
2718	if (zc->zc_guid == 0) {
2719		vd = NULL;
2720	} else {
2721		vd = spa_lookup_by_guid(spa, zc->zc_guid, B_TRUE);
2722		if (vd == NULL) {
2723			(void) spa_vdev_state_exit(spa, NULL, ENODEV);
2724			spa_close(spa, FTAG);
2725			return (ENODEV);
2726		}
2727	}
2728
2729	vdev_clear(spa, vd);
2730
2731	(void) spa_vdev_state_exit(spa, NULL, 0);
2732
2733	/*
2734	 * Resume any suspended I/Os.
2735	 */
2736	zio_resume(spa);
2737
2738	spa_close(spa, FTAG);
2739
2740	return (0);
2741}
2742
2743/*
2744 * inputs:
2745 * zc_name	name of filesystem
2746 * zc_value	name of origin snapshot
2747 *
2748 * outputs:	none
2749 */
2750static int
2751zfs_ioc_promote(zfs_cmd_t *zc)
2752{
2753	char *cp;
2754
2755	/*
2756	 * We don't need to unmount *all* the origin fs's snapshots, but
2757	 * it's easier.
2758	 */
2759	cp = strchr(zc->zc_value, '@');
2760	if (cp)
2761		*cp = '\0';
2762	(void) dmu_objset_find(zc->zc_value,
2763	    zfs_unmount_snap, NULL, DS_FIND_SNAPSHOTS);
2764	return (dsl_dataset_promote(zc->zc_name));
2765}
2766
2767#ifdef TODO
2768/*
2769 * We don't want to have a hard dependency
2770 * against some special symbols in sharefs
2771 * nfs, and smbsrv.  Determine them if needed when
2772 * the first file system is shared.
2773 * Neither sharefs, nfs or smbsrv are unloadable modules.
2774 */
2775int (*znfsexport_fs)(void *arg);
2776int (*zshare_fs)(enum sharefs_sys_op, share_t *, uint32_t);
2777int (*zsmbexport_fs)(void *arg, boolean_t add_share);
2778
2779int zfs_nfsshare_inited;
2780int zfs_smbshare_inited;
2781
2782ddi_modhandle_t nfs_mod;
2783ddi_modhandle_t sharefs_mod;
2784ddi_modhandle_t smbsrv_mod;
2785#endif
2786kmutex_t zfs_share_lock;
2787
2788#ifdef TODO
2789static int
2790zfs_init_sharefs()
2791{
2792	int error;
2793
2794	ASSERT(MUTEX_HELD(&zfs_share_lock));
2795	/* Both NFS and SMB shares also require sharetab support. */
2796	if (sharefs_mod == NULL && ((sharefs_mod =
2797	    ddi_modopen("fs/sharefs",
2798	    KRTLD_MODE_FIRST, &error)) == NULL)) {
2799		return (ENOSYS);
2800	}
2801	if (zshare_fs == NULL && ((zshare_fs =
2802	    (int (*)(enum sharefs_sys_op, share_t *, uint32_t))
2803	    ddi_modsym(sharefs_mod, "sharefs_impl", &error)) == NULL)) {
2804		return (ENOSYS);
2805	}
2806	return (0);
2807}
2808#endif
2809
2810static int
2811zfs_ioc_share(zfs_cmd_t *zc)
2812{
2813#ifdef TODO
2814	int error;
2815	int opcode;
2816
2817	switch (zc->zc_share.z_sharetype) {
2818	case ZFS_SHARE_NFS:
2819	case ZFS_UNSHARE_NFS:
2820		if (zfs_nfsshare_inited == 0) {
2821			mutex_enter(&zfs_share_lock);
2822			if (nfs_mod == NULL && ((nfs_mod = ddi_modopen("fs/nfs",
2823			    KRTLD_MODE_FIRST, &error)) == NULL)) {
2824				mutex_exit(&zfs_share_lock);
2825				return (ENOSYS);
2826			}
2827			if (znfsexport_fs == NULL &&
2828			    ((znfsexport_fs = (int (*)(void *))
2829			    ddi_modsym(nfs_mod,
2830			    "nfs_export", &error)) == NULL)) {
2831				mutex_exit(&zfs_share_lock);
2832				return (ENOSYS);
2833			}
2834			error = zfs_init_sharefs();
2835			if (error) {
2836				mutex_exit(&zfs_share_lock);
2837				return (ENOSYS);
2838			}
2839			zfs_nfsshare_inited = 1;
2840			mutex_exit(&zfs_share_lock);
2841		}
2842		break;
2843	case ZFS_SHARE_SMB:
2844	case ZFS_UNSHARE_SMB:
2845		if (zfs_smbshare_inited == 0) {
2846			mutex_enter(&zfs_share_lock);
2847			if (smbsrv_mod == NULL && ((smbsrv_mod =
2848			    ddi_modopen("drv/smbsrv",
2849			    KRTLD_MODE_FIRST, &error)) == NULL)) {
2850				mutex_exit(&zfs_share_lock);
2851				return (ENOSYS);
2852			}
2853			if (zsmbexport_fs == NULL && ((zsmbexport_fs =
2854			    (int (*)(void *, boolean_t))ddi_modsym(smbsrv_mod,
2855			    "smb_server_share", &error)) == NULL)) {
2856				mutex_exit(&zfs_share_lock);
2857				return (ENOSYS);
2858			}
2859			error = zfs_init_sharefs();
2860			if (error) {
2861				mutex_exit(&zfs_share_lock);
2862				return (ENOSYS);
2863			}
2864			zfs_smbshare_inited = 1;
2865			mutex_exit(&zfs_share_lock);
2866		}
2867		break;
2868	default:
2869		return (EINVAL);
2870	}
2871
2872	switch (zc->zc_share.z_sharetype) {
2873	case ZFS_SHARE_NFS:
2874	case ZFS_UNSHARE_NFS:
2875		if (error =
2876		    znfsexport_fs((void *)
2877		    (uintptr_t)zc->zc_share.z_exportdata))
2878			return (error);
2879		break;
2880	case ZFS_SHARE_SMB:
2881	case ZFS_UNSHARE_SMB:
2882		if (error = zsmbexport_fs((void *)
2883		    (uintptr_t)zc->zc_share.z_exportdata,
2884		    zc->zc_share.z_sharetype == ZFS_SHARE_SMB ?
2885		    B_TRUE : B_FALSE)) {
2886			return (error);
2887		}
2888		break;
2889	}
2890
2891	opcode = (zc->zc_share.z_sharetype == ZFS_SHARE_NFS ||
2892	    zc->zc_share.z_sharetype == ZFS_SHARE_SMB) ?
2893	    SHAREFS_ADD : SHAREFS_REMOVE;
2894
2895	/*
2896	 * Add or remove share from sharetab
2897	 */
2898	error = zshare_fs(opcode,
2899	    (void *)(uintptr_t)zc->zc_share.z_sharedata,
2900	    zc->zc_share.z_sharemax);
2901
2902	return (error);
2903#else
2904	return (ENOSYS);
2905#endif
2906}
2907
2908/*
2909 * pool create, destroy, and export don't log the history as part of
2910 * zfsdev_ioctl, but rather zfs_ioc_pool_create, and zfs_ioc_pool_export
2911 * do the logging of those commands.
2912 */
2913static int
2914zfs_ioc_jail(zfs_cmd_t *zc)
2915{
2916
2917	return (zone_dataset_attach(curthread->td_ucred, zc->zc_name,
2918	    (int)zc->zc_jailid));
2919}
2920
2921static int
2922zfs_ioc_unjail(zfs_cmd_t *zc)
2923{
2924
2925	return (zone_dataset_detach(curthread->td_ucred, zc->zc_name,
2926	    (int)zc->zc_jailid));
2927}
2928
2929static zfs_ioc_vec_t zfs_ioc_vec[] = {
2930	{ zfs_ioc_pool_create, zfs_secpolicy_config, POOL_NAME, B_FALSE },
2931	{ zfs_ioc_pool_destroy,	zfs_secpolicy_config, POOL_NAME, B_FALSE },
2932	{ zfs_ioc_pool_import, zfs_secpolicy_config, POOL_NAME, B_TRUE },
2933	{ zfs_ioc_pool_export, zfs_secpolicy_config, POOL_NAME, B_FALSE },
2934	{ zfs_ioc_pool_configs,	zfs_secpolicy_none, NO_NAME, B_FALSE },
2935	{ zfs_ioc_pool_stats, zfs_secpolicy_read, POOL_NAME, B_FALSE },
2936	{ zfs_ioc_pool_tryimport, zfs_secpolicy_config, NO_NAME, B_FALSE },
2937	{ zfs_ioc_pool_scrub, zfs_secpolicy_config, POOL_NAME, B_TRUE },
2938	{ zfs_ioc_pool_freeze, zfs_secpolicy_config, NO_NAME, B_FALSE },
2939	{ zfs_ioc_pool_upgrade,	zfs_secpolicy_config, POOL_NAME, B_TRUE },
2940	{ zfs_ioc_pool_get_history, zfs_secpolicy_config, POOL_NAME, B_FALSE },
2941	{ zfs_ioc_vdev_add, zfs_secpolicy_config, POOL_NAME, B_TRUE },
2942	{ zfs_ioc_vdev_remove, zfs_secpolicy_config, POOL_NAME, B_TRUE },
2943	{ zfs_ioc_vdev_set_state, zfs_secpolicy_config,	POOL_NAME, B_TRUE },
2944	{ zfs_ioc_vdev_attach, zfs_secpolicy_config, POOL_NAME, B_TRUE },
2945	{ zfs_ioc_vdev_detach, zfs_secpolicy_config, POOL_NAME, B_TRUE },
2946	{ zfs_ioc_vdev_setpath,	zfs_secpolicy_config, POOL_NAME, B_FALSE },
2947	{ zfs_ioc_objset_stats,	zfs_secpolicy_read, DATASET_NAME, B_FALSE },
2948	{ zfs_ioc_objset_zplprops, zfs_secpolicy_read, DATASET_NAME, B_FALSE },
2949	{ zfs_ioc_dataset_list_next, zfs_secpolicy_read,
2950	    DATASET_NAME, B_FALSE },
2951	{ zfs_ioc_snapshot_list_next, zfs_secpolicy_read,
2952	    DATASET_NAME, B_FALSE },
2953	{ zfs_ioc_set_prop, zfs_secpolicy_none, DATASET_NAME, B_TRUE },
2954	{ zfs_ioc_create_minor,	zfs_secpolicy_minor, DATASET_NAME, B_FALSE },
2955	{ zfs_ioc_remove_minor,	zfs_secpolicy_minor, DATASET_NAME, B_FALSE },
2956	{ zfs_ioc_create, zfs_secpolicy_create, DATASET_NAME, B_TRUE },
2957	{ zfs_ioc_destroy, zfs_secpolicy_destroy, DATASET_NAME, B_TRUE },
2958	{ zfs_ioc_rollback, zfs_secpolicy_rollback, DATASET_NAME, B_TRUE },
2959	{ zfs_ioc_rename, zfs_secpolicy_rename,	DATASET_NAME, B_TRUE },
2960	{ zfs_ioc_recv, zfs_secpolicy_receive, DATASET_NAME, B_TRUE },
2961	{ zfs_ioc_send, zfs_secpolicy_send, DATASET_NAME, B_TRUE },
2962	{ zfs_ioc_inject_fault,	zfs_secpolicy_inject, NO_NAME, B_FALSE },
2963	{ zfs_ioc_clear_fault, zfs_secpolicy_inject, NO_NAME, B_FALSE },
2964	{ zfs_ioc_inject_list_next, zfs_secpolicy_inject, NO_NAME, B_FALSE },
2965	{ zfs_ioc_error_log, zfs_secpolicy_inject, POOL_NAME, B_FALSE },
2966	{ zfs_ioc_clear, zfs_secpolicy_config, POOL_NAME, B_TRUE },
2967	{ zfs_ioc_promote, zfs_secpolicy_promote, DATASET_NAME, B_TRUE },
2968	{ zfs_ioc_destroy_snaps, zfs_secpolicy_destroy,	DATASET_NAME, B_TRUE },
2969	{ zfs_ioc_snapshot, zfs_secpolicy_snapshot, DATASET_NAME, B_TRUE },
2970	{ zfs_ioc_dsobj_to_dsname, zfs_secpolicy_config, POOL_NAME, B_FALSE },
2971	{ zfs_ioc_obj_to_path, zfs_secpolicy_config, NO_NAME, B_FALSE },
2972	{ zfs_ioc_pool_set_props, zfs_secpolicy_config,	POOL_NAME, B_TRUE },
2973	{ zfs_ioc_pool_get_props, zfs_secpolicy_read, POOL_NAME, B_FALSE },
2974	{ zfs_ioc_set_fsacl, zfs_secpolicy_fsacl, DATASET_NAME, B_TRUE },
2975	{ zfs_ioc_get_fsacl, zfs_secpolicy_read, DATASET_NAME, B_FALSE },
2976	{ zfs_ioc_iscsi_perm_check, zfs_secpolicy_iscsi,
2977	    DATASET_NAME, B_FALSE },
2978	{ zfs_ioc_share, zfs_secpolicy_share, DATASET_NAME, B_FALSE },
2979	{ zfs_ioc_inherit_prop, zfs_secpolicy_inherit, DATASET_NAME, B_TRUE },
2980	{ zfs_ioc_jail, zfs_secpolicy_config, DATASET_NAME, B_TRUE },
2981	{ zfs_ioc_unjail, zfs_secpolicy_config, DATASET_NAME, B_TRUE }
2982};
2983
2984static int
2985zfsdev_ioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flag,
2986    struct thread *td)
2987{
2988	zfs_cmd_t *zc = (void *)addr;
2989	uint_t vec;
2990	int error;
2991
2992	vec = ZFS_IOC(cmd);
2993
2994	if (vec >= sizeof (zfs_ioc_vec) / sizeof (zfs_ioc_vec[0]))
2995		return (EINVAL);
2996
2997	error = zfs_ioc_vec[vec].zvec_secpolicy(zc, td->td_ucred);
2998
2999	/*
3000	 * Ensure that all pool/dataset names are valid before we pass down to
3001	 * the lower layers.
3002	 */
3003	if (error == 0) {
3004		zc->zc_name[sizeof (zc->zc_name) - 1] = '\0';
3005		switch (zfs_ioc_vec[vec].zvec_namecheck) {
3006		case POOL_NAME:
3007			if (pool_namecheck(zc->zc_name, NULL, NULL) != 0)
3008				error = EINVAL;
3009			break;
3010
3011		case DATASET_NAME:
3012			if (dataset_namecheck(zc->zc_name, NULL, NULL) != 0)
3013				error = EINVAL;
3014			break;
3015
3016		case NO_NAME:
3017			break;
3018		}
3019	}
3020
3021	if (error == 0)
3022		error = zfs_ioc_vec[vec].zvec_func(zc);
3023
3024	if (zfs_ioc_vec[vec].zvec_his_log == B_TRUE)
3025		zfs_log_history(zc);
3026
3027	return (error);
3028}
3029
3030/*
3031 * OK, so this is a little weird.
3032 *
3033 * /dev/zfs is the control node, i.e. minor 0.
3034 * /dev/zvol/[r]dsk/pool/dataset are the zvols, minor > 0.
3035 *
3036 * /dev/zfs has basically nothing to do except serve up ioctls,
3037 * so most of the standard driver entry points are in zvol.c.
3038 */
3039static struct cdevsw zfs_cdevsw = {
3040	.d_version =	D_VERSION,
3041	.d_ioctl =	zfsdev_ioctl,
3042	.d_name =	ZFS_DEV_NAME
3043};
3044
3045static void
3046zfsdev_init(void)
3047{
3048	zfsdev = make_dev(&zfs_cdevsw, 0x0, UID_ROOT, GID_OPERATOR, 0666,
3049	    ZFS_DEV_NAME);
3050}
3051
3052static void
3053zfsdev_fini(void)
3054{
3055	if (zfsdev != NULL)
3056		destroy_dev(zfsdev);
3057}
3058
3059static struct task zfs_start_task;
3060static struct root_hold_token *zfs_root_token;
3061
3062
3063uint_t zfs_fsyncer_key;
3064extern uint_t rrw_tsd_key;
3065
3066static void
3067zfs_start(void *context __unused, int pending __unused)
3068{
3069
3070	zfsdev_init();
3071	spa_init(FREAD | FWRITE);
3072	zfs_init();
3073	zvol_init();
3074
3075	tsd_create(&zfs_fsyncer_key, NULL);
3076	tsd_create(&rrw_tsd_key, NULL);
3077
3078	printf("ZFS storage pool version " SPA_VERSION_STRING "\n");
3079	root_mount_rel(zfs_root_token);
3080}
3081
3082static int
3083zfs_modevent(module_t mod, int type, void *unused __unused)
3084{
3085	int error;
3086
3087	error = EOPNOTSUPP;
3088	switch (type) {
3089	case MOD_LOAD:
3090		zfs_root_token = root_mount_hold("ZFS");
3091		printf("WARNING: ZFS is considered to be an experimental "
3092		    "feature in FreeBSD.\n");
3093		TASK_INIT(&zfs_start_task, 0, zfs_start, NULL);
3094		taskqueue_enqueue(taskqueue_thread, &zfs_start_task);
3095		mutex_init(&zfs_share_lock, NULL, MUTEX_DEFAULT, NULL);
3096		error = 0;
3097		break;
3098	case MOD_UNLOAD:
3099		if (spa_busy() || zfs_busy() || zvol_busy() ||
3100		    zio_injection_enabled) {
3101			error = EBUSY;
3102			break;
3103		}
3104		zvol_fini();
3105		zfs_fini();
3106		spa_fini();
3107		zfsdev_fini();
3108		tsd_destroy(&zfs_fsyncer_key);
3109		tsd_destroy(&rrw_tsd_key);
3110		mutex_destroy(&zfs_share_lock);
3111		error = 0;
3112		break;
3113	}
3114	return (error);
3115}
3116
3117static moduledata_t zfs_mod = {
3118	"zfsctrl",
3119	zfs_modevent,
3120	0
3121};
3122DECLARE_MODULE(zfsctrl, zfs_mod, SI_SUB_VFS, SI_ORDER_ANY);
3123MODULE_DEPEND(zfsctrl, opensolaris, 1, 1, 1);
3124