zfs_ioctl.c revision 7656:2621e50fdf4a
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21/*
22 * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
23 * Use is subject to license terms.
24 */
25
26
27#include <sys/types.h>
28#include <sys/param.h>
29#include <sys/errno.h>
30#include <sys/uio.h>
31#include <sys/buf.h>
32#include <sys/modctl.h>
33#include <sys/open.h>
34#include <sys/file.h>
35#include <sys/kmem.h>
36#include <sys/conf.h>
37#include <sys/cmn_err.h>
38#include <sys/stat.h>
39#include <sys/zfs_ioctl.h>
40#include <sys/zfs_znode.h>
41#include <sys/zap.h>
42#include <sys/spa.h>
43#include <sys/spa_impl.h>
44#include <sys/vdev.h>
45#include <sys/vdev_impl.h>
46#include <sys/dmu.h>
47#include <sys/dsl_dir.h>
48#include <sys/dsl_dataset.h>
49#include <sys/dsl_prop.h>
50#include <sys/dsl_deleg.h>
51#include <sys/dmu_objset.h>
52#include <sys/ddi.h>
53#include <sys/sunddi.h>
54#include <sys/sunldi.h>
55#include <sys/policy.h>
56#include <sys/zone.h>
57#include <sys/nvpair.h>
58#include <sys/pathname.h>
59#include <sys/mount.h>
60#include <sys/sdt.h>
61#include <sys/fs/zfs.h>
62#include <sys/zfs_ctldir.h>
63#include <sys/zfs_dir.h>
64#include <sys/zvol.h>
65#include <sharefs/share.h>
66#include <sys/dmu_objset.h>
67
68#include "zfs_namecheck.h"
69#include "zfs_prop.h"
70#include "zfs_deleg.h"
71
72extern struct modlfs zfs_modlfs;
73
74extern void zfs_init(void);
75extern void zfs_fini(void);
76
77ldi_ident_t zfs_li = NULL;
78dev_info_t *zfs_dip;
79
80typedef int zfs_ioc_func_t(zfs_cmd_t *);
81typedef int zfs_secpolicy_func_t(zfs_cmd_t *, cred_t *);
82
83typedef struct zfs_ioc_vec {
84	zfs_ioc_func_t		*zvec_func;
85	zfs_secpolicy_func_t	*zvec_secpolicy;
86	enum {
87		NO_NAME,
88		POOL_NAME,
89		DATASET_NAME
90	} zvec_namecheck;
91	boolean_t		zvec_his_log;
92} zfs_ioc_vec_t;
93
94static void clear_props(char *dataset, nvlist_t *props);
95static int zfs_fill_zplprops_root(uint64_t, nvlist_t *, nvlist_t *,
96    boolean_t *);
97int zfs_set_prop_nvlist(const char *, nvlist_t *);
98
99/* _NOTE(PRINTFLIKE(4)) - this is printf-like, but lint is too whiney */
100void
101__dprintf(const char *file, const char *func, int line, const char *fmt, ...)
102{
103	const char *newfile;
104	char buf[256];
105	va_list adx;
106
107	/*
108	 * Get rid of annoying "../common/" prefix to filename.
109	 */
110	newfile = strrchr(file, '/');
111	if (newfile != NULL) {
112		newfile = newfile + 1; /* Get rid of leading / */
113	} else {
114		newfile = file;
115	}
116
117	va_start(adx, fmt);
118	(void) vsnprintf(buf, sizeof (buf), fmt, adx);
119	va_end(adx);
120
121	/*
122	 * To get this data, use the zfs-dprintf probe as so:
123	 * dtrace -q -n 'zfs-dprintf \
124	 *	/stringof(arg0) == "dbuf.c"/ \
125	 *	{printf("%s: %s", stringof(arg1), stringof(arg3))}'
126	 * arg0 = file name
127	 * arg1 = function name
128	 * arg2 = line number
129	 * arg3 = message
130	 */
131	DTRACE_PROBE4(zfs__dprintf,
132	    char *, newfile, char *, func, int, line, char *, buf);
133}
134
135static void
136history_str_free(char *buf)
137{
138	kmem_free(buf, HIS_MAX_RECORD_LEN);
139}
140
141static char *
142history_str_get(zfs_cmd_t *zc)
143{
144	char *buf;
145
146	if (zc->zc_history == NULL)
147		return (NULL);
148
149	buf = kmem_alloc(HIS_MAX_RECORD_LEN, KM_SLEEP);
150	if (copyinstr((void *)(uintptr_t)zc->zc_history,
151	    buf, HIS_MAX_RECORD_LEN, NULL) != 0) {
152		history_str_free(buf);
153		return (NULL);
154	}
155
156	buf[HIS_MAX_RECORD_LEN -1] = '\0';
157
158	return (buf);
159}
160
161/*
162 * Check to see if the named dataset is currently defined as bootable
163 */
164static boolean_t
165zfs_is_bootfs(const char *name)
166{
167	spa_t *spa;
168	boolean_t ret = B_FALSE;
169
170	if (spa_open(name, &spa, FTAG) == 0) {
171		if (spa->spa_bootfs) {
172			objset_t *os;
173
174			if (dmu_objset_open(name, DMU_OST_ZFS,
175			    DS_MODE_USER | DS_MODE_READONLY, &os) == 0) {
176				ret = (dmu_objset_id(os) == spa->spa_bootfs);
177				dmu_objset_close(os);
178			}
179		}
180		spa_close(spa, FTAG);
181	}
182	return (ret);
183}
184
185/*
186 * zfs_earlier_version
187 *
188 *	Return non-zero if the spa version is less than requested version.
189 */
190static int
191zfs_earlier_version(const char *name, int version)
192{
193	spa_t *spa;
194
195	if (spa_open(name, &spa, FTAG) == 0) {
196		if (spa_version(spa) < version) {
197			spa_close(spa, FTAG);
198			return (1);
199		}
200		spa_close(spa, FTAG);
201	}
202	return (0);
203}
204
205/*
206 * zpl_earlier_version
207 *
208 * Return TRUE if the ZPL version is less than requested version.
209 */
210static boolean_t
211zpl_earlier_version(const char *name, int version)
212{
213	objset_t *os;
214	boolean_t rc = B_TRUE;
215
216	if (dmu_objset_open(name, DMU_OST_ANY,
217	    DS_MODE_USER | DS_MODE_READONLY, &os) == 0) {
218		uint64_t zplversion;
219
220		if (zfs_get_zplprop(os, ZFS_PROP_VERSION, &zplversion) == 0)
221			rc = zplversion < version;
222		dmu_objset_close(os);
223	}
224	return (rc);
225}
226
227static void
228zfs_log_history(zfs_cmd_t *zc)
229{
230	spa_t *spa;
231	char *buf;
232
233	if ((buf = history_str_get(zc)) == NULL)
234		return;
235
236	if (spa_open(zc->zc_name, &spa, FTAG) == 0) {
237		if (spa_version(spa) >= SPA_VERSION_ZPOOL_HISTORY)
238			(void) spa_history_log(spa, buf, LOG_CMD_NORMAL);
239		spa_close(spa, FTAG);
240	}
241	history_str_free(buf);
242}
243
244/*
245 * Policy for top-level read operations (list pools).  Requires no privileges,
246 * and can be used in the local zone, as there is no associated dataset.
247 */
248/* ARGSUSED */
249static int
250zfs_secpolicy_none(zfs_cmd_t *zc, cred_t *cr)
251{
252	return (0);
253}
254
255/*
256 * Policy for dataset read operations (list children, get statistics).  Requires
257 * no privileges, but must be visible in the local zone.
258 */
259/* ARGSUSED */
260static int
261zfs_secpolicy_read(zfs_cmd_t *zc, cred_t *cr)
262{
263	if (INGLOBALZONE(curproc) ||
264	    zone_dataset_visible(zc->zc_name, NULL))
265		return (0);
266
267	return (ENOENT);
268}
269
270static int
271zfs_dozonecheck(const char *dataset, cred_t *cr)
272{
273	uint64_t zoned;
274	int writable = 1;
275
276	/*
277	 * The dataset must be visible by this zone -- check this first
278	 * so they don't see EPERM on something they shouldn't know about.
279	 */
280	if (!INGLOBALZONE(curproc) &&
281	    !zone_dataset_visible(dataset, &writable))
282		return (ENOENT);
283
284	if (dsl_prop_get_integer(dataset, "zoned", &zoned, NULL))
285		return (ENOENT);
286
287	if (INGLOBALZONE(curproc)) {
288		/*
289		 * If the fs is zoned, only root can access it from the
290		 * global zone.
291		 */
292		if (secpolicy_zfs(cr) && zoned)
293			return (EPERM);
294	} else {
295		/*
296		 * If we are in a local zone, the 'zoned' property must be set.
297		 */
298		if (!zoned)
299			return (EPERM);
300
301		/* must be writable by this zone */
302		if (!writable)
303			return (EPERM);
304	}
305	return (0);
306}
307
308int
309zfs_secpolicy_write_perms(const char *name, const char *perm, cred_t *cr)
310{
311	int error;
312
313	error = zfs_dozonecheck(name, cr);
314	if (error == 0) {
315		error = secpolicy_zfs(cr);
316		if (error)
317			error = dsl_deleg_access(name, perm, cr);
318	}
319	return (error);
320}
321
322static int
323zfs_secpolicy_setprop(const char *name, zfs_prop_t prop, cred_t *cr)
324{
325	/*
326	 * Check permissions for special properties.
327	 */
328	switch (prop) {
329	case ZFS_PROP_ZONED:
330		/*
331		 * Disallow setting of 'zoned' from within a local zone.
332		 */
333		if (!INGLOBALZONE(curproc))
334			return (EPERM);
335		break;
336
337	case ZFS_PROP_QUOTA:
338		if (!INGLOBALZONE(curproc)) {
339			uint64_t zoned;
340			char setpoint[MAXNAMELEN];
341			/*
342			 * Unprivileged users are allowed to modify the
343			 * quota on things *under* (ie. contained by)
344			 * the thing they own.
345			 */
346			if (dsl_prop_get_integer(name, "zoned", &zoned,
347			    setpoint))
348				return (EPERM);
349			if (!zoned || strlen(name) <= strlen(setpoint))
350				return (EPERM);
351		}
352		break;
353	}
354
355	return (zfs_secpolicy_write_perms(name, zfs_prop_to_name(prop), cr));
356}
357
358int
359zfs_secpolicy_fsacl(zfs_cmd_t *zc, cred_t *cr)
360{
361	int error;
362
363	error = zfs_dozonecheck(zc->zc_name, cr);
364	if (error)
365		return (error);
366
367	/*
368	 * permission to set permissions will be evaluated later in
369	 * dsl_deleg_can_allow()
370	 */
371	return (0);
372}
373
374int
375zfs_secpolicy_rollback(zfs_cmd_t *zc, cred_t *cr)
376{
377	int error;
378	error = zfs_secpolicy_write_perms(zc->zc_name,
379	    ZFS_DELEG_PERM_ROLLBACK, cr);
380	if (error == 0)
381		error = zfs_secpolicy_write_perms(zc->zc_name,
382		    ZFS_DELEG_PERM_MOUNT, cr);
383	return (error);
384}
385
386int
387zfs_secpolicy_send(zfs_cmd_t *zc, cred_t *cr)
388{
389	return (zfs_secpolicy_write_perms(zc->zc_name,
390	    ZFS_DELEG_PERM_SEND, cr));
391}
392
393int
394zfs_secpolicy_share(zfs_cmd_t *zc, cred_t *cr)
395{
396	if (!INGLOBALZONE(curproc))
397		return (EPERM);
398
399	if (secpolicy_nfs(cr) == 0) {
400		return (0);
401	} else {
402		vnode_t *vp;
403		int error;
404
405		if ((error = lookupname(zc->zc_value, UIO_SYSSPACE,
406		    NO_FOLLOW, NULL, &vp)) != 0)
407			return (error);
408
409		/* Now make sure mntpnt and dataset are ZFS */
410
411		if (vp->v_vfsp->vfs_fstype != zfsfstype ||
412		    (strcmp((char *)refstr_value(vp->v_vfsp->vfs_resource),
413		    zc->zc_name) != 0)) {
414			VN_RELE(vp);
415			return (EPERM);
416		}
417
418		VN_RELE(vp);
419		return (dsl_deleg_access(zc->zc_name,
420		    ZFS_DELEG_PERM_SHARE, cr));
421	}
422}
423
424static int
425zfs_get_parent(const char *datasetname, char *parent, int parentsize)
426{
427	char *cp;
428
429	/*
430	 * Remove the @bla or /bla from the end of the name to get the parent.
431	 */
432	(void) strncpy(parent, datasetname, parentsize);
433	cp = strrchr(parent, '@');
434	if (cp != NULL) {
435		cp[0] = '\0';
436	} else {
437		cp = strrchr(parent, '/');
438		if (cp == NULL)
439			return (ENOENT);
440		cp[0] = '\0';
441	}
442
443	return (0);
444}
445
446int
447zfs_secpolicy_destroy_perms(const char *name, cred_t *cr)
448{
449	int error;
450
451	if ((error = zfs_secpolicy_write_perms(name,
452	    ZFS_DELEG_PERM_MOUNT, cr)) != 0)
453		return (error);
454
455	return (zfs_secpolicy_write_perms(name, ZFS_DELEG_PERM_DESTROY, cr));
456}
457
458static int
459zfs_secpolicy_destroy(zfs_cmd_t *zc, cred_t *cr)
460{
461	return (zfs_secpolicy_destroy_perms(zc->zc_name, cr));
462}
463
464/*
465 * Must have sys_config privilege to check the iscsi permission
466 */
467/* ARGSUSED */
468static int
469zfs_secpolicy_iscsi(zfs_cmd_t *zc, cred_t *cr)
470{
471	return (secpolicy_zfs(cr));
472}
473
474int
475zfs_secpolicy_rename_perms(const char *from, const char *to, cred_t *cr)
476{
477	char 	parentname[MAXNAMELEN];
478	int	error;
479
480	if ((error = zfs_secpolicy_write_perms(from,
481	    ZFS_DELEG_PERM_RENAME, cr)) != 0)
482		return (error);
483
484	if ((error = zfs_secpolicy_write_perms(from,
485	    ZFS_DELEG_PERM_MOUNT, cr)) != 0)
486		return (error);
487
488	if ((error = zfs_get_parent(to, parentname,
489	    sizeof (parentname))) != 0)
490		return (error);
491
492	if ((error = zfs_secpolicy_write_perms(parentname,
493	    ZFS_DELEG_PERM_CREATE, cr)) != 0)
494		return (error);
495
496	if ((error = zfs_secpolicy_write_perms(parentname,
497	    ZFS_DELEG_PERM_MOUNT, cr)) != 0)
498		return (error);
499
500	return (error);
501}
502
503static int
504zfs_secpolicy_rename(zfs_cmd_t *zc, cred_t *cr)
505{
506	return (zfs_secpolicy_rename_perms(zc->zc_name, zc->zc_value, cr));
507}
508
509static int
510zfs_secpolicy_promote(zfs_cmd_t *zc, cred_t *cr)
511{
512	char 	parentname[MAXNAMELEN];
513	objset_t *clone;
514	int error;
515
516	error = zfs_secpolicy_write_perms(zc->zc_name,
517	    ZFS_DELEG_PERM_PROMOTE, cr);
518	if (error)
519		return (error);
520
521	error = dmu_objset_open(zc->zc_name, DMU_OST_ANY,
522	    DS_MODE_USER | DS_MODE_READONLY, &clone);
523
524	if (error == 0) {
525		dsl_dataset_t *pclone = NULL;
526		dsl_dir_t *dd;
527		dd = clone->os->os_dsl_dataset->ds_dir;
528
529		rw_enter(&dd->dd_pool->dp_config_rwlock, RW_READER);
530		error = dsl_dataset_hold_obj(dd->dd_pool,
531		    dd->dd_phys->dd_origin_obj, FTAG, &pclone);
532		rw_exit(&dd->dd_pool->dp_config_rwlock);
533		if (error) {
534			dmu_objset_close(clone);
535			return (error);
536		}
537
538		error = zfs_secpolicy_write_perms(zc->zc_name,
539		    ZFS_DELEG_PERM_MOUNT, cr);
540
541		dsl_dataset_name(pclone, parentname);
542		dmu_objset_close(clone);
543		dsl_dataset_rele(pclone, FTAG);
544		if (error == 0)
545			error = zfs_secpolicy_write_perms(parentname,
546			    ZFS_DELEG_PERM_PROMOTE, cr);
547	}
548	return (error);
549}
550
551static int
552zfs_secpolicy_receive(zfs_cmd_t *zc, cred_t *cr)
553{
554	int error;
555
556	if ((error = zfs_secpolicy_write_perms(zc->zc_name,
557	    ZFS_DELEG_PERM_RECEIVE, cr)) != 0)
558		return (error);
559
560	if ((error = zfs_secpolicy_write_perms(zc->zc_name,
561	    ZFS_DELEG_PERM_MOUNT, cr)) != 0)
562		return (error);
563
564	return (zfs_secpolicy_write_perms(zc->zc_name,
565	    ZFS_DELEG_PERM_CREATE, cr));
566}
567
568int
569zfs_secpolicy_snapshot_perms(const char *name, cred_t *cr)
570{
571	int error;
572
573	if ((error = zfs_secpolicy_write_perms(name,
574	    ZFS_DELEG_PERM_SNAPSHOT, cr)) != 0)
575		return (error);
576
577	error = zfs_secpolicy_write_perms(name,
578	    ZFS_DELEG_PERM_MOUNT, cr);
579
580	return (error);
581}
582
583static int
584zfs_secpolicy_snapshot(zfs_cmd_t *zc, cred_t *cr)
585{
586
587	return (zfs_secpolicy_snapshot_perms(zc->zc_name, cr));
588}
589
590static int
591zfs_secpolicy_create(zfs_cmd_t *zc, cred_t *cr)
592{
593	char 	parentname[MAXNAMELEN];
594	int 	error;
595
596	if ((error = zfs_get_parent(zc->zc_name, parentname,
597	    sizeof (parentname))) != 0)
598		return (error);
599
600	if (zc->zc_value[0] != '\0') {
601		if ((error = zfs_secpolicy_write_perms(zc->zc_value,
602		    ZFS_DELEG_PERM_CLONE, cr)) != 0)
603			return (error);
604	}
605
606	if ((error = zfs_secpolicy_write_perms(parentname,
607	    ZFS_DELEG_PERM_CREATE, cr)) != 0)
608		return (error);
609
610	error = zfs_secpolicy_write_perms(parentname,
611	    ZFS_DELEG_PERM_MOUNT, cr);
612
613	return (error);
614}
615
616static int
617zfs_secpolicy_umount(zfs_cmd_t *zc, cred_t *cr)
618{
619	int error;
620
621	error = secpolicy_fs_unmount(cr, NULL);
622	if (error) {
623		error = dsl_deleg_access(zc->zc_name, ZFS_DELEG_PERM_MOUNT, cr);
624	}
625	return (error);
626}
627
628/*
629 * Policy for pool operations - create/destroy pools, add vdevs, etc.  Requires
630 * SYS_CONFIG privilege, which is not available in a local zone.
631 */
632/* ARGSUSED */
633static int
634zfs_secpolicy_config(zfs_cmd_t *zc, cred_t *cr)
635{
636	if (secpolicy_sys_config(cr, B_FALSE) != 0)
637		return (EPERM);
638
639	return (0);
640}
641
642/*
643 * Just like zfs_secpolicy_config, except that we will check for
644 * mount permission on the dataset for permission to create/remove
645 * the minor nodes.
646 */
647static int
648zfs_secpolicy_minor(zfs_cmd_t *zc, cred_t *cr)
649{
650	if (secpolicy_sys_config(cr, B_FALSE) != 0) {
651		return (dsl_deleg_access(zc->zc_name,
652		    ZFS_DELEG_PERM_MOUNT, cr));
653	}
654
655	return (0);
656}
657
658/*
659 * Policy for fault injection.  Requires all privileges.
660 */
661/* ARGSUSED */
662static int
663zfs_secpolicy_inject(zfs_cmd_t *zc, cred_t *cr)
664{
665	return (secpolicy_zinject(cr));
666}
667
668static int
669zfs_secpolicy_inherit(zfs_cmd_t *zc, cred_t *cr)
670{
671	zfs_prop_t prop = zfs_name_to_prop(zc->zc_value);
672
673	if (prop == ZPROP_INVAL) {
674		if (!zfs_prop_user(zc->zc_value))
675			return (EINVAL);
676		return (zfs_secpolicy_write_perms(zc->zc_name,
677		    ZFS_DELEG_PERM_USERPROP, cr));
678	} else {
679		if (!zfs_prop_inheritable(prop))
680			return (EINVAL);
681		return (zfs_secpolicy_setprop(zc->zc_name, prop, cr));
682	}
683}
684
685/*
686 * Returns the nvlist as specified by the user in the zfs_cmd_t.
687 */
688static int
689get_nvlist(uint64_t nvl, uint64_t size, nvlist_t **nvp)
690{
691	char *packed;
692	int error;
693	nvlist_t *list = NULL;
694
695	/*
696	 * Read in and unpack the user-supplied nvlist.
697	 */
698	if (size == 0)
699		return (EINVAL);
700
701	packed = kmem_alloc(size, KM_SLEEP);
702
703	if ((error = xcopyin((void *)(uintptr_t)nvl, packed, size)) != 0) {
704		kmem_free(packed, size);
705		return (error);
706	}
707
708	if ((error = nvlist_unpack(packed, size, &list, 0)) != 0) {
709		kmem_free(packed, size);
710		return (error);
711	}
712
713	kmem_free(packed, size);
714
715	*nvp = list;
716	return (0);
717}
718
719static int
720put_nvlist(zfs_cmd_t *zc, nvlist_t *nvl)
721{
722	char *packed = NULL;
723	size_t size;
724	int error;
725
726	VERIFY(nvlist_size(nvl, &size, NV_ENCODE_NATIVE) == 0);
727
728	if (size > zc->zc_nvlist_dst_size) {
729		error = ENOMEM;
730	} else {
731		packed = kmem_alloc(size, KM_SLEEP);
732		VERIFY(nvlist_pack(nvl, &packed, &size, NV_ENCODE_NATIVE,
733		    KM_SLEEP) == 0);
734		error = xcopyout(packed, (void *)(uintptr_t)zc->zc_nvlist_dst,
735		    size);
736		kmem_free(packed, size);
737	}
738
739	zc->zc_nvlist_dst_size = size;
740	return (error);
741}
742
743static int
744zfs_ioc_pool_create(zfs_cmd_t *zc)
745{
746	int error;
747	nvlist_t *config, *props = NULL;
748	nvlist_t *rootprops = NULL;
749	nvlist_t *zplprops = NULL;
750	char *buf;
751
752	if (error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
753	    &config))
754		return (error);
755
756	if (zc->zc_nvlist_src_size != 0 && (error =
757	    get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size, &props))) {
758		nvlist_free(config);
759		return (error);
760	}
761
762	if (props) {
763		nvlist_t *nvl = NULL;
764		uint64_t version = SPA_VERSION;
765
766		(void) nvlist_lookup_uint64(props,
767		    zpool_prop_to_name(ZPOOL_PROP_VERSION), &version);
768		if (version < SPA_VERSION_INITIAL || version > SPA_VERSION) {
769			error = EINVAL;
770			goto pool_props_bad;
771		}
772		(void) nvlist_lookup_nvlist(props, ZPOOL_ROOTFS_PROPS, &nvl);
773		if (nvl) {
774			error = nvlist_dup(nvl, &rootprops, KM_SLEEP);
775			if (error != 0) {
776				nvlist_free(config);
777				nvlist_free(props);
778				return (error);
779			}
780			(void) nvlist_remove_all(props, ZPOOL_ROOTFS_PROPS);
781		}
782		VERIFY(nvlist_alloc(&zplprops, NV_UNIQUE_NAME, KM_SLEEP) == 0);
783		error = zfs_fill_zplprops_root(version, rootprops,
784		    zplprops, NULL);
785		if (error)
786			goto pool_props_bad;
787	}
788
789	buf = history_str_get(zc);
790
791	error = spa_create(zc->zc_name, config, props, buf, zplprops);
792
793	/*
794	 * Set the remaining root properties
795	 */
796	if (!error &&
797	    (error = zfs_set_prop_nvlist(zc->zc_name, rootprops)) != 0)
798		(void) spa_destroy(zc->zc_name);
799
800	if (buf != NULL)
801		history_str_free(buf);
802
803pool_props_bad:
804	nvlist_free(rootprops);
805	nvlist_free(zplprops);
806	nvlist_free(config);
807	nvlist_free(props);
808
809	return (error);
810}
811
812static int
813zfs_ioc_pool_destroy(zfs_cmd_t *zc)
814{
815	int error;
816	zfs_log_history(zc);
817	error = spa_destroy(zc->zc_name);
818	return (error);
819}
820
821static int
822zfs_ioc_pool_import(zfs_cmd_t *zc)
823{
824	int error;
825	nvlist_t *config, *props = NULL;
826	uint64_t guid;
827
828	if ((error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
829	    &config)) != 0)
830		return (error);
831
832	if (zc->zc_nvlist_src_size != 0 && (error =
833	    get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size, &props))) {
834		nvlist_free(config);
835		return (error);
836	}
837
838	if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID, &guid) != 0 ||
839	    guid != zc->zc_guid)
840		error = EINVAL;
841	else if (zc->zc_cookie)
842		error = spa_import_faulted(zc->zc_name, config,
843		    props);
844	else
845		error = spa_import(zc->zc_name, config, props);
846
847	nvlist_free(config);
848
849	if (props)
850		nvlist_free(props);
851
852	return (error);
853}
854
855static int
856zfs_ioc_pool_export(zfs_cmd_t *zc)
857{
858	int error;
859	boolean_t force = (boolean_t)zc->zc_cookie;
860
861	zfs_log_history(zc);
862	error = spa_export(zc->zc_name, NULL, force);
863	return (error);
864}
865
866static int
867zfs_ioc_pool_configs(zfs_cmd_t *zc)
868{
869	nvlist_t *configs;
870	int error;
871
872	if ((configs = spa_all_configs(&zc->zc_cookie)) == NULL)
873		return (EEXIST);
874
875	error = put_nvlist(zc, configs);
876
877	nvlist_free(configs);
878
879	return (error);
880}
881
882static int
883zfs_ioc_pool_stats(zfs_cmd_t *zc)
884{
885	nvlist_t *config;
886	int error;
887	int ret = 0;
888
889	error = spa_get_stats(zc->zc_name, &config, zc->zc_value,
890	    sizeof (zc->zc_value));
891
892	if (config != NULL) {
893		ret = put_nvlist(zc, config);
894		nvlist_free(config);
895
896		/*
897		 * The config may be present even if 'error' is non-zero.
898		 * In this case we return success, and preserve the real errno
899		 * in 'zc_cookie'.
900		 */
901		zc->zc_cookie = error;
902	} else {
903		ret = error;
904	}
905
906	return (ret);
907}
908
909/*
910 * Try to import the given pool, returning pool stats as appropriate so that
911 * user land knows which devices are available and overall pool health.
912 */
913static int
914zfs_ioc_pool_tryimport(zfs_cmd_t *zc)
915{
916	nvlist_t *tryconfig, *config;
917	int error;
918
919	if ((error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
920	    &tryconfig)) != 0)
921		return (error);
922
923	config = spa_tryimport(tryconfig);
924
925	nvlist_free(tryconfig);
926
927	if (config == NULL)
928		return (EINVAL);
929
930	error = put_nvlist(zc, config);
931	nvlist_free(config);
932
933	return (error);
934}
935
936static int
937zfs_ioc_pool_scrub(zfs_cmd_t *zc)
938{
939	spa_t *spa;
940	int error;
941
942	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
943		return (error);
944
945	error = spa_scrub(spa, zc->zc_cookie);
946
947	spa_close(spa, FTAG);
948
949	return (error);
950}
951
952static int
953zfs_ioc_pool_freeze(zfs_cmd_t *zc)
954{
955	spa_t *spa;
956	int error;
957
958	error = spa_open(zc->zc_name, &spa, FTAG);
959	if (error == 0) {
960		spa_freeze(spa);
961		spa_close(spa, FTAG);
962	}
963	return (error);
964}
965
966static int
967zfs_ioc_pool_upgrade(zfs_cmd_t *zc)
968{
969	spa_t *spa;
970	int error;
971
972	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
973		return (error);
974
975	if (zc->zc_cookie < spa_version(spa) || zc->zc_cookie > SPA_VERSION) {
976		spa_close(spa, FTAG);
977		return (EINVAL);
978	}
979
980	spa_upgrade(spa, zc->zc_cookie);
981	spa_close(spa, FTAG);
982
983	return (error);
984}
985
986static int
987zfs_ioc_pool_get_history(zfs_cmd_t *zc)
988{
989	spa_t *spa;
990	char *hist_buf;
991	uint64_t size;
992	int error;
993
994	if ((size = zc->zc_history_len) == 0)
995		return (EINVAL);
996
997	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
998		return (error);
999
1000	if (spa_version(spa) < SPA_VERSION_ZPOOL_HISTORY) {
1001		spa_close(spa, FTAG);
1002		return (ENOTSUP);
1003	}
1004
1005	hist_buf = kmem_alloc(size, KM_SLEEP);
1006	if ((error = spa_history_get(spa, &zc->zc_history_offset,
1007	    &zc->zc_history_len, hist_buf)) == 0) {
1008		error = xcopyout(hist_buf,
1009		    (char *)(uintptr_t)zc->zc_history,
1010		    zc->zc_history_len);
1011	}
1012
1013	spa_close(spa, FTAG);
1014	kmem_free(hist_buf, size);
1015	return (error);
1016}
1017
1018static int
1019zfs_ioc_dsobj_to_dsname(zfs_cmd_t *zc)
1020{
1021	int error;
1022
1023	if (error = dsl_dsobj_to_dsname(zc->zc_name, zc->zc_obj, zc->zc_value))
1024		return (error);
1025
1026	return (0);
1027}
1028
1029static int
1030zfs_ioc_obj_to_path(zfs_cmd_t *zc)
1031{
1032	objset_t *osp;
1033	int error;
1034
1035	if ((error = dmu_objset_open(zc->zc_name, DMU_OST_ZFS,
1036	    DS_MODE_USER | DS_MODE_READONLY, &osp)) != 0)
1037		return (error);
1038	error = zfs_obj_to_path(osp, zc->zc_obj, zc->zc_value,
1039	    sizeof (zc->zc_value));
1040	dmu_objset_close(osp);
1041
1042	return (error);
1043}
1044
1045static int
1046zfs_ioc_vdev_add(zfs_cmd_t *zc)
1047{
1048	spa_t *spa;
1049	int error;
1050	nvlist_t *config, **l2cache, **spares;
1051	uint_t nl2cache = 0, nspares = 0;
1052
1053	error = spa_open(zc->zc_name, &spa, FTAG);
1054	if (error != 0)
1055		return (error);
1056
1057	error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
1058	    &config);
1059	(void) nvlist_lookup_nvlist_array(config, ZPOOL_CONFIG_L2CACHE,
1060	    &l2cache, &nl2cache);
1061
1062	(void) nvlist_lookup_nvlist_array(config, ZPOOL_CONFIG_SPARES,
1063	    &spares, &nspares);
1064
1065	/*
1066	 * A root pool with concatenated devices is not supported.
1067	 * Thus, can not add a device to a root pool.
1068	 *
1069	 * Intent log device can not be added to a rootpool because
1070	 * during mountroot, zil is replayed, a seperated log device
1071	 * can not be accessed during the mountroot time.
1072	 *
1073	 * l2cache and spare devices are ok to be added to a rootpool.
1074	 */
1075	if (spa->spa_bootfs != 0 && nl2cache == 0 && nspares == 0) {
1076		spa_close(spa, FTAG);
1077		return (EDOM);
1078	}
1079
1080	if (error == 0) {
1081		error = spa_vdev_add(spa, config);
1082		nvlist_free(config);
1083	}
1084	spa_close(spa, FTAG);
1085	return (error);
1086}
1087
1088static int
1089zfs_ioc_vdev_remove(zfs_cmd_t *zc)
1090{
1091	spa_t *spa;
1092	int error;
1093
1094	error = spa_open(zc->zc_name, &spa, FTAG);
1095	if (error != 0)
1096		return (error);
1097	error = spa_vdev_remove(spa, zc->zc_guid, B_FALSE);
1098	spa_close(spa, FTAG);
1099	return (error);
1100}
1101
1102static int
1103zfs_ioc_vdev_set_state(zfs_cmd_t *zc)
1104{
1105	spa_t *spa;
1106	int error;
1107	vdev_state_t newstate = VDEV_STATE_UNKNOWN;
1108
1109	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
1110		return (error);
1111	switch (zc->zc_cookie) {
1112	case VDEV_STATE_ONLINE:
1113		error = vdev_online(spa, zc->zc_guid, zc->zc_obj, &newstate);
1114		break;
1115
1116	case VDEV_STATE_OFFLINE:
1117		error = vdev_offline(spa, zc->zc_guid, zc->zc_obj);
1118		break;
1119
1120	case VDEV_STATE_FAULTED:
1121		error = vdev_fault(spa, zc->zc_guid);
1122		break;
1123
1124	case VDEV_STATE_DEGRADED:
1125		error = vdev_degrade(spa, zc->zc_guid);
1126		break;
1127
1128	default:
1129		error = EINVAL;
1130	}
1131	zc->zc_cookie = newstate;
1132	spa_close(spa, FTAG);
1133	return (error);
1134}
1135
1136static int
1137zfs_ioc_vdev_attach(zfs_cmd_t *zc)
1138{
1139	spa_t *spa;
1140	int replacing = zc->zc_cookie;
1141	nvlist_t *config;
1142	int error;
1143
1144	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
1145		return (error);
1146
1147	if ((error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
1148	    &config)) == 0) {
1149		error = spa_vdev_attach(spa, zc->zc_guid, config, replacing);
1150		nvlist_free(config);
1151	}
1152
1153	spa_close(spa, FTAG);
1154	return (error);
1155}
1156
1157static int
1158zfs_ioc_vdev_detach(zfs_cmd_t *zc)
1159{
1160	spa_t *spa;
1161	int error;
1162
1163	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
1164		return (error);
1165
1166	error = spa_vdev_detach(spa, zc->zc_guid, B_FALSE);
1167
1168	spa_close(spa, FTAG);
1169	return (error);
1170}
1171
1172static int
1173zfs_ioc_vdev_setpath(zfs_cmd_t *zc)
1174{
1175	spa_t *spa;
1176	char *path = zc->zc_value;
1177	uint64_t guid = zc->zc_guid;
1178	int error;
1179
1180	error = spa_open(zc->zc_name, &spa, FTAG);
1181	if (error != 0)
1182		return (error);
1183
1184	error = spa_vdev_setpath(spa, guid, path);
1185	spa_close(spa, FTAG);
1186	return (error);
1187}
1188
1189/*
1190 * inputs:
1191 * zc_name		name of filesystem
1192 * zc_nvlist_dst_size	size of buffer for property nvlist
1193 *
1194 * outputs:
1195 * zc_objset_stats	stats
1196 * zc_nvlist_dst	property nvlist
1197 * zc_nvlist_dst_size	size of property nvlist
1198 */
1199static int
1200zfs_ioc_objset_stats(zfs_cmd_t *zc)
1201{
1202	objset_t *os = NULL;
1203	int error;
1204	nvlist_t *nv;
1205
1206	if (error = dmu_objset_open(zc->zc_name,
1207	    DMU_OST_ANY, DS_MODE_USER | DS_MODE_READONLY, &os))
1208		return (error);
1209
1210	dmu_objset_fast_stat(os, &zc->zc_objset_stats);
1211
1212	if (zc->zc_nvlist_dst != 0 &&
1213	    (error = dsl_prop_get_all(os, &nv, FALSE)) == 0) {
1214		dmu_objset_stats(os, nv);
1215		/*
1216		 * NB: zvol_get_stats() will read the objset contents,
1217		 * which we aren't supposed to do with a
1218		 * DS_MODE_USER hold, because it could be
1219		 * inconsistent.  So this is a bit of a workaround...
1220		 */
1221		if (!zc->zc_objset_stats.dds_inconsistent) {
1222			if (dmu_objset_type(os) == DMU_OST_ZVOL)
1223				VERIFY(zvol_get_stats(os, nv) == 0);
1224		}
1225		error = put_nvlist(zc, nv);
1226		nvlist_free(nv);
1227	}
1228
1229	dmu_objset_close(os);
1230	return (error);
1231}
1232
1233static int
1234nvl_add_zplprop(objset_t *os, nvlist_t *props, zfs_prop_t prop)
1235{
1236	uint64_t value;
1237	int error;
1238
1239	/*
1240	 * zfs_get_zplprop() will either find a value or give us
1241	 * the default value (if there is one).
1242	 */
1243	if ((error = zfs_get_zplprop(os, prop, &value)) != 0)
1244		return (error);
1245	VERIFY(nvlist_add_uint64(props, zfs_prop_to_name(prop), value) == 0);
1246	return (0);
1247}
1248
1249/*
1250 * inputs:
1251 * zc_name		name of filesystem
1252 * zc_nvlist_dst_size	size of buffer for zpl property nvlist
1253 *
1254 * outputs:
1255 * zc_nvlist_dst	zpl property nvlist
1256 * zc_nvlist_dst_size	size of zpl property nvlist
1257 */
1258static int
1259zfs_ioc_objset_zplprops(zfs_cmd_t *zc)
1260{
1261	objset_t *os;
1262	int err;
1263
1264	if (err = dmu_objset_open(zc->zc_name,
1265	    DMU_OST_ANY, DS_MODE_USER | DS_MODE_READONLY, &os))
1266		return (err);
1267
1268	dmu_objset_fast_stat(os, &zc->zc_objset_stats);
1269
1270	/*
1271	 * NB: nvl_add_zplprop() will read the objset contents,
1272	 * which we aren't supposed to do with a DS_MODE_USER
1273	 * hold, because it could be inconsistent.
1274	 */
1275	if (zc->zc_nvlist_dst != NULL &&
1276	    !zc->zc_objset_stats.dds_inconsistent &&
1277	    dmu_objset_type(os) == DMU_OST_ZFS) {
1278		nvlist_t *nv;
1279
1280		VERIFY(nvlist_alloc(&nv, NV_UNIQUE_NAME, KM_SLEEP) == 0);
1281		if ((err = nvl_add_zplprop(os, nv, ZFS_PROP_VERSION)) == 0 &&
1282		    (err = nvl_add_zplprop(os, nv, ZFS_PROP_NORMALIZE)) == 0 &&
1283		    (err = nvl_add_zplprop(os, nv, ZFS_PROP_UTF8ONLY)) == 0 &&
1284		    (err = nvl_add_zplprop(os, nv, ZFS_PROP_CASE)) == 0)
1285			err = put_nvlist(zc, nv);
1286		nvlist_free(nv);
1287	} else {
1288		err = ENOENT;
1289	}
1290	dmu_objset_close(os);
1291	return (err);
1292}
1293
1294/*
1295 * inputs:
1296 * zc_name		name of filesystem
1297 * zc_cookie		zap cursor
1298 * zc_nvlist_dst_size	size of buffer for property nvlist
1299 *
1300 * outputs:
1301 * zc_name		name of next filesystem
1302 * zc_objset_stats	stats
1303 * zc_nvlist_dst	property nvlist
1304 * zc_nvlist_dst_size	size of property nvlist
1305 */
1306static int
1307zfs_ioc_dataset_list_next(zfs_cmd_t *zc)
1308{
1309	objset_t *os;
1310	int error;
1311	char *p;
1312
1313	if (error = dmu_objset_open(zc->zc_name,
1314	    DMU_OST_ANY, DS_MODE_USER | DS_MODE_READONLY, &os)) {
1315		if (error == ENOENT)
1316			error = ESRCH;
1317		return (error);
1318	}
1319
1320	p = strrchr(zc->zc_name, '/');
1321	if (p == NULL || p[1] != '\0')
1322		(void) strlcat(zc->zc_name, "/", sizeof (zc->zc_name));
1323	p = zc->zc_name + strlen(zc->zc_name);
1324
1325	do {
1326		error = dmu_dir_list_next(os,
1327		    sizeof (zc->zc_name) - (p - zc->zc_name), p,
1328		    NULL, &zc->zc_cookie);
1329		if (error == ENOENT)
1330			error = ESRCH;
1331	} while (error == 0 && !INGLOBALZONE(curproc) &&
1332	    !zone_dataset_visible(zc->zc_name, NULL));
1333	dmu_objset_close(os);
1334
1335	/*
1336	 * If it's a hidden dataset (ie. with a '$' in its name), don't
1337	 * try to get stats for it.  Userland will skip over it.
1338	 */
1339	if (error == 0 && strchr(zc->zc_name, '$') == NULL)
1340		error = zfs_ioc_objset_stats(zc); /* fill in the stats */
1341
1342	return (error);
1343}
1344
1345/*
1346 * inputs:
1347 * zc_name		name of filesystem
1348 * zc_cookie		zap cursor
1349 * zc_nvlist_dst_size	size of buffer for property nvlist
1350 *
1351 * outputs:
1352 * zc_name		name of next snapshot
1353 * zc_objset_stats	stats
1354 * zc_nvlist_dst	property nvlist
1355 * zc_nvlist_dst_size	size of property nvlist
1356 */
1357static int
1358zfs_ioc_snapshot_list_next(zfs_cmd_t *zc)
1359{
1360	objset_t *os;
1361	int error;
1362
1363	error = dmu_objset_open(zc->zc_name,
1364	    DMU_OST_ANY, DS_MODE_USER | DS_MODE_READONLY, &os);
1365	if (error)
1366		return (error == ENOENT ? ESRCH : error);
1367
1368	/*
1369	 * A dataset name of maximum length cannot have any snapshots,
1370	 * so exit immediately.
1371	 */
1372	if (strlcat(zc->zc_name, "@", sizeof (zc->zc_name)) >= MAXNAMELEN) {
1373		dmu_objset_close(os);
1374		return (ESRCH);
1375	}
1376
1377	error = dmu_snapshot_list_next(os,
1378	    sizeof (zc->zc_name) - strlen(zc->zc_name),
1379	    zc->zc_name + strlen(zc->zc_name), NULL, &zc->zc_cookie, NULL);
1380	dmu_objset_close(os);
1381	if (error == 0)
1382		error = zfs_ioc_objset_stats(zc); /* fill in the stats */
1383	else if (error == ENOENT)
1384		error = ESRCH;
1385
1386	/* if we failed, undo the @ that we tacked on to zc_name */
1387	if (error)
1388		*strchr(zc->zc_name, '@') = '\0';
1389	return (error);
1390}
1391
1392int
1393zfs_set_prop_nvlist(const char *name, nvlist_t *nvl)
1394{
1395	nvpair_t *elem;
1396	int error;
1397	uint64_t intval;
1398	char *strval;
1399
1400	/*
1401	 * First validate permission to set all of the properties
1402	 */
1403	elem = NULL;
1404	while ((elem = nvlist_next_nvpair(nvl, elem)) != NULL) {
1405		const char *propname = nvpair_name(elem);
1406		zfs_prop_t prop = zfs_name_to_prop(propname);
1407
1408		if (prop == ZPROP_INVAL) {
1409			/*
1410			 * If this is a user-defined property, it must be a
1411			 * string, and there is no further validation to do.
1412			 */
1413			if (!zfs_prop_user(propname) ||
1414			    nvpair_type(elem) != DATA_TYPE_STRING)
1415				return (EINVAL);
1416
1417			if (error = zfs_secpolicy_write_perms(name,
1418			    ZFS_DELEG_PERM_USERPROP, CRED()))
1419				return (error);
1420			continue;
1421		}
1422
1423		if ((error = zfs_secpolicy_setprop(name, prop, CRED())) != 0)
1424			return (error);
1425
1426		/*
1427		 * Check that this value is valid for this pool version
1428		 */
1429		switch (prop) {
1430		case ZFS_PROP_COMPRESSION:
1431			/*
1432			 * If the user specified gzip compression, make sure
1433			 * the SPA supports it. We ignore any errors here since
1434			 * we'll catch them later.
1435			 */
1436			if (nvpair_type(elem) == DATA_TYPE_UINT64 &&
1437			    nvpair_value_uint64(elem, &intval) == 0) {
1438				if (intval >= ZIO_COMPRESS_GZIP_1 &&
1439				    intval <= ZIO_COMPRESS_GZIP_9 &&
1440				    zfs_earlier_version(name,
1441				    SPA_VERSION_GZIP_COMPRESSION))
1442					return (ENOTSUP);
1443
1444				/*
1445				 * If this is a bootable dataset then
1446				 * verify that the compression algorithm
1447				 * is supported for booting. We must return
1448				 * something other than ENOTSUP since it
1449				 * implies a downrev pool version.
1450				 */
1451				if (zfs_is_bootfs(name) &&
1452				    !BOOTFS_COMPRESS_VALID(intval))
1453					return (ERANGE);
1454			}
1455			break;
1456
1457		case ZFS_PROP_COPIES:
1458			if (zfs_earlier_version(name,
1459			    SPA_VERSION_DITTO_BLOCKS))
1460				return (ENOTSUP);
1461			break;
1462
1463		case ZFS_PROP_SHARESMB:
1464			if (zpl_earlier_version(name, ZPL_VERSION_FUID))
1465				return (ENOTSUP);
1466			break;
1467		}
1468	}
1469
1470	elem = NULL;
1471	while ((elem = nvlist_next_nvpair(nvl, elem)) != NULL) {
1472		const char *propname = nvpair_name(elem);
1473		zfs_prop_t prop = zfs_name_to_prop(propname);
1474
1475		if (prop == ZPROP_INVAL) {
1476			VERIFY(nvpair_value_string(elem, &strval) == 0);
1477			error = dsl_prop_set(name, propname, 1,
1478			    strlen(strval) + 1, strval);
1479			if (error == 0)
1480				continue;
1481			else
1482				return (error);
1483		}
1484
1485		switch (prop) {
1486		case ZFS_PROP_QUOTA:
1487			if ((error = nvpair_value_uint64(elem, &intval)) != 0 ||
1488			    (error = dsl_dir_set_quota(name, intval)) != 0)
1489				return (error);
1490			break;
1491
1492		case ZFS_PROP_REFQUOTA:
1493			if ((error = nvpair_value_uint64(elem, &intval)) != 0 ||
1494			    (error = dsl_dataset_set_quota(name, intval)) != 0)
1495				return (error);
1496			break;
1497
1498		case ZFS_PROP_RESERVATION:
1499			if ((error = nvpair_value_uint64(elem, &intval)) != 0 ||
1500			    (error = dsl_dir_set_reservation(name,
1501			    intval)) != 0)
1502				return (error);
1503			break;
1504
1505		case ZFS_PROP_REFRESERVATION:
1506			if ((error = nvpair_value_uint64(elem, &intval)) != 0 ||
1507			    (error = dsl_dataset_set_reservation(name,
1508			    intval)) != 0)
1509				return (error);
1510			break;
1511
1512		case ZFS_PROP_VOLSIZE:
1513			if ((error = nvpair_value_uint64(elem, &intval)) != 0 ||
1514			    (error = zvol_set_volsize(name,
1515			    ddi_driver_major(zfs_dip), intval)) != 0)
1516				return (error);
1517			break;
1518
1519		case ZFS_PROP_VOLBLOCKSIZE:
1520			if ((error = nvpair_value_uint64(elem, &intval)) != 0 ||
1521			    (error = zvol_set_volblocksize(name, intval)) != 0)
1522				return (error);
1523			break;
1524
1525		case ZFS_PROP_VERSION:
1526			if ((error = nvpair_value_uint64(elem, &intval)) != 0 ||
1527			    (error = zfs_set_version(name, intval)) != 0)
1528				return (error);
1529			break;
1530
1531		default:
1532			if (nvpair_type(elem) == DATA_TYPE_STRING) {
1533				if (zfs_prop_get_type(prop) !=
1534				    PROP_TYPE_STRING)
1535					return (EINVAL);
1536				VERIFY(nvpair_value_string(elem, &strval) == 0);
1537				if ((error = dsl_prop_set(name,
1538				    nvpair_name(elem), 1, strlen(strval) + 1,
1539				    strval)) != 0)
1540					return (error);
1541			} else if (nvpair_type(elem) == DATA_TYPE_UINT64) {
1542				const char *unused;
1543
1544				VERIFY(nvpair_value_uint64(elem, &intval) == 0);
1545
1546				switch (zfs_prop_get_type(prop)) {
1547				case PROP_TYPE_NUMBER:
1548					break;
1549				case PROP_TYPE_STRING:
1550					return (EINVAL);
1551				case PROP_TYPE_INDEX:
1552					if (zfs_prop_index_to_string(prop,
1553					    intval, &unused) != 0)
1554						return (EINVAL);
1555					break;
1556				default:
1557					cmn_err(CE_PANIC,
1558					    "unknown property type");
1559					break;
1560				}
1561
1562				if ((error = dsl_prop_set(name, propname,
1563				    8, 1, &intval)) != 0)
1564					return (error);
1565			} else {
1566				return (EINVAL);
1567			}
1568			break;
1569		}
1570	}
1571
1572	return (0);
1573}
1574
1575/*
1576 * inputs:
1577 * zc_name		name of filesystem
1578 * zc_value		name of property to inherit
1579 * zc_nvlist_src{_size}	nvlist of properties to apply
1580 * zc_cookie		clear existing local props?
1581 *
1582 * outputs:		none
1583 */
1584static int
1585zfs_ioc_set_prop(zfs_cmd_t *zc)
1586{
1587	nvlist_t *nvl;
1588	int error;
1589
1590	if ((error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
1591	    &nvl)) != 0)
1592		return (error);
1593
1594	if (zc->zc_cookie) {
1595		nvlist_t *origprops;
1596		objset_t *os;
1597
1598		if (dmu_objset_open(zc->zc_name, DMU_OST_ANY,
1599		    DS_MODE_USER | DS_MODE_READONLY, &os) == 0) {
1600			if (dsl_prop_get_all(os, &origprops, TRUE) == 0) {
1601				clear_props(zc->zc_name, origprops);
1602				nvlist_free(origprops);
1603			}
1604			dmu_objset_close(os);
1605		}
1606
1607	}
1608
1609	error = zfs_set_prop_nvlist(zc->zc_name, nvl);
1610
1611	nvlist_free(nvl);
1612	return (error);
1613}
1614
1615/*
1616 * inputs:
1617 * zc_name		name of filesystem
1618 * zc_value		name of property to inherit
1619 *
1620 * outputs:		none
1621 */
1622static int
1623zfs_ioc_inherit_prop(zfs_cmd_t *zc)
1624{
1625	/* the property name has been validated by zfs_secpolicy_inherit() */
1626	return (dsl_prop_set(zc->zc_name, zc->zc_value, 0, 0, NULL));
1627}
1628
1629static int
1630zfs_ioc_pool_set_props(zfs_cmd_t *zc)
1631{
1632	nvlist_t *props;
1633	spa_t *spa;
1634	int error;
1635
1636	if ((error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
1637	    &props)))
1638		return (error);
1639
1640	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0) {
1641		nvlist_free(props);
1642		return (error);
1643	}
1644
1645	error = spa_prop_set(spa, props);
1646
1647	nvlist_free(props);
1648	spa_close(spa, FTAG);
1649
1650	return (error);
1651}
1652
1653static int
1654zfs_ioc_pool_get_props(zfs_cmd_t *zc)
1655{
1656	spa_t *spa;
1657	int error;
1658	nvlist_t *nvp = NULL;
1659
1660	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
1661		return (error);
1662
1663	error = spa_prop_get(spa, &nvp);
1664
1665	if (error == 0 && zc->zc_nvlist_dst != NULL)
1666		error = put_nvlist(zc, nvp);
1667	else
1668		error = EFAULT;
1669
1670	spa_close(spa, FTAG);
1671
1672	if (nvp)
1673		nvlist_free(nvp);
1674	return (error);
1675}
1676
1677static int
1678zfs_ioc_iscsi_perm_check(zfs_cmd_t *zc)
1679{
1680	nvlist_t *nvp;
1681	int error;
1682	uint32_t uid;
1683	uint32_t gid;
1684	uint32_t *groups;
1685	uint_t group_cnt;
1686	cred_t	*usercred;
1687
1688	if ((error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
1689	    &nvp)) != 0) {
1690		return (error);
1691	}
1692
1693	if ((error = nvlist_lookup_uint32(nvp,
1694	    ZFS_DELEG_PERM_UID, &uid)) != 0) {
1695		nvlist_free(nvp);
1696		return (EPERM);
1697	}
1698
1699	if ((error = nvlist_lookup_uint32(nvp,
1700	    ZFS_DELEG_PERM_GID, &gid)) != 0) {
1701		nvlist_free(nvp);
1702		return (EPERM);
1703	}
1704
1705	if ((error = nvlist_lookup_uint32_array(nvp, ZFS_DELEG_PERM_GROUPS,
1706	    &groups, &group_cnt)) != 0) {
1707		nvlist_free(nvp);
1708		return (EPERM);
1709	}
1710	usercred = cralloc();
1711	if ((crsetugid(usercred, uid, gid) != 0) ||
1712	    (crsetgroups(usercred, group_cnt, (gid_t *)groups) != 0)) {
1713		nvlist_free(nvp);
1714		crfree(usercred);
1715		return (EPERM);
1716	}
1717	nvlist_free(nvp);
1718	error = dsl_deleg_access(zc->zc_name,
1719	    zfs_prop_to_name(ZFS_PROP_SHAREISCSI), usercred);
1720	crfree(usercred);
1721	return (error);
1722}
1723
1724/*
1725 * inputs:
1726 * zc_name		name of filesystem
1727 * zc_nvlist_src{_size}	nvlist of delegated permissions
1728 * zc_perm_action	allow/unallow flag
1729 *
1730 * outputs:		none
1731 */
1732static int
1733zfs_ioc_set_fsacl(zfs_cmd_t *zc)
1734{
1735	int error;
1736	nvlist_t *fsaclnv = NULL;
1737
1738	if ((error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
1739	    &fsaclnv)) != 0)
1740		return (error);
1741
1742	/*
1743	 * Verify nvlist is constructed correctly
1744	 */
1745	if ((error = zfs_deleg_verify_nvlist(fsaclnv)) != 0) {
1746		nvlist_free(fsaclnv);
1747		return (EINVAL);
1748	}
1749
1750	/*
1751	 * If we don't have PRIV_SYS_MOUNT, then validate
1752	 * that user is allowed to hand out each permission in
1753	 * the nvlist(s)
1754	 */
1755
1756	error = secpolicy_zfs(CRED());
1757	if (error) {
1758		if (zc->zc_perm_action == B_FALSE) {
1759			error = dsl_deleg_can_allow(zc->zc_name,
1760			    fsaclnv, CRED());
1761		} else {
1762			error = dsl_deleg_can_unallow(zc->zc_name,
1763			    fsaclnv, CRED());
1764		}
1765	}
1766
1767	if (error == 0)
1768		error = dsl_deleg_set(zc->zc_name, fsaclnv, zc->zc_perm_action);
1769
1770	nvlist_free(fsaclnv);
1771	return (error);
1772}
1773
1774/*
1775 * inputs:
1776 * zc_name		name of filesystem
1777 *
1778 * outputs:
1779 * zc_nvlist_src{_size}	nvlist of delegated permissions
1780 */
1781static int
1782zfs_ioc_get_fsacl(zfs_cmd_t *zc)
1783{
1784	nvlist_t *nvp;
1785	int error;
1786
1787	if ((error = dsl_deleg_get(zc->zc_name, &nvp)) == 0) {
1788		error = put_nvlist(zc, nvp);
1789		nvlist_free(nvp);
1790	}
1791
1792	return (error);
1793}
1794
1795/*
1796 * inputs:
1797 * zc_name		name of volume
1798 *
1799 * outputs:		none
1800 */
1801static int
1802zfs_ioc_create_minor(zfs_cmd_t *zc)
1803{
1804	return (zvol_create_minor(zc->zc_name, ddi_driver_major(zfs_dip)));
1805}
1806
1807/*
1808 * inputs:
1809 * zc_name		name of volume
1810 *
1811 * outputs:		none
1812 */
1813static int
1814zfs_ioc_remove_minor(zfs_cmd_t *zc)
1815{
1816	return (zvol_remove_minor(zc->zc_name));
1817}
1818
1819/*
1820 * Search the vfs list for a specified resource.  Returns a pointer to it
1821 * or NULL if no suitable entry is found. The caller of this routine
1822 * is responsible for releasing the returned vfs pointer.
1823 */
1824static vfs_t *
1825zfs_get_vfs(const char *resource)
1826{
1827	struct vfs *vfsp;
1828	struct vfs *vfs_found = NULL;
1829
1830	vfs_list_read_lock();
1831	vfsp = rootvfs;
1832	do {
1833		if (strcmp(refstr_value(vfsp->vfs_resource), resource) == 0) {
1834			VFS_HOLD(vfsp);
1835			vfs_found = vfsp;
1836			break;
1837		}
1838		vfsp = vfsp->vfs_next;
1839	} while (vfsp != rootvfs);
1840	vfs_list_unlock();
1841	return (vfs_found);
1842}
1843
1844/* ARGSUSED */
1845static void
1846zfs_create_cb(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx)
1847{
1848	zfs_creat_t *zct = arg;
1849
1850	zfs_create_fs(os, cr, zct->zct_zplprops, tx);
1851}
1852
1853#define	ZFS_PROP_UNDEFINED	((uint64_t)-1)
1854
1855/*
1856 * inputs:
1857 * createprops		list of properties requested by creator
1858 * default_zplver	zpl version to use if unspecified in createprops
1859 * fuids_ok		fuids allowed in this version of the spa?
1860 * os			parent objset pointer (NULL if root fs)
1861 *
1862 * outputs:
1863 * zplprops	values for the zplprops we attach to the master node object
1864 * is_ci	true if requested file system will be purely case-insensitive
1865 *
1866 * Determine the settings for utf8only, normalization and
1867 * casesensitivity.  Specific values may have been requested by the
1868 * creator and/or we can inherit values from the parent dataset.  If
1869 * the file system is of too early a vintage, a creator can not
1870 * request settings for these properties, even if the requested
1871 * setting is the default value.  We don't actually want to create dsl
1872 * properties for these, so remove them from the source nvlist after
1873 * processing.
1874 */
1875static int
1876zfs_fill_zplprops_impl(objset_t *os, uint64_t default_zplver,
1877    boolean_t fuids_ok, nvlist_t *createprops, nvlist_t *zplprops,
1878    boolean_t *is_ci)
1879{
1880	uint64_t zplver = default_zplver;
1881	uint64_t sense = ZFS_PROP_UNDEFINED;
1882	uint64_t norm = ZFS_PROP_UNDEFINED;
1883	uint64_t u8 = ZFS_PROP_UNDEFINED;
1884
1885	ASSERT(zplprops != NULL);
1886
1887	/*
1888	 * Pull out creator prop choices, if any.
1889	 */
1890	if (createprops) {
1891		(void) nvlist_lookup_uint64(createprops,
1892		    zfs_prop_to_name(ZFS_PROP_VERSION), &zplver);
1893		(void) nvlist_lookup_uint64(createprops,
1894		    zfs_prop_to_name(ZFS_PROP_NORMALIZE), &norm);
1895		(void) nvlist_remove_all(createprops,
1896		    zfs_prop_to_name(ZFS_PROP_NORMALIZE));
1897		(void) nvlist_lookup_uint64(createprops,
1898		    zfs_prop_to_name(ZFS_PROP_UTF8ONLY), &u8);
1899		(void) nvlist_remove_all(createprops,
1900		    zfs_prop_to_name(ZFS_PROP_UTF8ONLY));
1901		(void) nvlist_lookup_uint64(createprops,
1902		    zfs_prop_to_name(ZFS_PROP_CASE), &sense);
1903		(void) nvlist_remove_all(createprops,
1904		    zfs_prop_to_name(ZFS_PROP_CASE));
1905	}
1906
1907	/*
1908	 * If the zpl version requested is whacky or the file system
1909	 * or pool is version is too "young" to support normalization
1910	 * and the creator tried to set a value for one of the props,
1911	 * error out.
1912	 */
1913	if ((zplver < ZPL_VERSION_INITIAL || zplver > ZPL_VERSION) ||
1914	    (zplver >= ZPL_VERSION_FUID && !fuids_ok) ||
1915	    (zplver < ZPL_VERSION_NORMALIZATION &&
1916	    (norm != ZFS_PROP_UNDEFINED || u8 != ZFS_PROP_UNDEFINED ||
1917	    sense != ZFS_PROP_UNDEFINED)))
1918		return (ENOTSUP);
1919
1920	/*
1921	 * Put the version in the zplprops
1922	 */
1923	VERIFY(nvlist_add_uint64(zplprops,
1924	    zfs_prop_to_name(ZFS_PROP_VERSION), zplver) == 0);
1925
1926	if (norm == ZFS_PROP_UNDEFINED)
1927		VERIFY(zfs_get_zplprop(os, ZFS_PROP_NORMALIZE, &norm) == 0);
1928	VERIFY(nvlist_add_uint64(zplprops,
1929	    zfs_prop_to_name(ZFS_PROP_NORMALIZE), norm) == 0);
1930
1931	/*
1932	 * If we're normalizing, names must always be valid UTF-8 strings.
1933	 */
1934	if (norm)
1935		u8 = 1;
1936	if (u8 == ZFS_PROP_UNDEFINED)
1937		VERIFY(zfs_get_zplprop(os, ZFS_PROP_UTF8ONLY, &u8) == 0);
1938	VERIFY(nvlist_add_uint64(zplprops,
1939	    zfs_prop_to_name(ZFS_PROP_UTF8ONLY), u8) == 0);
1940
1941	if (sense == ZFS_PROP_UNDEFINED)
1942		VERIFY(zfs_get_zplprop(os, ZFS_PROP_CASE, &sense) == 0);
1943	VERIFY(nvlist_add_uint64(zplprops,
1944	    zfs_prop_to_name(ZFS_PROP_CASE), sense) == 0);
1945
1946	if (is_ci)
1947		*is_ci = (sense == ZFS_CASE_INSENSITIVE);
1948
1949	return (0);
1950}
1951
1952static int
1953zfs_fill_zplprops(const char *dataset, nvlist_t *createprops,
1954    nvlist_t *zplprops, boolean_t *is_ci)
1955{
1956	boolean_t fuids_ok = B_TRUE;
1957	uint64_t zplver = ZPL_VERSION;
1958	objset_t *os = NULL;
1959	char parentname[MAXNAMELEN];
1960	char *cp;
1961	int error;
1962
1963	(void) strlcpy(parentname, dataset, sizeof (parentname));
1964	cp = strrchr(parentname, '/');
1965	ASSERT(cp != NULL);
1966	cp[0] = '\0';
1967
1968	if (zfs_earlier_version(dataset, SPA_VERSION_FUID)) {
1969		zplver = ZPL_VERSION_FUID - 1;
1970		fuids_ok = B_FALSE;
1971	}
1972
1973	/*
1974	 * Open parent object set so we can inherit zplprop values.
1975	 */
1976	if ((error = dmu_objset_open(parentname, DMU_OST_ANY,
1977	    DS_MODE_USER | DS_MODE_READONLY, &os)) != 0)
1978		return (error);
1979
1980	error = zfs_fill_zplprops_impl(os, zplver, fuids_ok, createprops,
1981	    zplprops, is_ci);
1982	dmu_objset_close(os);
1983	return (error);
1984}
1985
1986static int
1987zfs_fill_zplprops_root(uint64_t spa_vers, nvlist_t *createprops,
1988    nvlist_t *zplprops, boolean_t *is_ci)
1989{
1990	boolean_t fuids_ok = B_TRUE;
1991	uint64_t zplver = ZPL_VERSION;
1992	int error;
1993
1994	if (spa_vers < SPA_VERSION_FUID) {
1995		zplver = ZPL_VERSION_FUID - 1;
1996		fuids_ok = B_FALSE;
1997	}
1998
1999	error = zfs_fill_zplprops_impl(NULL, zplver, fuids_ok, createprops,
2000	    zplprops, is_ci);
2001	return (error);
2002}
2003
2004/*
2005 * inputs:
2006 * zc_objset_type	type of objset to create (fs vs zvol)
2007 * zc_name		name of new objset
2008 * zc_value		name of snapshot to clone from (may be empty)
2009 * zc_nvlist_src{_size}	nvlist of properties to apply
2010 *
2011 * outputs: none
2012 */
2013static int
2014zfs_ioc_create(zfs_cmd_t *zc)
2015{
2016	objset_t *clone;
2017	int error = 0;
2018	zfs_creat_t zct;
2019	nvlist_t *nvprops = NULL;
2020	void (*cbfunc)(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx);
2021	dmu_objset_type_t type = zc->zc_objset_type;
2022
2023	switch (type) {
2024
2025	case DMU_OST_ZFS:
2026		cbfunc = zfs_create_cb;
2027		break;
2028
2029	case DMU_OST_ZVOL:
2030		cbfunc = zvol_create_cb;
2031		break;
2032
2033	default:
2034		cbfunc = NULL;
2035		break;
2036	}
2037	if (strchr(zc->zc_name, '@') ||
2038	    strchr(zc->zc_name, '%'))
2039		return (EINVAL);
2040
2041	if (zc->zc_nvlist_src != NULL &&
2042	    (error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
2043	    &nvprops)) != 0)
2044		return (error);
2045
2046	zct.zct_zplprops = NULL;
2047	zct.zct_props = nvprops;
2048
2049	if (zc->zc_value[0] != '\0') {
2050		/*
2051		 * We're creating a clone of an existing snapshot.
2052		 */
2053		zc->zc_value[sizeof (zc->zc_value) - 1] = '\0';
2054		if (dataset_namecheck(zc->zc_value, NULL, NULL) != 0) {
2055			nvlist_free(nvprops);
2056			return (EINVAL);
2057		}
2058
2059		error = dmu_objset_open(zc->zc_value, type,
2060		    DS_MODE_USER | DS_MODE_READONLY, &clone);
2061		if (error) {
2062			nvlist_free(nvprops);
2063			return (error);
2064		}
2065
2066		error = dmu_objset_create(zc->zc_name, type, clone, 0,
2067		    NULL, NULL);
2068		if (error) {
2069			dmu_objset_close(clone);
2070			nvlist_free(nvprops);
2071			return (error);
2072		}
2073		dmu_objset_close(clone);
2074	} else {
2075		boolean_t is_insensitive = B_FALSE;
2076
2077		if (cbfunc == NULL) {
2078			nvlist_free(nvprops);
2079			return (EINVAL);
2080		}
2081
2082		if (type == DMU_OST_ZVOL) {
2083			uint64_t volsize, volblocksize;
2084
2085			if (nvprops == NULL ||
2086			    nvlist_lookup_uint64(nvprops,
2087			    zfs_prop_to_name(ZFS_PROP_VOLSIZE),
2088			    &volsize) != 0) {
2089				nvlist_free(nvprops);
2090				return (EINVAL);
2091			}
2092
2093			if ((error = nvlist_lookup_uint64(nvprops,
2094			    zfs_prop_to_name(ZFS_PROP_VOLBLOCKSIZE),
2095			    &volblocksize)) != 0 && error != ENOENT) {
2096				nvlist_free(nvprops);
2097				return (EINVAL);
2098			}
2099
2100			if (error != 0)
2101				volblocksize = zfs_prop_default_numeric(
2102				    ZFS_PROP_VOLBLOCKSIZE);
2103
2104			if ((error = zvol_check_volblocksize(
2105			    volblocksize)) != 0 ||
2106			    (error = zvol_check_volsize(volsize,
2107			    volblocksize)) != 0) {
2108				nvlist_free(nvprops);
2109				return (error);
2110			}
2111		} else if (type == DMU_OST_ZFS) {
2112			int error;
2113
2114			/*
2115			 * We have to have normalization and
2116			 * case-folding flags correct when we do the
2117			 * file system creation, so go figure them out
2118			 * now.
2119			 */
2120			VERIFY(nvlist_alloc(&zct.zct_zplprops,
2121			    NV_UNIQUE_NAME, KM_SLEEP) == 0);
2122			error = zfs_fill_zplprops(zc->zc_name, nvprops,
2123			    zct.zct_zplprops, &is_insensitive);
2124			if (error != 0) {
2125				nvlist_free(nvprops);
2126				nvlist_free(zct.zct_zplprops);
2127				return (error);
2128			}
2129		}
2130		error = dmu_objset_create(zc->zc_name, type, NULL,
2131		    is_insensitive ? DS_FLAG_CI_DATASET : 0, cbfunc, &zct);
2132		nvlist_free(zct.zct_zplprops);
2133	}
2134
2135	/*
2136	 * It would be nice to do this atomically.
2137	 */
2138	if (error == 0) {
2139		if ((error = zfs_set_prop_nvlist(zc->zc_name, nvprops)) != 0)
2140			(void) dmu_objset_destroy(zc->zc_name);
2141	}
2142	nvlist_free(nvprops);
2143	return (error);
2144}
2145
2146struct snap_prop_arg {
2147	nvlist_t *nvprops;
2148	const char *snapname;
2149};
2150
2151static int
2152set_snap_props(char *name, void *arg)
2153{
2154	struct snap_prop_arg *snpa = arg;
2155	int len = strlen(name) + strlen(snpa->snapname) + 2;
2156	char *buf = kmem_alloc(len, KM_SLEEP);
2157	int err;
2158
2159	(void) snprintf(buf, len, "%s@%s", name, snpa->snapname);
2160	err = zfs_set_prop_nvlist(buf, snpa->nvprops);
2161	if (err)
2162		(void) dmu_objset_destroy(buf);
2163	kmem_free(buf, len);
2164	return (err);
2165}
2166
2167/*
2168 * inputs:
2169 * zc_name	name of filesystem
2170 * zc_value	short name of snapshot
2171 * zc_cookie	recursive flag
2172 *
2173 * outputs:	none
2174 */
2175static int
2176zfs_ioc_snapshot(zfs_cmd_t *zc)
2177{
2178	nvlist_t *nvprops = NULL;
2179	int error;
2180	boolean_t recursive = zc->zc_cookie;
2181
2182	if (snapshot_namecheck(zc->zc_value, NULL, NULL) != 0)
2183		return (EINVAL);
2184
2185	if (zc->zc_nvlist_src != NULL &&
2186	    (error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
2187	    &nvprops)) != 0)
2188		return (error);
2189
2190	error = dmu_objset_snapshot(zc->zc_name, zc->zc_value, recursive);
2191
2192	/*
2193	 * It would be nice to do this atomically.
2194	 */
2195	if (error == 0) {
2196		struct snap_prop_arg snpa;
2197		snpa.nvprops = nvprops;
2198		snpa.snapname = zc->zc_value;
2199		if (recursive) {
2200			error = dmu_objset_find(zc->zc_name,
2201			    set_snap_props, &snpa, DS_FIND_CHILDREN);
2202			if (error) {
2203				(void) dmu_snapshots_destroy(zc->zc_name,
2204				    zc->zc_value);
2205			}
2206		} else {
2207			error = set_snap_props(zc->zc_name, &snpa);
2208		}
2209	}
2210	nvlist_free(nvprops);
2211	return (error);
2212}
2213
2214int
2215zfs_unmount_snap(char *name, void *arg)
2216{
2217	vfs_t *vfsp = NULL;
2218
2219	if (arg) {
2220		char *snapname = arg;
2221		int len = strlen(name) + strlen(snapname) + 2;
2222		char *buf = kmem_alloc(len, KM_SLEEP);
2223
2224		(void) strcpy(buf, name);
2225		(void) strcat(buf, "@");
2226		(void) strcat(buf, snapname);
2227		vfsp = zfs_get_vfs(buf);
2228		kmem_free(buf, len);
2229	} else if (strchr(name, '@')) {
2230		vfsp = zfs_get_vfs(name);
2231	}
2232
2233	if (vfsp) {
2234		/*
2235		 * Always force the unmount for snapshots.
2236		 */
2237		int flag = MS_FORCE;
2238		int err;
2239
2240		if ((err = vn_vfswlock(vfsp->vfs_vnodecovered)) != 0) {
2241			VFS_RELE(vfsp);
2242			return (err);
2243		}
2244		VFS_RELE(vfsp);
2245		if ((err = dounmount(vfsp, flag, kcred)) != 0)
2246			return (err);
2247	}
2248	return (0);
2249}
2250
2251/*
2252 * inputs:
2253 * zc_name	name of filesystem
2254 * zc_value	short name of snapshot
2255 *
2256 * outputs:	none
2257 */
2258static int
2259zfs_ioc_destroy_snaps(zfs_cmd_t *zc)
2260{
2261	int err;
2262
2263	if (snapshot_namecheck(zc->zc_value, NULL, NULL) != 0)
2264		return (EINVAL);
2265	err = dmu_objset_find(zc->zc_name,
2266	    zfs_unmount_snap, zc->zc_value, DS_FIND_CHILDREN);
2267	if (err)
2268		return (err);
2269	return (dmu_snapshots_destroy(zc->zc_name, zc->zc_value));
2270}
2271
2272/*
2273 * inputs:
2274 * zc_name		name of dataset to destroy
2275 * zc_objset_type	type of objset
2276 *
2277 * outputs:		none
2278 */
2279static int
2280zfs_ioc_destroy(zfs_cmd_t *zc)
2281{
2282	if (strchr(zc->zc_name, '@') && zc->zc_objset_type == DMU_OST_ZFS) {
2283		int err = zfs_unmount_snap(zc->zc_name, NULL);
2284		if (err)
2285			return (err);
2286	}
2287
2288	return (dmu_objset_destroy(zc->zc_name));
2289}
2290
2291/*
2292 * inputs:
2293 * zc_name	name of dataset to rollback (to most recent snapshot)
2294 *
2295 * outputs:	none
2296 */
2297static int
2298zfs_ioc_rollback(zfs_cmd_t *zc)
2299{
2300	objset_t *os;
2301	int error;
2302	zfsvfs_t *zfsvfs = NULL;
2303
2304	/*
2305	 * Get the zfsvfs for the receiving objset. There
2306	 * won't be one if we're operating on a zvol, if the
2307	 * objset doesn't exist yet, or is not mounted.
2308	 */
2309	error = dmu_objset_open(zc->zc_name, DMU_OST_ANY, DS_MODE_USER, &os);
2310	if (error)
2311		return (error);
2312
2313	if (dmu_objset_type(os) == DMU_OST_ZFS) {
2314		mutex_enter(&os->os->os_user_ptr_lock);
2315		zfsvfs = dmu_objset_get_user(os);
2316		if (zfsvfs != NULL)
2317			VFS_HOLD(zfsvfs->z_vfs);
2318		mutex_exit(&os->os->os_user_ptr_lock);
2319	}
2320
2321	if (zfsvfs != NULL) {
2322		char osname[MAXNAMELEN];
2323		int mode;
2324
2325		error = zfs_suspend_fs(zfsvfs, osname, &mode);
2326		if (error == 0) {
2327			int resume_err;
2328
2329			ASSERT(strcmp(osname, zc->zc_name) == 0);
2330			error = dmu_objset_rollback(os);
2331			resume_err = zfs_resume_fs(zfsvfs, osname, mode);
2332			error = error ? error : resume_err;
2333		} else {
2334			dmu_objset_close(os);
2335		}
2336		VFS_RELE(zfsvfs->z_vfs);
2337	} else {
2338		error = dmu_objset_rollback(os);
2339	}
2340	/* Note, the dmu_objset_rollback() releases the objset for us. */
2341
2342	return (error);
2343}
2344
2345/*
2346 * inputs:
2347 * zc_name	old name of dataset
2348 * zc_value	new name of dataset
2349 * zc_cookie	recursive flag (only valid for snapshots)
2350 *
2351 * outputs:	none
2352 */
2353static int
2354zfs_ioc_rename(zfs_cmd_t *zc)
2355{
2356	boolean_t recursive = zc->zc_cookie & 1;
2357
2358	zc->zc_value[sizeof (zc->zc_value) - 1] = '\0';
2359	if (dataset_namecheck(zc->zc_value, NULL, NULL) != 0 ||
2360	    strchr(zc->zc_value, '%'))
2361		return (EINVAL);
2362
2363	/*
2364	 * Unmount snapshot unless we're doing a recursive rename,
2365	 * in which case the dataset code figures out which snapshots
2366	 * to unmount.
2367	 */
2368	if (!recursive && strchr(zc->zc_name, '@') != NULL &&
2369	    zc->zc_objset_type == DMU_OST_ZFS) {
2370		int err = zfs_unmount_snap(zc->zc_name, NULL);
2371		if (err)
2372			return (err);
2373	}
2374	return (dmu_objset_rename(zc->zc_name, zc->zc_value, recursive));
2375}
2376
2377static void
2378clear_props(char *dataset, nvlist_t *props)
2379{
2380	zfs_cmd_t *zc;
2381	nvpair_t *prop;
2382
2383	if (props == NULL)
2384		return;
2385	zc = kmem_alloc(sizeof (zfs_cmd_t), KM_SLEEP);
2386	(void) strcpy(zc->zc_name, dataset);
2387	for (prop = nvlist_next_nvpair(props, NULL); prop;
2388	    prop = nvlist_next_nvpair(props, prop)) {
2389		(void) strcpy(zc->zc_value, nvpair_name(prop));
2390		if (zfs_secpolicy_inherit(zc, CRED()) == 0)
2391			(void) zfs_ioc_inherit_prop(zc);
2392	}
2393	kmem_free(zc, sizeof (zfs_cmd_t));
2394}
2395
2396/*
2397 * inputs:
2398 * zc_name		name of containing filesystem
2399 * zc_nvlist_src{_size}	nvlist of properties to apply
2400 * zc_value		name of snapshot to create
2401 * zc_string		name of clone origin (if DRR_FLAG_CLONE)
2402 * zc_cookie		file descriptor to recv from
2403 * zc_begin_record	the BEGIN record of the stream (not byteswapped)
2404 * zc_guid		force flag
2405 *
2406 * outputs:
2407 * zc_cookie		number of bytes read
2408 */
2409static int
2410zfs_ioc_recv(zfs_cmd_t *zc)
2411{
2412	file_t *fp;
2413	objset_t *os;
2414	dmu_recv_cookie_t drc;
2415	zfsvfs_t *zfsvfs = NULL;
2416	boolean_t force = (boolean_t)zc->zc_guid;
2417	int error, fd;
2418	offset_t off;
2419	nvlist_t *props = NULL;
2420	nvlist_t *origprops = NULL;
2421	objset_t *origin = NULL;
2422	char *tosnap;
2423	char tofs[ZFS_MAXNAMELEN];
2424
2425	if (dataset_namecheck(zc->zc_value, NULL, NULL) != 0 ||
2426	    strchr(zc->zc_value, '@') == NULL ||
2427	    strchr(zc->zc_value, '%'))
2428		return (EINVAL);
2429
2430	(void) strcpy(tofs, zc->zc_value);
2431	tosnap = strchr(tofs, '@');
2432	*tosnap = '\0';
2433	tosnap++;
2434
2435	if (zc->zc_nvlist_src != NULL &&
2436	    (error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
2437	    &props)) != 0)
2438		return (error);
2439
2440	fd = zc->zc_cookie;
2441	fp = getf(fd);
2442	if (fp == NULL) {
2443		nvlist_free(props);
2444		return (EBADF);
2445	}
2446
2447	if (dmu_objset_open(tofs, DMU_OST_ANY,
2448	    DS_MODE_USER | DS_MODE_READONLY, &os) == 0) {
2449		/*
2450		 * Try to get the zfsvfs for the receiving objset.
2451		 * There won't be one if we're operating on a zvol,
2452		 * if the objset doesn't exist yet, or is not mounted.
2453		 */
2454		mutex_enter(&os->os->os_user_ptr_lock);
2455		if (zfsvfs = dmu_objset_get_user(os)) {
2456			if (!mutex_tryenter(&zfsvfs->z_online_recv_lock)) {
2457				mutex_exit(&os->os->os_user_ptr_lock);
2458				dmu_objset_close(os);
2459				zfsvfs = NULL;
2460				error = EBUSY;
2461				goto out;
2462			}
2463			VFS_HOLD(zfsvfs->z_vfs);
2464		}
2465		mutex_exit(&os->os->os_user_ptr_lock);
2466
2467		/*
2468		 * If new properties are supplied, they are to completely
2469		 * replace the existing ones, so stash away the existing ones.
2470		 */
2471		if (props)
2472			(void) dsl_prop_get_all(os, &origprops, TRUE);
2473
2474		dmu_objset_close(os);
2475	}
2476
2477	if (zc->zc_string[0]) {
2478		error = dmu_objset_open(zc->zc_string, DMU_OST_ANY,
2479		    DS_MODE_USER | DS_MODE_READONLY, &origin);
2480		if (error)
2481			goto out;
2482	}
2483
2484	error = dmu_recv_begin(tofs, tosnap, &zc->zc_begin_record,
2485	    force, origin, zfsvfs != NULL, &drc);
2486	if (origin)
2487		dmu_objset_close(origin);
2488	if (error)
2489		goto out;
2490
2491	/*
2492	 * Reset properties.  We do this before we receive the stream
2493	 * so that the properties are applied to the new data.
2494	 */
2495	if (props) {
2496		clear_props(tofs, origprops);
2497		/*
2498		 * XXX - Note, this is all-or-nothing; should be best-effort.
2499		 */
2500		(void) zfs_set_prop_nvlist(tofs, props);
2501	}
2502
2503	off = fp->f_offset;
2504	error = dmu_recv_stream(&drc, fp->f_vnode, &off);
2505
2506	if (error == 0 && zfsvfs) {
2507		char osname[MAXNAMELEN];
2508		int mode;
2509
2510		/* online recv */
2511		error = zfs_suspend_fs(zfsvfs, osname, &mode);
2512		if (error == 0) {
2513			int resume_err;
2514
2515			error = dmu_recv_end(&drc);
2516			resume_err = zfs_resume_fs(zfsvfs, osname, mode);
2517			error = error ? error : resume_err;
2518		} else {
2519			dmu_recv_abort_cleanup(&drc);
2520		}
2521	} else if (error == 0) {
2522		error = dmu_recv_end(&drc);
2523	}
2524
2525	zc->zc_cookie = off - fp->f_offset;
2526	if (VOP_SEEK(fp->f_vnode, fp->f_offset, &off, NULL) == 0)
2527		fp->f_offset = off;
2528
2529	/*
2530	 * On error, restore the original props.
2531	 */
2532	if (error && props) {
2533		clear_props(tofs, props);
2534		(void) zfs_set_prop_nvlist(tofs, origprops);
2535	}
2536out:
2537	if (zfsvfs) {
2538		mutex_exit(&zfsvfs->z_online_recv_lock);
2539		VFS_RELE(zfsvfs->z_vfs);
2540	}
2541	nvlist_free(props);
2542	nvlist_free(origprops);
2543	releasef(fd);
2544	return (error);
2545}
2546
2547/*
2548 * inputs:
2549 * zc_name	name of snapshot to send
2550 * zc_value	short name of incremental fromsnap (may be empty)
2551 * zc_cookie	file descriptor to send stream to
2552 * zc_obj	fromorigin flag (mutually exclusive with zc_value)
2553 *
2554 * outputs: none
2555 */
2556static int
2557zfs_ioc_send(zfs_cmd_t *zc)
2558{
2559	objset_t *fromsnap = NULL;
2560	objset_t *tosnap;
2561	file_t *fp;
2562	int error;
2563	offset_t off;
2564
2565	error = dmu_objset_open(zc->zc_name, DMU_OST_ANY,
2566	    DS_MODE_USER | DS_MODE_READONLY, &tosnap);
2567	if (error)
2568		return (error);
2569
2570	if (zc->zc_value[0] != '\0') {
2571		char buf[MAXPATHLEN];
2572		char *cp;
2573
2574		(void) strncpy(buf, zc->zc_name, sizeof (buf));
2575		cp = strchr(buf, '@');
2576		if (cp)
2577			*(cp+1) = 0;
2578		(void) strncat(buf, zc->zc_value, sizeof (buf));
2579		error = dmu_objset_open(buf, DMU_OST_ANY,
2580		    DS_MODE_USER | DS_MODE_READONLY, &fromsnap);
2581		if (error) {
2582			dmu_objset_close(tosnap);
2583			return (error);
2584		}
2585	}
2586
2587	fp = getf(zc->zc_cookie);
2588	if (fp == NULL) {
2589		dmu_objset_close(tosnap);
2590		if (fromsnap)
2591			dmu_objset_close(fromsnap);
2592		return (EBADF);
2593	}
2594
2595	off = fp->f_offset;
2596	error = dmu_sendbackup(tosnap, fromsnap, zc->zc_obj, fp->f_vnode, &off);
2597
2598	if (VOP_SEEK(fp->f_vnode, fp->f_offset, &off, NULL) == 0)
2599		fp->f_offset = off;
2600	releasef(zc->zc_cookie);
2601	if (fromsnap)
2602		dmu_objset_close(fromsnap);
2603	dmu_objset_close(tosnap);
2604	return (error);
2605}
2606
2607static int
2608zfs_ioc_inject_fault(zfs_cmd_t *zc)
2609{
2610	int id, error;
2611
2612	error = zio_inject_fault(zc->zc_name, (int)zc->zc_guid, &id,
2613	    &zc->zc_inject_record);
2614
2615	if (error == 0)
2616		zc->zc_guid = (uint64_t)id;
2617
2618	return (error);
2619}
2620
2621static int
2622zfs_ioc_clear_fault(zfs_cmd_t *zc)
2623{
2624	return (zio_clear_fault((int)zc->zc_guid));
2625}
2626
2627static int
2628zfs_ioc_inject_list_next(zfs_cmd_t *zc)
2629{
2630	int id = (int)zc->zc_guid;
2631	int error;
2632
2633	error = zio_inject_list_next(&id, zc->zc_name, sizeof (zc->zc_name),
2634	    &zc->zc_inject_record);
2635
2636	zc->zc_guid = id;
2637
2638	return (error);
2639}
2640
2641static int
2642zfs_ioc_error_log(zfs_cmd_t *zc)
2643{
2644	spa_t *spa;
2645	int error;
2646	size_t count = (size_t)zc->zc_nvlist_dst_size;
2647
2648	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
2649		return (error);
2650
2651	error = spa_get_errlog(spa, (void *)(uintptr_t)zc->zc_nvlist_dst,
2652	    &count);
2653	if (error == 0)
2654		zc->zc_nvlist_dst_size = count;
2655	else
2656		zc->zc_nvlist_dst_size = spa_get_errlog_size(spa);
2657
2658	spa_close(spa, FTAG);
2659
2660	return (error);
2661}
2662
2663static int
2664zfs_ioc_clear(zfs_cmd_t *zc)
2665{
2666	spa_t *spa;
2667	vdev_t *vd;
2668	uint64_t txg;
2669	int error;
2670
2671	/*
2672	 * On zpool clear we also fix up missing slogs
2673	 */
2674	mutex_enter(&spa_namespace_lock);
2675	spa = spa_lookup(zc->zc_name);
2676	if (spa == NULL) {
2677		mutex_exit(&spa_namespace_lock);
2678		return (EIO);
2679	}
2680	if (spa->spa_log_state == SPA_LOG_MISSING) {
2681		/* we need to let spa_open/spa_load clear the chains */
2682		spa->spa_log_state = SPA_LOG_CLEAR;
2683	}
2684	mutex_exit(&spa_namespace_lock);
2685
2686	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
2687		return (error);
2688
2689	/*
2690	 * Try to resume any I/Os which may have been suspended
2691	 * as a result of a complete pool failure.
2692	 */
2693	if (!list_is_empty(&spa->spa_zio_list)) {
2694		if (zio_vdev_resume_io(spa) != 0) {
2695			spa_close(spa, FTAG);
2696			return (EIO);
2697		}
2698	}
2699
2700	txg = spa_vdev_enter(spa);
2701
2702	if (zc->zc_guid == 0) {
2703		vd = NULL;
2704	} else {
2705		vd = spa_lookup_by_guid(spa, zc->zc_guid, B_TRUE);
2706		if (vd == NULL) {
2707			(void) spa_vdev_exit(spa, NULL, txg, ENODEV);
2708			spa_close(spa, FTAG);
2709			return (ENODEV);
2710		}
2711	}
2712
2713	vdev_clear(spa, vd, B_TRUE);
2714
2715	(void) spa_vdev_exit(spa, NULL, txg, 0);
2716
2717	spa_close(spa, FTAG);
2718
2719	return (0);
2720}
2721
2722/*
2723 * inputs:
2724 * zc_name	name of filesystem
2725 * zc_value	name of origin snapshot
2726 *
2727 * outputs:	none
2728 */
2729static int
2730zfs_ioc_promote(zfs_cmd_t *zc)
2731{
2732	char *cp;
2733
2734	/*
2735	 * We don't need to unmount *all* the origin fs's snapshots, but
2736	 * it's easier.
2737	 */
2738	cp = strchr(zc->zc_value, '@');
2739	if (cp)
2740		*cp = '\0';
2741	(void) dmu_objset_find(zc->zc_value,
2742	    zfs_unmount_snap, NULL, DS_FIND_SNAPSHOTS);
2743	return (dsl_dataset_promote(zc->zc_name));
2744}
2745
2746/*
2747 * We don't want to have a hard dependency
2748 * against some special symbols in sharefs
2749 * nfs, and smbsrv.  Determine them if needed when
2750 * the first file system is shared.
2751 * Neither sharefs, nfs or smbsrv are unloadable modules.
2752 */
2753int (*znfsexport_fs)(void *arg);
2754int (*zshare_fs)(enum sharefs_sys_op, share_t *, uint32_t);
2755int (*zsmbexport_fs)(void *arg, boolean_t add_share);
2756
2757int zfs_nfsshare_inited;
2758int zfs_smbshare_inited;
2759
2760ddi_modhandle_t nfs_mod;
2761ddi_modhandle_t sharefs_mod;
2762ddi_modhandle_t smbsrv_mod;
2763kmutex_t zfs_share_lock;
2764
2765static int
2766zfs_init_sharefs()
2767{
2768	int error;
2769
2770	ASSERT(MUTEX_HELD(&zfs_share_lock));
2771	/* Both NFS and SMB shares also require sharetab support. */
2772	if (sharefs_mod == NULL && ((sharefs_mod =
2773	    ddi_modopen("fs/sharefs",
2774	    KRTLD_MODE_FIRST, &error)) == NULL)) {
2775		return (ENOSYS);
2776	}
2777	if (zshare_fs == NULL && ((zshare_fs =
2778	    (int (*)(enum sharefs_sys_op, share_t *, uint32_t))
2779	    ddi_modsym(sharefs_mod, "sharefs_impl", &error)) == NULL)) {
2780		return (ENOSYS);
2781	}
2782	return (0);
2783}
2784
2785static int
2786zfs_ioc_share(zfs_cmd_t *zc)
2787{
2788	int error;
2789	int opcode;
2790
2791	switch (zc->zc_share.z_sharetype) {
2792	case ZFS_SHARE_NFS:
2793	case ZFS_UNSHARE_NFS:
2794		if (zfs_nfsshare_inited == 0) {
2795			mutex_enter(&zfs_share_lock);
2796			if (nfs_mod == NULL && ((nfs_mod = ddi_modopen("fs/nfs",
2797			    KRTLD_MODE_FIRST, &error)) == NULL)) {
2798				mutex_exit(&zfs_share_lock);
2799				return (ENOSYS);
2800			}
2801			if (znfsexport_fs == NULL &&
2802			    ((znfsexport_fs = (int (*)(void *))
2803			    ddi_modsym(nfs_mod,
2804			    "nfs_export", &error)) == NULL)) {
2805				mutex_exit(&zfs_share_lock);
2806				return (ENOSYS);
2807			}
2808			error = zfs_init_sharefs();
2809			if (error) {
2810				mutex_exit(&zfs_share_lock);
2811				return (ENOSYS);
2812			}
2813			zfs_nfsshare_inited = 1;
2814			mutex_exit(&zfs_share_lock);
2815		}
2816		break;
2817	case ZFS_SHARE_SMB:
2818	case ZFS_UNSHARE_SMB:
2819		if (zfs_smbshare_inited == 0) {
2820			mutex_enter(&zfs_share_lock);
2821			if (smbsrv_mod == NULL && ((smbsrv_mod =
2822			    ddi_modopen("drv/smbsrv",
2823			    KRTLD_MODE_FIRST, &error)) == NULL)) {
2824				mutex_exit(&zfs_share_lock);
2825				return (ENOSYS);
2826			}
2827			if (zsmbexport_fs == NULL && ((zsmbexport_fs =
2828			    (int (*)(void *, boolean_t))ddi_modsym(smbsrv_mod,
2829			    "smb_server_share", &error)) == NULL)) {
2830				mutex_exit(&zfs_share_lock);
2831				return (ENOSYS);
2832			}
2833			error = zfs_init_sharefs();
2834			if (error) {
2835				mutex_exit(&zfs_share_lock);
2836				return (ENOSYS);
2837			}
2838			zfs_smbshare_inited = 1;
2839			mutex_exit(&zfs_share_lock);
2840		}
2841		break;
2842	default:
2843		return (EINVAL);
2844	}
2845
2846	switch (zc->zc_share.z_sharetype) {
2847	case ZFS_SHARE_NFS:
2848	case ZFS_UNSHARE_NFS:
2849		if (error =
2850		    znfsexport_fs((void *)
2851		    (uintptr_t)zc->zc_share.z_exportdata))
2852			return (error);
2853		break;
2854	case ZFS_SHARE_SMB:
2855	case ZFS_UNSHARE_SMB:
2856		if (error = zsmbexport_fs((void *)
2857		    (uintptr_t)zc->zc_share.z_exportdata,
2858		    zc->zc_share.z_sharetype == ZFS_SHARE_SMB ?
2859		    B_TRUE : B_FALSE)) {
2860			return (error);
2861		}
2862		break;
2863	}
2864
2865	opcode = (zc->zc_share.z_sharetype == ZFS_SHARE_NFS ||
2866	    zc->zc_share.z_sharetype == ZFS_SHARE_SMB) ?
2867	    SHAREFS_ADD : SHAREFS_REMOVE;
2868
2869	/*
2870	 * Add or remove share from sharetab
2871	 */
2872	error = zshare_fs(opcode,
2873	    (void *)(uintptr_t)zc->zc_share.z_sharedata,
2874	    zc->zc_share.z_sharemax);
2875
2876	return (error);
2877
2878}
2879
2880/*
2881 * pool create, destroy, and export don't log the history as part of
2882 * zfsdev_ioctl, but rather zfs_ioc_pool_create, and zfs_ioc_pool_export
2883 * do the logging of those commands.
2884 */
2885static zfs_ioc_vec_t zfs_ioc_vec[] = {
2886	{ zfs_ioc_pool_create, zfs_secpolicy_config, POOL_NAME, B_FALSE },
2887	{ zfs_ioc_pool_destroy,	zfs_secpolicy_config, POOL_NAME, B_FALSE },
2888	{ zfs_ioc_pool_import, zfs_secpolicy_config, POOL_NAME, B_TRUE },
2889	{ zfs_ioc_pool_export, zfs_secpolicy_config, POOL_NAME, B_FALSE },
2890	{ zfs_ioc_pool_configs,	zfs_secpolicy_none, NO_NAME, B_FALSE },
2891	{ zfs_ioc_pool_stats, zfs_secpolicy_read, POOL_NAME, B_FALSE },
2892	{ zfs_ioc_pool_tryimport, zfs_secpolicy_config, NO_NAME, B_FALSE },
2893	{ zfs_ioc_pool_scrub, zfs_secpolicy_config, POOL_NAME, B_TRUE },
2894	{ zfs_ioc_pool_freeze, zfs_secpolicy_config, NO_NAME, B_FALSE },
2895	{ zfs_ioc_pool_upgrade,	zfs_secpolicy_config, POOL_NAME, B_TRUE },
2896	{ zfs_ioc_pool_get_history, zfs_secpolicy_config, POOL_NAME, B_FALSE },
2897	{ zfs_ioc_vdev_add, zfs_secpolicy_config, POOL_NAME, B_TRUE },
2898	{ zfs_ioc_vdev_remove, zfs_secpolicy_config, POOL_NAME, B_TRUE },
2899	{ zfs_ioc_vdev_set_state, zfs_secpolicy_config,	POOL_NAME, B_TRUE },
2900	{ zfs_ioc_vdev_attach, zfs_secpolicy_config, POOL_NAME, B_TRUE },
2901	{ zfs_ioc_vdev_detach, zfs_secpolicy_config, POOL_NAME, B_TRUE },
2902	{ zfs_ioc_vdev_setpath,	zfs_secpolicy_config, POOL_NAME, B_FALSE },
2903	{ zfs_ioc_objset_stats,	zfs_secpolicy_read, DATASET_NAME, B_FALSE },
2904	{ zfs_ioc_objset_zplprops, zfs_secpolicy_read, DATASET_NAME, B_FALSE },
2905	{ zfs_ioc_dataset_list_next, zfs_secpolicy_read,
2906	    DATASET_NAME, B_FALSE },
2907	{ zfs_ioc_snapshot_list_next, zfs_secpolicy_read,
2908	    DATASET_NAME, B_FALSE },
2909	{ zfs_ioc_set_prop, zfs_secpolicy_none, DATASET_NAME, B_TRUE },
2910	{ zfs_ioc_create_minor,	zfs_secpolicy_minor, DATASET_NAME, B_FALSE },
2911	{ zfs_ioc_remove_minor,	zfs_secpolicy_minor, DATASET_NAME, B_FALSE },
2912	{ zfs_ioc_create, zfs_secpolicy_create, DATASET_NAME, B_TRUE },
2913	{ zfs_ioc_destroy, zfs_secpolicy_destroy, DATASET_NAME, B_TRUE },
2914	{ zfs_ioc_rollback, zfs_secpolicy_rollback, DATASET_NAME, B_TRUE },
2915	{ zfs_ioc_rename, zfs_secpolicy_rename,	DATASET_NAME, B_TRUE },
2916	{ zfs_ioc_recv, zfs_secpolicy_receive, DATASET_NAME, B_TRUE },
2917	{ zfs_ioc_send, zfs_secpolicy_send, DATASET_NAME, B_TRUE },
2918	{ zfs_ioc_inject_fault,	zfs_secpolicy_inject, NO_NAME, B_FALSE },
2919	{ zfs_ioc_clear_fault, zfs_secpolicy_inject, NO_NAME, B_FALSE },
2920	{ zfs_ioc_inject_list_next, zfs_secpolicy_inject, NO_NAME, B_FALSE },
2921	{ zfs_ioc_error_log, zfs_secpolicy_inject, POOL_NAME, B_FALSE },
2922	{ zfs_ioc_clear, zfs_secpolicy_config, POOL_NAME, B_TRUE },
2923	{ zfs_ioc_promote, zfs_secpolicy_promote, DATASET_NAME, B_TRUE },
2924	{ zfs_ioc_destroy_snaps, zfs_secpolicy_destroy,	DATASET_NAME, B_TRUE },
2925	{ zfs_ioc_snapshot, zfs_secpolicy_snapshot, DATASET_NAME, B_TRUE },
2926	{ zfs_ioc_dsobj_to_dsname, zfs_secpolicy_config, POOL_NAME, B_FALSE },
2927	{ zfs_ioc_obj_to_path, zfs_secpolicy_config, NO_NAME, B_FALSE },
2928	{ zfs_ioc_pool_set_props, zfs_secpolicy_config,	POOL_NAME, B_TRUE },
2929	{ zfs_ioc_pool_get_props, zfs_secpolicy_read, POOL_NAME, B_FALSE },
2930	{ zfs_ioc_set_fsacl, zfs_secpolicy_fsacl, DATASET_NAME, B_TRUE },
2931	{ zfs_ioc_get_fsacl, zfs_secpolicy_read, DATASET_NAME, B_FALSE },
2932	{ zfs_ioc_iscsi_perm_check, zfs_secpolicy_iscsi,
2933	    DATASET_NAME, B_FALSE },
2934	{ zfs_ioc_share, zfs_secpolicy_share, DATASET_NAME, B_FALSE },
2935	{ zfs_ioc_inherit_prop, zfs_secpolicy_inherit, DATASET_NAME, B_TRUE },
2936};
2937
2938static int
2939zfsdev_ioctl(dev_t dev, int cmd, intptr_t arg, int flag, cred_t *cr, int *rvalp)
2940{
2941	zfs_cmd_t *zc;
2942	uint_t vec;
2943	int error, rc;
2944
2945	if (getminor(dev) != 0)
2946		return (zvol_ioctl(dev, cmd, arg, flag, cr, rvalp));
2947
2948	vec = cmd - ZFS_IOC;
2949	ASSERT3U(getmajor(dev), ==, ddi_driver_major(zfs_dip));
2950
2951	if (vec >= sizeof (zfs_ioc_vec) / sizeof (zfs_ioc_vec[0]))
2952		return (EINVAL);
2953
2954	zc = kmem_zalloc(sizeof (zfs_cmd_t), KM_SLEEP);
2955
2956	error = xcopyin((void *)arg, zc, sizeof (zfs_cmd_t));
2957
2958	if (error == 0)
2959		error = zfs_ioc_vec[vec].zvec_secpolicy(zc, cr);
2960
2961	/*
2962	 * Ensure that all pool/dataset names are valid before we pass down to
2963	 * the lower layers.
2964	 */
2965	if (error == 0) {
2966		zc->zc_name[sizeof (zc->zc_name) - 1] = '\0';
2967		switch (zfs_ioc_vec[vec].zvec_namecheck) {
2968		case POOL_NAME:
2969			if (pool_namecheck(zc->zc_name, NULL, NULL) != 0)
2970				error = EINVAL;
2971			break;
2972
2973		case DATASET_NAME:
2974			if (dataset_namecheck(zc->zc_name, NULL, NULL) != 0)
2975				error = EINVAL;
2976			break;
2977
2978		case NO_NAME:
2979			break;
2980		}
2981	}
2982
2983	if (error == 0)
2984		error = zfs_ioc_vec[vec].zvec_func(zc);
2985
2986	rc = xcopyout(zc, (void *)arg, sizeof (zfs_cmd_t));
2987	if (error == 0) {
2988		error = rc;
2989		if (zfs_ioc_vec[vec].zvec_his_log == B_TRUE)
2990			zfs_log_history(zc);
2991	}
2992
2993	kmem_free(zc, sizeof (zfs_cmd_t));
2994	return (error);
2995}
2996
2997static int
2998zfs_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
2999{
3000	if (cmd != DDI_ATTACH)
3001		return (DDI_FAILURE);
3002
3003	if (ddi_create_minor_node(dip, "zfs", S_IFCHR, 0,
3004	    DDI_PSEUDO, 0) == DDI_FAILURE)
3005		return (DDI_FAILURE);
3006
3007	zfs_dip = dip;
3008
3009	ddi_report_dev(dip);
3010
3011	return (DDI_SUCCESS);
3012}
3013
3014static int
3015zfs_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
3016{
3017	if (spa_busy() || zfs_busy() || zvol_busy())
3018		return (DDI_FAILURE);
3019
3020	if (cmd != DDI_DETACH)
3021		return (DDI_FAILURE);
3022
3023	zfs_dip = NULL;
3024
3025	ddi_prop_remove_all(dip);
3026	ddi_remove_minor_node(dip, NULL);
3027
3028	return (DDI_SUCCESS);
3029}
3030
3031/*ARGSUSED*/
3032static int
3033zfs_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result)
3034{
3035	switch (infocmd) {
3036	case DDI_INFO_DEVT2DEVINFO:
3037		*result = zfs_dip;
3038		return (DDI_SUCCESS);
3039
3040	case DDI_INFO_DEVT2INSTANCE:
3041		*result = (void *)0;
3042		return (DDI_SUCCESS);
3043	}
3044
3045	return (DDI_FAILURE);
3046}
3047
3048/*
3049 * OK, so this is a little weird.
3050 *
3051 * /dev/zfs is the control node, i.e. minor 0.
3052 * /dev/zvol/[r]dsk/pool/dataset are the zvols, minor > 0.
3053 *
3054 * /dev/zfs has basically nothing to do except serve up ioctls,
3055 * so most of the standard driver entry points are in zvol.c.
3056 */
3057static struct cb_ops zfs_cb_ops = {
3058	zvol_open,	/* open */
3059	zvol_close,	/* close */
3060	zvol_strategy,	/* strategy */
3061	nodev,		/* print */
3062	zvol_dump,	/* dump */
3063	zvol_read,	/* read */
3064	zvol_write,	/* write */
3065	zfsdev_ioctl,	/* ioctl */
3066	nodev,		/* devmap */
3067	nodev,		/* mmap */
3068	nodev,		/* segmap */
3069	nochpoll,	/* poll */
3070	ddi_prop_op,	/* prop_op */
3071	NULL,		/* streamtab */
3072	D_NEW | D_MP | D_64BIT,		/* Driver compatibility flag */
3073	CB_REV,		/* version */
3074	nodev,		/* async read */
3075	nodev,		/* async write */
3076};
3077
3078static struct dev_ops zfs_dev_ops = {
3079	DEVO_REV,	/* version */
3080	0,		/* refcnt */
3081	zfs_info,	/* info */
3082	nulldev,	/* identify */
3083	nulldev,	/* probe */
3084	zfs_attach,	/* attach */
3085	zfs_detach,	/* detach */
3086	nodev,		/* reset */
3087	&zfs_cb_ops,	/* driver operations */
3088	NULL,		/* no bus operations */
3089	NULL,		/* power */
3090	ddi_quiesce_not_needed,	/* quiesce */
3091};
3092
3093static struct modldrv zfs_modldrv = {
3094	&mod_driverops,
3095	"ZFS storage pool",
3096	&zfs_dev_ops
3097};
3098
3099static struct modlinkage modlinkage = {
3100	MODREV_1,
3101	(void *)&zfs_modlfs,
3102	(void *)&zfs_modldrv,
3103	NULL
3104};
3105
3106
3107uint_t zfs_fsyncer_key;
3108extern uint_t rrw_tsd_key;
3109
3110int
3111_init(void)
3112{
3113	int error;
3114
3115	spa_init(FREAD | FWRITE);
3116	zfs_init();
3117	zvol_init();
3118
3119	if ((error = mod_install(&modlinkage)) != 0) {
3120		zvol_fini();
3121		zfs_fini();
3122		spa_fini();
3123		return (error);
3124	}
3125
3126	tsd_create(&zfs_fsyncer_key, NULL);
3127	tsd_create(&rrw_tsd_key, NULL);
3128
3129	error = ldi_ident_from_mod(&modlinkage, &zfs_li);
3130	ASSERT(error == 0);
3131	mutex_init(&zfs_share_lock, NULL, MUTEX_DEFAULT, NULL);
3132
3133	return (0);
3134}
3135
3136int
3137_fini(void)
3138{
3139	int error;
3140
3141	if (spa_busy() || zfs_busy() || zvol_busy() || zio_injection_enabled)
3142		return (EBUSY);
3143
3144	if ((error = mod_remove(&modlinkage)) != 0)
3145		return (error);
3146
3147	zvol_fini();
3148	zfs_fini();
3149	spa_fini();
3150	if (zfs_nfsshare_inited)
3151		(void) ddi_modclose(nfs_mod);
3152	if (zfs_smbshare_inited)
3153		(void) ddi_modclose(smbsrv_mod);
3154	if (zfs_nfsshare_inited || zfs_smbshare_inited)
3155		(void) ddi_modclose(sharefs_mod);
3156
3157	tsd_destroy(&zfs_fsyncer_key);
3158	ldi_ident_release(zfs_li);
3159	zfs_li = NULL;
3160	mutex_destroy(&zfs_share_lock);
3161
3162	return (error);
3163}
3164
3165int
3166_info(struct modinfo *modinfop)
3167{
3168	return (mod_info(&modlinkage, modinfop));
3169}
3170