zfs_ioctl.c revision 230397
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21/*
22 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
23 * Copyright (c) 2011 Pawel Jakub Dawidek <pawel@dawidek.net>.
24 * All rights reserved.
25 * Portions Copyright 2011 Martin Matuska <mm@FreeBSD.org>
26 * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
27 * Copyright (c) 2011 by Delphix. All rights reserved.
28 */
29
30#include <sys/types.h>
31#include <sys/param.h>
32#include <sys/systm.h>
33#include <sys/conf.h>
34#include <sys/kernel.h>
35#include <sys/lock.h>
36#include <sys/malloc.h>
37#include <sys/mutex.h>
38#include <sys/proc.h>
39#include <sys/errno.h>
40#include <sys/uio.h>
41#include <sys/buf.h>
42#include <sys/file.h>
43#include <sys/kmem.h>
44#include <sys/conf.h>
45#include <sys/cmn_err.h>
46#include <sys/stat.h>
47#include <sys/zfs_ioctl.h>
48#include <sys/zfs_vfsops.h>
49#include <sys/zfs_znode.h>
50#include <sys/zap.h>
51#include <sys/spa.h>
52#include <sys/spa_impl.h>
53#include <sys/vdev.h>
54#include <sys/dmu.h>
55#include <sys/dsl_dir.h>
56#include <sys/dsl_dataset.h>
57#include <sys/dsl_prop.h>
58#include <sys/dsl_deleg.h>
59#include <sys/dmu_objset.h>
60#include <sys/sunddi.h>
61#include <sys/policy.h>
62#include <sys/zone.h>
63#include <sys/nvpair.h>
64#include <sys/mount.h>
65#include <sys/taskqueue.h>
66#include <sys/sdt.h>
67#include <sys/varargs.h>
68#include <sys/fs/zfs.h>
69#include <sys/zfs_ctldir.h>
70#include <sys/zfs_dir.h>
71#include <sys/zfs_onexit.h>
72#include <sys/zvol.h>
73#include <sys/dsl_scan.h>
74#include <sys/dmu_objset.h>
75
76#include "zfs_namecheck.h"
77#include "zfs_prop.h"
78#include "zfs_deleg.h"
79#include "zfs_comutil.h"
80#include "zfs_ioctl_compat.h"
81
82CTASSERT(sizeof(zfs_cmd_t) < IOCPARM_MAX);
83
84static int snapshot_list_prefetch;
85SYSCTL_DECL(_vfs_zfs);
86TUNABLE_INT("vfs.zfs.snapshot_list_prefetch", &snapshot_list_prefetch);
87SYSCTL_INT(_vfs_zfs, OID_AUTO, snapshot_list_prefetch, CTLFLAG_RW,
88    &snapshot_list_prefetch, 0, "Prefetch data when listing snapshots");
89
90static struct cdev *zfsdev;
91
92extern void zfs_init(void);
93extern void zfs_fini(void);
94
95typedef int zfs_ioc_func_t(zfs_cmd_t *);
96typedef int zfs_secpolicy_func_t(zfs_cmd_t *, cred_t *);
97
98typedef enum {
99	NO_NAME,
100	POOL_NAME,
101	DATASET_NAME
102} zfs_ioc_namecheck_t;
103
104typedef struct zfs_ioc_vec {
105	zfs_ioc_func_t		*zvec_func;
106	zfs_secpolicy_func_t	*zvec_secpolicy;
107	zfs_ioc_namecheck_t	zvec_namecheck;
108	boolean_t		zvec_his_log;
109	boolean_t		zvec_pool_check;
110} zfs_ioc_vec_t;
111
112/* This array is indexed by zfs_userquota_prop_t */
113static const char *userquota_perms[] = {
114	ZFS_DELEG_PERM_USERUSED,
115	ZFS_DELEG_PERM_USERQUOTA,
116	ZFS_DELEG_PERM_GROUPUSED,
117	ZFS_DELEG_PERM_GROUPQUOTA,
118};
119
120static int zfs_ioc_userspace_upgrade(zfs_cmd_t *zc);
121static int zfs_check_settable(const char *name, nvpair_t *property,
122    cred_t *cr);
123static int zfs_check_clearable(char *dataset, nvlist_t *props,
124    nvlist_t **errors);
125static int zfs_fill_zplprops_root(uint64_t, nvlist_t *, nvlist_t *,
126    boolean_t *);
127int zfs_set_prop_nvlist(const char *, zprop_source_t, nvlist_t *, nvlist_t **);
128
129static void zfsdev_close(void *data);
130
131/* _NOTE(PRINTFLIKE(4)) - this is printf-like, but lint is too whiney */
132void
133__dprintf(const char *file, const char *func, int line, const char *fmt, ...)
134{
135	const char *newfile;
136	char buf[512];
137	va_list adx;
138
139	/*
140	 * Get rid of annoying "../common/" prefix to filename.
141	 */
142	newfile = strrchr(file, '/');
143	if (newfile != NULL) {
144		newfile = newfile + 1; /* Get rid of leading / */
145	} else {
146		newfile = file;
147	}
148
149	va_start(adx, fmt);
150	(void) vsnprintf(buf, sizeof (buf), fmt, adx);
151	va_end(adx);
152
153	/*
154	 * To get this data, use the zfs-dprintf probe as so:
155	 * dtrace -q -n 'zfs-dprintf \
156	 *	/stringof(arg0) == "dbuf.c"/ \
157	 *	{printf("%s: %s", stringof(arg1), stringof(arg3))}'
158	 * arg0 = file name
159	 * arg1 = function name
160	 * arg2 = line number
161	 * arg3 = message
162	 */
163	DTRACE_PROBE4(zfs__dprintf,
164	    char *, newfile, char *, func, int, line, char *, buf);
165}
166
167static void
168history_str_free(char *buf)
169{
170	kmem_free(buf, HIS_MAX_RECORD_LEN);
171}
172
173static char *
174history_str_get(zfs_cmd_t *zc)
175{
176	char *buf;
177
178	if (zc->zc_history == 0)
179		return (NULL);
180
181	buf = kmem_alloc(HIS_MAX_RECORD_LEN, KM_SLEEP);
182	if (copyinstr((void *)(uintptr_t)zc->zc_history,
183	    buf, HIS_MAX_RECORD_LEN, NULL) != 0) {
184		history_str_free(buf);
185		return (NULL);
186	}
187
188	buf[HIS_MAX_RECORD_LEN -1] = '\0';
189
190	return (buf);
191}
192
193/*
194 * Check to see if the named dataset is currently defined as bootable
195 */
196static boolean_t
197zfs_is_bootfs(const char *name)
198{
199	objset_t *os;
200
201	if (dmu_objset_hold(name, FTAG, &os) == 0) {
202		boolean_t ret;
203		ret = (dmu_objset_id(os) == spa_bootfs(dmu_objset_spa(os)));
204		dmu_objset_rele(os, FTAG);
205		return (ret);
206	}
207	return (B_FALSE);
208}
209
210/*
211 * zfs_earlier_version
212 *
213 *	Return non-zero if the spa version is less than requested version.
214 */
215static int
216zfs_earlier_version(const char *name, int version)
217{
218	spa_t *spa;
219
220	if (spa_open(name, &spa, FTAG) == 0) {
221		if (spa_version(spa) < version) {
222			spa_close(spa, FTAG);
223			return (1);
224		}
225		spa_close(spa, FTAG);
226	}
227	return (0);
228}
229
230/*
231 * zpl_earlier_version
232 *
233 * Return TRUE if the ZPL version is less than requested version.
234 */
235static boolean_t
236zpl_earlier_version(const char *name, int version)
237{
238	objset_t *os;
239	boolean_t rc = B_TRUE;
240
241	if (dmu_objset_hold(name, FTAG, &os) == 0) {
242		uint64_t zplversion;
243
244		if (dmu_objset_type(os) != DMU_OST_ZFS) {
245			dmu_objset_rele(os, FTAG);
246			return (B_TRUE);
247		}
248		/* XXX reading from non-owned objset */
249		if (zfs_get_zplprop(os, ZFS_PROP_VERSION, &zplversion) == 0)
250			rc = zplversion < version;
251		dmu_objset_rele(os, FTAG);
252	}
253	return (rc);
254}
255
256static void
257zfs_log_history(zfs_cmd_t *zc)
258{
259	spa_t *spa;
260	char *buf;
261
262	if ((buf = history_str_get(zc)) == NULL)
263		return;
264
265	if (spa_open(zc->zc_name, &spa, FTAG) == 0) {
266		if (spa_version(spa) >= SPA_VERSION_ZPOOL_HISTORY)
267			(void) spa_history_log(spa, buf, LOG_CMD_NORMAL);
268		spa_close(spa, FTAG);
269	}
270	history_str_free(buf);
271}
272
273/*
274 * Policy for top-level read operations (list pools).  Requires no privileges,
275 * and can be used in the local zone, as there is no associated dataset.
276 */
277/* ARGSUSED */
278static int
279zfs_secpolicy_none(zfs_cmd_t *zc, cred_t *cr)
280{
281	return (0);
282}
283
284/*
285 * Policy for dataset read operations (list children, get statistics).  Requires
286 * no privileges, but must be visible in the local zone.
287 */
288/* ARGSUSED */
289static int
290zfs_secpolicy_read(zfs_cmd_t *zc, cred_t *cr)
291{
292	if (INGLOBALZONE(curthread) ||
293	    zone_dataset_visible(zc->zc_name, NULL))
294		return (0);
295
296	return (ENOENT);
297}
298
299static int
300zfs_dozonecheck_impl(const char *dataset, uint64_t zoned, cred_t *cr)
301{
302	int writable = 1;
303
304	/*
305	 * The dataset must be visible by this zone -- check this first
306	 * so they don't see EPERM on something they shouldn't know about.
307	 */
308	if (!INGLOBALZONE(curthread) &&
309	    !zone_dataset_visible(dataset, &writable))
310		return (ENOENT);
311
312	if (INGLOBALZONE(curthread)) {
313		/*
314		 * If the fs is zoned, only root can access it from the
315		 * global zone.
316		 */
317		if (secpolicy_zfs(cr) && zoned)
318			return (EPERM);
319	} else {
320		/*
321		 * If we are in a local zone, the 'zoned' property must be set.
322		 */
323		if (!zoned)
324			return (EPERM);
325
326		/* must be writable by this zone */
327		if (!writable)
328			return (EPERM);
329	}
330	return (0);
331}
332
333static int
334zfs_dozonecheck(const char *dataset, cred_t *cr)
335{
336	uint64_t zoned;
337
338	if (dsl_prop_get_integer(dataset, "jailed", &zoned, NULL))
339		return (ENOENT);
340
341	return (zfs_dozonecheck_impl(dataset, zoned, cr));
342}
343
344static int
345zfs_dozonecheck_ds(const char *dataset, dsl_dataset_t *ds, cred_t *cr)
346{
347	uint64_t zoned;
348
349	rw_enter(&ds->ds_dir->dd_pool->dp_config_rwlock, RW_READER);
350	if (dsl_prop_get_ds(ds, "jailed", 8, 1, &zoned, NULL)) {
351		rw_exit(&ds->ds_dir->dd_pool->dp_config_rwlock);
352		return (ENOENT);
353	}
354	rw_exit(&ds->ds_dir->dd_pool->dp_config_rwlock);
355
356	return (zfs_dozonecheck_impl(dataset, zoned, cr));
357}
358
359/*
360 * If name ends in a '@', then require recursive permissions.
361 */
362int
363zfs_secpolicy_write_perms(const char *name, const char *perm, cred_t *cr)
364{
365	int error;
366	boolean_t descendent = B_FALSE;
367	dsl_dataset_t *ds;
368	char *at;
369
370	at = strchr(name, '@');
371	if (at != NULL && at[1] == '\0') {
372		*at = '\0';
373		descendent = B_TRUE;
374	}
375
376	error = dsl_dataset_hold(name, FTAG, &ds);
377	if (at != NULL)
378		*at = '@';
379	if (error != 0)
380		return (error);
381
382	error = zfs_dozonecheck_ds(name, ds, cr);
383	if (error == 0) {
384		error = secpolicy_zfs(cr);
385		if (error)
386			error = dsl_deleg_access_impl(ds, descendent, perm, cr);
387	}
388
389	dsl_dataset_rele(ds, FTAG);
390	return (error);
391}
392
393int
394zfs_secpolicy_write_perms_ds(const char *name, dsl_dataset_t *ds,
395    const char *perm, cred_t *cr)
396{
397	int error;
398
399	error = zfs_dozonecheck_ds(name, ds, cr);
400	if (error == 0) {
401		error = secpolicy_zfs(cr);
402		if (error)
403			error = dsl_deleg_access_impl(ds, B_FALSE, perm, cr);
404	}
405	return (error);
406}
407
408#ifdef SECLABEL
409/*
410 * Policy for setting the security label property.
411 *
412 * Returns 0 for success, non-zero for access and other errors.
413 */
414static int
415zfs_set_slabel_policy(const char *name, char *strval, cred_t *cr)
416{
417	char		ds_hexsl[MAXNAMELEN];
418	bslabel_t	ds_sl, new_sl;
419	boolean_t	new_default = FALSE;
420	uint64_t	zoned;
421	int		needed_priv = -1;
422	int		error;
423
424	/* First get the existing dataset label. */
425	error = dsl_prop_get(name, zfs_prop_to_name(ZFS_PROP_MLSLABEL),
426	    1, sizeof (ds_hexsl), &ds_hexsl, NULL);
427	if (error)
428		return (EPERM);
429
430	if (strcasecmp(strval, ZFS_MLSLABEL_DEFAULT) == 0)
431		new_default = TRUE;
432
433	/* The label must be translatable */
434	if (!new_default && (hexstr_to_label(strval, &new_sl) != 0))
435		return (EINVAL);
436
437	/*
438	 * In a non-global zone, disallow attempts to set a label that
439	 * doesn't match that of the zone; otherwise no other checks
440	 * are needed.
441	 */
442	if (!INGLOBALZONE(curproc)) {
443		if (new_default || !blequal(&new_sl, CR_SL(CRED())))
444			return (EPERM);
445		return (0);
446	}
447
448	/*
449	 * For global-zone datasets (i.e., those whose zoned property is
450	 * "off", verify that the specified new label is valid for the
451	 * global zone.
452	 */
453	if (dsl_prop_get_integer(name,
454	    zfs_prop_to_name(ZFS_PROP_ZONED), &zoned, NULL))
455		return (EPERM);
456	if (!zoned) {
457		if (zfs_check_global_label(name, strval) != 0)
458			return (EPERM);
459	}
460
461	/*
462	 * If the existing dataset label is nondefault, check if the
463	 * dataset is mounted (label cannot be changed while mounted).
464	 * Get the zfsvfs; if there isn't one, then the dataset isn't
465	 * mounted (or isn't a dataset, doesn't exist, ...).
466	 */
467	if (strcasecmp(ds_hexsl, ZFS_MLSLABEL_DEFAULT) != 0) {
468		objset_t *os;
469		static char *setsl_tag = "setsl_tag";
470
471		/*
472		 * Try to own the dataset; abort if there is any error,
473		 * (e.g., already mounted, in use, or other error).
474		 */
475		error = dmu_objset_own(name, DMU_OST_ZFS, B_TRUE,
476		    setsl_tag, &os);
477		if (error)
478			return (EPERM);
479
480		dmu_objset_disown(os, setsl_tag);
481
482		if (new_default) {
483			needed_priv = PRIV_FILE_DOWNGRADE_SL;
484			goto out_check;
485		}
486
487		if (hexstr_to_label(strval, &new_sl) != 0)
488			return (EPERM);
489
490		if (blstrictdom(&ds_sl, &new_sl))
491			needed_priv = PRIV_FILE_DOWNGRADE_SL;
492		else if (blstrictdom(&new_sl, &ds_sl))
493			needed_priv = PRIV_FILE_UPGRADE_SL;
494	} else {
495		/* dataset currently has a default label */
496		if (!new_default)
497			needed_priv = PRIV_FILE_UPGRADE_SL;
498	}
499
500out_check:
501	if (needed_priv != -1)
502		return (PRIV_POLICY(cr, needed_priv, B_FALSE, EPERM, NULL));
503	return (0);
504}
505#endif	/* SECLABEL */
506
507static int
508zfs_secpolicy_setprop(const char *dsname, zfs_prop_t prop, nvpair_t *propval,
509    cred_t *cr)
510{
511	char *strval;
512
513	/*
514	 * Check permissions for special properties.
515	 */
516	switch (prop) {
517	case ZFS_PROP_ZONED:
518		/*
519		 * Disallow setting of 'zoned' from within a local zone.
520		 */
521		if (!INGLOBALZONE(curthread))
522			return (EPERM);
523		break;
524
525	case ZFS_PROP_QUOTA:
526		if (!INGLOBALZONE(curthread)) {
527			uint64_t zoned;
528			char setpoint[MAXNAMELEN];
529			/*
530			 * Unprivileged users are allowed to modify the
531			 * quota on things *under* (ie. contained by)
532			 * the thing they own.
533			 */
534			if (dsl_prop_get_integer(dsname, "jailed", &zoned,
535			    setpoint))
536				return (EPERM);
537			if (!zoned || strlen(dsname) <= strlen(setpoint))
538				return (EPERM);
539		}
540		break;
541
542	case ZFS_PROP_MLSLABEL:
543#ifdef SECLABEL
544		if (!is_system_labeled())
545			return (EPERM);
546
547		if (nvpair_value_string(propval, &strval) == 0) {
548			int err;
549
550			err = zfs_set_slabel_policy(dsname, strval, CRED());
551			if (err != 0)
552				return (err);
553		}
554#else
555		return (EOPNOTSUPP);
556#endif
557		break;
558	}
559
560	return (zfs_secpolicy_write_perms(dsname, zfs_prop_to_name(prop), cr));
561}
562
563int
564zfs_secpolicy_fsacl(zfs_cmd_t *zc, cred_t *cr)
565{
566	int error;
567
568	error = zfs_dozonecheck(zc->zc_name, cr);
569	if (error)
570		return (error);
571
572	/*
573	 * permission to set permissions will be evaluated later in
574	 * dsl_deleg_can_allow()
575	 */
576	return (0);
577}
578
579int
580zfs_secpolicy_rollback(zfs_cmd_t *zc, cred_t *cr)
581{
582	return (zfs_secpolicy_write_perms(zc->zc_name,
583	    ZFS_DELEG_PERM_ROLLBACK, cr));
584}
585
586int
587zfs_secpolicy_send(zfs_cmd_t *zc, cred_t *cr)
588{
589	spa_t *spa;
590	dsl_pool_t *dp;
591	dsl_dataset_t *ds;
592	char *cp;
593	int error;
594
595	/*
596	 * Generate the current snapshot name from the given objsetid, then
597	 * use that name for the secpolicy/zone checks.
598	 */
599	cp = strchr(zc->zc_name, '@');
600	if (cp == NULL)
601		return (EINVAL);
602	error = spa_open(zc->zc_name, &spa, FTAG);
603	if (error)
604		return (error);
605
606	dp = spa_get_dsl(spa);
607	rw_enter(&dp->dp_config_rwlock, RW_READER);
608	error = dsl_dataset_hold_obj(dp, zc->zc_sendobj, FTAG, &ds);
609	rw_exit(&dp->dp_config_rwlock);
610	spa_close(spa, FTAG);
611	if (error)
612		return (error);
613
614	dsl_dataset_name(ds, zc->zc_name);
615
616	error = zfs_secpolicy_write_perms_ds(zc->zc_name, ds,
617	    ZFS_DELEG_PERM_SEND, cr);
618	dsl_dataset_rele(ds, FTAG);
619
620	return (error);
621}
622
623static int
624zfs_secpolicy_deleg_share(zfs_cmd_t *zc, cred_t *cr)
625{
626	vnode_t *vp;
627	int error;
628
629	if ((error = lookupname(zc->zc_value, UIO_SYSSPACE,
630	    NO_FOLLOW, NULL, &vp)) != 0)
631		return (error);
632
633	/* Now make sure mntpnt and dataset are ZFS */
634
635	if (strcmp(vp->v_vfsp->mnt_stat.f_fstypename, "zfs") != 0 ||
636	    (strcmp((char *)refstr_value(vp->v_vfsp->vfs_resource),
637	    zc->zc_name) != 0)) {
638		VN_RELE(vp);
639		return (EPERM);
640	}
641
642	VN_RELE(vp);
643	return (dsl_deleg_access(zc->zc_name,
644	    ZFS_DELEG_PERM_SHARE, cr));
645}
646
647int
648zfs_secpolicy_share(zfs_cmd_t *zc, cred_t *cr)
649{
650	if (!INGLOBALZONE(curthread))
651		return (EPERM);
652
653	if (secpolicy_nfs(cr) == 0) {
654		return (0);
655	} else {
656		return (zfs_secpolicy_deleg_share(zc, cr));
657	}
658}
659
660int
661zfs_secpolicy_smb_acl(zfs_cmd_t *zc, cred_t *cr)
662{
663	if (!INGLOBALZONE(curthread))
664		return (EPERM);
665
666	if (secpolicy_smb(cr) == 0) {
667		return (0);
668	} else {
669		return (zfs_secpolicy_deleg_share(zc, cr));
670	}
671}
672
673static int
674zfs_get_parent(const char *datasetname, char *parent, int parentsize)
675{
676	char *cp;
677
678	/*
679	 * Remove the @bla or /bla from the end of the name to get the parent.
680	 */
681	(void) strncpy(parent, datasetname, parentsize);
682	cp = strrchr(parent, '@');
683	if (cp != NULL) {
684		cp[0] = '\0';
685	} else {
686		cp = strrchr(parent, '/');
687		if (cp == NULL)
688			return (ENOENT);
689		cp[0] = '\0';
690	}
691
692	return (0);
693}
694
695int
696zfs_secpolicy_destroy_perms(const char *name, cred_t *cr)
697{
698	int error;
699
700	if ((error = zfs_secpolicy_write_perms(name,
701	    ZFS_DELEG_PERM_MOUNT, cr)) != 0)
702		return (error);
703
704	return (zfs_secpolicy_write_perms(name, ZFS_DELEG_PERM_DESTROY, cr));
705}
706
707static int
708zfs_secpolicy_destroy(zfs_cmd_t *zc, cred_t *cr)
709{
710	return (zfs_secpolicy_destroy_perms(zc->zc_name, cr));
711}
712
713/*
714 * Destroying snapshots with delegated permissions requires
715 * descendent mount and destroy permissions.
716 */
717static int
718zfs_secpolicy_destroy_recursive(zfs_cmd_t *zc, cred_t *cr)
719{
720	int error;
721	char *dsname;
722
723	dsname = kmem_asprintf("%s@", zc->zc_name);
724
725	error = zfs_secpolicy_destroy_perms(dsname, cr);
726
727 	if (error == ENOENT)
728 		error = zfs_secpolicy_destroy_perms(zc->zc_name, cr);
729
730	strfree(dsname);
731	return (error);
732}
733
734int
735zfs_secpolicy_rename_perms(const char *from, const char *to, cred_t *cr)
736{
737	char	parentname[MAXNAMELEN];
738	int	error;
739
740	if ((error = zfs_secpolicy_write_perms(from,
741	    ZFS_DELEG_PERM_RENAME, cr)) != 0)
742		return (error);
743
744	if ((error = zfs_secpolicy_write_perms(from,
745	    ZFS_DELEG_PERM_MOUNT, cr)) != 0)
746		return (error);
747
748	if ((error = zfs_get_parent(to, parentname,
749	    sizeof (parentname))) != 0)
750		return (error);
751
752	if ((error = zfs_secpolicy_write_perms(parentname,
753	    ZFS_DELEG_PERM_CREATE, cr)) != 0)
754		return (error);
755
756	if ((error = zfs_secpolicy_write_perms(parentname,
757	    ZFS_DELEG_PERM_MOUNT, cr)) != 0)
758		return (error);
759
760	return (error);
761}
762
763static int
764zfs_secpolicy_rename(zfs_cmd_t *zc, cred_t *cr)
765{
766	return (zfs_secpolicy_rename_perms(zc->zc_name, zc->zc_value, cr));
767}
768
769static int
770zfs_secpolicy_promote(zfs_cmd_t *zc, cred_t *cr)
771{
772	char	parentname[MAXNAMELEN];
773	objset_t *clone;
774	int error;
775
776	error = zfs_secpolicy_write_perms(zc->zc_name,
777	    ZFS_DELEG_PERM_PROMOTE, cr);
778	if (error)
779		return (error);
780
781	error = dmu_objset_hold(zc->zc_name, FTAG, &clone);
782
783	if (error == 0) {
784		dsl_dataset_t *pclone = NULL;
785		dsl_dir_t *dd;
786		dd = clone->os_dsl_dataset->ds_dir;
787
788		rw_enter(&dd->dd_pool->dp_config_rwlock, RW_READER);
789		error = dsl_dataset_hold_obj(dd->dd_pool,
790		    dd->dd_phys->dd_origin_obj, FTAG, &pclone);
791		rw_exit(&dd->dd_pool->dp_config_rwlock);
792		if (error) {
793			dmu_objset_rele(clone, FTAG);
794			return (error);
795		}
796
797		error = zfs_secpolicy_write_perms(zc->zc_name,
798		    ZFS_DELEG_PERM_MOUNT, cr);
799
800		dsl_dataset_name(pclone, parentname);
801		dmu_objset_rele(clone, FTAG);
802		dsl_dataset_rele(pclone, FTAG);
803		if (error == 0)
804			error = zfs_secpolicy_write_perms(parentname,
805			    ZFS_DELEG_PERM_PROMOTE, cr);
806	}
807	return (error);
808}
809
810static int
811zfs_secpolicy_receive(zfs_cmd_t *zc, cred_t *cr)
812{
813	int error;
814
815	if ((error = zfs_secpolicy_write_perms(zc->zc_name,
816	    ZFS_DELEG_PERM_RECEIVE, cr)) != 0)
817		return (error);
818
819	if ((error = zfs_secpolicy_write_perms(zc->zc_name,
820	    ZFS_DELEG_PERM_MOUNT, cr)) != 0)
821		return (error);
822
823	return (zfs_secpolicy_write_perms(zc->zc_name,
824	    ZFS_DELEG_PERM_CREATE, cr));
825}
826
827int
828zfs_secpolicy_snapshot_perms(const char *name, cred_t *cr)
829{
830	return (zfs_secpolicy_write_perms(name,
831	    ZFS_DELEG_PERM_SNAPSHOT, cr));
832}
833
834static int
835zfs_secpolicy_snapshot(zfs_cmd_t *zc, cred_t *cr)
836{
837
838	return (zfs_secpolicy_snapshot_perms(zc->zc_name, cr));
839}
840
841static int
842zfs_secpolicy_create(zfs_cmd_t *zc, cred_t *cr)
843{
844	char	parentname[MAXNAMELEN];
845	int	error;
846
847	if ((error = zfs_get_parent(zc->zc_name, parentname,
848	    sizeof (parentname))) != 0)
849		return (error);
850
851	if (zc->zc_value[0] != '\0') {
852		if ((error = zfs_secpolicy_write_perms(zc->zc_value,
853		    ZFS_DELEG_PERM_CLONE, cr)) != 0)
854			return (error);
855	}
856
857	if ((error = zfs_secpolicy_write_perms(parentname,
858	    ZFS_DELEG_PERM_CREATE, cr)) != 0)
859		return (error);
860
861	error = zfs_secpolicy_write_perms(parentname,
862	    ZFS_DELEG_PERM_MOUNT, cr);
863
864	return (error);
865}
866
867static int
868zfs_secpolicy_umount(zfs_cmd_t *zc, cred_t *cr)
869{
870	int error;
871
872	error = secpolicy_fs_unmount(cr, NULL);
873	if (error) {
874		error = dsl_deleg_access(zc->zc_name, ZFS_DELEG_PERM_MOUNT, cr);
875	}
876	return (error);
877}
878
879/*
880 * Policy for pool operations - create/destroy pools, add vdevs, etc.  Requires
881 * SYS_CONFIG privilege, which is not available in a local zone.
882 */
883/* ARGSUSED */
884static int
885zfs_secpolicy_config(zfs_cmd_t *zc, cred_t *cr)
886{
887	if (secpolicy_sys_config(cr, B_FALSE) != 0)
888		return (EPERM);
889
890	return (0);
891}
892
893/*
894 * Policy for object to name lookups.
895 */
896/* ARGSUSED */
897static int
898zfs_secpolicy_diff(zfs_cmd_t *zc, cred_t *cr)
899{
900	int error;
901
902	if ((error = secpolicy_sys_config(cr, B_FALSE)) == 0)
903		return (0);
904
905	error = zfs_secpolicy_write_perms(zc->zc_name, ZFS_DELEG_PERM_DIFF, cr);
906	return (error);
907}
908
909/*
910 * Policy for fault injection.  Requires all privileges.
911 */
912/* ARGSUSED */
913static int
914zfs_secpolicy_inject(zfs_cmd_t *zc, cred_t *cr)
915{
916	return (secpolicy_zinject(cr));
917}
918
919static int
920zfs_secpolicy_inherit(zfs_cmd_t *zc, cred_t *cr)
921{
922	zfs_prop_t prop = zfs_name_to_prop(zc->zc_value);
923
924	if (prop == ZPROP_INVAL) {
925		if (!zfs_prop_user(zc->zc_value))
926			return (EINVAL);
927		return (zfs_secpolicy_write_perms(zc->zc_name,
928		    ZFS_DELEG_PERM_USERPROP, cr));
929	} else {
930		return (zfs_secpolicy_setprop(zc->zc_name, prop,
931		    NULL, cr));
932	}
933}
934
935static int
936zfs_secpolicy_userspace_one(zfs_cmd_t *zc, cred_t *cr)
937{
938	int err = zfs_secpolicy_read(zc, cr);
939	if (err)
940		return (err);
941
942	if (zc->zc_objset_type >= ZFS_NUM_USERQUOTA_PROPS)
943		return (EINVAL);
944
945	if (zc->zc_value[0] == 0) {
946		/*
947		 * They are asking about a posix uid/gid.  If it's
948		 * themself, allow it.
949		 */
950		if (zc->zc_objset_type == ZFS_PROP_USERUSED ||
951		    zc->zc_objset_type == ZFS_PROP_USERQUOTA) {
952			if (zc->zc_guid == crgetuid(cr))
953				return (0);
954		} else {
955			if (groupmember(zc->zc_guid, cr))
956				return (0);
957		}
958	}
959
960	return (zfs_secpolicy_write_perms(zc->zc_name,
961	    userquota_perms[zc->zc_objset_type], cr));
962}
963
964static int
965zfs_secpolicy_userspace_many(zfs_cmd_t *zc, cred_t *cr)
966{
967	int err = zfs_secpolicy_read(zc, cr);
968	if (err)
969		return (err);
970
971	if (zc->zc_objset_type >= ZFS_NUM_USERQUOTA_PROPS)
972		return (EINVAL);
973
974	return (zfs_secpolicy_write_perms(zc->zc_name,
975	    userquota_perms[zc->zc_objset_type], cr));
976}
977
978static int
979zfs_secpolicy_userspace_upgrade(zfs_cmd_t *zc, cred_t *cr)
980{
981	return (zfs_secpolicy_setprop(zc->zc_name, ZFS_PROP_VERSION,
982	    NULL, cr));
983}
984
985static int
986zfs_secpolicy_hold(zfs_cmd_t *zc, cred_t *cr)
987{
988	return (zfs_secpolicy_write_perms(zc->zc_name,
989	    ZFS_DELEG_PERM_HOLD, cr));
990}
991
992static int
993zfs_secpolicy_release(zfs_cmd_t *zc, cred_t *cr)
994{
995	return (zfs_secpolicy_write_perms(zc->zc_name,
996	    ZFS_DELEG_PERM_RELEASE, cr));
997}
998
999/*
1000 * Policy for allowing temporary snapshots to be taken or released
1001 */
1002static int
1003zfs_secpolicy_tmp_snapshot(zfs_cmd_t *zc, cred_t *cr)
1004{
1005	/*
1006	 * A temporary snapshot is the same as a snapshot,
1007	 * hold, destroy and release all rolled into one.
1008	 * Delegated diff alone is sufficient that we allow this.
1009	 */
1010	int error;
1011
1012	if ((error = zfs_secpolicy_write_perms(zc->zc_name,
1013	    ZFS_DELEG_PERM_DIFF, cr)) == 0)
1014		return (0);
1015
1016	error = zfs_secpolicy_snapshot(zc, cr);
1017	if (!error)
1018		error = zfs_secpolicy_hold(zc, cr);
1019	if (!error)
1020		error = zfs_secpolicy_release(zc, cr);
1021	if (!error)
1022		error = zfs_secpolicy_destroy(zc, cr);
1023	return (error);
1024}
1025
1026/*
1027 * Returns the nvlist as specified by the user in the zfs_cmd_t.
1028 */
1029static int
1030get_nvlist(uint64_t nvl, uint64_t size, int iflag, nvlist_t **nvp)
1031{
1032	char *packed;
1033	int error;
1034	nvlist_t *list = NULL;
1035
1036	/*
1037	 * Read in and unpack the user-supplied nvlist.
1038	 */
1039	if (size == 0)
1040		return (EINVAL);
1041
1042	packed = kmem_alloc(size, KM_SLEEP);
1043
1044	if ((error = ddi_copyin((void *)(uintptr_t)nvl, packed, size,
1045	    iflag)) != 0) {
1046		kmem_free(packed, size);
1047		return (error);
1048	}
1049
1050	if ((error = nvlist_unpack(packed, size, &list, 0)) != 0) {
1051		kmem_free(packed, size);
1052		return (error);
1053	}
1054
1055	kmem_free(packed, size);
1056
1057	*nvp = list;
1058	return (0);
1059}
1060
1061static int
1062fit_error_list(zfs_cmd_t *zc, nvlist_t **errors)
1063{
1064	size_t size;
1065
1066	VERIFY(nvlist_size(*errors, &size, NV_ENCODE_NATIVE) == 0);
1067
1068	if (size > zc->zc_nvlist_dst_size) {
1069		nvpair_t *more_errors;
1070		int n = 0;
1071
1072		if (zc->zc_nvlist_dst_size < 1024)
1073			return (ENOMEM);
1074
1075		VERIFY(nvlist_add_int32(*errors, ZPROP_N_MORE_ERRORS, 0) == 0);
1076		more_errors = nvlist_prev_nvpair(*errors, NULL);
1077
1078		do {
1079			nvpair_t *pair = nvlist_prev_nvpair(*errors,
1080			    more_errors);
1081			VERIFY(nvlist_remove_nvpair(*errors, pair) == 0);
1082			n++;
1083			VERIFY(nvlist_size(*errors, &size,
1084			    NV_ENCODE_NATIVE) == 0);
1085		} while (size > zc->zc_nvlist_dst_size);
1086
1087		VERIFY(nvlist_remove_nvpair(*errors, more_errors) == 0);
1088		VERIFY(nvlist_add_int32(*errors, ZPROP_N_MORE_ERRORS, n) == 0);
1089		ASSERT(nvlist_size(*errors, &size, NV_ENCODE_NATIVE) == 0);
1090		ASSERT(size <= zc->zc_nvlist_dst_size);
1091	}
1092
1093	return (0);
1094}
1095
1096static int
1097put_nvlist(zfs_cmd_t *zc, nvlist_t *nvl)
1098{
1099	char *packed = NULL;
1100	int error = 0;
1101	size_t size;
1102
1103	VERIFY(nvlist_size(nvl, &size, NV_ENCODE_NATIVE) == 0);
1104
1105	if (size > zc->zc_nvlist_dst_size) {
1106		/*
1107		 * Solaris returns ENOMEM here, because even if an error is
1108		 * returned from an ioctl(2), new zc_nvlist_dst_size will be
1109		 * passed to the userland. This is not the case for FreeBSD.
1110		 * We need to return 0, so the kernel will copy the
1111		 * zc_nvlist_dst_size back and the userland can discover that a
1112		 * bigger buffer is needed.
1113		 */
1114		error = 0;
1115	} else {
1116		packed = kmem_alloc(size, KM_SLEEP);
1117		VERIFY(nvlist_pack(nvl, &packed, &size, NV_ENCODE_NATIVE,
1118		    KM_SLEEP) == 0);
1119		if (ddi_copyout(packed, (void *)(uintptr_t)zc->zc_nvlist_dst,
1120		    size, zc->zc_iflags) != 0)
1121			error = EFAULT;
1122		kmem_free(packed, size);
1123	}
1124
1125	zc->zc_nvlist_dst_size = size;
1126	return (error);
1127}
1128
1129static int
1130getzfsvfs(const char *dsname, zfsvfs_t **zfvp)
1131{
1132	objset_t *os;
1133	int error;
1134
1135	error = dmu_objset_hold(dsname, FTAG, &os);
1136	if (error)
1137		return (error);
1138	if (dmu_objset_type(os) != DMU_OST_ZFS) {
1139		dmu_objset_rele(os, FTAG);
1140		return (EINVAL);
1141	}
1142
1143	mutex_enter(&os->os_user_ptr_lock);
1144	*zfvp = dmu_objset_get_user(os);
1145	if (*zfvp) {
1146		VFS_HOLD((*zfvp)->z_vfs);
1147	} else {
1148		error = ESRCH;
1149	}
1150	mutex_exit(&os->os_user_ptr_lock);
1151	dmu_objset_rele(os, FTAG);
1152	return (error);
1153}
1154
1155/*
1156 * Find a zfsvfs_t for a mounted filesystem, or create our own, in which
1157 * case its z_vfs will be NULL, and it will be opened as the owner.
1158 */
1159static int
1160zfsvfs_hold(const char *name, void *tag, zfsvfs_t **zfvp, boolean_t writer)
1161{
1162	int error = 0;
1163
1164	if (getzfsvfs(name, zfvp) != 0)
1165		error = zfsvfs_create(name, zfvp);
1166	if (error == 0) {
1167		rrw_enter(&(*zfvp)->z_teardown_lock, (writer) ? RW_WRITER :
1168		    RW_READER, tag);
1169		if ((*zfvp)->z_unmounted) {
1170			/*
1171			 * XXX we could probably try again, since the unmounting
1172			 * thread should be just about to disassociate the
1173			 * objset from the zfsvfs.
1174			 */
1175			rrw_exit(&(*zfvp)->z_teardown_lock, tag);
1176			return (EBUSY);
1177		}
1178	}
1179	return (error);
1180}
1181
1182static void
1183zfsvfs_rele(zfsvfs_t *zfsvfs, void *tag)
1184{
1185	rrw_exit(&zfsvfs->z_teardown_lock, tag);
1186
1187	if (zfsvfs->z_vfs) {
1188		VFS_RELE(zfsvfs->z_vfs);
1189	} else {
1190		dmu_objset_disown(zfsvfs->z_os, zfsvfs);
1191		zfsvfs_free(zfsvfs);
1192	}
1193}
1194
1195static int
1196zfs_ioc_pool_create(zfs_cmd_t *zc)
1197{
1198	int error;
1199	nvlist_t *config, *props = NULL;
1200	nvlist_t *rootprops = NULL;
1201	nvlist_t *zplprops = NULL;
1202	char *buf;
1203
1204	if (error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
1205	    zc->zc_iflags, &config))
1206		return (error);
1207
1208	if (zc->zc_nvlist_src_size != 0 && (error =
1209	    get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
1210	    zc->zc_iflags, &props))) {
1211		nvlist_free(config);
1212		return (error);
1213	}
1214
1215	if (props) {
1216		nvlist_t *nvl = NULL;
1217		uint64_t version = SPA_VERSION;
1218
1219		(void) nvlist_lookup_uint64(props,
1220		    zpool_prop_to_name(ZPOOL_PROP_VERSION), &version);
1221		if (version < SPA_VERSION_INITIAL || version > SPA_VERSION) {
1222			error = EINVAL;
1223			goto pool_props_bad;
1224		}
1225		(void) nvlist_lookup_nvlist(props, ZPOOL_ROOTFS_PROPS, &nvl);
1226		if (nvl) {
1227			error = nvlist_dup(nvl, &rootprops, KM_SLEEP);
1228			if (error != 0) {
1229				nvlist_free(config);
1230				nvlist_free(props);
1231				return (error);
1232			}
1233			(void) nvlist_remove_all(props, ZPOOL_ROOTFS_PROPS);
1234		}
1235		VERIFY(nvlist_alloc(&zplprops, NV_UNIQUE_NAME, KM_SLEEP) == 0);
1236		error = zfs_fill_zplprops_root(version, rootprops,
1237		    zplprops, NULL);
1238		if (error)
1239			goto pool_props_bad;
1240	}
1241
1242	buf = history_str_get(zc);
1243
1244	error = spa_create(zc->zc_name, config, props, buf, zplprops);
1245
1246	/*
1247	 * Set the remaining root properties
1248	 */
1249	if (!error && (error = zfs_set_prop_nvlist(zc->zc_name,
1250	    ZPROP_SRC_LOCAL, rootprops, NULL)) != 0)
1251		(void) spa_destroy(zc->zc_name);
1252
1253	if (buf != NULL)
1254		history_str_free(buf);
1255
1256pool_props_bad:
1257	nvlist_free(rootprops);
1258	nvlist_free(zplprops);
1259	nvlist_free(config);
1260	nvlist_free(props);
1261
1262	return (error);
1263}
1264
1265static int
1266zfs_ioc_pool_destroy(zfs_cmd_t *zc)
1267{
1268	int error;
1269	zfs_log_history(zc);
1270	error = spa_destroy(zc->zc_name);
1271	if (error == 0)
1272		zvol_remove_minors(zc->zc_name);
1273	return (error);
1274}
1275
1276static int
1277zfs_ioc_pool_import(zfs_cmd_t *zc)
1278{
1279	nvlist_t *config, *props = NULL;
1280	uint64_t guid;
1281	int error;
1282
1283	if ((error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
1284	    zc->zc_iflags, &config)) != 0)
1285		return (error);
1286
1287	if (zc->zc_nvlist_src_size != 0 && (error =
1288	    get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
1289	    zc->zc_iflags, &props))) {
1290		nvlist_free(config);
1291		return (error);
1292	}
1293
1294	if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID, &guid) != 0 ||
1295	    guid != zc->zc_guid)
1296		error = EINVAL;
1297	else
1298		error = spa_import(zc->zc_name, config, props, zc->zc_cookie);
1299
1300	if (zc->zc_nvlist_dst != 0) {
1301		int err;
1302
1303		if ((err = put_nvlist(zc, config)) != 0)
1304			error = err;
1305	}
1306
1307	nvlist_free(config);
1308
1309	if (props)
1310		nvlist_free(props);
1311
1312	return (error);
1313}
1314
1315static int
1316zfs_ioc_pool_export(zfs_cmd_t *zc)
1317{
1318	int error;
1319	boolean_t force = (boolean_t)zc->zc_cookie;
1320	boolean_t hardforce = (boolean_t)zc->zc_guid;
1321
1322	zfs_log_history(zc);
1323	error = spa_export(zc->zc_name, NULL, force, hardforce);
1324	if (error == 0)
1325		zvol_remove_minors(zc->zc_name);
1326	return (error);
1327}
1328
1329static int
1330zfs_ioc_pool_configs(zfs_cmd_t *zc)
1331{
1332	nvlist_t *configs;
1333	int error;
1334
1335	if ((configs = spa_all_configs(&zc->zc_cookie)) == NULL)
1336		return (EEXIST);
1337
1338	error = put_nvlist(zc, configs);
1339
1340	nvlist_free(configs);
1341
1342	return (error);
1343}
1344
1345static int
1346zfs_ioc_pool_stats(zfs_cmd_t *zc)
1347{
1348	nvlist_t *config;
1349	int error;
1350	int ret = 0;
1351
1352	error = spa_get_stats(zc->zc_name, &config, zc->zc_value,
1353	    sizeof (zc->zc_value));
1354
1355	if (config != NULL) {
1356		ret = put_nvlist(zc, config);
1357		nvlist_free(config);
1358
1359		/*
1360		 * The config may be present even if 'error' is non-zero.
1361		 * In this case we return success, and preserve the real errno
1362		 * in 'zc_cookie'.
1363		 */
1364		zc->zc_cookie = error;
1365	} else {
1366		ret = error;
1367	}
1368
1369	return (ret);
1370}
1371
1372/*
1373 * Try to import the given pool, returning pool stats as appropriate so that
1374 * user land knows which devices are available and overall pool health.
1375 */
1376static int
1377zfs_ioc_pool_tryimport(zfs_cmd_t *zc)
1378{
1379	nvlist_t *tryconfig, *config;
1380	int error;
1381
1382	if ((error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
1383	    zc->zc_iflags, &tryconfig)) != 0)
1384		return (error);
1385
1386	config = spa_tryimport(tryconfig);
1387
1388	nvlist_free(tryconfig);
1389
1390	if (config == NULL)
1391		return (EINVAL);
1392
1393	error = put_nvlist(zc, config);
1394	nvlist_free(config);
1395
1396	return (error);
1397}
1398
1399/*
1400 * inputs:
1401 * zc_name              name of the pool
1402 * zc_cookie            scan func (pool_scan_func_t)
1403 */
1404static int
1405zfs_ioc_pool_scan(zfs_cmd_t *zc)
1406{
1407	spa_t *spa;
1408	int error;
1409
1410	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
1411		return (error);
1412
1413	if (zc->zc_cookie == POOL_SCAN_NONE)
1414		error = spa_scan_stop(spa);
1415	else
1416		error = spa_scan(spa, zc->zc_cookie);
1417
1418	spa_close(spa, FTAG);
1419
1420	return (error);
1421}
1422
1423static int
1424zfs_ioc_pool_freeze(zfs_cmd_t *zc)
1425{
1426	spa_t *spa;
1427	int error;
1428
1429	error = spa_open(zc->zc_name, &spa, FTAG);
1430	if (error == 0) {
1431		spa_freeze(spa);
1432		spa_close(spa, FTAG);
1433	}
1434	return (error);
1435}
1436
1437static int
1438zfs_ioc_pool_upgrade(zfs_cmd_t *zc)
1439{
1440	spa_t *spa;
1441	int error;
1442
1443	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
1444		return (error);
1445
1446	if (zc->zc_cookie < spa_version(spa) || zc->zc_cookie > SPA_VERSION) {
1447		spa_close(spa, FTAG);
1448		return (EINVAL);
1449	}
1450
1451	spa_upgrade(spa, zc->zc_cookie);
1452	spa_close(spa, FTAG);
1453
1454	return (error);
1455}
1456
1457static int
1458zfs_ioc_pool_get_history(zfs_cmd_t *zc)
1459{
1460	spa_t *spa;
1461	char *hist_buf;
1462	uint64_t size;
1463	int error;
1464
1465	if ((size = zc->zc_history_len) == 0)
1466		return (EINVAL);
1467
1468	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
1469		return (error);
1470
1471	if (spa_version(spa) < SPA_VERSION_ZPOOL_HISTORY) {
1472		spa_close(spa, FTAG);
1473		return (ENOTSUP);
1474	}
1475
1476	hist_buf = kmem_alloc(size, KM_SLEEP);
1477	if ((error = spa_history_get(spa, &zc->zc_history_offset,
1478	    &zc->zc_history_len, hist_buf)) == 0) {
1479		error = ddi_copyout(hist_buf,
1480		    (void *)(uintptr_t)zc->zc_history,
1481		    zc->zc_history_len, zc->zc_iflags);
1482	}
1483
1484	spa_close(spa, FTAG);
1485	kmem_free(hist_buf, size);
1486	return (error);
1487}
1488
1489static int
1490zfs_ioc_pool_reguid(zfs_cmd_t *zc)
1491{
1492	spa_t *spa;
1493	int error;
1494
1495	error = spa_open(zc->zc_name, &spa, FTAG);
1496	if (error == 0) {
1497		error = spa_change_guid(spa);
1498		spa_close(spa, FTAG);
1499	}
1500	return (error);
1501}
1502
1503static int
1504zfs_ioc_dsobj_to_dsname(zfs_cmd_t *zc)
1505{
1506	int error;
1507
1508	if (error = dsl_dsobj_to_dsname(zc->zc_name, zc->zc_obj, zc->zc_value))
1509		return (error);
1510
1511	return (0);
1512}
1513
1514/*
1515 * inputs:
1516 * zc_name		name of filesystem
1517 * zc_obj		object to find
1518 *
1519 * outputs:
1520 * zc_value		name of object
1521 */
1522static int
1523zfs_ioc_obj_to_path(zfs_cmd_t *zc)
1524{
1525	objset_t *os;
1526	int error;
1527
1528	/* XXX reading from objset not owned */
1529	if ((error = dmu_objset_hold(zc->zc_name, FTAG, &os)) != 0)
1530		return (error);
1531	if (dmu_objset_type(os) != DMU_OST_ZFS) {
1532		dmu_objset_rele(os, FTAG);
1533		return (EINVAL);
1534	}
1535	error = zfs_obj_to_path(os, zc->zc_obj, zc->zc_value,
1536	    sizeof (zc->zc_value));
1537	dmu_objset_rele(os, FTAG);
1538
1539	return (error);
1540}
1541
1542/*
1543 * inputs:
1544 * zc_name		name of filesystem
1545 * zc_obj		object to find
1546 *
1547 * outputs:
1548 * zc_stat		stats on object
1549 * zc_value		path to object
1550 */
1551static int
1552zfs_ioc_obj_to_stats(zfs_cmd_t *zc)
1553{
1554	objset_t *os;
1555	int error;
1556
1557	/* XXX reading from objset not owned */
1558	if ((error = dmu_objset_hold(zc->zc_name, FTAG, &os)) != 0)
1559		return (error);
1560	if (dmu_objset_type(os) != DMU_OST_ZFS) {
1561		dmu_objset_rele(os, FTAG);
1562		return (EINVAL);
1563	}
1564	error = zfs_obj_to_stats(os, zc->zc_obj, &zc->zc_stat, zc->zc_value,
1565	    sizeof (zc->zc_value));
1566	dmu_objset_rele(os, FTAG);
1567
1568	return (error);
1569}
1570
1571static int
1572zfs_ioc_vdev_add(zfs_cmd_t *zc)
1573{
1574	spa_t *spa;
1575	int error;
1576	nvlist_t *config, **l2cache, **spares;
1577	uint_t nl2cache = 0, nspares = 0;
1578
1579	error = spa_open(zc->zc_name, &spa, FTAG);
1580	if (error != 0)
1581		return (error);
1582
1583	error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
1584	    zc->zc_iflags, &config);
1585	(void) nvlist_lookup_nvlist_array(config, ZPOOL_CONFIG_L2CACHE,
1586	    &l2cache, &nl2cache);
1587
1588	(void) nvlist_lookup_nvlist_array(config, ZPOOL_CONFIG_SPARES,
1589	    &spares, &nspares);
1590
1591	/*
1592	 * A root pool with concatenated devices is not supported.
1593	 * Thus, can not add a device to a root pool.
1594	 *
1595	 * Intent log device can not be added to a rootpool because
1596	 * during mountroot, zil is replayed, a seperated log device
1597	 * can not be accessed during the mountroot time.
1598	 *
1599	 * l2cache and spare devices are ok to be added to a rootpool.
1600	 */
1601	if (spa_bootfs(spa) != 0 && nl2cache == 0 && nspares == 0) {
1602		nvlist_free(config);
1603		spa_close(spa, FTAG);
1604		return (EDOM);
1605	}
1606
1607	if (error == 0) {
1608		error = spa_vdev_add(spa, config);
1609		nvlist_free(config);
1610	}
1611	spa_close(spa, FTAG);
1612	return (error);
1613}
1614
1615/*
1616 * inputs:
1617 * zc_name		name of the pool
1618 * zc_nvlist_conf	nvlist of devices to remove
1619 * zc_cookie		to stop the remove?
1620 */
1621static int
1622zfs_ioc_vdev_remove(zfs_cmd_t *zc)
1623{
1624	spa_t *spa;
1625	int error;
1626
1627	error = spa_open(zc->zc_name, &spa, FTAG);
1628	if (error != 0)
1629		return (error);
1630	error = spa_vdev_remove(spa, zc->zc_guid, B_FALSE);
1631	spa_close(spa, FTAG);
1632	return (error);
1633}
1634
1635static int
1636zfs_ioc_vdev_set_state(zfs_cmd_t *zc)
1637{
1638	spa_t *spa;
1639	int error;
1640	vdev_state_t newstate = VDEV_STATE_UNKNOWN;
1641
1642	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
1643		return (error);
1644	switch (zc->zc_cookie) {
1645	case VDEV_STATE_ONLINE:
1646		error = vdev_online(spa, zc->zc_guid, zc->zc_obj, &newstate);
1647		break;
1648
1649	case VDEV_STATE_OFFLINE:
1650		error = vdev_offline(spa, zc->zc_guid, zc->zc_obj);
1651		break;
1652
1653	case VDEV_STATE_FAULTED:
1654		if (zc->zc_obj != VDEV_AUX_ERR_EXCEEDED &&
1655		    zc->zc_obj != VDEV_AUX_EXTERNAL)
1656			zc->zc_obj = VDEV_AUX_ERR_EXCEEDED;
1657
1658		error = vdev_fault(spa, zc->zc_guid, zc->zc_obj);
1659		break;
1660
1661	case VDEV_STATE_DEGRADED:
1662		if (zc->zc_obj != VDEV_AUX_ERR_EXCEEDED &&
1663		    zc->zc_obj != VDEV_AUX_EXTERNAL)
1664			zc->zc_obj = VDEV_AUX_ERR_EXCEEDED;
1665
1666		error = vdev_degrade(spa, zc->zc_guid, zc->zc_obj);
1667		break;
1668
1669	default:
1670		error = EINVAL;
1671	}
1672	zc->zc_cookie = newstate;
1673	spa_close(spa, FTAG);
1674	return (error);
1675}
1676
1677static int
1678zfs_ioc_vdev_attach(zfs_cmd_t *zc)
1679{
1680	spa_t *spa;
1681	int replacing = zc->zc_cookie;
1682	nvlist_t *config;
1683	int error;
1684
1685	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
1686		return (error);
1687
1688	if ((error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
1689	    zc->zc_iflags, &config)) == 0) {
1690		error = spa_vdev_attach(spa, zc->zc_guid, config, replacing);
1691		nvlist_free(config);
1692	}
1693
1694	spa_close(spa, FTAG);
1695	return (error);
1696}
1697
1698static int
1699zfs_ioc_vdev_detach(zfs_cmd_t *zc)
1700{
1701	spa_t *spa;
1702	int error;
1703
1704	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
1705		return (error);
1706
1707	error = spa_vdev_detach(spa, zc->zc_guid, 0, B_FALSE);
1708
1709	spa_close(spa, FTAG);
1710	return (error);
1711}
1712
1713static int
1714zfs_ioc_vdev_split(zfs_cmd_t *zc)
1715{
1716	spa_t *spa;
1717	nvlist_t *config, *props = NULL;
1718	int error;
1719	boolean_t exp = !!(zc->zc_cookie & ZPOOL_EXPORT_AFTER_SPLIT);
1720
1721	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
1722		return (error);
1723
1724	if (error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
1725	    zc->zc_iflags, &config)) {
1726		spa_close(spa, FTAG);
1727		return (error);
1728	}
1729
1730	if (zc->zc_nvlist_src_size != 0 && (error =
1731	    get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
1732	    zc->zc_iflags, &props))) {
1733		spa_close(spa, FTAG);
1734		nvlist_free(config);
1735		return (error);
1736	}
1737
1738	error = spa_vdev_split_mirror(spa, zc->zc_string, config, props, exp);
1739
1740	spa_close(spa, FTAG);
1741
1742	nvlist_free(config);
1743	nvlist_free(props);
1744
1745	return (error);
1746}
1747
1748static int
1749zfs_ioc_vdev_setpath(zfs_cmd_t *zc)
1750{
1751	spa_t *spa;
1752	char *path = zc->zc_value;
1753	uint64_t guid = zc->zc_guid;
1754	int error;
1755
1756	error = spa_open(zc->zc_name, &spa, FTAG);
1757	if (error != 0)
1758		return (error);
1759
1760	error = spa_vdev_setpath(spa, guid, path);
1761	spa_close(spa, FTAG);
1762	return (error);
1763}
1764
1765static int
1766zfs_ioc_vdev_setfru(zfs_cmd_t *zc)
1767{
1768	spa_t *spa;
1769	char *fru = zc->zc_value;
1770	uint64_t guid = zc->zc_guid;
1771	int error;
1772
1773	error = spa_open(zc->zc_name, &spa, FTAG);
1774	if (error != 0)
1775		return (error);
1776
1777	error = spa_vdev_setfru(spa, guid, fru);
1778	spa_close(spa, FTAG);
1779	return (error);
1780}
1781
1782static int
1783zfs_ioc_objset_stats_impl(zfs_cmd_t *zc, objset_t *os)
1784{
1785	int error = 0;
1786	nvlist_t *nv;
1787
1788	dmu_objset_fast_stat(os, &zc->zc_objset_stats);
1789
1790	if (zc->zc_nvlist_dst != 0 &&
1791	    (error = dsl_prop_get_all(os, &nv)) == 0) {
1792		dmu_objset_stats(os, nv);
1793		/*
1794		 * NB: zvol_get_stats() will read the objset contents,
1795		 * which we aren't supposed to do with a
1796		 * DS_MODE_USER hold, because it could be
1797		 * inconsistent.  So this is a bit of a workaround...
1798		 * XXX reading with out owning
1799		 */
1800		if (!zc->zc_objset_stats.dds_inconsistent &&
1801		    dmu_objset_type(os) == DMU_OST_ZVOL) {
1802			error = zvol_get_stats(os, nv);
1803			if (error == EIO)
1804				return (error);
1805			VERIFY3S(error, ==, 0);
1806		}
1807		error = put_nvlist(zc, nv);
1808		nvlist_free(nv);
1809	}
1810
1811	return (error);
1812}
1813
1814/*
1815 * inputs:
1816 * zc_name		name of filesystem
1817 * zc_nvlist_dst_size	size of buffer for property nvlist
1818 *
1819 * outputs:
1820 * zc_objset_stats	stats
1821 * zc_nvlist_dst	property nvlist
1822 * zc_nvlist_dst_size	size of property nvlist
1823 */
1824static int
1825zfs_ioc_objset_stats(zfs_cmd_t *zc)
1826{
1827	objset_t *os = NULL;
1828	int error;
1829
1830	if (error = dmu_objset_hold(zc->zc_name, FTAG, &os))
1831		return (error);
1832
1833	error = zfs_ioc_objset_stats_impl(zc, os);
1834
1835	dmu_objset_rele(os, FTAG);
1836
1837	if (error == ENOMEM)
1838		error = 0;
1839	return (error);
1840}
1841
1842/*
1843 * inputs:
1844 * zc_name		name of filesystem
1845 * zc_nvlist_dst_size	size of buffer for property nvlist
1846 *
1847 * outputs:
1848 * zc_nvlist_dst	received property nvlist
1849 * zc_nvlist_dst_size	size of received property nvlist
1850 *
1851 * Gets received properties (distinct from local properties on or after
1852 * SPA_VERSION_RECVD_PROPS) for callers who want to differentiate received from
1853 * local property values.
1854 */
1855static int
1856zfs_ioc_objset_recvd_props(zfs_cmd_t *zc)
1857{
1858	objset_t *os = NULL;
1859	int error;
1860	nvlist_t *nv;
1861
1862	if (error = dmu_objset_hold(zc->zc_name, FTAG, &os))
1863		return (error);
1864
1865	/*
1866	 * Without this check, we would return local property values if the
1867	 * caller has not already received properties on or after
1868	 * SPA_VERSION_RECVD_PROPS.
1869	 */
1870	if (!dsl_prop_get_hasrecvd(os)) {
1871		dmu_objset_rele(os, FTAG);
1872		return (ENOTSUP);
1873	}
1874
1875	if (zc->zc_nvlist_dst != 0 &&
1876	    (error = dsl_prop_get_received(os, &nv)) == 0) {
1877		error = put_nvlist(zc, nv);
1878		nvlist_free(nv);
1879	}
1880
1881	dmu_objset_rele(os, FTAG);
1882	return (error);
1883}
1884
1885static int
1886nvl_add_zplprop(objset_t *os, nvlist_t *props, zfs_prop_t prop)
1887{
1888	uint64_t value;
1889	int error;
1890
1891	/*
1892	 * zfs_get_zplprop() will either find a value or give us
1893	 * the default value (if there is one).
1894	 */
1895	if ((error = zfs_get_zplprop(os, prop, &value)) != 0)
1896		return (error);
1897	VERIFY(nvlist_add_uint64(props, zfs_prop_to_name(prop), value) == 0);
1898	return (0);
1899}
1900
1901/*
1902 * inputs:
1903 * zc_name		name of filesystem
1904 * zc_nvlist_dst_size	size of buffer for zpl property nvlist
1905 *
1906 * outputs:
1907 * zc_nvlist_dst	zpl property nvlist
1908 * zc_nvlist_dst_size	size of zpl property nvlist
1909 */
1910static int
1911zfs_ioc_objset_zplprops(zfs_cmd_t *zc)
1912{
1913	objset_t *os;
1914	int err;
1915
1916	/* XXX reading without owning */
1917	if (err = dmu_objset_hold(zc->zc_name, FTAG, &os))
1918		return (err);
1919
1920	dmu_objset_fast_stat(os, &zc->zc_objset_stats);
1921
1922	/*
1923	 * NB: nvl_add_zplprop() will read the objset contents,
1924	 * which we aren't supposed to do with a DS_MODE_USER
1925	 * hold, because it could be inconsistent.
1926	 */
1927	if (zc->zc_nvlist_dst != 0 &&
1928	    !zc->zc_objset_stats.dds_inconsistent &&
1929	    dmu_objset_type(os) == DMU_OST_ZFS) {
1930		nvlist_t *nv;
1931
1932		VERIFY(nvlist_alloc(&nv, NV_UNIQUE_NAME, KM_SLEEP) == 0);
1933		if ((err = nvl_add_zplprop(os, nv, ZFS_PROP_VERSION)) == 0 &&
1934		    (err = nvl_add_zplprop(os, nv, ZFS_PROP_NORMALIZE)) == 0 &&
1935		    (err = nvl_add_zplprop(os, nv, ZFS_PROP_UTF8ONLY)) == 0 &&
1936		    (err = nvl_add_zplprop(os, nv, ZFS_PROP_CASE)) == 0)
1937			err = put_nvlist(zc, nv);
1938		nvlist_free(nv);
1939	} else {
1940		err = ENOENT;
1941	}
1942	dmu_objset_rele(os, FTAG);
1943	return (err);
1944}
1945
1946boolean_t
1947dataset_name_hidden(const char *name)
1948{
1949	/*
1950	 * Skip over datasets that are not visible in this zone,
1951	 * internal datasets (which have a $ in their name), and
1952	 * temporary datasets (which have a % in their name).
1953	 */
1954	if (strchr(name, '$') != NULL)
1955		return (B_TRUE);
1956	if (strchr(name, '%') != NULL)
1957		return (B_TRUE);
1958	if (!INGLOBALZONE(curthread) && !zone_dataset_visible(name, NULL))
1959		return (B_TRUE);
1960	return (B_FALSE);
1961}
1962
1963/*
1964 * inputs:
1965 * zc_name		name of filesystem
1966 * zc_cookie		zap cursor
1967 * zc_nvlist_dst_size	size of buffer for property nvlist
1968 *
1969 * outputs:
1970 * zc_name		name of next filesystem
1971 * zc_cookie		zap cursor
1972 * zc_objset_stats	stats
1973 * zc_nvlist_dst	property nvlist
1974 * zc_nvlist_dst_size	size of property nvlist
1975 */
1976static int
1977zfs_ioc_dataset_list_next(zfs_cmd_t *zc)
1978{
1979	objset_t *os;
1980	int error;
1981	char *p;
1982	size_t orig_len = strlen(zc->zc_name);
1983
1984top:
1985	if (error = dmu_objset_hold(zc->zc_name, FTAG, &os)) {
1986		if (error == ENOENT)
1987			error = ESRCH;
1988		return (error);
1989	}
1990
1991	p = strrchr(zc->zc_name, '/');
1992	if (p == NULL || p[1] != '\0')
1993		(void) strlcat(zc->zc_name, "/", sizeof (zc->zc_name));
1994	p = zc->zc_name + strlen(zc->zc_name);
1995
1996	/*
1997	 * Pre-fetch the datasets.  dmu_objset_prefetch() always returns 0
1998	 * but is not declared void because its called by dmu_objset_find().
1999	 */
2000	if (zc->zc_cookie == 0) {
2001		uint64_t cookie = 0;
2002		int len = sizeof (zc->zc_name) - (p - zc->zc_name);
2003
2004		while (dmu_dir_list_next(os, len, p, NULL, &cookie) == 0) {
2005			if (!dataset_name_hidden(zc->zc_name))
2006				(void) dmu_objset_prefetch(zc->zc_name, NULL);
2007		}
2008	}
2009
2010	do {
2011		error = dmu_dir_list_next(os,
2012		    sizeof (zc->zc_name) - (p - zc->zc_name), p,
2013		    NULL, &zc->zc_cookie);
2014		if (error == ENOENT)
2015			error = ESRCH;
2016	} while (error == 0 && dataset_name_hidden(zc->zc_name));
2017	dmu_objset_rele(os, FTAG);
2018
2019	/*
2020	 * If it's an internal dataset (ie. with a '$' in its name),
2021	 * don't try to get stats for it, otherwise we'll return ENOENT.
2022	 */
2023	if (error == 0 && strchr(zc->zc_name, '$') == NULL) {
2024		error = zfs_ioc_objset_stats(zc); /* fill in the stats */
2025		if (error == ENOENT) {
2026			/* We lost a race with destroy, get the next one. */
2027			zc->zc_name[orig_len] = '\0';
2028			goto top;
2029		}
2030	}
2031	return (error);
2032}
2033
2034/*
2035 * inputs:
2036 * zc_name		name of filesystem
2037 * zc_cookie		zap cursor
2038 * zc_nvlist_dst_size	size of buffer for property nvlist
2039 *
2040 * outputs:
2041 * zc_name		name of next snapshot
2042 * zc_objset_stats	stats
2043 * zc_nvlist_dst	property nvlist
2044 * zc_nvlist_dst_size	size of property nvlist
2045 */
2046static int
2047zfs_ioc_snapshot_list_next(zfs_cmd_t *zc)
2048{
2049	objset_t *os;
2050	int error;
2051
2052top:
2053	if (snapshot_list_prefetch && zc->zc_cookie == 0)
2054		(void) dmu_objset_find(zc->zc_name, dmu_objset_prefetch,
2055		    NULL, DS_FIND_SNAPSHOTS);
2056
2057	error = dmu_objset_hold(zc->zc_name, FTAG, &os);
2058	if (error)
2059		return (error == ENOENT ? ESRCH : error);
2060
2061	/*
2062	 * A dataset name of maximum length cannot have any snapshots,
2063	 * so exit immediately.
2064	 */
2065	if (strlcat(zc->zc_name, "@", sizeof (zc->zc_name)) >= MAXNAMELEN) {
2066		dmu_objset_rele(os, FTAG);
2067		return (ESRCH);
2068	}
2069
2070	error = dmu_snapshot_list_next(os,
2071	    sizeof (zc->zc_name) - strlen(zc->zc_name),
2072	    zc->zc_name + strlen(zc->zc_name), &zc->zc_obj, &zc->zc_cookie,
2073	    NULL);
2074
2075	if (error == 0) {
2076		dsl_dataset_t *ds;
2077		dsl_pool_t *dp = os->os_dsl_dataset->ds_dir->dd_pool;
2078
2079		/*
2080		 * Since we probably don't have a hold on this snapshot,
2081		 * it's possible that the objsetid could have been destroyed
2082		 * and reused for a new objset. It's OK if this happens during
2083		 * a zfs send operation, since the new createtxg will be
2084		 * beyond the range we're interested in.
2085		 */
2086		rw_enter(&dp->dp_config_rwlock, RW_READER);
2087		error = dsl_dataset_hold_obj(dp, zc->zc_obj, FTAG, &ds);
2088		rw_exit(&dp->dp_config_rwlock);
2089		if (error) {
2090			if (error == ENOENT) {
2091				/* Racing with destroy, get the next one. */
2092				*strchr(zc->zc_name, '@') = '\0';
2093				dmu_objset_rele(os, FTAG);
2094				goto top;
2095			}
2096		} else {
2097			objset_t *ossnap;
2098
2099			error = dmu_objset_from_ds(ds, &ossnap);
2100			if (error == 0)
2101				error = zfs_ioc_objset_stats_impl(zc, ossnap);
2102			dsl_dataset_rele(ds, FTAG);
2103		}
2104	} else if (error == ENOENT) {
2105		error = ESRCH;
2106	}
2107
2108	dmu_objset_rele(os, FTAG);
2109	/* if we failed, undo the @ that we tacked on to zc_name */
2110	if (error)
2111		*strchr(zc->zc_name, '@') = '\0';
2112	return (error);
2113}
2114
2115static int
2116zfs_prop_set_userquota(const char *dsname, nvpair_t *pair)
2117{
2118	const char *propname = nvpair_name(pair);
2119	uint64_t *valary;
2120	unsigned int vallen;
2121	const char *domain;
2122	char *dash;
2123	zfs_userquota_prop_t type;
2124	uint64_t rid;
2125	uint64_t quota;
2126	zfsvfs_t *zfsvfs;
2127	int err;
2128
2129	if (nvpair_type(pair) == DATA_TYPE_NVLIST) {
2130		nvlist_t *attrs;
2131		VERIFY(nvpair_value_nvlist(pair, &attrs) == 0);
2132		if (nvlist_lookup_nvpair(attrs, ZPROP_VALUE,
2133		    &pair) != 0)
2134			return (EINVAL);
2135	}
2136
2137	/*
2138	 * A correctly constructed propname is encoded as
2139	 * userquota@<rid>-<domain>.
2140	 */
2141	if ((dash = strchr(propname, '-')) == NULL ||
2142	    nvpair_value_uint64_array(pair, &valary, &vallen) != 0 ||
2143	    vallen != 3)
2144		return (EINVAL);
2145
2146	domain = dash + 1;
2147	type = valary[0];
2148	rid = valary[1];
2149	quota = valary[2];
2150
2151	err = zfsvfs_hold(dsname, FTAG, &zfsvfs, B_FALSE);
2152	if (err == 0) {
2153		err = zfs_set_userquota(zfsvfs, type, domain, rid, quota);
2154		zfsvfs_rele(zfsvfs, FTAG);
2155	}
2156
2157	return (err);
2158}
2159
2160/*
2161 * If the named property is one that has a special function to set its value,
2162 * return 0 on success and a positive error code on failure; otherwise if it is
2163 * not one of the special properties handled by this function, return -1.
2164 *
2165 * XXX: It would be better for callers of the property interface if we handled
2166 * these special cases in dsl_prop.c (in the dsl layer).
2167 */
2168static int
2169zfs_prop_set_special(const char *dsname, zprop_source_t source,
2170    nvpair_t *pair)
2171{
2172	const char *propname = nvpair_name(pair);
2173	zfs_prop_t prop = zfs_name_to_prop(propname);
2174	uint64_t intval;
2175	int err;
2176
2177	if (prop == ZPROP_INVAL) {
2178		if (zfs_prop_userquota(propname))
2179			return (zfs_prop_set_userquota(dsname, pair));
2180		return (-1);
2181	}
2182
2183	if (nvpair_type(pair) == DATA_TYPE_NVLIST) {
2184		nvlist_t *attrs;
2185		VERIFY(nvpair_value_nvlist(pair, &attrs) == 0);
2186		VERIFY(nvlist_lookup_nvpair(attrs, ZPROP_VALUE,
2187		    &pair) == 0);
2188	}
2189
2190	if (zfs_prop_get_type(prop) == PROP_TYPE_STRING)
2191		return (-1);
2192
2193	VERIFY(0 == nvpair_value_uint64(pair, &intval));
2194
2195	switch (prop) {
2196	case ZFS_PROP_QUOTA:
2197		err = dsl_dir_set_quota(dsname, source, intval);
2198		break;
2199	case ZFS_PROP_REFQUOTA:
2200		err = dsl_dataset_set_quota(dsname, source, intval);
2201		break;
2202	case ZFS_PROP_RESERVATION:
2203		err = dsl_dir_set_reservation(dsname, source, intval);
2204		break;
2205	case ZFS_PROP_REFRESERVATION:
2206		err = dsl_dataset_set_reservation(dsname, source, intval);
2207		break;
2208	case ZFS_PROP_VOLSIZE:
2209		err = zvol_set_volsize(dsname, ddi_driver_major(zfs_dip),
2210		    intval);
2211		break;
2212	case ZFS_PROP_VERSION:
2213	{
2214		zfsvfs_t *zfsvfs;
2215
2216		if ((err = zfsvfs_hold(dsname, FTAG, &zfsvfs, B_TRUE)) != 0)
2217			break;
2218
2219		err = zfs_set_version(zfsvfs, intval);
2220		zfsvfs_rele(zfsvfs, FTAG);
2221
2222		if (err == 0 && intval >= ZPL_VERSION_USERSPACE) {
2223			zfs_cmd_t *zc;
2224
2225			zc = kmem_zalloc(sizeof (zfs_cmd_t), KM_SLEEP);
2226			(void) strcpy(zc->zc_name, dsname);
2227			(void) zfs_ioc_userspace_upgrade(zc);
2228			kmem_free(zc, sizeof (zfs_cmd_t));
2229		}
2230		break;
2231	}
2232
2233	default:
2234		err = -1;
2235	}
2236
2237	return (err);
2238}
2239
2240/*
2241 * This function is best effort. If it fails to set any of the given properties,
2242 * it continues to set as many as it can and returns the first error
2243 * encountered. If the caller provides a non-NULL errlist, it also gives the
2244 * complete list of names of all the properties it failed to set along with the
2245 * corresponding error numbers. The caller is responsible for freeing the
2246 * returned errlist.
2247 *
2248 * If every property is set successfully, zero is returned and the list pointed
2249 * at by errlist is NULL.
2250 */
2251int
2252zfs_set_prop_nvlist(const char *dsname, zprop_source_t source, nvlist_t *nvl,
2253    nvlist_t **errlist)
2254{
2255	nvpair_t *pair;
2256	nvpair_t *propval;
2257	int rv = 0;
2258	uint64_t intval;
2259	char *strval;
2260	nvlist_t *genericnvl;
2261	nvlist_t *errors;
2262	nvlist_t *retrynvl;
2263
2264	VERIFY(nvlist_alloc(&genericnvl, NV_UNIQUE_NAME, KM_SLEEP) == 0);
2265	VERIFY(nvlist_alloc(&errors, NV_UNIQUE_NAME, KM_SLEEP) == 0);
2266	VERIFY(nvlist_alloc(&retrynvl, NV_UNIQUE_NAME, KM_SLEEP) == 0);
2267
2268retry:
2269	pair = NULL;
2270	while ((pair = nvlist_next_nvpair(nvl, pair)) != NULL) {
2271		const char *propname = nvpair_name(pair);
2272		zfs_prop_t prop = zfs_name_to_prop(propname);
2273		int err = 0;
2274
2275		/* decode the property value */
2276		propval = pair;
2277		if (nvpair_type(pair) == DATA_TYPE_NVLIST) {
2278			nvlist_t *attrs;
2279			VERIFY(nvpair_value_nvlist(pair, &attrs) == 0);
2280			if (nvlist_lookup_nvpair(attrs, ZPROP_VALUE,
2281			    &propval) != 0)
2282				err = EINVAL;
2283		}
2284
2285		/* Validate value type */
2286		if (err == 0 && prop == ZPROP_INVAL) {
2287			if (zfs_prop_user(propname)) {
2288				if (nvpair_type(propval) != DATA_TYPE_STRING)
2289					err = EINVAL;
2290			} else if (zfs_prop_userquota(propname)) {
2291				if (nvpair_type(propval) !=
2292				    DATA_TYPE_UINT64_ARRAY)
2293					err = EINVAL;
2294			} else {
2295				err = EINVAL;
2296			}
2297		} else if (err == 0) {
2298			if (nvpair_type(propval) == DATA_TYPE_STRING) {
2299				if (zfs_prop_get_type(prop) != PROP_TYPE_STRING)
2300					err = EINVAL;
2301			} else if (nvpair_type(propval) == DATA_TYPE_UINT64) {
2302				const char *unused;
2303
2304				VERIFY(nvpair_value_uint64(propval,
2305				    &intval) == 0);
2306
2307				switch (zfs_prop_get_type(prop)) {
2308				case PROP_TYPE_NUMBER:
2309					break;
2310				case PROP_TYPE_STRING:
2311					err = EINVAL;
2312					break;
2313				case PROP_TYPE_INDEX:
2314					if (zfs_prop_index_to_string(prop,
2315					    intval, &unused) != 0)
2316						err = EINVAL;
2317					break;
2318				default:
2319					cmn_err(CE_PANIC,
2320					    "unknown property type");
2321				}
2322			} else {
2323				err = EINVAL;
2324			}
2325		}
2326
2327		/* Validate permissions */
2328		if (err == 0)
2329			err = zfs_check_settable(dsname, pair, CRED());
2330
2331		if (err == 0) {
2332			err = zfs_prop_set_special(dsname, source, pair);
2333			if (err == -1) {
2334				/*
2335				 * For better performance we build up a list of
2336				 * properties to set in a single transaction.
2337				 */
2338				err = nvlist_add_nvpair(genericnvl, pair);
2339			} else if (err != 0 && nvl != retrynvl) {
2340				/*
2341				 * This may be a spurious error caused by
2342				 * receiving quota and reservation out of order.
2343				 * Try again in a second pass.
2344				 */
2345				err = nvlist_add_nvpair(retrynvl, pair);
2346			}
2347		}
2348
2349		if (err != 0)
2350			VERIFY(nvlist_add_int32(errors, propname, err) == 0);
2351	}
2352
2353	if (nvl != retrynvl && !nvlist_empty(retrynvl)) {
2354		nvl = retrynvl;
2355		goto retry;
2356	}
2357
2358	if (!nvlist_empty(genericnvl) &&
2359	    dsl_props_set(dsname, source, genericnvl) != 0) {
2360		/*
2361		 * If this fails, we still want to set as many properties as we
2362		 * can, so try setting them individually.
2363		 */
2364		pair = NULL;
2365		while ((pair = nvlist_next_nvpair(genericnvl, pair)) != NULL) {
2366			const char *propname = nvpair_name(pair);
2367			int err = 0;
2368
2369			propval = pair;
2370			if (nvpair_type(pair) == DATA_TYPE_NVLIST) {
2371				nvlist_t *attrs;
2372				VERIFY(nvpair_value_nvlist(pair, &attrs) == 0);
2373				VERIFY(nvlist_lookup_nvpair(attrs, ZPROP_VALUE,
2374				    &propval) == 0);
2375			}
2376
2377			if (nvpair_type(propval) == DATA_TYPE_STRING) {
2378				VERIFY(nvpair_value_string(propval,
2379				    &strval) == 0);
2380				err = dsl_prop_set(dsname, propname, source, 1,
2381				    strlen(strval) + 1, strval);
2382			} else {
2383				VERIFY(nvpair_value_uint64(propval,
2384				    &intval) == 0);
2385				err = dsl_prop_set(dsname, propname, source, 8,
2386				    1, &intval);
2387			}
2388
2389			if (err != 0) {
2390				VERIFY(nvlist_add_int32(errors, propname,
2391				    err) == 0);
2392			}
2393		}
2394	}
2395	nvlist_free(genericnvl);
2396	nvlist_free(retrynvl);
2397
2398	if ((pair = nvlist_next_nvpair(errors, NULL)) == NULL) {
2399		nvlist_free(errors);
2400		errors = NULL;
2401	} else {
2402		VERIFY(nvpair_value_int32(pair, &rv) == 0);
2403	}
2404
2405	if (errlist == NULL)
2406		nvlist_free(errors);
2407	else
2408		*errlist = errors;
2409
2410	return (rv);
2411}
2412
2413/*
2414 * Check that all the properties are valid user properties.
2415 */
2416static int
2417zfs_check_userprops(char *fsname, nvlist_t *nvl)
2418{
2419	nvpair_t *pair = NULL;
2420	int error = 0;
2421
2422	while ((pair = nvlist_next_nvpair(nvl, pair)) != NULL) {
2423		const char *propname = nvpair_name(pair);
2424		char *valstr;
2425
2426		if (!zfs_prop_user(propname) ||
2427		    nvpair_type(pair) != DATA_TYPE_STRING)
2428			return (EINVAL);
2429
2430		if (error = zfs_secpolicy_write_perms(fsname,
2431		    ZFS_DELEG_PERM_USERPROP, CRED()))
2432			return (error);
2433
2434		if (strlen(propname) >= ZAP_MAXNAMELEN)
2435			return (ENAMETOOLONG);
2436
2437		VERIFY(nvpair_value_string(pair, &valstr) == 0);
2438		if (strlen(valstr) >= ZAP_MAXVALUELEN)
2439			return (E2BIG);
2440	}
2441	return (0);
2442}
2443
2444static void
2445props_skip(nvlist_t *props, nvlist_t *skipped, nvlist_t **newprops)
2446{
2447	nvpair_t *pair;
2448
2449	VERIFY(nvlist_alloc(newprops, NV_UNIQUE_NAME, KM_SLEEP) == 0);
2450
2451	pair = NULL;
2452	while ((pair = nvlist_next_nvpair(props, pair)) != NULL) {
2453		if (nvlist_exists(skipped, nvpair_name(pair)))
2454			continue;
2455
2456		VERIFY(nvlist_add_nvpair(*newprops, pair) == 0);
2457	}
2458}
2459
2460static int
2461clear_received_props(objset_t *os, const char *fs, nvlist_t *props,
2462    nvlist_t *skipped)
2463{
2464	int err = 0;
2465	nvlist_t *cleared_props = NULL;
2466	props_skip(props, skipped, &cleared_props);
2467	if (!nvlist_empty(cleared_props)) {
2468		/*
2469		 * Acts on local properties until the dataset has received
2470		 * properties at least once on or after SPA_VERSION_RECVD_PROPS.
2471		 */
2472		zprop_source_t flags = (ZPROP_SRC_NONE |
2473		    (dsl_prop_get_hasrecvd(os) ? ZPROP_SRC_RECEIVED : 0));
2474		err = zfs_set_prop_nvlist(fs, flags, cleared_props, NULL);
2475	}
2476	nvlist_free(cleared_props);
2477	return (err);
2478}
2479
2480/*
2481 * inputs:
2482 * zc_name		name of filesystem
2483 * zc_value		name of property to set
2484 * zc_nvlist_src{_size}	nvlist of properties to apply
2485 * zc_cookie		received properties flag
2486 *
2487 * outputs:
2488 * zc_nvlist_dst{_size} error for each unapplied received property
2489 */
2490static int
2491zfs_ioc_set_prop(zfs_cmd_t *zc)
2492{
2493	nvlist_t *nvl;
2494	boolean_t received = zc->zc_cookie;
2495	zprop_source_t source = (received ? ZPROP_SRC_RECEIVED :
2496	    ZPROP_SRC_LOCAL);
2497	nvlist_t *errors = NULL;
2498	int error;
2499
2500	if ((error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
2501	    zc->zc_iflags, &nvl)) != 0)
2502		return (error);
2503
2504	if (received) {
2505		nvlist_t *origprops;
2506		objset_t *os;
2507
2508		if (dmu_objset_hold(zc->zc_name, FTAG, &os) == 0) {
2509			if (dsl_prop_get_received(os, &origprops) == 0) {
2510				(void) clear_received_props(os,
2511				    zc->zc_name, origprops, nvl);
2512				nvlist_free(origprops);
2513			}
2514
2515			dsl_prop_set_hasrecvd(os);
2516			dmu_objset_rele(os, FTAG);
2517		}
2518	}
2519
2520	error = zfs_set_prop_nvlist(zc->zc_name, source, nvl, &errors);
2521
2522	if (zc->zc_nvlist_dst != 0 && errors != NULL) {
2523		(void) put_nvlist(zc, errors);
2524	}
2525
2526	nvlist_free(errors);
2527	nvlist_free(nvl);
2528	return (error);
2529}
2530
2531/*
2532 * inputs:
2533 * zc_name		name of filesystem
2534 * zc_value		name of property to inherit
2535 * zc_cookie		revert to received value if TRUE
2536 *
2537 * outputs:		none
2538 */
2539static int
2540zfs_ioc_inherit_prop(zfs_cmd_t *zc)
2541{
2542	const char *propname = zc->zc_value;
2543	zfs_prop_t prop = zfs_name_to_prop(propname);
2544	boolean_t received = zc->zc_cookie;
2545	zprop_source_t source = (received
2546	    ? ZPROP_SRC_NONE		/* revert to received value, if any */
2547	    : ZPROP_SRC_INHERITED);	/* explicitly inherit */
2548
2549	if (received) {
2550		nvlist_t *dummy;
2551		nvpair_t *pair;
2552		zprop_type_t type;
2553		int err;
2554
2555		/*
2556		 * zfs_prop_set_special() expects properties in the form of an
2557		 * nvpair with type info.
2558		 */
2559		if (prop == ZPROP_INVAL) {
2560			if (!zfs_prop_user(propname))
2561				return (EINVAL);
2562
2563			type = PROP_TYPE_STRING;
2564		} else if (prop == ZFS_PROP_VOLSIZE ||
2565		    prop == ZFS_PROP_VERSION) {
2566			return (EINVAL);
2567		} else {
2568			type = zfs_prop_get_type(prop);
2569		}
2570
2571		VERIFY(nvlist_alloc(&dummy, NV_UNIQUE_NAME, KM_SLEEP) == 0);
2572
2573		switch (type) {
2574		case PROP_TYPE_STRING:
2575			VERIFY(0 == nvlist_add_string(dummy, propname, ""));
2576			break;
2577		case PROP_TYPE_NUMBER:
2578		case PROP_TYPE_INDEX:
2579			VERIFY(0 == nvlist_add_uint64(dummy, propname, 0));
2580			break;
2581		default:
2582			nvlist_free(dummy);
2583			return (EINVAL);
2584		}
2585
2586		pair = nvlist_next_nvpair(dummy, NULL);
2587		err = zfs_prop_set_special(zc->zc_name, source, pair);
2588		nvlist_free(dummy);
2589		if (err != -1)
2590			return (err); /* special property already handled */
2591	} else {
2592		/*
2593		 * Only check this in the non-received case. We want to allow
2594		 * 'inherit -S' to revert non-inheritable properties like quota
2595		 * and reservation to the received or default values even though
2596		 * they are not considered inheritable.
2597		 */
2598		if (prop != ZPROP_INVAL && !zfs_prop_inheritable(prop))
2599			return (EINVAL);
2600	}
2601
2602	/* the property name has been validated by zfs_secpolicy_inherit() */
2603	return (dsl_prop_set(zc->zc_name, zc->zc_value, source, 0, 0, NULL));
2604}
2605
2606static int
2607zfs_ioc_pool_set_props(zfs_cmd_t *zc)
2608{
2609	nvlist_t *props;
2610	spa_t *spa;
2611	int error;
2612	nvpair_t *pair;
2613
2614	if (error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
2615	    zc->zc_iflags, &props))
2616		return (error);
2617
2618	/*
2619	 * If the only property is the configfile, then just do a spa_lookup()
2620	 * to handle the faulted case.
2621	 */
2622	pair = nvlist_next_nvpair(props, NULL);
2623	if (pair != NULL && strcmp(nvpair_name(pair),
2624	    zpool_prop_to_name(ZPOOL_PROP_CACHEFILE)) == 0 &&
2625	    nvlist_next_nvpair(props, pair) == NULL) {
2626		mutex_enter(&spa_namespace_lock);
2627		if ((spa = spa_lookup(zc->zc_name)) != NULL) {
2628			spa_configfile_set(spa, props, B_FALSE);
2629			spa_config_sync(spa, B_FALSE, B_TRUE);
2630		}
2631		mutex_exit(&spa_namespace_lock);
2632		if (spa != NULL) {
2633			nvlist_free(props);
2634			return (0);
2635		}
2636	}
2637
2638	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0) {
2639		nvlist_free(props);
2640		return (error);
2641	}
2642
2643	error = spa_prop_set(spa, props);
2644
2645	nvlist_free(props);
2646	spa_close(spa, FTAG);
2647
2648	return (error);
2649}
2650
2651static int
2652zfs_ioc_pool_get_props(zfs_cmd_t *zc)
2653{
2654	spa_t *spa;
2655	int error;
2656	nvlist_t *nvp = NULL;
2657
2658	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0) {
2659		/*
2660		 * If the pool is faulted, there may be properties we can still
2661		 * get (such as altroot and cachefile), so attempt to get them
2662		 * anyway.
2663		 */
2664		mutex_enter(&spa_namespace_lock);
2665		if ((spa = spa_lookup(zc->zc_name)) != NULL)
2666			error = spa_prop_get(spa, &nvp);
2667		mutex_exit(&spa_namespace_lock);
2668	} else {
2669		error = spa_prop_get(spa, &nvp);
2670		spa_close(spa, FTAG);
2671	}
2672
2673	if (error == 0 && zc->zc_nvlist_dst != 0)
2674		error = put_nvlist(zc, nvp);
2675	else
2676		error = EFAULT;
2677
2678	nvlist_free(nvp);
2679	return (error);
2680}
2681
2682/*
2683 * inputs:
2684 * zc_name		name of filesystem
2685 * zc_nvlist_src{_size}	nvlist of delegated permissions
2686 * zc_perm_action	allow/unallow flag
2687 *
2688 * outputs:		none
2689 */
2690static int
2691zfs_ioc_set_fsacl(zfs_cmd_t *zc)
2692{
2693	int error;
2694	nvlist_t *fsaclnv = NULL;
2695
2696	if ((error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
2697	    zc->zc_iflags, &fsaclnv)) != 0)
2698		return (error);
2699
2700	/*
2701	 * Verify nvlist is constructed correctly
2702	 */
2703	if ((error = zfs_deleg_verify_nvlist(fsaclnv)) != 0) {
2704		nvlist_free(fsaclnv);
2705		return (EINVAL);
2706	}
2707
2708	/*
2709	 * If we don't have PRIV_SYS_MOUNT, then validate
2710	 * that user is allowed to hand out each permission in
2711	 * the nvlist(s)
2712	 */
2713
2714	error = secpolicy_zfs(CRED());
2715	if (error) {
2716		if (zc->zc_perm_action == B_FALSE) {
2717			error = dsl_deleg_can_allow(zc->zc_name,
2718			    fsaclnv, CRED());
2719		} else {
2720			error = dsl_deleg_can_unallow(zc->zc_name,
2721			    fsaclnv, CRED());
2722		}
2723	}
2724
2725	if (error == 0)
2726		error = dsl_deleg_set(zc->zc_name, fsaclnv, zc->zc_perm_action);
2727
2728	nvlist_free(fsaclnv);
2729	return (error);
2730}
2731
2732/*
2733 * inputs:
2734 * zc_name		name of filesystem
2735 *
2736 * outputs:
2737 * zc_nvlist_src{_size}	nvlist of delegated permissions
2738 */
2739static int
2740zfs_ioc_get_fsacl(zfs_cmd_t *zc)
2741{
2742	nvlist_t *nvp;
2743	int error;
2744
2745	if ((error = dsl_deleg_get(zc->zc_name, &nvp)) == 0) {
2746		error = put_nvlist(zc, nvp);
2747		nvlist_free(nvp);
2748	}
2749
2750	return (error);
2751}
2752
2753/*
2754 * Search the vfs list for a specified resource.  Returns a pointer to it
2755 * or NULL if no suitable entry is found. The caller of this routine
2756 * is responsible for releasing the returned vfs pointer.
2757 */
2758static vfs_t *
2759zfs_get_vfs(const char *resource)
2760{
2761	vfs_t *vfsp;
2762
2763	mtx_lock(&mountlist_mtx);
2764	TAILQ_FOREACH(vfsp, &mountlist, mnt_list) {
2765		if (strcmp(refstr_value(vfsp->vfs_resource), resource) == 0) {
2766			VFS_HOLD(vfsp);
2767			break;
2768		}
2769	}
2770	mtx_unlock(&mountlist_mtx);
2771	return (vfsp);
2772}
2773
2774/* ARGSUSED */
2775static void
2776zfs_create_cb(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx)
2777{
2778	zfs_creat_t *zct = arg;
2779
2780	zfs_create_fs(os, cr, zct->zct_zplprops, tx);
2781}
2782
2783#define	ZFS_PROP_UNDEFINED	((uint64_t)-1)
2784
2785/*
2786 * inputs:
2787 * createprops		list of properties requested by creator
2788 * default_zplver	zpl version to use if unspecified in createprops
2789 * fuids_ok		fuids allowed in this version of the spa?
2790 * os			parent objset pointer (NULL if root fs)
2791 *
2792 * outputs:
2793 * zplprops	values for the zplprops we attach to the master node object
2794 * is_ci	true if requested file system will be purely case-insensitive
2795 *
2796 * Determine the settings for utf8only, normalization and
2797 * casesensitivity.  Specific values may have been requested by the
2798 * creator and/or we can inherit values from the parent dataset.  If
2799 * the file system is of too early a vintage, a creator can not
2800 * request settings for these properties, even if the requested
2801 * setting is the default value.  We don't actually want to create dsl
2802 * properties for these, so remove them from the source nvlist after
2803 * processing.
2804 */
2805static int
2806zfs_fill_zplprops_impl(objset_t *os, uint64_t zplver,
2807    boolean_t fuids_ok, boolean_t sa_ok, nvlist_t *createprops,
2808    nvlist_t *zplprops, boolean_t *is_ci)
2809{
2810	uint64_t sense = ZFS_PROP_UNDEFINED;
2811	uint64_t norm = ZFS_PROP_UNDEFINED;
2812	uint64_t u8 = ZFS_PROP_UNDEFINED;
2813
2814	ASSERT(zplprops != NULL);
2815
2816	/*
2817	 * Pull out creator prop choices, if any.
2818	 */
2819	if (createprops) {
2820		(void) nvlist_lookup_uint64(createprops,
2821		    zfs_prop_to_name(ZFS_PROP_VERSION), &zplver);
2822		(void) nvlist_lookup_uint64(createprops,
2823		    zfs_prop_to_name(ZFS_PROP_NORMALIZE), &norm);
2824		(void) nvlist_remove_all(createprops,
2825		    zfs_prop_to_name(ZFS_PROP_NORMALIZE));
2826		(void) nvlist_lookup_uint64(createprops,
2827		    zfs_prop_to_name(ZFS_PROP_UTF8ONLY), &u8);
2828		(void) nvlist_remove_all(createprops,
2829		    zfs_prop_to_name(ZFS_PROP_UTF8ONLY));
2830		(void) nvlist_lookup_uint64(createprops,
2831		    zfs_prop_to_name(ZFS_PROP_CASE), &sense);
2832		(void) nvlist_remove_all(createprops,
2833		    zfs_prop_to_name(ZFS_PROP_CASE));
2834	}
2835
2836	/*
2837	 * If the zpl version requested is whacky or the file system
2838	 * or pool is version is too "young" to support normalization
2839	 * and the creator tried to set a value for one of the props,
2840	 * error out.
2841	 */
2842	if ((zplver < ZPL_VERSION_INITIAL || zplver > ZPL_VERSION) ||
2843	    (zplver >= ZPL_VERSION_FUID && !fuids_ok) ||
2844	    (zplver >= ZPL_VERSION_SA && !sa_ok) ||
2845	    (zplver < ZPL_VERSION_NORMALIZATION &&
2846	    (norm != ZFS_PROP_UNDEFINED || u8 != ZFS_PROP_UNDEFINED ||
2847	    sense != ZFS_PROP_UNDEFINED)))
2848		return (ENOTSUP);
2849
2850	/*
2851	 * Put the version in the zplprops
2852	 */
2853	VERIFY(nvlist_add_uint64(zplprops,
2854	    zfs_prop_to_name(ZFS_PROP_VERSION), zplver) == 0);
2855
2856	if (norm == ZFS_PROP_UNDEFINED)
2857		VERIFY(zfs_get_zplprop(os, ZFS_PROP_NORMALIZE, &norm) == 0);
2858	VERIFY(nvlist_add_uint64(zplprops,
2859	    zfs_prop_to_name(ZFS_PROP_NORMALIZE), norm) == 0);
2860
2861	/*
2862	 * If we're normalizing, names must always be valid UTF-8 strings.
2863	 */
2864	if (norm)
2865		u8 = 1;
2866	if (u8 == ZFS_PROP_UNDEFINED)
2867		VERIFY(zfs_get_zplprop(os, ZFS_PROP_UTF8ONLY, &u8) == 0);
2868	VERIFY(nvlist_add_uint64(zplprops,
2869	    zfs_prop_to_name(ZFS_PROP_UTF8ONLY), u8) == 0);
2870
2871	if (sense == ZFS_PROP_UNDEFINED)
2872		VERIFY(zfs_get_zplprop(os, ZFS_PROP_CASE, &sense) == 0);
2873	VERIFY(nvlist_add_uint64(zplprops,
2874	    zfs_prop_to_name(ZFS_PROP_CASE), sense) == 0);
2875
2876	if (is_ci)
2877		*is_ci = (sense == ZFS_CASE_INSENSITIVE);
2878
2879	return (0);
2880}
2881
2882static int
2883zfs_fill_zplprops(const char *dataset, nvlist_t *createprops,
2884    nvlist_t *zplprops, boolean_t *is_ci)
2885{
2886	boolean_t fuids_ok, sa_ok;
2887	uint64_t zplver = ZPL_VERSION;
2888	objset_t *os = NULL;
2889	char parentname[MAXNAMELEN];
2890	char *cp;
2891	spa_t *spa;
2892	uint64_t spa_vers;
2893	int error;
2894
2895	(void) strlcpy(parentname, dataset, sizeof (parentname));
2896	cp = strrchr(parentname, '/');
2897	ASSERT(cp != NULL);
2898	cp[0] = '\0';
2899
2900	if ((error = spa_open(dataset, &spa, FTAG)) != 0)
2901		return (error);
2902
2903	spa_vers = spa_version(spa);
2904	spa_close(spa, FTAG);
2905
2906	zplver = zfs_zpl_version_map(spa_vers);
2907	fuids_ok = (zplver >= ZPL_VERSION_FUID);
2908	sa_ok = (zplver >= ZPL_VERSION_SA);
2909
2910	/*
2911	 * Open parent object set so we can inherit zplprop values.
2912	 */
2913	if ((error = dmu_objset_hold(parentname, FTAG, &os)) != 0)
2914		return (error);
2915
2916	error = zfs_fill_zplprops_impl(os, zplver, fuids_ok, sa_ok, createprops,
2917	    zplprops, is_ci);
2918	dmu_objset_rele(os, FTAG);
2919	return (error);
2920}
2921
2922static int
2923zfs_fill_zplprops_root(uint64_t spa_vers, nvlist_t *createprops,
2924    nvlist_t *zplprops, boolean_t *is_ci)
2925{
2926	boolean_t fuids_ok;
2927	boolean_t sa_ok;
2928	uint64_t zplver = ZPL_VERSION;
2929	int error;
2930
2931	zplver = zfs_zpl_version_map(spa_vers);
2932	fuids_ok = (zplver >= ZPL_VERSION_FUID);
2933	sa_ok = (zplver >= ZPL_VERSION_SA);
2934
2935	error = zfs_fill_zplprops_impl(NULL, zplver, fuids_ok, sa_ok,
2936	    createprops, zplprops, is_ci);
2937	return (error);
2938}
2939
2940/*
2941 * inputs:
2942 * zc_objset_type	type of objset to create (fs vs zvol)
2943 * zc_name		name of new objset
2944 * zc_value		name of snapshot to clone from (may be empty)
2945 * zc_nvlist_src{_size}	nvlist of properties to apply
2946 *
2947 * outputs: none
2948 */
2949static int
2950zfs_ioc_create(zfs_cmd_t *zc)
2951{
2952	objset_t *clone;
2953	int error = 0;
2954	zfs_creat_t zct;
2955	nvlist_t *nvprops = NULL;
2956	void (*cbfunc)(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx);
2957	dmu_objset_type_t type = zc->zc_objset_type;
2958
2959	switch (type) {
2960
2961	case DMU_OST_ZFS:
2962		cbfunc = zfs_create_cb;
2963		break;
2964
2965	case DMU_OST_ZVOL:
2966		cbfunc = zvol_create_cb;
2967		break;
2968
2969	default:
2970		cbfunc = NULL;
2971		break;
2972	}
2973	if (strchr(zc->zc_name, '@') ||
2974	    strchr(zc->zc_name, '%'))
2975		return (EINVAL);
2976
2977	if (zc->zc_nvlist_src != 0 &&
2978	    (error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
2979	    zc->zc_iflags, &nvprops)) != 0)
2980		return (error);
2981
2982	zct.zct_zplprops = NULL;
2983	zct.zct_props = nvprops;
2984
2985	if (zc->zc_value[0] != '\0') {
2986		/*
2987		 * We're creating a clone of an existing snapshot.
2988		 */
2989		zc->zc_value[sizeof (zc->zc_value) - 1] = '\0';
2990		if (dataset_namecheck(zc->zc_value, NULL, NULL) != 0) {
2991			nvlist_free(nvprops);
2992			return (EINVAL);
2993		}
2994
2995		error = dmu_objset_hold(zc->zc_value, FTAG, &clone);
2996		if (error) {
2997			nvlist_free(nvprops);
2998			return (error);
2999		}
3000
3001		error = dmu_objset_clone(zc->zc_name, dmu_objset_ds(clone), 0);
3002		dmu_objset_rele(clone, FTAG);
3003		if (error) {
3004			nvlist_free(nvprops);
3005			return (error);
3006		}
3007	} else {
3008		boolean_t is_insensitive = B_FALSE;
3009
3010		if (cbfunc == NULL) {
3011			nvlist_free(nvprops);
3012			return (EINVAL);
3013		}
3014
3015		if (type == DMU_OST_ZVOL) {
3016			uint64_t volsize, volblocksize;
3017
3018			if (nvprops == NULL ||
3019			    nvlist_lookup_uint64(nvprops,
3020			    zfs_prop_to_name(ZFS_PROP_VOLSIZE),
3021			    &volsize) != 0) {
3022				nvlist_free(nvprops);
3023				return (EINVAL);
3024			}
3025
3026			if ((error = nvlist_lookup_uint64(nvprops,
3027			    zfs_prop_to_name(ZFS_PROP_VOLBLOCKSIZE),
3028			    &volblocksize)) != 0 && error != ENOENT) {
3029				nvlist_free(nvprops);
3030				return (EINVAL);
3031			}
3032
3033			if (error != 0)
3034				volblocksize = zfs_prop_default_numeric(
3035				    ZFS_PROP_VOLBLOCKSIZE);
3036
3037			if ((error = zvol_check_volblocksize(
3038			    volblocksize)) != 0 ||
3039			    (error = zvol_check_volsize(volsize,
3040			    volblocksize)) != 0) {
3041				nvlist_free(nvprops);
3042				return (error);
3043			}
3044		} else if (type == DMU_OST_ZFS) {
3045			int error;
3046
3047			/*
3048			 * We have to have normalization and
3049			 * case-folding flags correct when we do the
3050			 * file system creation, so go figure them out
3051			 * now.
3052			 */
3053			VERIFY(nvlist_alloc(&zct.zct_zplprops,
3054			    NV_UNIQUE_NAME, KM_SLEEP) == 0);
3055			error = zfs_fill_zplprops(zc->zc_name, nvprops,
3056			    zct.zct_zplprops, &is_insensitive);
3057			if (error != 0) {
3058				nvlist_free(nvprops);
3059				nvlist_free(zct.zct_zplprops);
3060				return (error);
3061			}
3062		}
3063		error = dmu_objset_create(zc->zc_name, type,
3064		    is_insensitive ? DS_FLAG_CI_DATASET : 0, cbfunc, &zct);
3065		nvlist_free(zct.zct_zplprops);
3066	}
3067
3068	/*
3069	 * It would be nice to do this atomically.
3070	 */
3071	if (error == 0) {
3072		error = zfs_set_prop_nvlist(zc->zc_name, ZPROP_SRC_LOCAL,
3073		    nvprops, NULL);
3074		if (error != 0)
3075			(void) dmu_objset_destroy(zc->zc_name, B_FALSE);
3076	}
3077	nvlist_free(nvprops);
3078#ifdef __FreeBSD__
3079	if (error == 0 && type == DMU_OST_ZVOL)
3080		zvol_create_minors(zc->zc_name);
3081#endif
3082	return (error);
3083}
3084
3085/*
3086 * inputs:
3087 * zc_name	name of filesystem
3088 * zc_value	short name of snapshot
3089 * zc_cookie	recursive flag
3090 * zc_nvlist_src[_size] property list
3091 *
3092 * outputs:
3093 * zc_value	short snapname (i.e. part after the '@')
3094 */
3095static int
3096zfs_ioc_snapshot(zfs_cmd_t *zc)
3097{
3098	nvlist_t *nvprops = NULL;
3099	int error;
3100	boolean_t recursive = zc->zc_cookie;
3101
3102	if (snapshot_namecheck(zc->zc_value, NULL, NULL) != 0)
3103		return (EINVAL);
3104
3105	if (zc->zc_nvlist_src != 0 &&
3106	    (error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
3107	    zc->zc_iflags, &nvprops)) != 0)
3108		return (error);
3109
3110	error = zfs_check_userprops(zc->zc_name, nvprops);
3111	if (error)
3112		goto out;
3113
3114	if (!nvlist_empty(nvprops) &&
3115	    zfs_earlier_version(zc->zc_name, SPA_VERSION_SNAP_PROPS)) {
3116		error = ENOTSUP;
3117		goto out;
3118	}
3119
3120	error = dmu_objset_snapshot(zc->zc_name, zc->zc_value, NULL,
3121	    nvprops, recursive, B_FALSE, -1);
3122
3123out:
3124	nvlist_free(nvprops);
3125	return (error);
3126}
3127
3128int
3129zfs_unmount_snap(const char *name, void *arg)
3130{
3131	vfs_t *vfsp = NULL;
3132
3133	if (arg) {
3134		char *snapname = arg;
3135		char *fullname = kmem_asprintf("%s@%s", name, snapname);
3136		vfsp = zfs_get_vfs(fullname);
3137		strfree(fullname);
3138	} else if (strchr(name, '@')) {
3139		vfsp = zfs_get_vfs(name);
3140	}
3141
3142	if (vfsp) {
3143		/*
3144		 * Always force the unmount for snapshots.
3145		 */
3146		int flag = MS_FORCE;
3147		int err;
3148
3149		if ((err = vn_vfswlock(vfsp->vfs_vnodecovered)) != 0) {
3150			VFS_RELE(vfsp);
3151			return (err);
3152		}
3153		VFS_RELE(vfsp);
3154		mtx_lock(&Giant);	/* dounmount() */
3155		dounmount(vfsp, flag, curthread);
3156		mtx_unlock(&Giant);	/* dounmount() */
3157	}
3158	return (0);
3159}
3160
3161/*
3162 * inputs:
3163 * zc_name		name of filesystem, snaps must be under it
3164 * zc_nvlist_src[_size]	full names of snapshots to destroy
3165 * zc_defer_destroy	mark for deferred destroy
3166 *
3167 * outputs:
3168 * zc_name		on failure, name of failed snapshot
3169 */
3170static int
3171zfs_ioc_destroy_snaps_nvl(zfs_cmd_t *zc)
3172{
3173	int err, len;
3174	nvlist_t *nvl;
3175	nvpair_t *pair;
3176
3177	if ((err = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
3178	    zc->zc_iflags, &nvl)) != 0) {
3179#ifndef __FreeBSD__
3180		return (err);
3181#else
3182		/*
3183		 * We are probably called by older binaries,
3184		 * allocate and populate nvlist with recursive snapshots
3185		 */
3186		if (snapshot_namecheck(zc->zc_value, NULL, NULL) != 0)
3187			return (EINVAL);
3188		VERIFY(nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_SLEEP) == 0);
3189		err = dmu_get_recursive_snaps_nvl(zc->zc_name,
3190		    zc->zc_value, nvl);
3191		if (err) {
3192			nvlist_free(nvl);
3193			return (err);
3194		}
3195#endif /* __FreeBSD__ */
3196	}
3197
3198	len = strlen(zc->zc_name);
3199	for (pair = nvlist_next_nvpair(nvl, NULL); pair != NULL;
3200	    pair = nvlist_next_nvpair(nvl, pair)) {
3201		const char *name = nvpair_name(pair);
3202		/*
3203		 * The snap name must be underneath the zc_name.  This ensures
3204		 * that our permission checks were legitimate.
3205		 */
3206		if (strncmp(zc->zc_name, name, len) != 0 ||
3207		    (name[len] != '@' && name[len] != '/')) {
3208			nvlist_free(nvl);
3209			return (EINVAL);
3210		}
3211
3212		(void) zfs_unmount_snap(name, NULL);
3213	}
3214
3215	err = dmu_snapshots_destroy_nvl(nvl, zc->zc_defer_destroy,
3216	    zc->zc_name);
3217	nvlist_free(nvl);
3218	return (err);
3219}
3220
3221/*
3222 * inputs:
3223 * zc_name		name of dataset to destroy
3224 * zc_objset_type	type of objset
3225 * zc_defer_destroy	mark for deferred destroy
3226 *
3227 * outputs:		none
3228 */
3229static int
3230zfs_ioc_destroy(zfs_cmd_t *zc)
3231{
3232	int err;
3233	if (strchr(zc->zc_name, '@') && zc->zc_objset_type == DMU_OST_ZFS) {
3234		err = zfs_unmount_snap(zc->zc_name, NULL);
3235		if (err)
3236			return (err);
3237	}
3238
3239	err = dmu_objset_destroy(zc->zc_name, zc->zc_defer_destroy);
3240	if (zc->zc_objset_type == DMU_OST_ZVOL && err == 0)
3241		(void) zvol_remove_minor(zc->zc_name);
3242	return (err);
3243}
3244
3245/*
3246 * inputs:
3247 * zc_name	name of dataset to rollback (to most recent snapshot)
3248 *
3249 * outputs:	none
3250 */
3251static int
3252zfs_ioc_rollback(zfs_cmd_t *zc)
3253{
3254	dsl_dataset_t *ds, *clone;
3255	int error;
3256	zfsvfs_t *zfsvfs;
3257	char *clone_name;
3258
3259	error = dsl_dataset_hold(zc->zc_name, FTAG, &ds);
3260	if (error)
3261		return (error);
3262
3263	/* must not be a snapshot */
3264	if (dsl_dataset_is_snapshot(ds)) {
3265		dsl_dataset_rele(ds, FTAG);
3266		return (EINVAL);
3267	}
3268
3269	/* must have a most recent snapshot */
3270	if (ds->ds_phys->ds_prev_snap_txg < TXG_INITIAL) {
3271		dsl_dataset_rele(ds, FTAG);
3272		return (EINVAL);
3273	}
3274
3275	/*
3276	 * Create clone of most recent snapshot.
3277	 */
3278	clone_name = kmem_asprintf("%s/%%rollback", zc->zc_name);
3279	error = dmu_objset_clone(clone_name, ds->ds_prev, DS_FLAG_INCONSISTENT);
3280	if (error)
3281		goto out;
3282
3283	error = dsl_dataset_own(clone_name, B_TRUE, FTAG, &clone);
3284	if (error)
3285		goto out;
3286
3287	/*
3288	 * Do clone swap.
3289	 */
3290	if (getzfsvfs(zc->zc_name, &zfsvfs) == 0) {
3291		error = zfs_suspend_fs(zfsvfs);
3292		if (error == 0) {
3293			int resume_err;
3294
3295			if (dsl_dataset_tryown(ds, B_FALSE, FTAG)) {
3296				error = dsl_dataset_clone_swap(clone, ds,
3297				    B_TRUE);
3298				dsl_dataset_disown(ds, FTAG);
3299				ds = NULL;
3300			} else {
3301				error = EBUSY;
3302			}
3303			resume_err = zfs_resume_fs(zfsvfs, zc->zc_name);
3304			error = error ? error : resume_err;
3305		}
3306		VFS_RELE(zfsvfs->z_vfs);
3307	} else {
3308		if (dsl_dataset_tryown(ds, B_FALSE, FTAG)) {
3309			error = dsl_dataset_clone_swap(clone, ds, B_TRUE);
3310			dsl_dataset_disown(ds, FTAG);
3311			ds = NULL;
3312		} else {
3313			error = EBUSY;
3314		}
3315	}
3316
3317	/*
3318	 * Destroy clone (which also closes it).
3319	 */
3320	(void) dsl_dataset_destroy(clone, FTAG, B_FALSE);
3321
3322out:
3323	strfree(clone_name);
3324	if (ds)
3325		dsl_dataset_rele(ds, FTAG);
3326	return (error);
3327}
3328
3329/*
3330 * inputs:
3331 * zc_name	old name of dataset
3332 * zc_value	new name of dataset
3333 * zc_cookie	recursive flag (only valid for snapshots)
3334 *
3335 * outputs:	none
3336 */
3337static int
3338zfs_ioc_rename(zfs_cmd_t *zc)
3339{
3340	int flags = 0;
3341
3342	if (zc->zc_cookie & 1)
3343		flags |= ZFS_RENAME_RECURSIVE;
3344	if (zc->zc_cookie & 2)
3345		flags |= ZFS_RENAME_ALLOW_MOUNTED;
3346
3347	zc->zc_value[sizeof (zc->zc_value) - 1] = '\0';
3348	if (dataset_namecheck(zc->zc_value, NULL, NULL) != 0 ||
3349	    strchr(zc->zc_value, '%'))
3350		return (EINVAL);
3351
3352	/*
3353	 * Unmount snapshot unless we're doing a recursive rename,
3354	 * in which case the dataset code figures out which snapshots
3355	 * to unmount.
3356	 */
3357	if (!(flags & ZFS_RENAME_RECURSIVE) &&
3358	    strchr(zc->zc_name, '@') != NULL &&
3359	    zc->zc_objset_type == DMU_OST_ZFS) {
3360		int err = zfs_unmount_snap(zc->zc_name, NULL);
3361		if (err)
3362			return (err);
3363	}
3364	return (dmu_objset_rename(zc->zc_name, zc->zc_value, flags));
3365}
3366
3367static int
3368zfs_check_settable(const char *dsname, nvpair_t *pair, cred_t *cr)
3369{
3370	const char *propname = nvpair_name(pair);
3371	boolean_t issnap = (strchr(dsname, '@') != NULL);
3372	zfs_prop_t prop = zfs_name_to_prop(propname);
3373	uint64_t intval;
3374	int err;
3375
3376	if (prop == ZPROP_INVAL) {
3377		if (zfs_prop_user(propname)) {
3378			if (err = zfs_secpolicy_write_perms(dsname,
3379			    ZFS_DELEG_PERM_USERPROP, cr))
3380				return (err);
3381			return (0);
3382		}
3383
3384		if (!issnap && zfs_prop_userquota(propname)) {
3385			const char *perm = NULL;
3386			const char *uq_prefix =
3387			    zfs_userquota_prop_prefixes[ZFS_PROP_USERQUOTA];
3388			const char *gq_prefix =
3389			    zfs_userquota_prop_prefixes[ZFS_PROP_GROUPQUOTA];
3390
3391			if (strncmp(propname, uq_prefix,
3392			    strlen(uq_prefix)) == 0) {
3393				perm = ZFS_DELEG_PERM_USERQUOTA;
3394			} else if (strncmp(propname, gq_prefix,
3395			    strlen(gq_prefix)) == 0) {
3396				perm = ZFS_DELEG_PERM_GROUPQUOTA;
3397			} else {
3398				/* USERUSED and GROUPUSED are read-only */
3399				return (EINVAL);
3400			}
3401
3402			if (err = zfs_secpolicy_write_perms(dsname, perm, cr))
3403				return (err);
3404			return (0);
3405		}
3406
3407		return (EINVAL);
3408	}
3409
3410	if (issnap)
3411		return (EINVAL);
3412
3413	if (nvpair_type(pair) == DATA_TYPE_NVLIST) {
3414		/*
3415		 * dsl_prop_get_all_impl() returns properties in this
3416		 * format.
3417		 */
3418		nvlist_t *attrs;
3419		VERIFY(nvpair_value_nvlist(pair, &attrs) == 0);
3420		VERIFY(nvlist_lookup_nvpair(attrs, ZPROP_VALUE,
3421		    &pair) == 0);
3422	}
3423
3424	/*
3425	 * Check that this value is valid for this pool version
3426	 */
3427	switch (prop) {
3428	case ZFS_PROP_COMPRESSION:
3429		/*
3430		 * If the user specified gzip compression, make sure
3431		 * the SPA supports it. We ignore any errors here since
3432		 * we'll catch them later.
3433		 */
3434		if (nvpair_type(pair) == DATA_TYPE_UINT64 &&
3435		    nvpair_value_uint64(pair, &intval) == 0) {
3436			if (intval >= ZIO_COMPRESS_GZIP_1 &&
3437			    intval <= ZIO_COMPRESS_GZIP_9 &&
3438			    zfs_earlier_version(dsname,
3439			    SPA_VERSION_GZIP_COMPRESSION)) {
3440				return (ENOTSUP);
3441			}
3442
3443			if (intval == ZIO_COMPRESS_ZLE &&
3444			    zfs_earlier_version(dsname,
3445			    SPA_VERSION_ZLE_COMPRESSION))
3446				return (ENOTSUP);
3447
3448			/*
3449			 * If this is a bootable dataset then
3450			 * verify that the compression algorithm
3451			 * is supported for booting. We must return
3452			 * something other than ENOTSUP since it
3453			 * implies a downrev pool version.
3454			 */
3455			if (zfs_is_bootfs(dsname) &&
3456			    !BOOTFS_COMPRESS_VALID(intval)) {
3457				return (ERANGE);
3458			}
3459		}
3460		break;
3461
3462	case ZFS_PROP_COPIES:
3463		if (zfs_earlier_version(dsname, SPA_VERSION_DITTO_BLOCKS))
3464			return (ENOTSUP);
3465		break;
3466
3467	case ZFS_PROP_DEDUP:
3468		if (zfs_earlier_version(dsname, SPA_VERSION_DEDUP))
3469			return (ENOTSUP);
3470		break;
3471
3472	case ZFS_PROP_SHARESMB:
3473		if (zpl_earlier_version(dsname, ZPL_VERSION_FUID))
3474			return (ENOTSUP);
3475		break;
3476
3477	case ZFS_PROP_ACLINHERIT:
3478		if (nvpair_type(pair) == DATA_TYPE_UINT64 &&
3479		    nvpair_value_uint64(pair, &intval) == 0) {
3480			if (intval == ZFS_ACL_PASSTHROUGH_X &&
3481			    zfs_earlier_version(dsname,
3482			    SPA_VERSION_PASSTHROUGH_X))
3483				return (ENOTSUP);
3484		}
3485		break;
3486	}
3487
3488	return (zfs_secpolicy_setprop(dsname, prop, pair, CRED()));
3489}
3490
3491/*
3492 * Removes properties from the given props list that fail permission checks
3493 * needed to clear them and to restore them in case of a receive error. For each
3494 * property, make sure we have both set and inherit permissions.
3495 *
3496 * Returns the first error encountered if any permission checks fail. If the
3497 * caller provides a non-NULL errlist, it also gives the complete list of names
3498 * of all the properties that failed a permission check along with the
3499 * corresponding error numbers. The caller is responsible for freeing the
3500 * returned errlist.
3501 *
3502 * If every property checks out successfully, zero is returned and the list
3503 * pointed at by errlist is NULL.
3504 */
3505static int
3506zfs_check_clearable(char *dataset, nvlist_t *props, nvlist_t **errlist)
3507{
3508	zfs_cmd_t *zc;
3509	nvpair_t *pair, *next_pair;
3510	nvlist_t *errors;
3511	int err, rv = 0;
3512
3513	if (props == NULL)
3514		return (0);
3515
3516	VERIFY(nvlist_alloc(&errors, NV_UNIQUE_NAME, KM_SLEEP) == 0);
3517
3518	zc = kmem_alloc(sizeof (zfs_cmd_t), KM_SLEEP);
3519	(void) strcpy(zc->zc_name, dataset);
3520	pair = nvlist_next_nvpair(props, NULL);
3521	while (pair != NULL) {
3522		next_pair = nvlist_next_nvpair(props, pair);
3523
3524		(void) strcpy(zc->zc_value, nvpair_name(pair));
3525		if ((err = zfs_check_settable(dataset, pair, CRED())) != 0 ||
3526		    (err = zfs_secpolicy_inherit(zc, CRED())) != 0) {
3527			VERIFY(nvlist_remove_nvpair(props, pair) == 0);
3528			VERIFY(nvlist_add_int32(errors,
3529			    zc->zc_value, err) == 0);
3530		}
3531		pair = next_pair;
3532	}
3533	kmem_free(zc, sizeof (zfs_cmd_t));
3534
3535	if ((pair = nvlist_next_nvpair(errors, NULL)) == NULL) {
3536		nvlist_free(errors);
3537		errors = NULL;
3538	} else {
3539		VERIFY(nvpair_value_int32(pair, &rv) == 0);
3540	}
3541
3542	if (errlist == NULL)
3543		nvlist_free(errors);
3544	else
3545		*errlist = errors;
3546
3547	return (rv);
3548}
3549
3550static boolean_t
3551propval_equals(nvpair_t *p1, nvpair_t *p2)
3552{
3553	if (nvpair_type(p1) == DATA_TYPE_NVLIST) {
3554		/* dsl_prop_get_all_impl() format */
3555		nvlist_t *attrs;
3556		VERIFY(nvpair_value_nvlist(p1, &attrs) == 0);
3557		VERIFY(nvlist_lookup_nvpair(attrs, ZPROP_VALUE,
3558		    &p1) == 0);
3559	}
3560
3561	if (nvpair_type(p2) == DATA_TYPE_NVLIST) {
3562		nvlist_t *attrs;
3563		VERIFY(nvpair_value_nvlist(p2, &attrs) == 0);
3564		VERIFY(nvlist_lookup_nvpair(attrs, ZPROP_VALUE,
3565		    &p2) == 0);
3566	}
3567
3568	if (nvpair_type(p1) != nvpair_type(p2))
3569		return (B_FALSE);
3570
3571	if (nvpair_type(p1) == DATA_TYPE_STRING) {
3572		char *valstr1, *valstr2;
3573
3574		VERIFY(nvpair_value_string(p1, (char **)&valstr1) == 0);
3575		VERIFY(nvpair_value_string(p2, (char **)&valstr2) == 0);
3576		return (strcmp(valstr1, valstr2) == 0);
3577	} else {
3578		uint64_t intval1, intval2;
3579
3580		VERIFY(nvpair_value_uint64(p1, &intval1) == 0);
3581		VERIFY(nvpair_value_uint64(p2, &intval2) == 0);
3582		return (intval1 == intval2);
3583	}
3584}
3585
3586/*
3587 * Remove properties from props if they are not going to change (as determined
3588 * by comparison with origprops). Remove them from origprops as well, since we
3589 * do not need to clear or restore properties that won't change.
3590 */
3591static void
3592props_reduce(nvlist_t *props, nvlist_t *origprops)
3593{
3594	nvpair_t *pair, *next_pair;
3595
3596	if (origprops == NULL)
3597		return; /* all props need to be received */
3598
3599	pair = nvlist_next_nvpair(props, NULL);
3600	while (pair != NULL) {
3601		const char *propname = nvpair_name(pair);
3602		nvpair_t *match;
3603
3604		next_pair = nvlist_next_nvpair(props, pair);
3605
3606		if ((nvlist_lookup_nvpair(origprops, propname,
3607		    &match) != 0) || !propval_equals(pair, match))
3608			goto next; /* need to set received value */
3609
3610		/* don't clear the existing received value */
3611		(void) nvlist_remove_nvpair(origprops, match);
3612		/* don't bother receiving the property */
3613		(void) nvlist_remove_nvpair(props, pair);
3614next:
3615		pair = next_pair;
3616	}
3617}
3618
3619#ifdef	DEBUG
3620static boolean_t zfs_ioc_recv_inject_err;
3621#endif
3622
3623/*
3624 * inputs:
3625 * zc_name		name of containing filesystem
3626 * zc_nvlist_src{_size}	nvlist of properties to apply
3627 * zc_value		name of snapshot to create
3628 * zc_string		name of clone origin (if DRR_FLAG_CLONE)
3629 * zc_cookie		file descriptor to recv from
3630 * zc_begin_record	the BEGIN record of the stream (not byteswapped)
3631 * zc_guid		force flag
3632 * zc_cleanup_fd	cleanup-on-exit file descriptor
3633 * zc_action_handle	handle for this guid/ds mapping (or zero on first call)
3634 *
3635 * outputs:
3636 * zc_cookie		number of bytes read
3637 * zc_nvlist_dst{_size} error for each unapplied received property
3638 * zc_obj		zprop_errflags_t
3639 * zc_action_handle	handle for this guid/ds mapping
3640 */
3641static int
3642zfs_ioc_recv(zfs_cmd_t *zc)
3643{
3644	file_t *fp;
3645	objset_t *os;
3646	dmu_recv_cookie_t drc;
3647	boolean_t force = (boolean_t)zc->zc_guid;
3648	int fd;
3649	int error = 0;
3650	int props_error = 0;
3651	nvlist_t *errors;
3652	offset_t off;
3653	nvlist_t *props = NULL; /* sent properties */
3654	nvlist_t *origprops = NULL; /* existing properties */
3655	objset_t *origin = NULL;
3656	char *tosnap;
3657	char tofs[ZFS_MAXNAMELEN];
3658	boolean_t first_recvd_props = B_FALSE;
3659
3660	if (dataset_namecheck(zc->zc_value, NULL, NULL) != 0 ||
3661	    strchr(zc->zc_value, '@') == NULL ||
3662	    strchr(zc->zc_value, '%'))
3663		return (EINVAL);
3664
3665	(void) strcpy(tofs, zc->zc_value);
3666	tosnap = strchr(tofs, '@');
3667	*tosnap++ = '\0';
3668
3669	if (zc->zc_nvlist_src != 0 &&
3670	    (error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
3671	    zc->zc_iflags, &props)) != 0)
3672		return (error);
3673
3674	fd = zc->zc_cookie;
3675	fp = getf(fd);
3676	if (fp == NULL) {
3677		nvlist_free(props);
3678		return (EBADF);
3679	}
3680
3681	VERIFY(nvlist_alloc(&errors, NV_UNIQUE_NAME, KM_SLEEP) == 0);
3682
3683	if (props && dmu_objset_hold(tofs, FTAG, &os) == 0) {
3684		if ((spa_version(os->os_spa) >= SPA_VERSION_RECVD_PROPS) &&
3685		    !dsl_prop_get_hasrecvd(os)) {
3686			first_recvd_props = B_TRUE;
3687		}
3688
3689		/*
3690		 * If new received properties are supplied, they are to
3691		 * completely replace the existing received properties, so stash
3692		 * away the existing ones.
3693		 */
3694		if (dsl_prop_get_received(os, &origprops) == 0) {
3695			nvlist_t *errlist = NULL;
3696			/*
3697			 * Don't bother writing a property if its value won't
3698			 * change (and avoid the unnecessary security checks).
3699			 *
3700			 * The first receive after SPA_VERSION_RECVD_PROPS is a
3701			 * special case where we blow away all local properties
3702			 * regardless.
3703			 */
3704			if (!first_recvd_props)
3705				props_reduce(props, origprops);
3706			if (zfs_check_clearable(tofs, origprops,
3707			    &errlist) != 0)
3708				(void) nvlist_merge(errors, errlist, 0);
3709			nvlist_free(errlist);
3710		}
3711
3712		dmu_objset_rele(os, FTAG);
3713	}
3714
3715	if (zc->zc_string[0]) {
3716		error = dmu_objset_hold(zc->zc_string, FTAG, &origin);
3717		if (error)
3718			goto out;
3719	}
3720
3721	error = dmu_recv_begin(tofs, tosnap, zc->zc_top_ds,
3722	    &zc->zc_begin_record, force, origin, &drc);
3723	if (origin)
3724		dmu_objset_rele(origin, FTAG);
3725	if (error)
3726		goto out;
3727
3728	/*
3729	 * Set properties before we receive the stream so that they are applied
3730	 * to the new data. Note that we must call dmu_recv_stream() if
3731	 * dmu_recv_begin() succeeds.
3732	 */
3733	if (props) {
3734		nvlist_t *errlist;
3735
3736		if (dmu_objset_from_ds(drc.drc_logical_ds, &os) == 0) {
3737			if (drc.drc_newfs) {
3738				if (spa_version(os->os_spa) >=
3739				    SPA_VERSION_RECVD_PROPS)
3740					first_recvd_props = B_TRUE;
3741			} else if (origprops != NULL) {
3742				if (clear_received_props(os, tofs, origprops,
3743				    first_recvd_props ? NULL : props) != 0)
3744					zc->zc_obj |= ZPROP_ERR_NOCLEAR;
3745			} else {
3746				zc->zc_obj |= ZPROP_ERR_NOCLEAR;
3747			}
3748			dsl_prop_set_hasrecvd(os);
3749		} else if (!drc.drc_newfs) {
3750			zc->zc_obj |= ZPROP_ERR_NOCLEAR;
3751		}
3752
3753		(void) zfs_set_prop_nvlist(tofs, ZPROP_SRC_RECEIVED,
3754		    props, &errlist);
3755		(void) nvlist_merge(errors, errlist, 0);
3756		nvlist_free(errlist);
3757	}
3758
3759	if (fit_error_list(zc, &errors) != 0 || put_nvlist(zc, errors) != 0) {
3760		/*
3761		 * Caller made zc->zc_nvlist_dst less than the minimum expected
3762		 * size or supplied an invalid address.
3763		 */
3764		props_error = EINVAL;
3765	}
3766
3767	off = fp->f_offset;
3768	error = dmu_recv_stream(&drc, fp, &off, zc->zc_cleanup_fd,
3769	    &zc->zc_action_handle);
3770
3771	if (error == 0) {
3772		zfsvfs_t *zfsvfs = NULL;
3773
3774		if (getzfsvfs(tofs, &zfsvfs) == 0) {
3775			/* online recv */
3776			int end_err;
3777
3778			error = zfs_suspend_fs(zfsvfs);
3779			/*
3780			 * If the suspend fails, then the recv_end will
3781			 * likely also fail, and clean up after itself.
3782			 */
3783			end_err = dmu_recv_end(&drc);
3784			if (error == 0)
3785				error = zfs_resume_fs(zfsvfs, tofs);
3786			error = error ? error : end_err;
3787			VFS_RELE(zfsvfs->z_vfs);
3788		} else {
3789			error = dmu_recv_end(&drc);
3790		}
3791	}
3792
3793	zc->zc_cookie = off - fp->f_offset;
3794	if (off >= 0 && off <= MAXOFFSET_T)
3795		fp->f_offset = off;
3796
3797#ifdef	DEBUG
3798	if (zfs_ioc_recv_inject_err) {
3799		zfs_ioc_recv_inject_err = B_FALSE;
3800		error = 1;
3801	}
3802#endif
3803	/*
3804	 * On error, restore the original props.
3805	 */
3806	if (error && props) {
3807		if (dmu_objset_hold(tofs, FTAG, &os) == 0) {
3808			if (clear_received_props(os, tofs, props, NULL) != 0) {
3809				/*
3810				 * We failed to clear the received properties.
3811				 * Since we may have left a $recvd value on the
3812				 * system, we can't clear the $hasrecvd flag.
3813				 */
3814				zc->zc_obj |= ZPROP_ERR_NORESTORE;
3815			} else if (first_recvd_props) {
3816				dsl_prop_unset_hasrecvd(os);
3817			}
3818			dmu_objset_rele(os, FTAG);
3819		} else if (!drc.drc_newfs) {
3820			/* We failed to clear the received properties. */
3821			zc->zc_obj |= ZPROP_ERR_NORESTORE;
3822		}
3823
3824		if (origprops == NULL && !drc.drc_newfs) {
3825			/* We failed to stash the original properties. */
3826			zc->zc_obj |= ZPROP_ERR_NORESTORE;
3827		}
3828
3829		/*
3830		 * dsl_props_set() will not convert RECEIVED to LOCAL on or
3831		 * after SPA_VERSION_RECVD_PROPS, so we need to specify LOCAL
3832		 * explictly if we're restoring local properties cleared in the
3833		 * first new-style receive.
3834		 */
3835		if (origprops != NULL &&
3836		    zfs_set_prop_nvlist(tofs, (first_recvd_props ?
3837		    ZPROP_SRC_LOCAL : ZPROP_SRC_RECEIVED),
3838		    origprops, NULL) != 0) {
3839			/*
3840			 * We stashed the original properties but failed to
3841			 * restore them.
3842			 */
3843			zc->zc_obj |= ZPROP_ERR_NORESTORE;
3844		}
3845	}
3846out:
3847	nvlist_free(props);
3848	nvlist_free(origprops);
3849	nvlist_free(errors);
3850	releasef(fd);
3851
3852	if (error == 0)
3853		error = props_error;
3854
3855	return (error);
3856}
3857
3858/*
3859 * inputs:
3860 * zc_name	name of snapshot to send
3861 * zc_cookie	file descriptor to send stream to
3862 * zc_obj	fromorigin flag (mutually exclusive with zc_fromobj)
3863 * zc_sendobj	objsetid of snapshot to send
3864 * zc_fromobj	objsetid of incremental fromsnap (may be zero)
3865 * zc_guid	if set, estimate size of stream only.  zc_cookie is ignored.
3866 *		output size in zc_objset_type.
3867 *
3868 * outputs: none
3869 */
3870static int
3871zfs_ioc_send(zfs_cmd_t *zc)
3872{
3873	objset_t *fromsnap = NULL;
3874	objset_t *tosnap;
3875	int error;
3876	offset_t off;
3877	dsl_dataset_t *ds;
3878	dsl_dataset_t *dsfrom = NULL;
3879	spa_t *spa;
3880	dsl_pool_t *dp;
3881	boolean_t estimate = (zc->zc_guid != 0);
3882
3883	error = spa_open(zc->zc_name, &spa, FTAG);
3884	if (error)
3885		return (error);
3886
3887	dp = spa_get_dsl(spa);
3888	rw_enter(&dp->dp_config_rwlock, RW_READER);
3889	error = dsl_dataset_hold_obj(dp, zc->zc_sendobj, FTAG, &ds);
3890	rw_exit(&dp->dp_config_rwlock);
3891	if (error) {
3892		spa_close(spa, FTAG);
3893		return (error);
3894	}
3895
3896	error = dmu_objset_from_ds(ds, &tosnap);
3897	if (error) {
3898		dsl_dataset_rele(ds, FTAG);
3899		spa_close(spa, FTAG);
3900		return (error);
3901	}
3902
3903	if (zc->zc_fromobj != 0) {
3904		rw_enter(&dp->dp_config_rwlock, RW_READER);
3905		error = dsl_dataset_hold_obj(dp, zc->zc_fromobj, FTAG, &dsfrom);
3906		rw_exit(&dp->dp_config_rwlock);
3907		spa_close(spa, FTAG);
3908		if (error) {
3909			dsl_dataset_rele(ds, FTAG);
3910			return (error);
3911		}
3912		error = dmu_objset_from_ds(dsfrom, &fromsnap);
3913		if (error) {
3914			dsl_dataset_rele(dsfrom, FTAG);
3915			dsl_dataset_rele(ds, FTAG);
3916			return (error);
3917		}
3918	} else {
3919		spa_close(spa, FTAG);
3920	}
3921
3922	if (estimate) {
3923		error = dmu_send_estimate(tosnap, fromsnap, zc->zc_obj,
3924		    &zc->zc_objset_type);
3925	} else {
3926		file_t *fp = getf(zc->zc_cookie);
3927		if (fp == NULL) {
3928			dsl_dataset_rele(ds, FTAG);
3929			if (dsfrom)
3930				dsl_dataset_rele(dsfrom, FTAG);
3931			return (EBADF);
3932		}
3933
3934		off = fp->f_offset;
3935		error = dmu_sendbackup(tosnap, fromsnap, zc->zc_obj, fp, &off);
3936
3937		if (off >= 0 && off <= MAXOFFSET_T)
3938			fp->f_offset = off;
3939		releasef(zc->zc_cookie);
3940	}
3941	if (dsfrom)
3942		dsl_dataset_rele(dsfrom, FTAG);
3943	dsl_dataset_rele(ds, FTAG);
3944	return (error);
3945}
3946
3947static int
3948zfs_ioc_inject_fault(zfs_cmd_t *zc)
3949{
3950	int id, error;
3951
3952	error = zio_inject_fault(zc->zc_name, (int)zc->zc_guid, &id,
3953	    &zc->zc_inject_record);
3954
3955	if (error == 0)
3956		zc->zc_guid = (uint64_t)id;
3957
3958	return (error);
3959}
3960
3961static int
3962zfs_ioc_clear_fault(zfs_cmd_t *zc)
3963{
3964	return (zio_clear_fault((int)zc->zc_guid));
3965}
3966
3967static int
3968zfs_ioc_inject_list_next(zfs_cmd_t *zc)
3969{
3970	int id = (int)zc->zc_guid;
3971	int error;
3972
3973	error = zio_inject_list_next(&id, zc->zc_name, sizeof (zc->zc_name),
3974	    &zc->zc_inject_record);
3975
3976	zc->zc_guid = id;
3977
3978	return (error);
3979}
3980
3981static int
3982zfs_ioc_error_log(zfs_cmd_t *zc)
3983{
3984	spa_t *spa;
3985	int error;
3986	size_t count = (size_t)zc->zc_nvlist_dst_size;
3987
3988	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
3989		return (error);
3990
3991	error = spa_get_errlog(spa, (void *)(uintptr_t)zc->zc_nvlist_dst,
3992	    &count);
3993	if (error == 0)
3994		zc->zc_nvlist_dst_size = count;
3995	else
3996		zc->zc_nvlist_dst_size = spa_get_errlog_size(spa);
3997
3998	spa_close(spa, FTAG);
3999
4000	return (error);
4001}
4002
4003static int
4004zfs_ioc_clear(zfs_cmd_t *zc)
4005{
4006	spa_t *spa;
4007	vdev_t *vd;
4008	int error;
4009
4010	/*
4011	 * On zpool clear we also fix up missing slogs
4012	 */
4013	mutex_enter(&spa_namespace_lock);
4014	spa = spa_lookup(zc->zc_name);
4015	if (spa == NULL) {
4016		mutex_exit(&spa_namespace_lock);
4017		return (EIO);
4018	}
4019	if (spa_get_log_state(spa) == SPA_LOG_MISSING) {
4020		/* we need to let spa_open/spa_load clear the chains */
4021		spa_set_log_state(spa, SPA_LOG_CLEAR);
4022	}
4023	spa->spa_last_open_failed = 0;
4024	mutex_exit(&spa_namespace_lock);
4025
4026	if (zc->zc_cookie & ZPOOL_NO_REWIND) {
4027		error = spa_open(zc->zc_name, &spa, FTAG);
4028	} else {
4029		nvlist_t *policy;
4030		nvlist_t *config = NULL;
4031
4032		if (zc->zc_nvlist_src == 0)
4033			return (EINVAL);
4034
4035		if ((error = get_nvlist(zc->zc_nvlist_src,
4036		    zc->zc_nvlist_src_size, zc->zc_iflags, &policy)) == 0) {
4037			error = spa_open_rewind(zc->zc_name, &spa, FTAG,
4038			    policy, &config);
4039			if (config != NULL) {
4040				int err;
4041
4042				if ((err = put_nvlist(zc, config)) != 0)
4043					error = err;
4044				nvlist_free(config);
4045			}
4046			nvlist_free(policy);
4047		}
4048	}
4049
4050	if (error)
4051		return (error);
4052
4053	spa_vdev_state_enter(spa, SCL_NONE);
4054
4055	if (zc->zc_guid == 0) {
4056		vd = NULL;
4057	} else {
4058		vd = spa_lookup_by_guid(spa, zc->zc_guid, B_TRUE);
4059		if (vd == NULL) {
4060			(void) spa_vdev_state_exit(spa, NULL, ENODEV);
4061			spa_close(spa, FTAG);
4062			return (ENODEV);
4063		}
4064	}
4065
4066	vdev_clear(spa, vd);
4067
4068	(void) spa_vdev_state_exit(spa, NULL, 0);
4069
4070	/*
4071	 * Resume any suspended I/Os.
4072	 */
4073	if (zio_resume(spa) != 0)
4074		error = EIO;
4075
4076	spa_close(spa, FTAG);
4077
4078	return (error);
4079}
4080
4081/*
4082 * inputs:
4083 * zc_name	name of filesystem
4084 * zc_value	name of origin snapshot
4085 *
4086 * outputs:
4087 * zc_string	name of conflicting snapshot, if there is one
4088 */
4089static int
4090zfs_ioc_promote(zfs_cmd_t *zc)
4091{
4092	char *cp;
4093
4094	/*
4095	 * We don't need to unmount *all* the origin fs's snapshots, but
4096	 * it's easier.
4097	 */
4098	cp = strchr(zc->zc_value, '@');
4099	if (cp)
4100		*cp = '\0';
4101	(void) dmu_objset_find(zc->zc_value,
4102	    zfs_unmount_snap, NULL, DS_FIND_SNAPSHOTS);
4103	return (dsl_dataset_promote(zc->zc_name, zc->zc_string));
4104}
4105
4106/*
4107 * Retrieve a single {user|group}{used|quota}@... property.
4108 *
4109 * inputs:
4110 * zc_name	name of filesystem
4111 * zc_objset_type zfs_userquota_prop_t
4112 * zc_value	domain name (eg. "S-1-234-567-89")
4113 * zc_guid	RID/UID/GID
4114 *
4115 * outputs:
4116 * zc_cookie	property value
4117 */
4118static int
4119zfs_ioc_userspace_one(zfs_cmd_t *zc)
4120{
4121	zfsvfs_t *zfsvfs;
4122	int error;
4123
4124	if (zc->zc_objset_type >= ZFS_NUM_USERQUOTA_PROPS)
4125		return (EINVAL);
4126
4127	error = zfsvfs_hold(zc->zc_name, FTAG, &zfsvfs, B_FALSE);
4128	if (error)
4129		return (error);
4130
4131	error = zfs_userspace_one(zfsvfs,
4132	    zc->zc_objset_type, zc->zc_value, zc->zc_guid, &zc->zc_cookie);
4133	zfsvfs_rele(zfsvfs, FTAG);
4134
4135	return (error);
4136}
4137
4138/*
4139 * inputs:
4140 * zc_name		name of filesystem
4141 * zc_cookie		zap cursor
4142 * zc_objset_type	zfs_userquota_prop_t
4143 * zc_nvlist_dst[_size] buffer to fill (not really an nvlist)
4144 *
4145 * outputs:
4146 * zc_nvlist_dst[_size]	data buffer (array of zfs_useracct_t)
4147 * zc_cookie	zap cursor
4148 */
4149static int
4150zfs_ioc_userspace_many(zfs_cmd_t *zc)
4151{
4152	zfsvfs_t *zfsvfs;
4153	int bufsize = zc->zc_nvlist_dst_size;
4154
4155	if (bufsize <= 0)
4156		return (ENOMEM);
4157
4158	int error = zfsvfs_hold(zc->zc_name, FTAG, &zfsvfs, B_FALSE);
4159	if (error)
4160		return (error);
4161
4162	void *buf = kmem_alloc(bufsize, KM_SLEEP);
4163
4164	error = zfs_userspace_many(zfsvfs, zc->zc_objset_type, &zc->zc_cookie,
4165	    buf, &zc->zc_nvlist_dst_size);
4166
4167	if (error == 0) {
4168		error = ddi_copyout(buf,
4169		    (void *)(uintptr_t)zc->zc_nvlist_dst,
4170		    zc->zc_nvlist_dst_size, zc->zc_iflags);
4171	}
4172	kmem_free(buf, bufsize);
4173	zfsvfs_rele(zfsvfs, FTAG);
4174
4175	return (error);
4176}
4177
4178/*
4179 * inputs:
4180 * zc_name		name of filesystem
4181 *
4182 * outputs:
4183 * none
4184 */
4185static int
4186zfs_ioc_userspace_upgrade(zfs_cmd_t *zc)
4187{
4188	objset_t *os;
4189	int error = 0;
4190	zfsvfs_t *zfsvfs;
4191
4192	if (getzfsvfs(zc->zc_name, &zfsvfs) == 0) {
4193		if (!dmu_objset_userused_enabled(zfsvfs->z_os)) {
4194			/*
4195			 * If userused is not enabled, it may be because the
4196			 * objset needs to be closed & reopened (to grow the
4197			 * objset_phys_t).  Suspend/resume the fs will do that.
4198			 */
4199			error = zfs_suspend_fs(zfsvfs);
4200			if (error == 0)
4201				error = zfs_resume_fs(zfsvfs, zc->zc_name);
4202		}
4203		if (error == 0)
4204			error = dmu_objset_userspace_upgrade(zfsvfs->z_os);
4205		VFS_RELE(zfsvfs->z_vfs);
4206	} else {
4207		/* XXX kind of reading contents without owning */
4208		error = dmu_objset_hold(zc->zc_name, FTAG, &os);
4209		if (error)
4210			return (error);
4211
4212		error = dmu_objset_userspace_upgrade(os);
4213		dmu_objset_rele(os, FTAG);
4214	}
4215
4216	return (error);
4217}
4218
4219#ifdef sun
4220/*
4221 * We don't want to have a hard dependency
4222 * against some special symbols in sharefs
4223 * nfs, and smbsrv.  Determine them if needed when
4224 * the first file system is shared.
4225 * Neither sharefs, nfs or smbsrv are unloadable modules.
4226 */
4227int (*znfsexport_fs)(void *arg);
4228int (*zshare_fs)(enum sharefs_sys_op, share_t *, uint32_t);
4229int (*zsmbexport_fs)(void *arg, boolean_t add_share);
4230
4231int zfs_nfsshare_inited;
4232int zfs_smbshare_inited;
4233
4234ddi_modhandle_t nfs_mod;
4235ddi_modhandle_t sharefs_mod;
4236ddi_modhandle_t smbsrv_mod;
4237#endif	/* sun */
4238kmutex_t zfs_share_lock;
4239
4240#ifdef sun
4241static int
4242zfs_init_sharefs()
4243{
4244	int error;
4245
4246	ASSERT(MUTEX_HELD(&zfs_share_lock));
4247	/* Both NFS and SMB shares also require sharetab support. */
4248	if (sharefs_mod == NULL && ((sharefs_mod =
4249	    ddi_modopen("fs/sharefs",
4250	    KRTLD_MODE_FIRST, &error)) == NULL)) {
4251		return (ENOSYS);
4252	}
4253	if (zshare_fs == NULL && ((zshare_fs =
4254	    (int (*)(enum sharefs_sys_op, share_t *, uint32_t))
4255	    ddi_modsym(sharefs_mod, "sharefs_impl", &error)) == NULL)) {
4256		return (ENOSYS);
4257	}
4258	return (0);
4259}
4260#endif	/* sun */
4261
4262static int
4263zfs_ioc_share(zfs_cmd_t *zc)
4264{
4265#ifdef sun
4266	int error;
4267	int opcode;
4268
4269	switch (zc->zc_share.z_sharetype) {
4270	case ZFS_SHARE_NFS:
4271	case ZFS_UNSHARE_NFS:
4272		if (zfs_nfsshare_inited == 0) {
4273			mutex_enter(&zfs_share_lock);
4274			if (nfs_mod == NULL && ((nfs_mod = ddi_modopen("fs/nfs",
4275			    KRTLD_MODE_FIRST, &error)) == NULL)) {
4276				mutex_exit(&zfs_share_lock);
4277				return (ENOSYS);
4278			}
4279			if (znfsexport_fs == NULL &&
4280			    ((znfsexport_fs = (int (*)(void *))
4281			    ddi_modsym(nfs_mod,
4282			    "nfs_export", &error)) == NULL)) {
4283				mutex_exit(&zfs_share_lock);
4284				return (ENOSYS);
4285			}
4286			error = zfs_init_sharefs();
4287			if (error) {
4288				mutex_exit(&zfs_share_lock);
4289				return (ENOSYS);
4290			}
4291			zfs_nfsshare_inited = 1;
4292			mutex_exit(&zfs_share_lock);
4293		}
4294		break;
4295	case ZFS_SHARE_SMB:
4296	case ZFS_UNSHARE_SMB:
4297		if (zfs_smbshare_inited == 0) {
4298			mutex_enter(&zfs_share_lock);
4299			if (smbsrv_mod == NULL && ((smbsrv_mod =
4300			    ddi_modopen("drv/smbsrv",
4301			    KRTLD_MODE_FIRST, &error)) == NULL)) {
4302				mutex_exit(&zfs_share_lock);
4303				return (ENOSYS);
4304			}
4305			if (zsmbexport_fs == NULL && ((zsmbexport_fs =
4306			    (int (*)(void *, boolean_t))ddi_modsym(smbsrv_mod,
4307			    "smb_server_share", &error)) == NULL)) {
4308				mutex_exit(&zfs_share_lock);
4309				return (ENOSYS);
4310			}
4311			error = zfs_init_sharefs();
4312			if (error) {
4313				mutex_exit(&zfs_share_lock);
4314				return (ENOSYS);
4315			}
4316			zfs_smbshare_inited = 1;
4317			mutex_exit(&zfs_share_lock);
4318		}
4319		break;
4320	default:
4321		return (EINVAL);
4322	}
4323
4324	switch (zc->zc_share.z_sharetype) {
4325	case ZFS_SHARE_NFS:
4326	case ZFS_UNSHARE_NFS:
4327		if (error =
4328		    znfsexport_fs((void *)
4329		    (uintptr_t)zc->zc_share.z_exportdata))
4330			return (error);
4331		break;
4332	case ZFS_SHARE_SMB:
4333	case ZFS_UNSHARE_SMB:
4334		if (error = zsmbexport_fs((void *)
4335		    (uintptr_t)zc->zc_share.z_exportdata,
4336		    zc->zc_share.z_sharetype == ZFS_SHARE_SMB ?
4337		    B_TRUE: B_FALSE)) {
4338			return (error);
4339		}
4340		break;
4341	}
4342
4343	opcode = (zc->zc_share.z_sharetype == ZFS_SHARE_NFS ||
4344	    zc->zc_share.z_sharetype == ZFS_SHARE_SMB) ?
4345	    SHAREFS_ADD : SHAREFS_REMOVE;
4346
4347	/*
4348	 * Add or remove share from sharetab
4349	 */
4350	error = zshare_fs(opcode,
4351	    (void *)(uintptr_t)zc->zc_share.z_sharedata,
4352	    zc->zc_share.z_sharemax);
4353
4354	return (error);
4355
4356#else	/* !sun */
4357	return (ENOSYS);
4358#endif	/* !sun */
4359}
4360
4361ace_t full_access[] = {
4362	{(uid_t)-1, ACE_ALL_PERMS, ACE_EVERYONE, 0}
4363};
4364
4365/*
4366 * inputs:
4367 * zc_name		name of containing filesystem
4368 * zc_obj		object # beyond which we want next in-use object #
4369 *
4370 * outputs:
4371 * zc_obj		next in-use object #
4372 */
4373static int
4374zfs_ioc_next_obj(zfs_cmd_t *zc)
4375{
4376	objset_t *os = NULL;
4377	int error;
4378
4379	error = dmu_objset_hold(zc->zc_name, FTAG, &os);
4380	if (error)
4381		return (error);
4382
4383	error = dmu_object_next(os, &zc->zc_obj, B_FALSE,
4384	    os->os_dsl_dataset->ds_phys->ds_prev_snap_txg);
4385
4386	dmu_objset_rele(os, FTAG);
4387	return (error);
4388}
4389
4390/*
4391 * inputs:
4392 * zc_name		name of filesystem
4393 * zc_value		prefix name for snapshot
4394 * zc_cleanup_fd	cleanup-on-exit file descriptor for calling process
4395 *
4396 * outputs:
4397 */
4398static int
4399zfs_ioc_tmp_snapshot(zfs_cmd_t *zc)
4400{
4401	char *snap_name;
4402	int error;
4403
4404	snap_name = kmem_asprintf("%s-%016llx", zc->zc_value,
4405	    (u_longlong_t)ddi_get_lbolt64());
4406
4407	if (strlen(snap_name) >= MAXNAMELEN) {
4408		strfree(snap_name);
4409		return (E2BIG);
4410	}
4411
4412	error = dmu_objset_snapshot(zc->zc_name, snap_name, snap_name,
4413	    NULL, B_FALSE, B_TRUE, zc->zc_cleanup_fd);
4414	if (error != 0) {
4415		strfree(snap_name);
4416		return (error);
4417	}
4418
4419	(void) strcpy(zc->zc_value, snap_name);
4420	strfree(snap_name);
4421	return (0);
4422}
4423
4424/*
4425 * inputs:
4426 * zc_name		name of "to" snapshot
4427 * zc_value		name of "from" snapshot
4428 * zc_cookie		file descriptor to write diff data on
4429 *
4430 * outputs:
4431 * dmu_diff_record_t's to the file descriptor
4432 */
4433static int
4434zfs_ioc_diff(zfs_cmd_t *zc)
4435{
4436	objset_t *fromsnap;
4437	objset_t *tosnap;
4438	file_t *fp;
4439	offset_t off;
4440	int error;
4441
4442	error = dmu_objset_hold(zc->zc_name, FTAG, &tosnap);
4443	if (error)
4444		return (error);
4445
4446	error = dmu_objset_hold(zc->zc_value, FTAG, &fromsnap);
4447	if (error) {
4448		dmu_objset_rele(tosnap, FTAG);
4449		return (error);
4450	}
4451
4452	fp = getf(zc->zc_cookie);
4453	if (fp == NULL) {
4454		dmu_objset_rele(fromsnap, FTAG);
4455		dmu_objset_rele(tosnap, FTAG);
4456		return (EBADF);
4457	}
4458
4459	off = fp->f_offset;
4460
4461	error = dmu_diff(tosnap, fromsnap, fp, &off);
4462
4463	if (off >= 0 && off <= MAXOFFSET_T)
4464		fp->f_offset = off;
4465	releasef(zc->zc_cookie);
4466
4467	dmu_objset_rele(fromsnap, FTAG);
4468	dmu_objset_rele(tosnap, FTAG);
4469	return (error);
4470}
4471
4472#ifdef sun
4473/*
4474 * Remove all ACL files in shares dir
4475 */
4476static int
4477zfs_smb_acl_purge(znode_t *dzp)
4478{
4479	zap_cursor_t	zc;
4480	zap_attribute_t	zap;
4481	zfsvfs_t *zfsvfs = dzp->z_zfsvfs;
4482	int error;
4483
4484	for (zap_cursor_init(&zc, zfsvfs->z_os, dzp->z_id);
4485	    (error = zap_cursor_retrieve(&zc, &zap)) == 0;
4486	    zap_cursor_advance(&zc)) {
4487		if ((error = VOP_REMOVE(ZTOV(dzp), zap.za_name, kcred,
4488		    NULL, 0)) != 0)
4489			break;
4490	}
4491	zap_cursor_fini(&zc);
4492	return (error);
4493}
4494#endif	/* sun */
4495
4496static int
4497zfs_ioc_smb_acl(zfs_cmd_t *zc)
4498{
4499#ifdef sun
4500	vnode_t *vp;
4501	znode_t *dzp;
4502	vnode_t *resourcevp = NULL;
4503	znode_t *sharedir;
4504	zfsvfs_t *zfsvfs;
4505	nvlist_t *nvlist;
4506	char *src, *target;
4507	vattr_t vattr;
4508	vsecattr_t vsec;
4509	int error = 0;
4510
4511	if ((error = lookupname(zc->zc_value, UIO_SYSSPACE,
4512	    NO_FOLLOW, NULL, &vp)) != 0)
4513		return (error);
4514
4515	/* Now make sure mntpnt and dataset are ZFS */
4516
4517	if (strcmp(vp->v_vfsp->mnt_stat.f_fstypename, "zfs") != 0 ||
4518	    (strcmp((char *)refstr_value(vp->v_vfsp->vfs_resource),
4519	    zc->zc_name) != 0)) {
4520		VN_RELE(vp);
4521		return (EINVAL);
4522	}
4523
4524	dzp = VTOZ(vp);
4525	zfsvfs = dzp->z_zfsvfs;
4526	ZFS_ENTER(zfsvfs);
4527
4528	/*
4529	 * Create share dir if its missing.
4530	 */
4531	mutex_enter(&zfsvfs->z_lock);
4532	if (zfsvfs->z_shares_dir == 0) {
4533		dmu_tx_t *tx;
4534
4535		tx = dmu_tx_create(zfsvfs->z_os);
4536		dmu_tx_hold_zap(tx, MASTER_NODE_OBJ, TRUE,
4537		    ZFS_SHARES_DIR);
4538		dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, FALSE, NULL);
4539		error = dmu_tx_assign(tx, TXG_WAIT);
4540		if (error) {
4541			dmu_tx_abort(tx);
4542		} else {
4543			error = zfs_create_share_dir(zfsvfs, tx);
4544			dmu_tx_commit(tx);
4545		}
4546		if (error) {
4547			mutex_exit(&zfsvfs->z_lock);
4548			VN_RELE(vp);
4549			ZFS_EXIT(zfsvfs);
4550			return (error);
4551		}
4552	}
4553	mutex_exit(&zfsvfs->z_lock);
4554
4555	ASSERT(zfsvfs->z_shares_dir);
4556	if ((error = zfs_zget(zfsvfs, zfsvfs->z_shares_dir, &sharedir)) != 0) {
4557		VN_RELE(vp);
4558		ZFS_EXIT(zfsvfs);
4559		return (error);
4560	}
4561
4562	switch (zc->zc_cookie) {
4563	case ZFS_SMB_ACL_ADD:
4564		vattr.va_mask = AT_MODE|AT_UID|AT_GID|AT_TYPE;
4565		vattr.va_type = VREG;
4566		vattr.va_mode = S_IFREG|0777;
4567		vattr.va_uid = 0;
4568		vattr.va_gid = 0;
4569
4570		vsec.vsa_mask = VSA_ACE;
4571		vsec.vsa_aclentp = &full_access;
4572		vsec.vsa_aclentsz = sizeof (full_access);
4573		vsec.vsa_aclcnt = 1;
4574
4575		error = VOP_CREATE(ZTOV(sharedir), zc->zc_string,
4576		    &vattr, EXCL, 0, &resourcevp, kcred, 0, NULL, &vsec);
4577		if (resourcevp)
4578			VN_RELE(resourcevp);
4579		break;
4580
4581	case ZFS_SMB_ACL_REMOVE:
4582		error = VOP_REMOVE(ZTOV(sharedir), zc->zc_string, kcred,
4583		    NULL, 0);
4584		break;
4585
4586	case ZFS_SMB_ACL_RENAME:
4587		if ((error = get_nvlist(zc->zc_nvlist_src,
4588		    zc->zc_nvlist_src_size, zc->zc_iflags, &nvlist)) != 0) {
4589			VN_RELE(vp);
4590			ZFS_EXIT(zfsvfs);
4591			return (error);
4592		}
4593		if (nvlist_lookup_string(nvlist, ZFS_SMB_ACL_SRC, &src) ||
4594		    nvlist_lookup_string(nvlist, ZFS_SMB_ACL_TARGET,
4595		    &target)) {
4596			VN_RELE(vp);
4597			VN_RELE(ZTOV(sharedir));
4598			ZFS_EXIT(zfsvfs);
4599			nvlist_free(nvlist);
4600			return (error);
4601		}
4602		error = VOP_RENAME(ZTOV(sharedir), src, ZTOV(sharedir), target,
4603		    kcred, NULL, 0);
4604		nvlist_free(nvlist);
4605		break;
4606
4607	case ZFS_SMB_ACL_PURGE:
4608		error = zfs_smb_acl_purge(sharedir);
4609		break;
4610
4611	default:
4612		error = EINVAL;
4613		break;
4614	}
4615
4616	VN_RELE(vp);
4617	VN_RELE(ZTOV(sharedir));
4618
4619	ZFS_EXIT(zfsvfs);
4620
4621	return (error);
4622#else	/* !sun */
4623	return (EOPNOTSUPP);
4624#endif	/* !sun */
4625}
4626
4627/*
4628 * inputs:
4629 * zc_name		name of filesystem
4630 * zc_value		short name of snap
4631 * zc_string		user-supplied tag for this hold
4632 * zc_cookie		recursive flag
4633 * zc_temphold		set if hold is temporary
4634 * zc_cleanup_fd	cleanup-on-exit file descriptor for calling process
4635 * zc_sendobj		if non-zero, the objid for zc_name@zc_value
4636 * zc_createtxg		if zc_sendobj is non-zero, snap must have zc_createtxg
4637 *
4638 * outputs:		none
4639 */
4640static int
4641zfs_ioc_hold(zfs_cmd_t *zc)
4642{
4643	boolean_t recursive = zc->zc_cookie;
4644	spa_t *spa;
4645	dsl_pool_t *dp;
4646	dsl_dataset_t *ds;
4647	int error;
4648	minor_t minor = 0;
4649
4650	if (snapshot_namecheck(zc->zc_value, NULL, NULL) != 0)
4651		return (EINVAL);
4652
4653	if (zc->zc_sendobj == 0) {
4654		return (dsl_dataset_user_hold(zc->zc_name, zc->zc_value,
4655		    zc->zc_string, recursive, zc->zc_temphold,
4656		    zc->zc_cleanup_fd));
4657	}
4658
4659	if (recursive)
4660		return (EINVAL);
4661
4662	error = spa_open(zc->zc_name, &spa, FTAG);
4663	if (error)
4664		return (error);
4665
4666	dp = spa_get_dsl(spa);
4667	rw_enter(&dp->dp_config_rwlock, RW_READER);
4668	error = dsl_dataset_hold_obj(dp, zc->zc_sendobj, FTAG, &ds);
4669	rw_exit(&dp->dp_config_rwlock);
4670	spa_close(spa, FTAG);
4671	if (error)
4672		return (error);
4673
4674	/*
4675	 * Until we have a hold on this snapshot, it's possible that
4676	 * zc_sendobj could've been destroyed and reused as part
4677	 * of a later txg.  Make sure we're looking at the right object.
4678	 */
4679	if (zc->zc_createtxg != ds->ds_phys->ds_creation_txg) {
4680		dsl_dataset_rele(ds, FTAG);
4681		return (ENOENT);
4682	}
4683
4684	if (zc->zc_cleanup_fd != -1 && zc->zc_temphold) {
4685		error = zfs_onexit_fd_hold(zc->zc_cleanup_fd, &minor);
4686		if (error) {
4687			dsl_dataset_rele(ds, FTAG);
4688			return (error);
4689		}
4690	}
4691
4692	error = dsl_dataset_user_hold_for_send(ds, zc->zc_string,
4693	    zc->zc_temphold);
4694	if (minor != 0) {
4695		if (error == 0) {
4696			dsl_register_onexit_hold_cleanup(ds, zc->zc_string,
4697			    minor);
4698		}
4699		zfs_onexit_fd_rele(zc->zc_cleanup_fd);
4700	}
4701	dsl_dataset_rele(ds, FTAG);
4702
4703	return (error);
4704}
4705
4706/*
4707 * inputs:
4708 * zc_name	name of dataset from which we're releasing a user hold
4709 * zc_value	short name of snap
4710 * zc_string	user-supplied tag for this hold
4711 * zc_cookie	recursive flag
4712 *
4713 * outputs:	none
4714 */
4715static int
4716zfs_ioc_release(zfs_cmd_t *zc)
4717{
4718	boolean_t recursive = zc->zc_cookie;
4719
4720	if (snapshot_namecheck(zc->zc_value, NULL, NULL) != 0)
4721		return (EINVAL);
4722
4723	return (dsl_dataset_user_release(zc->zc_name, zc->zc_value,
4724	    zc->zc_string, recursive));
4725}
4726
4727/*
4728 * inputs:
4729 * zc_name		name of filesystem
4730 *
4731 * outputs:
4732 * zc_nvlist_src{_size}	nvlist of snapshot holds
4733 */
4734static int
4735zfs_ioc_get_holds(zfs_cmd_t *zc)
4736{
4737	nvlist_t *nvp;
4738	int error;
4739
4740	if ((error = dsl_dataset_get_holds(zc->zc_name, &nvp)) == 0) {
4741		error = put_nvlist(zc, nvp);
4742		nvlist_free(nvp);
4743	}
4744
4745	return (error);
4746}
4747
4748/*
4749 * inputs:
4750 * zc_name		name of new filesystem or snapshot
4751 * zc_value		full name of old snapshot
4752 *
4753 * outputs:
4754 * zc_cookie		space in bytes
4755 * zc_objset_type	compressed space in bytes
4756 * zc_perm_action	uncompressed space in bytes
4757 */
4758static int
4759zfs_ioc_space_written(zfs_cmd_t *zc)
4760{
4761	int error;
4762	dsl_dataset_t *new, *old;
4763
4764	error = dsl_dataset_hold(zc->zc_name, FTAG, &new);
4765	if (error != 0)
4766		return (error);
4767	error = dsl_dataset_hold(zc->zc_value, FTAG, &old);
4768	if (error != 0) {
4769		dsl_dataset_rele(new, FTAG);
4770		return (error);
4771	}
4772
4773	error = dsl_dataset_space_written(old, new, &zc->zc_cookie,
4774	    &zc->zc_objset_type, &zc->zc_perm_action);
4775	dsl_dataset_rele(old, FTAG);
4776	dsl_dataset_rele(new, FTAG);
4777	return (error);
4778}
4779
4780/*
4781 * inputs:
4782 * zc_name		full name of last snapshot
4783 * zc_value		full name of first snapshot
4784 *
4785 * outputs:
4786 * zc_cookie		space in bytes
4787 * zc_objset_type	compressed space in bytes
4788 * zc_perm_action	uncompressed space in bytes
4789 */
4790static int
4791zfs_ioc_space_snaps(zfs_cmd_t *zc)
4792{
4793	int error;
4794	dsl_dataset_t *new, *old;
4795
4796	error = dsl_dataset_hold(zc->zc_name, FTAG, &new);
4797	if (error != 0)
4798		return (error);
4799	error = dsl_dataset_hold(zc->zc_value, FTAG, &old);
4800	if (error != 0) {
4801		dsl_dataset_rele(new, FTAG);
4802		return (error);
4803	}
4804
4805	error = dsl_dataset_space_wouldfree(old, new, &zc->zc_cookie,
4806	    &zc->zc_objset_type, &zc->zc_perm_action);
4807	dsl_dataset_rele(old, FTAG);
4808	dsl_dataset_rele(new, FTAG);
4809	return (error);
4810}
4811
4812/*
4813 * pool create, destroy, and export don't log the history as part of
4814 * zfsdev_ioctl, but rather zfs_ioc_pool_create, and zfs_ioc_pool_export
4815 * do the logging of those commands.
4816 */
4817static int
4818zfs_ioc_jail(zfs_cmd_t *zc)
4819{
4820
4821	return (zone_dataset_attach(curthread->td_ucred, zc->zc_name,
4822	    (int)zc->zc_jailid));
4823}
4824
4825static int
4826zfs_ioc_unjail(zfs_cmd_t *zc)
4827{
4828
4829	return (zone_dataset_detach(curthread->td_ucred, zc->zc_name,
4830	    (int)zc->zc_jailid));
4831}
4832
4833static zfs_ioc_vec_t zfs_ioc_vec[] = {
4834	{ zfs_ioc_pool_create, zfs_secpolicy_config, POOL_NAME, B_FALSE,
4835	    B_FALSE },
4836	{ zfs_ioc_pool_destroy,	zfs_secpolicy_config, POOL_NAME, B_FALSE,
4837	    B_FALSE },
4838	{ zfs_ioc_pool_import, zfs_secpolicy_config, POOL_NAME, B_TRUE,
4839	    B_FALSE },
4840	{ zfs_ioc_pool_export, zfs_secpolicy_config, POOL_NAME, B_FALSE,
4841	    B_FALSE },
4842	{ zfs_ioc_pool_configs,	zfs_secpolicy_none, NO_NAME, B_FALSE,
4843	    B_FALSE },
4844	{ zfs_ioc_pool_stats, zfs_secpolicy_read, POOL_NAME, B_FALSE,
4845	    B_FALSE },
4846	{ zfs_ioc_pool_tryimport, zfs_secpolicy_config, NO_NAME, B_FALSE,
4847	    B_FALSE },
4848	{ zfs_ioc_pool_scan, zfs_secpolicy_config, POOL_NAME, B_TRUE,
4849	    B_TRUE },
4850	{ zfs_ioc_pool_freeze, zfs_secpolicy_config, NO_NAME, B_FALSE,
4851	    B_FALSE },
4852	{ zfs_ioc_pool_upgrade,	zfs_secpolicy_config, POOL_NAME, B_TRUE,
4853	    B_TRUE },
4854	{ zfs_ioc_pool_get_history, zfs_secpolicy_config, POOL_NAME, B_FALSE,
4855	    B_FALSE },
4856	{ zfs_ioc_vdev_add, zfs_secpolicy_config, POOL_NAME, B_TRUE,
4857	    B_TRUE },
4858	{ zfs_ioc_vdev_remove, zfs_secpolicy_config, POOL_NAME, B_TRUE,
4859	    B_TRUE },
4860	{ zfs_ioc_vdev_set_state, zfs_secpolicy_config,	POOL_NAME, B_TRUE,
4861	    B_FALSE },
4862	{ zfs_ioc_vdev_attach, zfs_secpolicy_config, POOL_NAME, B_TRUE,
4863	    B_TRUE },
4864	{ zfs_ioc_vdev_detach, zfs_secpolicy_config, POOL_NAME, B_TRUE,
4865	    B_TRUE },
4866	{ zfs_ioc_vdev_setpath,	zfs_secpolicy_config, POOL_NAME, B_FALSE,
4867	    B_TRUE },
4868	{ zfs_ioc_vdev_setfru,	zfs_secpolicy_config, POOL_NAME, B_FALSE,
4869	    B_TRUE },
4870	{ zfs_ioc_objset_stats,	zfs_secpolicy_read, DATASET_NAME, B_FALSE,
4871	    B_TRUE },
4872	{ zfs_ioc_objset_zplprops, zfs_secpolicy_read, DATASET_NAME, B_FALSE,
4873	    B_FALSE },
4874	{ zfs_ioc_dataset_list_next, zfs_secpolicy_read, DATASET_NAME, B_FALSE,
4875	    B_TRUE },
4876	{ zfs_ioc_snapshot_list_next, zfs_secpolicy_read, DATASET_NAME, B_FALSE,
4877	    B_TRUE },
4878	{ zfs_ioc_set_prop, zfs_secpolicy_none, DATASET_NAME, B_TRUE, B_TRUE },
4879	{ zfs_ioc_create, zfs_secpolicy_create, DATASET_NAME, B_TRUE, B_TRUE },
4880	{ zfs_ioc_destroy, zfs_secpolicy_destroy, DATASET_NAME, B_TRUE,
4881	    B_TRUE},
4882	{ zfs_ioc_rollback, zfs_secpolicy_rollback, DATASET_NAME, B_TRUE,
4883	    B_TRUE },
4884	{ zfs_ioc_rename, zfs_secpolicy_rename,	DATASET_NAME, B_TRUE, B_TRUE },
4885	{ zfs_ioc_recv, zfs_secpolicy_receive, DATASET_NAME, B_TRUE, B_TRUE },
4886	{ zfs_ioc_send, zfs_secpolicy_send, DATASET_NAME, B_FALSE, B_FALSE },
4887	{ zfs_ioc_inject_fault,	zfs_secpolicy_inject, NO_NAME, B_FALSE,
4888	    B_FALSE },
4889	{ zfs_ioc_clear_fault, zfs_secpolicy_inject, NO_NAME, B_FALSE,
4890	    B_FALSE },
4891	{ zfs_ioc_inject_list_next, zfs_secpolicy_inject, NO_NAME, B_FALSE,
4892	    B_FALSE },
4893	{ zfs_ioc_error_log, zfs_secpolicy_inject, POOL_NAME, B_FALSE,
4894	    B_FALSE },
4895	{ zfs_ioc_clear, zfs_secpolicy_config, POOL_NAME, B_TRUE, B_FALSE },
4896	{ zfs_ioc_promote, zfs_secpolicy_promote, DATASET_NAME, B_TRUE,
4897	    B_TRUE },
4898	{ zfs_ioc_destroy_snaps_nvl, zfs_secpolicy_destroy_recursive, DATASET_NAME,
4899	    B_TRUE, B_TRUE },
4900	{ zfs_ioc_snapshot, zfs_secpolicy_snapshot, DATASET_NAME, B_TRUE,
4901	    B_TRUE },
4902	{ zfs_ioc_dsobj_to_dsname, zfs_secpolicy_diff, POOL_NAME, B_FALSE,
4903	    B_FALSE },
4904	{ zfs_ioc_obj_to_path, zfs_secpolicy_diff, DATASET_NAME, B_FALSE,
4905	    B_TRUE },
4906	{ zfs_ioc_pool_set_props, zfs_secpolicy_config,	POOL_NAME, B_TRUE,
4907	    B_TRUE },
4908	{ zfs_ioc_pool_get_props, zfs_secpolicy_read, POOL_NAME, B_FALSE,
4909	    B_FALSE },
4910	{ zfs_ioc_set_fsacl, zfs_secpolicy_fsacl, DATASET_NAME, B_TRUE,
4911	    B_TRUE },
4912	{ zfs_ioc_get_fsacl, zfs_secpolicy_read, DATASET_NAME, B_FALSE,
4913	    B_FALSE },
4914	{ zfs_ioc_share, zfs_secpolicy_share, DATASET_NAME, B_FALSE, B_FALSE },
4915	{ zfs_ioc_inherit_prop, zfs_secpolicy_inherit, DATASET_NAME, B_TRUE,
4916	    B_TRUE },
4917	{ zfs_ioc_smb_acl, zfs_secpolicy_smb_acl, DATASET_NAME, B_FALSE,
4918	    B_FALSE },
4919	{ zfs_ioc_userspace_one, zfs_secpolicy_userspace_one,
4920	    DATASET_NAME, B_FALSE, B_FALSE },
4921	{ zfs_ioc_userspace_many, zfs_secpolicy_userspace_many,
4922	    DATASET_NAME, B_FALSE, B_FALSE },
4923	{ zfs_ioc_userspace_upgrade, zfs_secpolicy_userspace_upgrade,
4924	    DATASET_NAME, B_FALSE, B_TRUE },
4925	{ zfs_ioc_hold, zfs_secpolicy_hold, DATASET_NAME, B_TRUE, B_TRUE },
4926	{ zfs_ioc_release, zfs_secpolicy_release, DATASET_NAME, B_TRUE,
4927	    B_TRUE },
4928	{ zfs_ioc_get_holds, zfs_secpolicy_read, DATASET_NAME, B_FALSE,
4929	    B_TRUE },
4930	{ zfs_ioc_objset_recvd_props, zfs_secpolicy_read, DATASET_NAME, B_FALSE,
4931	    B_FALSE },
4932	{ zfs_ioc_vdev_split, zfs_secpolicy_config, POOL_NAME, B_TRUE,
4933	    B_TRUE },
4934	{ zfs_ioc_next_obj, zfs_secpolicy_read, DATASET_NAME, B_FALSE,
4935	    B_FALSE },
4936	{ zfs_ioc_diff, zfs_secpolicy_diff, DATASET_NAME, B_FALSE, B_FALSE },
4937	{ zfs_ioc_tmp_snapshot, zfs_secpolicy_tmp_snapshot, DATASET_NAME,
4938	    B_FALSE, B_FALSE },
4939	{ zfs_ioc_obj_to_stats, zfs_secpolicy_diff, DATASET_NAME, B_FALSE,
4940	    B_TRUE },
4941	{ zfs_ioc_jail, zfs_secpolicy_config, DATASET_NAME, B_TRUE, B_FALSE },
4942	{ zfs_ioc_unjail, zfs_secpolicy_config, DATASET_NAME, B_TRUE, B_FALSE },
4943	{ zfs_ioc_pool_reguid, zfs_secpolicy_config, POOL_NAME, B_TRUE,
4944	    B_TRUE },
4945	{ zfs_ioc_space_written, zfs_secpolicy_read, DATASET_NAME, B_FALSE,
4946	    B_TRUE },
4947	{ zfs_ioc_space_snaps, zfs_secpolicy_read, DATASET_NAME, B_FALSE,
4948	    B_TRUE }
4949};
4950
4951int
4952pool_status_check(const char *name, zfs_ioc_namecheck_t type)
4953{
4954	spa_t *spa;
4955	int error;
4956
4957	ASSERT(type == POOL_NAME || type == DATASET_NAME);
4958
4959	error = spa_open(name, &spa, FTAG);
4960	if (error == 0) {
4961		if (spa_suspended(spa))
4962			error = EAGAIN;
4963		spa_close(spa, FTAG);
4964	}
4965	return (error);
4966}
4967
4968/*
4969 * Find a free minor number.
4970 */
4971minor_t
4972zfsdev_minor_alloc(void)
4973{
4974	static minor_t last_minor;
4975	minor_t m;
4976
4977	ASSERT(MUTEX_HELD(&spa_namespace_lock));
4978
4979	for (m = last_minor + 1; m != last_minor; m++) {
4980		if (m > ZFSDEV_MAX_MINOR)
4981			m = 1;
4982		if (ddi_get_soft_state(zfsdev_state, m) == NULL) {
4983			last_minor = m;
4984			return (m);
4985		}
4986	}
4987
4988	return (0);
4989}
4990
4991static int
4992zfs_ctldev_init(struct cdev *devp)
4993{
4994	minor_t minor;
4995	zfs_soft_state_t *zs;
4996
4997	ASSERT(MUTEX_HELD(&spa_namespace_lock));
4998
4999	minor = zfsdev_minor_alloc();
5000	if (minor == 0)
5001		return (ENXIO);
5002
5003	if (ddi_soft_state_zalloc(zfsdev_state, minor) != DDI_SUCCESS)
5004		return (EAGAIN);
5005
5006	devfs_set_cdevpriv((void *)(uintptr_t)minor, zfsdev_close);
5007
5008	zs = ddi_get_soft_state(zfsdev_state, minor);
5009	zs->zss_type = ZSST_CTLDEV;
5010	zfs_onexit_init((zfs_onexit_t **)&zs->zss_data);
5011
5012	return (0);
5013}
5014
5015static void
5016zfs_ctldev_destroy(zfs_onexit_t *zo, minor_t minor)
5017{
5018	ASSERT(MUTEX_HELD(&spa_namespace_lock));
5019
5020	zfs_onexit_destroy(zo);
5021	ddi_soft_state_free(zfsdev_state, minor);
5022}
5023
5024void *
5025zfsdev_get_soft_state(minor_t minor, enum zfs_soft_state_type which)
5026{
5027	zfs_soft_state_t *zp;
5028
5029	zp = ddi_get_soft_state(zfsdev_state, minor);
5030	if (zp == NULL || zp->zss_type != which)
5031		return (NULL);
5032
5033	return (zp->zss_data);
5034}
5035
5036static int
5037zfsdev_open(struct cdev *devp, int flag, int mode, struct thread *td)
5038{
5039	int error = 0;
5040
5041#ifdef sun
5042	if (getminor(*devp) != 0)
5043		return (zvol_open(devp, flag, otyp, cr));
5044#endif
5045
5046	/* This is the control device. Allocate a new minor if requested. */
5047	if (flag & FEXCL) {
5048		mutex_enter(&spa_namespace_lock);
5049		error = zfs_ctldev_init(devp);
5050		mutex_exit(&spa_namespace_lock);
5051	}
5052
5053	return (error);
5054}
5055
5056static void
5057zfsdev_close(void *data)
5058{
5059	zfs_onexit_t *zo;
5060	minor_t minor = (minor_t)(uintptr_t)data;
5061
5062	if (minor == 0)
5063		return;
5064
5065	mutex_enter(&spa_namespace_lock);
5066	zo = zfsdev_get_soft_state(minor, ZSST_CTLDEV);
5067	if (zo == NULL) {
5068		mutex_exit(&spa_namespace_lock);
5069		return;
5070	}
5071	zfs_ctldev_destroy(zo, minor);
5072	mutex_exit(&spa_namespace_lock);
5073}
5074
5075static int
5076zfsdev_ioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flag,
5077    struct thread *td)
5078{
5079	zfs_cmd_t *zc;
5080	uint_t vec;
5081	int cflag, error, len;
5082
5083	cflag = ZFS_CMD_COMPAT_NONE;
5084	len = IOCPARM_LEN(cmd);
5085
5086	/*
5087	 * Check if we have sufficient kernel memory allocated
5088	 * for the zfs_cmd_t request.  Bail out if not so we
5089	 * will not access undefined memory region.
5090	 */
5091	if (len < sizeof(zfs_cmd_t))
5092		if (len == sizeof(zfs_cmd_v15_t)) {
5093			cflag = ZFS_CMD_COMPAT_V15;
5094			vec = zfs_ioctl_v15_to_v28[ZFS_IOC(cmd)];
5095		} else
5096			return (EINVAL);
5097	else
5098		vec = ZFS_IOC(cmd);
5099
5100	if (cflag != ZFS_CMD_COMPAT_NONE) {
5101		if (vec == ZFS_IOC_COMPAT_PASS)
5102			return (0);
5103		else if (vec == ZFS_IOC_COMPAT_FAIL)
5104			return (ENOTSUP);
5105	}
5106
5107	if (vec >= sizeof (zfs_ioc_vec) / sizeof (zfs_ioc_vec[0]))
5108		return (EINVAL);
5109
5110	if (cflag != ZFS_CMD_COMPAT_NONE) {
5111		zc = kmem_zalloc(sizeof(zfs_cmd_t), KM_SLEEP);
5112		bzero(zc, sizeof(zfs_cmd_t));
5113		zfs_cmd_compat_get(zc, addr, cflag);
5114		zfs_ioctl_compat_pre(zc, &vec, cflag);
5115	} else {
5116		zc = (void *)addr;
5117	}
5118
5119	error = zfs_ioc_vec[vec].zvec_secpolicy(zc, td->td_ucred);
5120
5121	/*
5122	 * Ensure that all pool/dataset names are valid before we pass down to
5123	 * the lower layers.
5124	 */
5125	if (error == 0) {
5126		zc->zc_name[sizeof (zc->zc_name) - 1] = '\0';
5127		zc->zc_iflags = flag & FKIOCTL;
5128		switch (zfs_ioc_vec[vec].zvec_namecheck) {
5129		case POOL_NAME:
5130			if (pool_namecheck(zc->zc_name, NULL, NULL) != 0)
5131				error = EINVAL;
5132			if (zfs_ioc_vec[vec].zvec_pool_check)
5133				error = pool_status_check(zc->zc_name,
5134				    zfs_ioc_vec[vec].zvec_namecheck);
5135			break;
5136
5137		case DATASET_NAME:
5138			if (dataset_namecheck(zc->zc_name, NULL, NULL) != 0)
5139				error = EINVAL;
5140			if (zfs_ioc_vec[vec].zvec_pool_check)
5141				error = pool_status_check(zc->zc_name,
5142				    zfs_ioc_vec[vec].zvec_namecheck);
5143			break;
5144
5145		case NO_NAME:
5146			break;
5147		}
5148	}
5149
5150	if (error == 0)
5151		error = zfs_ioc_vec[vec].zvec_func(zc);
5152
5153	if (error == 0) {
5154		if (zfs_ioc_vec[vec].zvec_his_log)
5155			zfs_log_history(zc);
5156	}
5157
5158	if (cflag != ZFS_CMD_COMPAT_NONE) {
5159		zfs_ioctl_compat_post(zc, ZFS_IOC(cmd), cflag);
5160		zfs_cmd_compat_put(zc, addr, cflag);
5161		kmem_free(zc, sizeof(zfs_cmd_t));
5162	}
5163
5164	return (error);
5165}
5166
5167#ifdef sun
5168static int
5169zfs_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
5170{
5171	if (cmd != DDI_ATTACH)
5172		return (DDI_FAILURE);
5173
5174	if (ddi_create_minor_node(dip, "zfs", S_IFCHR, 0,
5175	    DDI_PSEUDO, 0) == DDI_FAILURE)
5176		return (DDI_FAILURE);
5177
5178	zfs_dip = dip;
5179
5180	ddi_report_dev(dip);
5181
5182	return (DDI_SUCCESS);
5183}
5184
5185static int
5186zfs_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
5187{
5188	if (spa_busy() || zfs_busy() || zvol_busy())
5189		return (DDI_FAILURE);
5190
5191	if (cmd != DDI_DETACH)
5192		return (DDI_FAILURE);
5193
5194	zfs_dip = NULL;
5195
5196	ddi_prop_remove_all(dip);
5197	ddi_remove_minor_node(dip, NULL);
5198
5199	return (DDI_SUCCESS);
5200}
5201
5202/*ARGSUSED*/
5203static int
5204zfs_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result)
5205{
5206	switch (infocmd) {
5207	case DDI_INFO_DEVT2DEVINFO:
5208		*result = zfs_dip;
5209		return (DDI_SUCCESS);
5210
5211	case DDI_INFO_DEVT2INSTANCE:
5212		*result = (void *)0;
5213		return (DDI_SUCCESS);
5214	}
5215
5216	return (DDI_FAILURE);
5217}
5218#endif	/* sun */
5219
5220/*
5221 * OK, so this is a little weird.
5222 *
5223 * /dev/zfs is the control node, i.e. minor 0.
5224 * /dev/zvol/[r]dsk/pool/dataset are the zvols, minor > 0.
5225 *
5226 * /dev/zfs has basically nothing to do except serve up ioctls,
5227 * so most of the standard driver entry points are in zvol.c.
5228 */
5229#ifdef sun
5230static struct cb_ops zfs_cb_ops = {
5231	zfsdev_open,	/* open */
5232	zfsdev_close,	/* close */
5233	zvol_strategy,	/* strategy */
5234	nodev,		/* print */
5235	zvol_dump,	/* dump */
5236	zvol_read,	/* read */
5237	zvol_write,	/* write */
5238	zfsdev_ioctl,	/* ioctl */
5239	nodev,		/* devmap */
5240	nodev,		/* mmap */
5241	nodev,		/* segmap */
5242	nochpoll,	/* poll */
5243	ddi_prop_op,	/* prop_op */
5244	NULL,		/* streamtab */
5245	D_NEW | D_MP | D_64BIT,		/* Driver compatibility flag */
5246	CB_REV,		/* version */
5247	nodev,		/* async read */
5248	nodev,		/* async write */
5249};
5250
5251static struct dev_ops zfs_dev_ops = {
5252	DEVO_REV,	/* version */
5253	0,		/* refcnt */
5254	zfs_info,	/* info */
5255	nulldev,	/* identify */
5256	nulldev,	/* probe */
5257	zfs_attach,	/* attach */
5258	zfs_detach,	/* detach */
5259	nodev,		/* reset */
5260	&zfs_cb_ops,	/* driver operations */
5261	NULL,		/* no bus operations */
5262	NULL,		/* power */
5263	ddi_quiesce_not_needed,	/* quiesce */
5264};
5265
5266static struct modldrv zfs_modldrv = {
5267	&mod_driverops,
5268	"ZFS storage pool",
5269	&zfs_dev_ops
5270};
5271
5272static struct modlinkage modlinkage = {
5273	MODREV_1,
5274	(void *)&zfs_modlfs,
5275	(void *)&zfs_modldrv,
5276	NULL
5277};
5278#endif	/* sun */
5279
5280static struct cdevsw zfs_cdevsw = {
5281	.d_version =	D_VERSION,
5282	.d_open =	zfsdev_open,
5283	.d_ioctl =	zfsdev_ioctl,
5284	.d_name =	ZFS_DEV_NAME
5285};
5286
5287static void
5288zfsdev_init(void)
5289{
5290	zfsdev = make_dev(&zfs_cdevsw, 0x0, UID_ROOT, GID_OPERATOR, 0666,
5291	    ZFS_DEV_NAME);
5292}
5293
5294static void
5295zfsdev_fini(void)
5296{
5297	if (zfsdev != NULL)
5298		destroy_dev(zfsdev);
5299}
5300
5301static struct root_hold_token *zfs_root_token;
5302struct proc *zfsproc;
5303
5304uint_t zfs_fsyncer_key;
5305extern uint_t rrw_tsd_key;
5306
5307#ifdef sun
5308int
5309_init(void)
5310{
5311	int error;
5312
5313	spa_init(FREAD | FWRITE);
5314	zfs_init();
5315	zvol_init();
5316
5317	if ((error = mod_install(&modlinkage)) != 0) {
5318		zvol_fini();
5319		zfs_fini();
5320		spa_fini();
5321		return (error);
5322	}
5323
5324	tsd_create(&zfs_fsyncer_key, NULL);
5325	tsd_create(&rrw_tsd_key, NULL);
5326
5327	error = ldi_ident_from_mod(&modlinkage, &zfs_li);
5328	ASSERT(error == 0);
5329	mutex_init(&zfs_share_lock, NULL, MUTEX_DEFAULT, NULL);
5330
5331	return (0);
5332}
5333
5334int
5335_fini(void)
5336{
5337	int error;
5338
5339	if (spa_busy() || zfs_busy() || zvol_busy() || zio_injection_enabled)
5340		return (EBUSY);
5341
5342	if ((error = mod_remove(&modlinkage)) != 0)
5343		return (error);
5344
5345	zvol_fini();
5346	zfs_fini();
5347	spa_fini();
5348	if (zfs_nfsshare_inited)
5349		(void) ddi_modclose(nfs_mod);
5350	if (zfs_smbshare_inited)
5351		(void) ddi_modclose(smbsrv_mod);
5352	if (zfs_nfsshare_inited || zfs_smbshare_inited)
5353		(void) ddi_modclose(sharefs_mod);
5354
5355	tsd_destroy(&zfs_fsyncer_key);
5356	ldi_ident_release(zfs_li);
5357	zfs_li = NULL;
5358	mutex_destroy(&zfs_share_lock);
5359
5360	return (error);
5361}
5362
5363int
5364_info(struct modinfo *modinfop)
5365{
5366	return (mod_info(&modlinkage, modinfop));
5367}
5368#endif	/* sun */
5369
5370static int
5371zfs_modevent(module_t mod, int type, void *unused __unused)
5372{
5373	int error = 0;
5374
5375	switch (type) {
5376	case MOD_LOAD:
5377		zfs_root_token = root_mount_hold("ZFS");
5378
5379		mutex_init(&zfs_share_lock, NULL, MUTEX_DEFAULT, NULL);
5380
5381		spa_init(FREAD | FWRITE);
5382		zfs_init();
5383		zvol_init();
5384
5385		tsd_create(&zfs_fsyncer_key, NULL);
5386		tsd_create(&rrw_tsd_key, NULL);
5387
5388		printf("ZFS storage pool version " SPA_VERSION_STRING "\n");
5389		root_mount_rel(zfs_root_token);
5390
5391		zfsdev_init();
5392		break;
5393	case MOD_UNLOAD:
5394		if (spa_busy() || zfs_busy() || zvol_busy() ||
5395		    zio_injection_enabled) {
5396			error = EBUSY;
5397			break;
5398		}
5399
5400		zfsdev_fini();
5401		zvol_fini();
5402		zfs_fini();
5403		spa_fini();
5404
5405		tsd_destroy(&zfs_fsyncer_key);
5406		tsd_destroy(&rrw_tsd_key);
5407
5408		mutex_destroy(&zfs_share_lock);
5409		break;
5410	default:
5411		error = EOPNOTSUPP;
5412		break;
5413	}
5414	return (error);
5415}
5416
5417static moduledata_t zfs_mod = {
5418	"zfsctrl",
5419	zfs_modevent,
5420	0
5421};
5422DECLARE_MODULE(zfsctrl, zfs_mod, SI_SUB_VFS, SI_ORDER_ANY);
5423MODULE_DEPEND(zfsctrl, opensolaris, 1, 1, 1);
5424MODULE_DEPEND(zfsctrl, krpc, 1, 1, 1);
5425