zfs_ioctl.c revision 331612
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22/*
23 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
24 * Copyright (c) 2011-2012 Pawel Jakub Dawidek. All rights reserved.
25 * Copyright 2013 Martin Matuska <mm@FreeBSD.org>. All rights reserved.
26 * Copyright 2014 Xin Li <delphij@FreeBSD.org>. All rights reserved.
27 * Copyright 2015, OmniTI Computer Consulting, Inc. All rights reserved.
28 * Copyright 2015 Nexenta Systems, Inc.  All rights reserved.
29 * Copyright (c) 2014, 2016 Joyent, Inc. All rights reserved.
30 * Copyright (c) 2011, 2015 by Delphix. All rights reserved.
31 * Copyright (c) 2013 by Saso Kiselkov. All rights reserved.
32 * Copyright (c) 2013 Steven Hartland. All rights reserved.
33 * Copyright (c) 2014 Integros [integros.com]
34 */
35
36/*
37 * ZFS ioctls.
38 *
39 * This file handles the ioctls to /dev/zfs, used for configuring ZFS storage
40 * pools and filesystems, e.g. with /sbin/zfs and /sbin/zpool.
41 *
42 * There are two ways that we handle ioctls: the legacy way where almost
43 * all of the logic is in the ioctl callback, and the new way where most
44 * of the marshalling is handled in the common entry point, zfsdev_ioctl().
45 *
46 * Non-legacy ioctls should be registered by calling
47 * zfs_ioctl_register() from zfs_ioctl_init().  The ioctl is invoked
48 * from userland by lzc_ioctl().
49 *
50 * The registration arguments are as follows:
51 *
52 * const char *name
53 *   The name of the ioctl.  This is used for history logging.  If the
54 *   ioctl returns successfully (the callback returns 0), and allow_log
55 *   is true, then a history log entry will be recorded with the input &
56 *   output nvlists.  The log entry can be printed with "zpool history -i".
57 *
58 * zfs_ioc_t ioc
59 *   The ioctl request number, which userland will pass to ioctl(2).
60 *   The ioctl numbers can change from release to release, because
61 *   the caller (libzfs) must be matched to the kernel.
62 *
63 * zfs_secpolicy_func_t *secpolicy
64 *   This function will be called before the zfs_ioc_func_t, to
65 *   determine if this operation is permitted.  It should return EPERM
66 *   on failure, and 0 on success.  Checks include determining if the
67 *   dataset is visible in this zone, and if the user has either all
68 *   zfs privileges in the zone (SYS_MOUNT), or has been granted permission
69 *   to do this operation on this dataset with "zfs allow".
70 *
71 * zfs_ioc_namecheck_t namecheck
72 *   This specifies what to expect in the zfs_cmd_t:zc_name -- a pool
73 *   name, a dataset name, or nothing.  If the name is not well-formed,
74 *   the ioctl will fail and the callback will not be called.
75 *   Therefore, the callback can assume that the name is well-formed
76 *   (e.g. is null-terminated, doesn't have more than one '@' character,
77 *   doesn't have invalid characters).
78 *
79 * zfs_ioc_poolcheck_t pool_check
80 *   This specifies requirements on the pool state.  If the pool does
81 *   not meet them (is suspended or is readonly), the ioctl will fail
82 *   and the callback will not be called.  If any checks are specified
83 *   (i.e. it is not POOL_CHECK_NONE), namecheck must not be NO_NAME.
84 *   Multiple checks can be or-ed together (e.g. POOL_CHECK_SUSPENDED |
85 *   POOL_CHECK_READONLY).
86 *
87 * boolean_t smush_outnvlist
88 *   If smush_outnvlist is true, then the output is presumed to be a
89 *   list of errors, and it will be "smushed" down to fit into the
90 *   caller's buffer, by removing some entries and replacing them with a
91 *   single "N_MORE_ERRORS" entry indicating how many were removed.  See
92 *   nvlist_smush() for details.  If smush_outnvlist is false, and the
93 *   outnvlist does not fit into the userland-provided buffer, then the
94 *   ioctl will fail with ENOMEM.
95 *
96 * zfs_ioc_func_t *func
97 *   The callback function that will perform the operation.
98 *
99 *   The callback should return 0 on success, or an error number on
100 *   failure.  If the function fails, the userland ioctl will return -1,
101 *   and errno will be set to the callback's return value.  The callback
102 *   will be called with the following arguments:
103 *
104 *   const char *name
105 *     The name of the pool or dataset to operate on, from
106 *     zfs_cmd_t:zc_name.  The 'namecheck' argument specifies the
107 *     expected type (pool, dataset, or none).
108 *
109 *   nvlist_t *innvl
110 *     The input nvlist, deserialized from zfs_cmd_t:zc_nvlist_src.  Or
111 *     NULL if no input nvlist was provided.  Changes to this nvlist are
112 *     ignored.  If the input nvlist could not be deserialized, the
113 *     ioctl will fail and the callback will not be called.
114 *
115 *   nvlist_t *outnvl
116 *     The output nvlist, initially empty.  The callback can fill it in,
117 *     and it will be returned to userland by serializing it into
118 *     zfs_cmd_t:zc_nvlist_dst.  If it is non-empty, and serialization
119 *     fails (e.g. because the caller didn't supply a large enough
120 *     buffer), then the overall ioctl will fail.  See the
121 *     'smush_nvlist' argument above for additional behaviors.
122 *
123 *     There are two typical uses of the output nvlist:
124 *       - To return state, e.g. property values.  In this case,
125 *         smush_outnvlist should be false.  If the buffer was not large
126 *         enough, the caller will reallocate a larger buffer and try
127 *         the ioctl again.
128 *
129 *       - To return multiple errors from an ioctl which makes on-disk
130 *         changes.  In this case, smush_outnvlist should be true.
131 *         Ioctls which make on-disk modifications should generally not
132 *         use the outnvl if they succeed, because the caller can not
133 *         distinguish between the operation failing, and
134 *         deserialization failing.
135 */
136#ifdef __FreeBSD__
137#include "opt_kstack_pages.h"
138#endif
139
140#include <sys/types.h>
141#include <sys/param.h>
142#include <sys/systm.h>
143#include <sys/conf.h>
144#include <sys/kernel.h>
145#include <sys/lock.h>
146#include <sys/malloc.h>
147#include <sys/mutex.h>
148#include <sys/proc.h>
149#include <sys/errno.h>
150#include <sys/uio.h>
151#include <sys/buf.h>
152#include <sys/file.h>
153#include <sys/kmem.h>
154#include <sys/conf.h>
155#include <sys/cmn_err.h>
156#include <sys/stat.h>
157#include <sys/zfs_ioctl.h>
158#include <sys/zfs_vfsops.h>
159#include <sys/zfs_znode.h>
160#include <sys/zap.h>
161#include <sys/spa.h>
162#include <sys/spa_impl.h>
163#include <sys/vdev.h>
164#include <sys/dmu.h>
165#include <sys/dsl_dir.h>
166#include <sys/dsl_dataset.h>
167#include <sys/dsl_prop.h>
168#include <sys/dsl_deleg.h>
169#include <sys/dmu_objset.h>
170#include <sys/dmu_impl.h>
171#include <sys/dmu_tx.h>
172#include <sys/sunddi.h>
173#include <sys/policy.h>
174#include <sys/zone.h>
175#include <sys/nvpair.h>
176#include <sys/mount.h>
177#include <sys/taskqueue.h>
178#include <sys/sdt.h>
179#include <sys/varargs.h>
180#include <sys/fs/zfs.h>
181#include <sys/zfs_ctldir.h>
182#include <sys/zfs_dir.h>
183#include <sys/zfs_onexit.h>
184#include <sys/zvol.h>
185#include <sys/dsl_scan.h>
186#include <sys/dmu_objset.h>
187#include <sys/dmu_send.h>
188#include <sys/dsl_destroy.h>
189#include <sys/dsl_bookmark.h>
190#include <sys/dsl_userhold.h>
191#include <sys/zfeature.h>
192#include <sys/zio_checksum.h>
193
194#include "zfs_namecheck.h"
195#include "zfs_prop.h"
196#include "zfs_deleg.h"
197#include "zfs_comutil.h"
198#include "zfs_ioctl_compat.h"
199
200
201static struct cdev *zfsdev;
202
203extern void zfs_init(void);
204extern void zfs_fini(void);
205
206uint_t zfs_fsyncer_key;
207extern uint_t rrw_tsd_key;
208static uint_t zfs_allow_log_key;
209extern uint_t zfs_geom_probe_vdev_key;
210
211typedef int zfs_ioc_legacy_func_t(zfs_cmd_t *);
212typedef int zfs_ioc_func_t(const char *, nvlist_t *, nvlist_t *);
213typedef int zfs_secpolicy_func_t(zfs_cmd_t *, nvlist_t *, cred_t *);
214
215typedef enum {
216	NO_NAME,
217	POOL_NAME,
218	DATASET_NAME
219} zfs_ioc_namecheck_t;
220
221typedef enum {
222	POOL_CHECK_NONE		= 1 << 0,
223	POOL_CHECK_SUSPENDED	= 1 << 1,
224	POOL_CHECK_READONLY	= 1 << 2,
225} zfs_ioc_poolcheck_t;
226
227typedef struct zfs_ioc_vec {
228	zfs_ioc_legacy_func_t	*zvec_legacy_func;
229	zfs_ioc_func_t		*zvec_func;
230	zfs_secpolicy_func_t	*zvec_secpolicy;
231	zfs_ioc_namecheck_t	zvec_namecheck;
232	boolean_t		zvec_allow_log;
233	zfs_ioc_poolcheck_t	zvec_pool_check;
234	boolean_t		zvec_smush_outnvlist;
235	const char		*zvec_name;
236} zfs_ioc_vec_t;
237
238/* This array is indexed by zfs_userquota_prop_t */
239static const char *userquota_perms[] = {
240	ZFS_DELEG_PERM_USERUSED,
241	ZFS_DELEG_PERM_USERQUOTA,
242	ZFS_DELEG_PERM_GROUPUSED,
243	ZFS_DELEG_PERM_GROUPQUOTA,
244};
245
246static int zfs_ioc_userspace_upgrade(zfs_cmd_t *zc);
247static int zfs_check_settable(const char *name, nvpair_t *property,
248    cred_t *cr);
249static int zfs_check_clearable(char *dataset, nvlist_t *props,
250    nvlist_t **errors);
251static int zfs_fill_zplprops_root(uint64_t, nvlist_t *, nvlist_t *,
252    boolean_t *);
253int zfs_set_prop_nvlist(const char *, zprop_source_t, nvlist_t *, nvlist_t *);
254static int get_nvlist(uint64_t nvl, uint64_t size, int iflag, nvlist_t **nvp);
255
256static void zfsdev_close(void *data);
257
258static int zfs_prop_activate_feature(spa_t *spa, spa_feature_t feature);
259
260/* _NOTE(PRINTFLIKE(4)) - this is printf-like, but lint is too whiney */
261void
262__dprintf(const char *file, const char *func, int line, const char *fmt, ...)
263{
264	const char *newfile;
265	char buf[512];
266	va_list adx;
267
268	/*
269	 * Get rid of annoying "../common/" prefix to filename.
270	 */
271	newfile = strrchr(file, '/');
272	if (newfile != NULL) {
273		newfile = newfile + 1; /* Get rid of leading / */
274	} else {
275		newfile = file;
276	}
277
278	va_start(adx, fmt);
279	(void) vsnprintf(buf, sizeof (buf), fmt, adx);
280	va_end(adx);
281
282	/*
283	 * To get this data, use the zfs-dprintf probe as so:
284	 * dtrace -q -n 'zfs-dprintf \
285	 *	/stringof(arg0) == "dbuf.c"/ \
286	 *	{printf("%s: %s", stringof(arg1), stringof(arg3))}'
287	 * arg0 = file name
288	 * arg1 = function name
289	 * arg2 = line number
290	 * arg3 = message
291	 */
292	DTRACE_PROBE4(zfs__dprintf,
293	    char *, newfile, char *, func, int, line, char *, buf);
294}
295
296static void
297history_str_free(char *buf)
298{
299	kmem_free(buf, HIS_MAX_RECORD_LEN);
300}
301
302static char *
303history_str_get(zfs_cmd_t *zc)
304{
305	char *buf;
306
307	if (zc->zc_history == 0)
308		return (NULL);
309
310	buf = kmem_alloc(HIS_MAX_RECORD_LEN, KM_SLEEP);
311	if (copyinstr((void *)(uintptr_t)zc->zc_history,
312	    buf, HIS_MAX_RECORD_LEN, NULL) != 0) {
313		history_str_free(buf);
314		return (NULL);
315	}
316
317	buf[HIS_MAX_RECORD_LEN -1] = '\0';
318
319	return (buf);
320}
321
322/*
323 * Check to see if the named dataset is currently defined as bootable
324 */
325static boolean_t
326zfs_is_bootfs(const char *name)
327{
328	objset_t *os;
329
330	if (dmu_objset_hold(name, FTAG, &os) == 0) {
331		boolean_t ret;
332		ret = (dmu_objset_id(os) == spa_bootfs(dmu_objset_spa(os)));
333		dmu_objset_rele(os, FTAG);
334		return (ret);
335	}
336	return (B_FALSE);
337}
338
339/*
340 * Return non-zero if the spa version is less than requested version.
341 */
342static int
343zfs_earlier_version(const char *name, int version)
344{
345	spa_t *spa;
346
347	if (spa_open(name, &spa, FTAG) == 0) {
348		if (spa_version(spa) < version) {
349			spa_close(spa, FTAG);
350			return (1);
351		}
352		spa_close(spa, FTAG);
353	}
354	return (0);
355}
356
357/*
358 * Return TRUE if the ZPL version is less than requested version.
359 */
360static boolean_t
361zpl_earlier_version(const char *name, int version)
362{
363	objset_t *os;
364	boolean_t rc = B_TRUE;
365
366	if (dmu_objset_hold(name, FTAG, &os) == 0) {
367		uint64_t zplversion;
368
369		if (dmu_objset_type(os) != DMU_OST_ZFS) {
370			dmu_objset_rele(os, FTAG);
371			return (B_TRUE);
372		}
373		/* XXX reading from non-owned objset */
374		if (zfs_get_zplprop(os, ZFS_PROP_VERSION, &zplversion) == 0)
375			rc = zplversion < version;
376		dmu_objset_rele(os, FTAG);
377	}
378	return (rc);
379}
380
381static void
382zfs_log_history(zfs_cmd_t *zc)
383{
384	spa_t *spa;
385	char *buf;
386
387	if ((buf = history_str_get(zc)) == NULL)
388		return;
389
390	if (spa_open(zc->zc_name, &spa, FTAG) == 0) {
391		if (spa_version(spa) >= SPA_VERSION_ZPOOL_HISTORY)
392			(void) spa_history_log(spa, buf);
393		spa_close(spa, FTAG);
394	}
395	history_str_free(buf);
396}
397
398/*
399 * Policy for top-level read operations (list pools).  Requires no privileges,
400 * and can be used in the local zone, as there is no associated dataset.
401 */
402/* ARGSUSED */
403static int
404zfs_secpolicy_none(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
405{
406	return (0);
407}
408
409/*
410 * Policy for dataset read operations (list children, get statistics).  Requires
411 * no privileges, but must be visible in the local zone.
412 */
413/* ARGSUSED */
414static int
415zfs_secpolicy_read(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
416{
417	if (INGLOBALZONE(curthread) ||
418	    zone_dataset_visible(zc->zc_name, NULL))
419		return (0);
420
421	return (SET_ERROR(ENOENT));
422}
423
424static int
425zfs_dozonecheck_impl(const char *dataset, uint64_t zoned, cred_t *cr)
426{
427	int writable = 1;
428
429	/*
430	 * The dataset must be visible by this zone -- check this first
431	 * so they don't see EPERM on something they shouldn't know about.
432	 */
433	if (!INGLOBALZONE(curthread) &&
434	    !zone_dataset_visible(dataset, &writable))
435		return (SET_ERROR(ENOENT));
436
437	if (INGLOBALZONE(curthread)) {
438		/*
439		 * If the fs is zoned, only root can access it from the
440		 * global zone.
441		 */
442		if (secpolicy_zfs(cr) && zoned)
443			return (SET_ERROR(EPERM));
444	} else {
445		/*
446		 * If we are in a local zone, the 'zoned' property must be set.
447		 */
448		if (!zoned)
449			return (SET_ERROR(EPERM));
450
451		/* must be writable by this zone */
452		if (!writable)
453			return (SET_ERROR(EPERM));
454	}
455	return (0);
456}
457
458static int
459zfs_dozonecheck(const char *dataset, cred_t *cr)
460{
461	uint64_t zoned;
462
463	if (dsl_prop_get_integer(dataset, "jailed", &zoned, NULL))
464		return (SET_ERROR(ENOENT));
465
466	return (zfs_dozonecheck_impl(dataset, zoned, cr));
467}
468
469static int
470zfs_dozonecheck_ds(const char *dataset, dsl_dataset_t *ds, cred_t *cr)
471{
472	uint64_t zoned;
473
474	if (dsl_prop_get_int_ds(ds, "jailed", &zoned))
475		return (SET_ERROR(ENOENT));
476
477	return (zfs_dozonecheck_impl(dataset, zoned, cr));
478}
479
480static int
481zfs_secpolicy_write_perms_ds(const char *name, dsl_dataset_t *ds,
482    const char *perm, cred_t *cr)
483{
484	int error;
485
486	error = zfs_dozonecheck_ds(name, ds, cr);
487	if (error == 0) {
488		error = secpolicy_zfs(cr);
489		if (error != 0)
490			error = dsl_deleg_access_impl(ds, perm, cr);
491	}
492	return (error);
493}
494
495static int
496zfs_secpolicy_write_perms(const char *name, const char *perm, cred_t *cr)
497{
498	int error;
499	dsl_dataset_t *ds;
500	dsl_pool_t *dp;
501
502	/*
503	 * First do a quick check for root in the global zone, which
504	 * is allowed to do all write_perms.  This ensures that zfs_ioc_*
505	 * will get to handle nonexistent datasets.
506	 */
507	if (INGLOBALZONE(curthread) && secpolicy_zfs(cr) == 0)
508		return (0);
509
510	error = dsl_pool_hold(name, FTAG, &dp);
511	if (error != 0)
512		return (error);
513
514	error = dsl_dataset_hold(dp, name, FTAG, &ds);
515	if (error != 0) {
516		dsl_pool_rele(dp, FTAG);
517		return (error);
518	}
519
520	error = zfs_secpolicy_write_perms_ds(name, ds, perm, cr);
521
522	dsl_dataset_rele(ds, FTAG);
523	dsl_pool_rele(dp, FTAG);
524	return (error);
525}
526
527#ifdef SECLABEL
528/*
529 * Policy for setting the security label property.
530 *
531 * Returns 0 for success, non-zero for access and other errors.
532 */
533static int
534zfs_set_slabel_policy(const char *name, char *strval, cred_t *cr)
535{
536	char		ds_hexsl[MAXNAMELEN];
537	bslabel_t	ds_sl, new_sl;
538	boolean_t	new_default = FALSE;
539	uint64_t	zoned;
540	int		needed_priv = -1;
541	int		error;
542
543	/* First get the existing dataset label. */
544	error = dsl_prop_get(name, zfs_prop_to_name(ZFS_PROP_MLSLABEL),
545	    1, sizeof (ds_hexsl), &ds_hexsl, NULL);
546	if (error != 0)
547		return (SET_ERROR(EPERM));
548
549	if (strcasecmp(strval, ZFS_MLSLABEL_DEFAULT) == 0)
550		new_default = TRUE;
551
552	/* The label must be translatable */
553	if (!new_default && (hexstr_to_label(strval, &new_sl) != 0))
554		return (SET_ERROR(EINVAL));
555
556	/*
557	 * In a non-global zone, disallow attempts to set a label that
558	 * doesn't match that of the zone; otherwise no other checks
559	 * are needed.
560	 */
561	if (!INGLOBALZONE(curproc)) {
562		if (new_default || !blequal(&new_sl, CR_SL(CRED())))
563			return (SET_ERROR(EPERM));
564		return (0);
565	}
566
567	/*
568	 * For global-zone datasets (i.e., those whose zoned property is
569	 * "off", verify that the specified new label is valid for the
570	 * global zone.
571	 */
572	if (dsl_prop_get_integer(name,
573	    zfs_prop_to_name(ZFS_PROP_ZONED), &zoned, NULL))
574		return (SET_ERROR(EPERM));
575	if (!zoned) {
576		if (zfs_check_global_label(name, strval) != 0)
577			return (SET_ERROR(EPERM));
578	}
579
580	/*
581	 * If the existing dataset label is nondefault, check if the
582	 * dataset is mounted (label cannot be changed while mounted).
583	 * Get the zfsvfs; if there isn't one, then the dataset isn't
584	 * mounted (or isn't a dataset, doesn't exist, ...).
585	 */
586	if (strcasecmp(ds_hexsl, ZFS_MLSLABEL_DEFAULT) != 0) {
587		objset_t *os;
588		static char *setsl_tag = "setsl_tag";
589
590		/*
591		 * Try to own the dataset; abort if there is any error,
592		 * (e.g., already mounted, in use, or other error).
593		 */
594		error = dmu_objset_own(name, DMU_OST_ZFS, B_TRUE,
595		    setsl_tag, &os);
596		if (error != 0)
597			return (SET_ERROR(EPERM));
598
599		dmu_objset_disown(os, setsl_tag);
600
601		if (new_default) {
602			needed_priv = PRIV_FILE_DOWNGRADE_SL;
603			goto out_check;
604		}
605
606		if (hexstr_to_label(strval, &new_sl) != 0)
607			return (SET_ERROR(EPERM));
608
609		if (blstrictdom(&ds_sl, &new_sl))
610			needed_priv = PRIV_FILE_DOWNGRADE_SL;
611		else if (blstrictdom(&new_sl, &ds_sl))
612			needed_priv = PRIV_FILE_UPGRADE_SL;
613	} else {
614		/* dataset currently has a default label */
615		if (!new_default)
616			needed_priv = PRIV_FILE_UPGRADE_SL;
617	}
618
619out_check:
620	if (needed_priv != -1)
621		return (PRIV_POLICY(cr, needed_priv, B_FALSE, EPERM, NULL));
622	return (0);
623}
624#endif	/* SECLABEL */
625
626static int
627zfs_secpolicy_setprop(const char *dsname, zfs_prop_t prop, nvpair_t *propval,
628    cred_t *cr)
629{
630	char *strval;
631
632	/*
633	 * Check permissions for special properties.
634	 */
635	switch (prop) {
636	case ZFS_PROP_ZONED:
637		/*
638		 * Disallow setting of 'zoned' from within a local zone.
639		 */
640		if (!INGLOBALZONE(curthread))
641			return (SET_ERROR(EPERM));
642		break;
643
644	case ZFS_PROP_QUOTA:
645	case ZFS_PROP_FILESYSTEM_LIMIT:
646	case ZFS_PROP_SNAPSHOT_LIMIT:
647		if (!INGLOBALZONE(curthread)) {
648			uint64_t zoned;
649			char setpoint[ZFS_MAX_DATASET_NAME_LEN];
650			/*
651			 * Unprivileged users are allowed to modify the
652			 * limit on things *under* (ie. contained by)
653			 * the thing they own.
654			 */
655			if (dsl_prop_get_integer(dsname, "jailed", &zoned,
656			    setpoint))
657				return (SET_ERROR(EPERM));
658			if (!zoned || strlen(dsname) <= strlen(setpoint))
659				return (SET_ERROR(EPERM));
660		}
661		break;
662
663	case ZFS_PROP_MLSLABEL:
664#ifdef SECLABEL
665		if (!is_system_labeled())
666			return (SET_ERROR(EPERM));
667
668		if (nvpair_value_string(propval, &strval) == 0) {
669			int err;
670
671			err = zfs_set_slabel_policy(dsname, strval, CRED());
672			if (err != 0)
673				return (err);
674		}
675#else
676		return (EOPNOTSUPP);
677#endif
678		break;
679	}
680
681	return (zfs_secpolicy_write_perms(dsname, zfs_prop_to_name(prop), cr));
682}
683
684/* ARGSUSED */
685static int
686zfs_secpolicy_set_fsacl(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
687{
688	int error;
689
690	error = zfs_dozonecheck(zc->zc_name, cr);
691	if (error != 0)
692		return (error);
693
694	/*
695	 * permission to set permissions will be evaluated later in
696	 * dsl_deleg_can_allow()
697	 */
698	return (0);
699}
700
701/* ARGSUSED */
702static int
703zfs_secpolicy_rollback(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
704{
705	return (zfs_secpolicy_write_perms(zc->zc_name,
706	    ZFS_DELEG_PERM_ROLLBACK, cr));
707}
708
709/* ARGSUSED */
710static int
711zfs_secpolicy_send(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
712{
713	dsl_pool_t *dp;
714	dsl_dataset_t *ds;
715	char *cp;
716	int error;
717
718	/*
719	 * Generate the current snapshot name from the given objsetid, then
720	 * use that name for the secpolicy/zone checks.
721	 */
722	cp = strchr(zc->zc_name, '@');
723	if (cp == NULL)
724		return (SET_ERROR(EINVAL));
725	error = dsl_pool_hold(zc->zc_name, FTAG, &dp);
726	if (error != 0)
727		return (error);
728
729	error = dsl_dataset_hold_obj(dp, zc->zc_sendobj, FTAG, &ds);
730	if (error != 0) {
731		dsl_pool_rele(dp, FTAG);
732		return (error);
733	}
734
735	dsl_dataset_name(ds, zc->zc_name);
736
737	error = zfs_secpolicy_write_perms_ds(zc->zc_name, ds,
738	    ZFS_DELEG_PERM_SEND, cr);
739	dsl_dataset_rele(ds, FTAG);
740	dsl_pool_rele(dp, FTAG);
741
742	return (error);
743}
744
745/* ARGSUSED */
746static int
747zfs_secpolicy_send_new(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
748{
749	return (zfs_secpolicy_write_perms(zc->zc_name,
750	    ZFS_DELEG_PERM_SEND, cr));
751}
752
753/* ARGSUSED */
754static int
755zfs_secpolicy_deleg_share(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
756{
757	vnode_t *vp;
758	int error;
759
760	if ((error = lookupname(zc->zc_value, UIO_SYSSPACE,
761	    NO_FOLLOW, NULL, &vp)) != 0)
762		return (error);
763
764	/* Now make sure mntpnt and dataset are ZFS */
765
766	if (strcmp(vp->v_vfsp->mnt_stat.f_fstypename, "zfs") != 0 ||
767	    (strcmp((char *)refstr_value(vp->v_vfsp->vfs_resource),
768	    zc->zc_name) != 0)) {
769		VN_RELE(vp);
770		return (SET_ERROR(EPERM));
771	}
772
773	VN_RELE(vp);
774	return (dsl_deleg_access(zc->zc_name,
775	    ZFS_DELEG_PERM_SHARE, cr));
776}
777
778int
779zfs_secpolicy_share(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
780{
781	if (!INGLOBALZONE(curthread))
782		return (SET_ERROR(EPERM));
783
784	if (secpolicy_nfs(cr) == 0) {
785		return (0);
786	} else {
787		return (zfs_secpolicy_deleg_share(zc, innvl, cr));
788	}
789}
790
791int
792zfs_secpolicy_smb_acl(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
793{
794	if (!INGLOBALZONE(curthread))
795		return (SET_ERROR(EPERM));
796
797	if (secpolicy_smb(cr) == 0) {
798		return (0);
799	} else {
800		return (zfs_secpolicy_deleg_share(zc, innvl, cr));
801	}
802}
803
804static int
805zfs_get_parent(const char *datasetname, char *parent, int parentsize)
806{
807	char *cp;
808
809	/*
810	 * Remove the @bla or /bla from the end of the name to get the parent.
811	 */
812	(void) strncpy(parent, datasetname, parentsize);
813	cp = strrchr(parent, '@');
814	if (cp != NULL) {
815		cp[0] = '\0';
816	} else {
817		cp = strrchr(parent, '/');
818		if (cp == NULL)
819			return (SET_ERROR(ENOENT));
820		cp[0] = '\0';
821	}
822
823	return (0);
824}
825
826int
827zfs_secpolicy_destroy_perms(const char *name, cred_t *cr)
828{
829	int error;
830
831	if ((error = zfs_secpolicy_write_perms(name,
832	    ZFS_DELEG_PERM_MOUNT, cr)) != 0)
833		return (error);
834
835	return (zfs_secpolicy_write_perms(name, ZFS_DELEG_PERM_DESTROY, cr));
836}
837
838/* ARGSUSED */
839static int
840zfs_secpolicy_destroy(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
841{
842	return (zfs_secpolicy_destroy_perms(zc->zc_name, cr));
843}
844
845/*
846 * Destroying snapshots with delegated permissions requires
847 * descendant mount and destroy permissions.
848 */
849/* ARGSUSED */
850static int
851zfs_secpolicy_destroy_snaps(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
852{
853	nvlist_t *snaps;
854	nvpair_t *pair, *nextpair;
855	int error = 0;
856
857	if (nvlist_lookup_nvlist(innvl, "snaps", &snaps) != 0)
858		return (SET_ERROR(EINVAL));
859	for (pair = nvlist_next_nvpair(snaps, NULL); pair != NULL;
860	    pair = nextpair) {
861		nextpair = nvlist_next_nvpair(snaps, pair);
862		error = zfs_secpolicy_destroy_perms(nvpair_name(pair), cr);
863		if (error == ENOENT) {
864			/*
865			 * Ignore any snapshots that don't exist (we consider
866			 * them "already destroyed").  Remove the name from the
867			 * nvl here in case the snapshot is created between
868			 * now and when we try to destroy it (in which case
869			 * we don't want to destroy it since we haven't
870			 * checked for permission).
871			 */
872			fnvlist_remove_nvpair(snaps, pair);
873			error = 0;
874		}
875		if (error != 0)
876			break;
877	}
878
879	return (error);
880}
881
882int
883zfs_secpolicy_rename_perms(const char *from, const char *to, cred_t *cr)
884{
885	char	parentname[ZFS_MAX_DATASET_NAME_LEN];
886	int	error;
887
888	if ((error = zfs_secpolicy_write_perms(from,
889	    ZFS_DELEG_PERM_RENAME, cr)) != 0)
890		return (error);
891
892	if ((error = zfs_secpolicy_write_perms(from,
893	    ZFS_DELEG_PERM_MOUNT, cr)) != 0)
894		return (error);
895
896	if ((error = zfs_get_parent(to, parentname,
897	    sizeof (parentname))) != 0)
898		return (error);
899
900	if ((error = zfs_secpolicy_write_perms(parentname,
901	    ZFS_DELEG_PERM_CREATE, cr)) != 0)
902		return (error);
903
904	if ((error = zfs_secpolicy_write_perms(parentname,
905	    ZFS_DELEG_PERM_MOUNT, cr)) != 0)
906		return (error);
907
908	return (error);
909}
910
911/* ARGSUSED */
912static int
913zfs_secpolicy_rename(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
914{
915	char *at = NULL;
916	int error;
917
918	if ((zc->zc_cookie & 1) != 0) {
919		/*
920		 * This is recursive rename, so the starting snapshot might
921		 * not exist. Check file system or volume permission instead.
922		 */
923		at = strchr(zc->zc_name, '@');
924		if (at == NULL)
925			return (EINVAL);
926		*at = '\0';
927	}
928
929	error = zfs_secpolicy_rename_perms(zc->zc_name, zc->zc_value, cr);
930
931	if (at != NULL)
932		*at = '@';
933
934	return (error);
935}
936
937/* ARGSUSED */
938static int
939zfs_secpolicy_promote(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
940{
941	dsl_pool_t *dp;
942	dsl_dataset_t *clone;
943	int error;
944
945	error = zfs_secpolicy_write_perms(zc->zc_name,
946	    ZFS_DELEG_PERM_PROMOTE, cr);
947	if (error != 0)
948		return (error);
949
950	error = dsl_pool_hold(zc->zc_name, FTAG, &dp);
951	if (error != 0)
952		return (error);
953
954	error = dsl_dataset_hold(dp, zc->zc_name, FTAG, &clone);
955
956	if (error == 0) {
957		char parentname[ZFS_MAX_DATASET_NAME_LEN];
958		dsl_dataset_t *origin = NULL;
959		dsl_dir_t *dd;
960		dd = clone->ds_dir;
961
962		error = dsl_dataset_hold_obj(dd->dd_pool,
963		    dsl_dir_phys(dd)->dd_origin_obj, FTAG, &origin);
964		if (error != 0) {
965			dsl_dataset_rele(clone, FTAG);
966			dsl_pool_rele(dp, FTAG);
967			return (error);
968		}
969
970		error = zfs_secpolicy_write_perms_ds(zc->zc_name, clone,
971		    ZFS_DELEG_PERM_MOUNT, cr);
972
973		dsl_dataset_name(origin, parentname);
974		if (error == 0) {
975			error = zfs_secpolicy_write_perms_ds(parentname, origin,
976			    ZFS_DELEG_PERM_PROMOTE, cr);
977		}
978		dsl_dataset_rele(clone, FTAG);
979		dsl_dataset_rele(origin, FTAG);
980	}
981	dsl_pool_rele(dp, FTAG);
982	return (error);
983}
984
985/* ARGSUSED */
986static int
987zfs_secpolicy_recv(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
988{
989	int error;
990
991	if ((error = zfs_secpolicy_write_perms(zc->zc_name,
992	    ZFS_DELEG_PERM_RECEIVE, cr)) != 0)
993		return (error);
994
995	if ((error = zfs_secpolicy_write_perms(zc->zc_name,
996	    ZFS_DELEG_PERM_MOUNT, cr)) != 0)
997		return (error);
998
999	return (zfs_secpolicy_write_perms(zc->zc_name,
1000	    ZFS_DELEG_PERM_CREATE, cr));
1001}
1002
1003int
1004zfs_secpolicy_snapshot_perms(const char *name, cred_t *cr)
1005{
1006	return (zfs_secpolicy_write_perms(name,
1007	    ZFS_DELEG_PERM_SNAPSHOT, cr));
1008}
1009
1010/*
1011 * Check for permission to create each snapshot in the nvlist.
1012 */
1013/* ARGSUSED */
1014static int
1015zfs_secpolicy_snapshot(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1016{
1017	nvlist_t *snaps;
1018	int error;
1019	nvpair_t *pair;
1020
1021	if (nvlist_lookup_nvlist(innvl, "snaps", &snaps) != 0)
1022		return (SET_ERROR(EINVAL));
1023	for (pair = nvlist_next_nvpair(snaps, NULL); pair != NULL;
1024	    pair = nvlist_next_nvpair(snaps, pair)) {
1025		char *name = nvpair_name(pair);
1026		char *atp = strchr(name, '@');
1027
1028		if (atp == NULL) {
1029			error = SET_ERROR(EINVAL);
1030			break;
1031		}
1032		*atp = '\0';
1033		error = zfs_secpolicy_snapshot_perms(name, cr);
1034		*atp = '@';
1035		if (error != 0)
1036			break;
1037	}
1038	return (error);
1039}
1040
1041/*
1042 * Check for permission to create each snapshot in the nvlist.
1043 */
1044/* ARGSUSED */
1045static int
1046zfs_secpolicy_bookmark(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1047{
1048	int error = 0;
1049
1050	for (nvpair_t *pair = nvlist_next_nvpair(innvl, NULL);
1051	    pair != NULL; pair = nvlist_next_nvpair(innvl, pair)) {
1052		char *name = nvpair_name(pair);
1053		char *hashp = strchr(name, '#');
1054
1055		if (hashp == NULL) {
1056			error = SET_ERROR(EINVAL);
1057			break;
1058		}
1059		*hashp = '\0';
1060		error = zfs_secpolicy_write_perms(name,
1061		    ZFS_DELEG_PERM_BOOKMARK, cr);
1062		*hashp = '#';
1063		if (error != 0)
1064			break;
1065	}
1066	return (error);
1067}
1068
1069/* ARGSUSED */
1070static int
1071zfs_secpolicy_destroy_bookmarks(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1072{
1073	nvpair_t *pair, *nextpair;
1074	int error = 0;
1075
1076	for (pair = nvlist_next_nvpair(innvl, NULL); pair != NULL;
1077	    pair = nextpair) {
1078		char *name = nvpair_name(pair);
1079		char *hashp = strchr(name, '#');
1080		nextpair = nvlist_next_nvpair(innvl, pair);
1081
1082		if (hashp == NULL) {
1083			error = SET_ERROR(EINVAL);
1084			break;
1085		}
1086
1087		*hashp = '\0';
1088		error = zfs_secpolicy_write_perms(name,
1089		    ZFS_DELEG_PERM_DESTROY, cr);
1090		*hashp = '#';
1091		if (error == ENOENT) {
1092			/*
1093			 * Ignore any filesystems that don't exist (we consider
1094			 * their bookmarks "already destroyed").  Remove
1095			 * the name from the nvl here in case the filesystem
1096			 * is created between now and when we try to destroy
1097			 * the bookmark (in which case we don't want to
1098			 * destroy it since we haven't checked for permission).
1099			 */
1100			fnvlist_remove_nvpair(innvl, pair);
1101			error = 0;
1102		}
1103		if (error != 0)
1104			break;
1105	}
1106
1107	return (error);
1108}
1109
1110/* ARGSUSED */
1111static int
1112zfs_secpolicy_log_history(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1113{
1114	/*
1115	 * Even root must have a proper TSD so that we know what pool
1116	 * to log to.
1117	 */
1118	if (tsd_get(zfs_allow_log_key) == NULL)
1119		return (SET_ERROR(EPERM));
1120	return (0);
1121}
1122
1123static int
1124zfs_secpolicy_create_clone(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1125{
1126	char	parentname[ZFS_MAX_DATASET_NAME_LEN];
1127	int	error;
1128	char	*origin;
1129
1130	if ((error = zfs_get_parent(zc->zc_name, parentname,
1131	    sizeof (parentname))) != 0)
1132		return (error);
1133
1134	if (nvlist_lookup_string(innvl, "origin", &origin) == 0 &&
1135	    (error = zfs_secpolicy_write_perms(origin,
1136	    ZFS_DELEG_PERM_CLONE, cr)) != 0)
1137		return (error);
1138
1139	if ((error = zfs_secpolicy_write_perms(parentname,
1140	    ZFS_DELEG_PERM_CREATE, cr)) != 0)
1141		return (error);
1142
1143	return (zfs_secpolicy_write_perms(parentname,
1144	    ZFS_DELEG_PERM_MOUNT, cr));
1145}
1146
1147/*
1148 * Policy for pool operations - create/destroy pools, add vdevs, etc.  Requires
1149 * SYS_CONFIG privilege, which is not available in a local zone.
1150 */
1151/* ARGSUSED */
1152static int
1153zfs_secpolicy_config(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1154{
1155	if (secpolicy_sys_config(cr, B_FALSE) != 0)
1156		return (SET_ERROR(EPERM));
1157
1158	return (0);
1159}
1160
1161/*
1162 * Policy for object to name lookups.
1163 */
1164/* ARGSUSED */
1165static int
1166zfs_secpolicy_diff(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1167{
1168	int error;
1169
1170	if ((error = secpolicy_sys_config(cr, B_FALSE)) == 0)
1171		return (0);
1172
1173	error = zfs_secpolicy_write_perms(zc->zc_name, ZFS_DELEG_PERM_DIFF, cr);
1174	return (error);
1175}
1176
1177/*
1178 * Policy for fault injection.  Requires all privileges.
1179 */
1180/* ARGSUSED */
1181static int
1182zfs_secpolicy_inject(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1183{
1184	return (secpolicy_zinject(cr));
1185}
1186
1187/* ARGSUSED */
1188static int
1189zfs_secpolicy_inherit_prop(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1190{
1191	zfs_prop_t prop = zfs_name_to_prop(zc->zc_value);
1192
1193	if (prop == ZPROP_INVAL) {
1194		if (!zfs_prop_user(zc->zc_value))
1195			return (SET_ERROR(EINVAL));
1196		return (zfs_secpolicy_write_perms(zc->zc_name,
1197		    ZFS_DELEG_PERM_USERPROP, cr));
1198	} else {
1199		return (zfs_secpolicy_setprop(zc->zc_name, prop,
1200		    NULL, cr));
1201	}
1202}
1203
1204static int
1205zfs_secpolicy_userspace_one(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1206{
1207	int err = zfs_secpolicy_read(zc, innvl, cr);
1208	if (err)
1209		return (err);
1210
1211	if (zc->zc_objset_type >= ZFS_NUM_USERQUOTA_PROPS)
1212		return (SET_ERROR(EINVAL));
1213
1214	if (zc->zc_value[0] == 0) {
1215		/*
1216		 * They are asking about a posix uid/gid.  If it's
1217		 * themself, allow it.
1218		 */
1219		if (zc->zc_objset_type == ZFS_PROP_USERUSED ||
1220		    zc->zc_objset_type == ZFS_PROP_USERQUOTA) {
1221			if (zc->zc_guid == crgetuid(cr))
1222				return (0);
1223		} else {
1224			if (groupmember(zc->zc_guid, cr))
1225				return (0);
1226		}
1227	}
1228
1229	return (zfs_secpolicy_write_perms(zc->zc_name,
1230	    userquota_perms[zc->zc_objset_type], cr));
1231}
1232
1233static int
1234zfs_secpolicy_userspace_many(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1235{
1236	int err = zfs_secpolicy_read(zc, innvl, cr);
1237	if (err)
1238		return (err);
1239
1240	if (zc->zc_objset_type >= ZFS_NUM_USERQUOTA_PROPS)
1241		return (SET_ERROR(EINVAL));
1242
1243	return (zfs_secpolicy_write_perms(zc->zc_name,
1244	    userquota_perms[zc->zc_objset_type], cr));
1245}
1246
1247/* ARGSUSED */
1248static int
1249zfs_secpolicy_userspace_upgrade(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1250{
1251	return (zfs_secpolicy_setprop(zc->zc_name, ZFS_PROP_VERSION,
1252	    NULL, cr));
1253}
1254
1255/* ARGSUSED */
1256static int
1257zfs_secpolicy_hold(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1258{
1259	nvpair_t *pair;
1260	nvlist_t *holds;
1261	int error;
1262
1263	error = nvlist_lookup_nvlist(innvl, "holds", &holds);
1264	if (error != 0)
1265		return (SET_ERROR(EINVAL));
1266
1267	for (pair = nvlist_next_nvpair(holds, NULL); pair != NULL;
1268	    pair = nvlist_next_nvpair(holds, pair)) {
1269		char fsname[ZFS_MAX_DATASET_NAME_LEN];
1270		error = dmu_fsname(nvpair_name(pair), fsname);
1271		if (error != 0)
1272			return (error);
1273		error = zfs_secpolicy_write_perms(fsname,
1274		    ZFS_DELEG_PERM_HOLD, cr);
1275		if (error != 0)
1276			return (error);
1277	}
1278	return (0);
1279}
1280
1281/* ARGSUSED */
1282static int
1283zfs_secpolicy_release(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1284{
1285	nvpair_t *pair;
1286	int error;
1287
1288	for (pair = nvlist_next_nvpair(innvl, NULL); pair != NULL;
1289	    pair = nvlist_next_nvpair(innvl, pair)) {
1290		char fsname[ZFS_MAX_DATASET_NAME_LEN];
1291		error = dmu_fsname(nvpair_name(pair), fsname);
1292		if (error != 0)
1293			return (error);
1294		error = zfs_secpolicy_write_perms(fsname,
1295		    ZFS_DELEG_PERM_RELEASE, cr);
1296		if (error != 0)
1297			return (error);
1298	}
1299	return (0);
1300}
1301
1302/*
1303 * Policy for allowing temporary snapshots to be taken or released
1304 */
1305static int
1306zfs_secpolicy_tmp_snapshot(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1307{
1308	/*
1309	 * A temporary snapshot is the same as a snapshot,
1310	 * hold, destroy and release all rolled into one.
1311	 * Delegated diff alone is sufficient that we allow this.
1312	 */
1313	int error;
1314
1315	if ((error = zfs_secpolicy_write_perms(zc->zc_name,
1316	    ZFS_DELEG_PERM_DIFF, cr)) == 0)
1317		return (0);
1318
1319	error = zfs_secpolicy_snapshot_perms(zc->zc_name, cr);
1320	if (error == 0)
1321		error = zfs_secpolicy_hold(zc, innvl, cr);
1322	if (error == 0)
1323		error = zfs_secpolicy_release(zc, innvl, cr);
1324	if (error == 0)
1325		error = zfs_secpolicy_destroy(zc, innvl, cr);
1326	return (error);
1327}
1328
1329/*
1330 * Returns the nvlist as specified by the user in the zfs_cmd_t.
1331 */
1332static int
1333get_nvlist(uint64_t nvl, uint64_t size, int iflag, nvlist_t **nvp)
1334{
1335	char *packed;
1336	int error;
1337	nvlist_t *list = NULL;
1338
1339	/*
1340	 * Read in and unpack the user-supplied nvlist.
1341	 */
1342	if (size == 0)
1343		return (SET_ERROR(EINVAL));
1344
1345	packed = kmem_alloc(size, KM_SLEEP);
1346
1347	if ((error = ddi_copyin((void *)(uintptr_t)nvl, packed, size,
1348	    iflag)) != 0) {
1349		kmem_free(packed, size);
1350		return (SET_ERROR(EFAULT));
1351	}
1352
1353	if ((error = nvlist_unpack(packed, size, &list, 0)) != 0) {
1354		kmem_free(packed, size);
1355		return (error);
1356	}
1357
1358	kmem_free(packed, size);
1359
1360	*nvp = list;
1361	return (0);
1362}
1363
1364/*
1365 * Reduce the size of this nvlist until it can be serialized in 'max' bytes.
1366 * Entries will be removed from the end of the nvlist, and one int32 entry
1367 * named "N_MORE_ERRORS" will be added indicating how many entries were
1368 * removed.
1369 */
1370static int
1371nvlist_smush(nvlist_t *errors, size_t max)
1372{
1373	size_t size;
1374
1375	size = fnvlist_size(errors);
1376
1377	if (size > max) {
1378		nvpair_t *more_errors;
1379		int n = 0;
1380
1381		if (max < 1024)
1382			return (SET_ERROR(ENOMEM));
1383
1384		fnvlist_add_int32(errors, ZPROP_N_MORE_ERRORS, 0);
1385		more_errors = nvlist_prev_nvpair(errors, NULL);
1386
1387		do {
1388			nvpair_t *pair = nvlist_prev_nvpair(errors,
1389			    more_errors);
1390			fnvlist_remove_nvpair(errors, pair);
1391			n++;
1392			size = fnvlist_size(errors);
1393		} while (size > max);
1394
1395		fnvlist_remove_nvpair(errors, more_errors);
1396		fnvlist_add_int32(errors, ZPROP_N_MORE_ERRORS, n);
1397		ASSERT3U(fnvlist_size(errors), <=, max);
1398	}
1399
1400	return (0);
1401}
1402
1403static int
1404put_nvlist(zfs_cmd_t *zc, nvlist_t *nvl)
1405{
1406	char *packed = NULL;
1407	int error = 0;
1408	size_t size;
1409
1410	size = fnvlist_size(nvl);
1411
1412	if (size > zc->zc_nvlist_dst_size) {
1413		/*
1414		 * Solaris returns ENOMEM here, because even if an error is
1415		 * returned from an ioctl(2), new zc_nvlist_dst_size will be
1416		 * passed to the userland. This is not the case for FreeBSD.
1417		 * We need to return 0, so the kernel will copy the
1418		 * zc_nvlist_dst_size back and the userland can discover that a
1419		 * bigger buffer is needed.
1420		 */
1421		error = 0;
1422	} else {
1423		packed = fnvlist_pack(nvl, &size);
1424		if (ddi_copyout(packed, (void *)(uintptr_t)zc->zc_nvlist_dst,
1425		    size, zc->zc_iflags) != 0)
1426			error = SET_ERROR(EFAULT);
1427		fnvlist_pack_free(packed, size);
1428	}
1429
1430	zc->zc_nvlist_dst_size = size;
1431	zc->zc_nvlist_dst_filled = B_TRUE;
1432	return (error);
1433}
1434
1435static int
1436getzfsvfs(const char *dsname, zfsvfs_t **zfvp)
1437{
1438	objset_t *os;
1439	vfs_t *vfsp;
1440	int error;
1441
1442	error = dmu_objset_hold(dsname, FTAG, &os);
1443	if (error != 0)
1444		return (error);
1445	if (dmu_objset_type(os) != DMU_OST_ZFS) {
1446		dmu_objset_rele(os, FTAG);
1447		return (SET_ERROR(EINVAL));
1448	}
1449
1450	mutex_enter(&os->os_user_ptr_lock);
1451	*zfvp = dmu_objset_get_user(os);
1452	if (*zfvp) {
1453		vfsp = (*zfvp)->z_vfs;
1454		vfs_ref(vfsp);
1455	} else {
1456		error = SET_ERROR(ESRCH);
1457	}
1458	mutex_exit(&os->os_user_ptr_lock);
1459	dmu_objset_rele(os, FTAG);
1460	if (error == 0) {
1461		error = vfs_busy(vfsp, 0);
1462		vfs_rel(vfsp);
1463		if (error != 0) {
1464			*zfvp = NULL;
1465			error = SET_ERROR(ESRCH);
1466		}
1467	}
1468	return (error);
1469}
1470
1471/*
1472 * Find a zfsvfs_t for a mounted filesystem, or create our own, in which
1473 * case its z_vfs will be NULL, and it will be opened as the owner.
1474 * If 'writer' is set, the z_teardown_lock will be held for RW_WRITER,
1475 * which prevents all vnode ops from running.
1476 */
1477static int
1478zfsvfs_hold(const char *name, void *tag, zfsvfs_t **zfvp, boolean_t writer)
1479{
1480	int error = 0;
1481
1482	if (getzfsvfs(name, zfvp) != 0)
1483		error = zfsvfs_create(name, zfvp);
1484	if (error == 0) {
1485		rrm_enter(&(*zfvp)->z_teardown_lock, (writer) ? RW_WRITER :
1486		    RW_READER, tag);
1487#ifdef illumos
1488		if ((*zfvp)->z_unmounted) {
1489			/*
1490			 * XXX we could probably try again, since the unmounting
1491			 * thread should be just about to disassociate the
1492			 * objset from the zfsvfs.
1493			 */
1494			rrm_exit(&(*zfvp)->z_teardown_lock, tag);
1495			return (SET_ERROR(EBUSY));
1496		}
1497#else
1498		/*
1499		 * vfs_busy() ensures that the filesystem is not and
1500		 * can not be unmounted.
1501		 */
1502		ASSERT(!(*zfvp)->z_unmounted);
1503#endif
1504	}
1505	return (error);
1506}
1507
1508static void
1509zfsvfs_rele(zfsvfs_t *zfsvfs, void *tag)
1510{
1511	rrm_exit(&zfsvfs->z_teardown_lock, tag);
1512
1513	if (zfsvfs->z_vfs) {
1514#ifdef illumos
1515		VFS_RELE(zfsvfs->z_vfs);
1516#else
1517		vfs_unbusy(zfsvfs->z_vfs);
1518#endif
1519	} else {
1520		dmu_objset_disown(zfsvfs->z_os, zfsvfs);
1521		zfsvfs_free(zfsvfs);
1522	}
1523}
1524
1525static int
1526zfs_ioc_pool_create(zfs_cmd_t *zc)
1527{
1528	int error;
1529	nvlist_t *config, *props = NULL;
1530	nvlist_t *rootprops = NULL;
1531	nvlist_t *zplprops = NULL;
1532
1533	if (error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
1534	    zc->zc_iflags, &config))
1535		return (error);
1536
1537	if (zc->zc_nvlist_src_size != 0 && (error =
1538	    get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
1539	    zc->zc_iflags, &props))) {
1540		nvlist_free(config);
1541		return (error);
1542	}
1543
1544	if (props) {
1545		nvlist_t *nvl = NULL;
1546		uint64_t version = SPA_VERSION;
1547
1548		(void) nvlist_lookup_uint64(props,
1549		    zpool_prop_to_name(ZPOOL_PROP_VERSION), &version);
1550		if (!SPA_VERSION_IS_SUPPORTED(version)) {
1551			error = SET_ERROR(EINVAL);
1552			goto pool_props_bad;
1553		}
1554		(void) nvlist_lookup_nvlist(props, ZPOOL_ROOTFS_PROPS, &nvl);
1555		if (nvl) {
1556			error = nvlist_dup(nvl, &rootprops, KM_SLEEP);
1557			if (error != 0) {
1558				nvlist_free(config);
1559				nvlist_free(props);
1560				return (error);
1561			}
1562			(void) nvlist_remove_all(props, ZPOOL_ROOTFS_PROPS);
1563		}
1564		VERIFY(nvlist_alloc(&zplprops, NV_UNIQUE_NAME, KM_SLEEP) == 0);
1565		error = zfs_fill_zplprops_root(version, rootprops,
1566		    zplprops, NULL);
1567		if (error != 0)
1568			goto pool_props_bad;
1569	}
1570
1571	error = spa_create(zc->zc_name, config, props, zplprops);
1572
1573	/*
1574	 * Set the remaining root properties
1575	 */
1576	if (!error && (error = zfs_set_prop_nvlist(zc->zc_name,
1577	    ZPROP_SRC_LOCAL, rootprops, NULL)) != 0)
1578		(void) spa_destroy(zc->zc_name);
1579
1580pool_props_bad:
1581	nvlist_free(rootprops);
1582	nvlist_free(zplprops);
1583	nvlist_free(config);
1584	nvlist_free(props);
1585
1586	return (error);
1587}
1588
1589static int
1590zfs_ioc_pool_destroy(zfs_cmd_t *zc)
1591{
1592	int error;
1593	zfs_log_history(zc);
1594	error = spa_destroy(zc->zc_name);
1595	if (error == 0)
1596		zvol_remove_minors(zc->zc_name);
1597	return (error);
1598}
1599
1600static int
1601zfs_ioc_pool_import(zfs_cmd_t *zc)
1602{
1603	nvlist_t *config, *props = NULL;
1604	uint64_t guid;
1605	int error;
1606
1607	if ((error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
1608	    zc->zc_iflags, &config)) != 0)
1609		return (error);
1610
1611	if (zc->zc_nvlist_src_size != 0 && (error =
1612	    get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
1613	    zc->zc_iflags, &props))) {
1614		nvlist_free(config);
1615		return (error);
1616	}
1617
1618	if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID, &guid) != 0 ||
1619	    guid != zc->zc_guid)
1620		error = SET_ERROR(EINVAL);
1621	else
1622		error = spa_import(zc->zc_name, config, props, zc->zc_cookie);
1623
1624	if (zc->zc_nvlist_dst != 0) {
1625		int err;
1626
1627		if ((err = put_nvlist(zc, config)) != 0)
1628			error = err;
1629	}
1630
1631	nvlist_free(config);
1632
1633	nvlist_free(props);
1634
1635	return (error);
1636}
1637
1638static int
1639zfs_ioc_pool_export(zfs_cmd_t *zc)
1640{
1641	int error;
1642	boolean_t force = (boolean_t)zc->zc_cookie;
1643	boolean_t hardforce = (boolean_t)zc->zc_guid;
1644
1645	zfs_log_history(zc);
1646	error = spa_export(zc->zc_name, NULL, force, hardforce);
1647	if (error == 0)
1648		zvol_remove_minors(zc->zc_name);
1649	return (error);
1650}
1651
1652static int
1653zfs_ioc_pool_configs(zfs_cmd_t *zc)
1654{
1655	nvlist_t *configs;
1656	int error;
1657
1658	if ((configs = spa_all_configs(&zc->zc_cookie)) == NULL)
1659		return (SET_ERROR(EEXIST));
1660
1661	error = put_nvlist(zc, configs);
1662
1663	nvlist_free(configs);
1664
1665	return (error);
1666}
1667
1668/*
1669 * inputs:
1670 * zc_name		name of the pool
1671 *
1672 * outputs:
1673 * zc_cookie		real errno
1674 * zc_nvlist_dst	config nvlist
1675 * zc_nvlist_dst_size	size of config nvlist
1676 */
1677static int
1678zfs_ioc_pool_stats(zfs_cmd_t *zc)
1679{
1680	nvlist_t *config;
1681	int error;
1682	int ret = 0;
1683
1684	error = spa_get_stats(zc->zc_name, &config, zc->zc_value,
1685	    sizeof (zc->zc_value));
1686
1687	if (config != NULL) {
1688		ret = put_nvlist(zc, config);
1689		nvlist_free(config);
1690
1691		/*
1692		 * The config may be present even if 'error' is non-zero.
1693		 * In this case we return success, and preserve the real errno
1694		 * in 'zc_cookie'.
1695		 */
1696		zc->zc_cookie = error;
1697	} else {
1698		ret = error;
1699	}
1700
1701	return (ret);
1702}
1703
1704/*
1705 * Try to import the given pool, returning pool stats as appropriate so that
1706 * user land knows which devices are available and overall pool health.
1707 */
1708static int
1709zfs_ioc_pool_tryimport(zfs_cmd_t *zc)
1710{
1711	nvlist_t *tryconfig, *config;
1712	int error;
1713
1714	if ((error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
1715	    zc->zc_iflags, &tryconfig)) != 0)
1716		return (error);
1717
1718	config = spa_tryimport(tryconfig);
1719
1720	nvlist_free(tryconfig);
1721
1722	if (config == NULL)
1723		return (SET_ERROR(EINVAL));
1724
1725	error = put_nvlist(zc, config);
1726	nvlist_free(config);
1727
1728	return (error);
1729}
1730
1731/*
1732 * inputs:
1733 * zc_name              name of the pool
1734 * zc_cookie            scan func (pool_scan_func_t)
1735 */
1736static int
1737zfs_ioc_pool_scan(zfs_cmd_t *zc)
1738{
1739	spa_t *spa;
1740	int error;
1741
1742	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
1743		return (error);
1744
1745	if (zc->zc_cookie == POOL_SCAN_NONE)
1746		error = spa_scan_stop(spa);
1747	else
1748		error = spa_scan(spa, zc->zc_cookie);
1749
1750	spa_close(spa, FTAG);
1751
1752	return (error);
1753}
1754
1755static int
1756zfs_ioc_pool_freeze(zfs_cmd_t *zc)
1757{
1758	spa_t *spa;
1759	int error;
1760
1761	error = spa_open(zc->zc_name, &spa, FTAG);
1762	if (error == 0) {
1763		spa_freeze(spa);
1764		spa_close(spa, FTAG);
1765	}
1766	return (error);
1767}
1768
1769static int
1770zfs_ioc_pool_upgrade(zfs_cmd_t *zc)
1771{
1772	spa_t *spa;
1773	int error;
1774
1775	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
1776		return (error);
1777
1778	if (zc->zc_cookie < spa_version(spa) ||
1779	    !SPA_VERSION_IS_SUPPORTED(zc->zc_cookie)) {
1780		spa_close(spa, FTAG);
1781		return (SET_ERROR(EINVAL));
1782	}
1783
1784	spa_upgrade(spa, zc->zc_cookie);
1785	spa_close(spa, FTAG);
1786
1787	return (error);
1788}
1789
1790static int
1791zfs_ioc_pool_get_history(zfs_cmd_t *zc)
1792{
1793	spa_t *spa;
1794	char *hist_buf;
1795	uint64_t size;
1796	int error;
1797
1798	if ((size = zc->zc_history_len) == 0)
1799		return (SET_ERROR(EINVAL));
1800
1801	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
1802		return (error);
1803
1804	if (spa_version(spa) < SPA_VERSION_ZPOOL_HISTORY) {
1805		spa_close(spa, FTAG);
1806		return (SET_ERROR(ENOTSUP));
1807	}
1808
1809	hist_buf = kmem_alloc(size, KM_SLEEP);
1810	if ((error = spa_history_get(spa, &zc->zc_history_offset,
1811	    &zc->zc_history_len, hist_buf)) == 0) {
1812		error = ddi_copyout(hist_buf,
1813		    (void *)(uintptr_t)zc->zc_history,
1814		    zc->zc_history_len, zc->zc_iflags);
1815	}
1816
1817	spa_close(spa, FTAG);
1818	kmem_free(hist_buf, size);
1819	return (error);
1820}
1821
1822static int
1823zfs_ioc_pool_reguid(zfs_cmd_t *zc)
1824{
1825	spa_t *spa;
1826	int error;
1827
1828	error = spa_open(zc->zc_name, &spa, FTAG);
1829	if (error == 0) {
1830		error = spa_change_guid(spa);
1831		spa_close(spa, FTAG);
1832	}
1833	return (error);
1834}
1835
1836static int
1837zfs_ioc_dsobj_to_dsname(zfs_cmd_t *zc)
1838{
1839	return (dsl_dsobj_to_dsname(zc->zc_name, zc->zc_obj, zc->zc_value));
1840}
1841
1842/*
1843 * inputs:
1844 * zc_name		name of filesystem
1845 * zc_obj		object to find
1846 *
1847 * outputs:
1848 * zc_value		name of object
1849 */
1850static int
1851zfs_ioc_obj_to_path(zfs_cmd_t *zc)
1852{
1853	objset_t *os;
1854	int error;
1855
1856	/* XXX reading from objset not owned */
1857	if ((error = dmu_objset_hold(zc->zc_name, FTAG, &os)) != 0)
1858		return (error);
1859	if (dmu_objset_type(os) != DMU_OST_ZFS) {
1860		dmu_objset_rele(os, FTAG);
1861		return (SET_ERROR(EINVAL));
1862	}
1863	error = zfs_obj_to_path(os, zc->zc_obj, zc->zc_value,
1864	    sizeof (zc->zc_value));
1865	dmu_objset_rele(os, FTAG);
1866
1867	return (error);
1868}
1869
1870/*
1871 * inputs:
1872 * zc_name		name of filesystem
1873 * zc_obj		object to find
1874 *
1875 * outputs:
1876 * zc_stat		stats on object
1877 * zc_value		path to object
1878 */
1879static int
1880zfs_ioc_obj_to_stats(zfs_cmd_t *zc)
1881{
1882	objset_t *os;
1883	int error;
1884
1885	/* XXX reading from objset not owned */
1886	if ((error = dmu_objset_hold(zc->zc_name, FTAG, &os)) != 0)
1887		return (error);
1888	if (dmu_objset_type(os) != DMU_OST_ZFS) {
1889		dmu_objset_rele(os, FTAG);
1890		return (SET_ERROR(EINVAL));
1891	}
1892	error = zfs_obj_to_stats(os, zc->zc_obj, &zc->zc_stat, zc->zc_value,
1893	    sizeof (zc->zc_value));
1894	dmu_objset_rele(os, FTAG);
1895
1896	return (error);
1897}
1898
1899static int
1900zfs_ioc_vdev_add(zfs_cmd_t *zc)
1901{
1902	spa_t *spa;
1903	int error;
1904	nvlist_t *config, **l2cache, **spares;
1905	uint_t nl2cache = 0, nspares = 0;
1906
1907	error = spa_open(zc->zc_name, &spa, FTAG);
1908	if (error != 0)
1909		return (error);
1910
1911	error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
1912	    zc->zc_iflags, &config);
1913	(void) nvlist_lookup_nvlist_array(config, ZPOOL_CONFIG_L2CACHE,
1914	    &l2cache, &nl2cache);
1915
1916	(void) nvlist_lookup_nvlist_array(config, ZPOOL_CONFIG_SPARES,
1917	    &spares, &nspares);
1918
1919#ifdef illumos
1920	/*
1921	 * A root pool with concatenated devices is not supported.
1922	 * Thus, can not add a device to a root pool.
1923	 *
1924	 * Intent log device can not be added to a rootpool because
1925	 * during mountroot, zil is replayed, a seperated log device
1926	 * can not be accessed during the mountroot time.
1927	 *
1928	 * l2cache and spare devices are ok to be added to a rootpool.
1929	 */
1930	if (spa_bootfs(spa) != 0 && nl2cache == 0 && nspares == 0) {
1931		nvlist_free(config);
1932		spa_close(spa, FTAG);
1933		return (SET_ERROR(EDOM));
1934	}
1935#endif /* illumos */
1936
1937	if (error == 0) {
1938		error = spa_vdev_add(spa, config);
1939		nvlist_free(config);
1940	}
1941	spa_close(spa, FTAG);
1942	return (error);
1943}
1944
1945/*
1946 * inputs:
1947 * zc_name		name of the pool
1948 * zc_nvlist_conf	nvlist of devices to remove
1949 * zc_cookie		to stop the remove?
1950 */
1951static int
1952zfs_ioc_vdev_remove(zfs_cmd_t *zc)
1953{
1954	spa_t *spa;
1955	int error;
1956
1957	error = spa_open(zc->zc_name, &spa, FTAG);
1958	if (error != 0)
1959		return (error);
1960	error = spa_vdev_remove(spa, zc->zc_guid, B_FALSE);
1961	spa_close(spa, FTAG);
1962	return (error);
1963}
1964
1965static int
1966zfs_ioc_vdev_set_state(zfs_cmd_t *zc)
1967{
1968	spa_t *spa;
1969	int error;
1970	vdev_state_t newstate = VDEV_STATE_UNKNOWN;
1971
1972	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
1973		return (error);
1974	switch (zc->zc_cookie) {
1975	case VDEV_STATE_ONLINE:
1976		error = vdev_online(spa, zc->zc_guid, zc->zc_obj, &newstate);
1977		break;
1978
1979	case VDEV_STATE_OFFLINE:
1980		error = vdev_offline(spa, zc->zc_guid, zc->zc_obj);
1981		break;
1982
1983	case VDEV_STATE_FAULTED:
1984		if (zc->zc_obj != VDEV_AUX_ERR_EXCEEDED &&
1985		    zc->zc_obj != VDEV_AUX_EXTERNAL)
1986			zc->zc_obj = VDEV_AUX_ERR_EXCEEDED;
1987
1988		error = vdev_fault(spa, zc->zc_guid, zc->zc_obj);
1989		break;
1990
1991	case VDEV_STATE_DEGRADED:
1992		if (zc->zc_obj != VDEV_AUX_ERR_EXCEEDED &&
1993		    zc->zc_obj != VDEV_AUX_EXTERNAL)
1994			zc->zc_obj = VDEV_AUX_ERR_EXCEEDED;
1995
1996		error = vdev_degrade(spa, zc->zc_guid, zc->zc_obj);
1997		break;
1998
1999	default:
2000		error = SET_ERROR(EINVAL);
2001	}
2002	zc->zc_cookie = newstate;
2003	spa_close(spa, FTAG);
2004	return (error);
2005}
2006
2007static int
2008zfs_ioc_vdev_attach(zfs_cmd_t *zc)
2009{
2010	spa_t *spa;
2011	int replacing = zc->zc_cookie;
2012	nvlist_t *config;
2013	int error;
2014
2015	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
2016		return (error);
2017
2018	if ((error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
2019	    zc->zc_iflags, &config)) == 0) {
2020		error = spa_vdev_attach(spa, zc->zc_guid, config, replacing);
2021		nvlist_free(config);
2022	}
2023
2024	spa_close(spa, FTAG);
2025	return (error);
2026}
2027
2028static int
2029zfs_ioc_vdev_detach(zfs_cmd_t *zc)
2030{
2031	spa_t *spa;
2032	int error;
2033
2034	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
2035		return (error);
2036
2037	error = spa_vdev_detach(spa, zc->zc_guid, 0, B_FALSE);
2038
2039	spa_close(spa, FTAG);
2040	return (error);
2041}
2042
2043static int
2044zfs_ioc_vdev_split(zfs_cmd_t *zc)
2045{
2046	spa_t *spa;
2047	nvlist_t *config, *props = NULL;
2048	int error;
2049	boolean_t exp = !!(zc->zc_cookie & ZPOOL_EXPORT_AFTER_SPLIT);
2050
2051	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
2052		return (error);
2053
2054	if (error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
2055	    zc->zc_iflags, &config)) {
2056		spa_close(spa, FTAG);
2057		return (error);
2058	}
2059
2060	if (zc->zc_nvlist_src_size != 0 && (error =
2061	    get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
2062	    zc->zc_iflags, &props))) {
2063		spa_close(spa, FTAG);
2064		nvlist_free(config);
2065		return (error);
2066	}
2067
2068	error = spa_vdev_split_mirror(spa, zc->zc_string, config, props, exp);
2069
2070	spa_close(spa, FTAG);
2071
2072	nvlist_free(config);
2073	nvlist_free(props);
2074
2075	return (error);
2076}
2077
2078static int
2079zfs_ioc_vdev_setpath(zfs_cmd_t *zc)
2080{
2081	spa_t *spa;
2082	char *path = zc->zc_value;
2083	uint64_t guid = zc->zc_guid;
2084	int error;
2085
2086	error = spa_open(zc->zc_name, &spa, FTAG);
2087	if (error != 0)
2088		return (error);
2089
2090	error = spa_vdev_setpath(spa, guid, path);
2091	spa_close(spa, FTAG);
2092	return (error);
2093}
2094
2095static int
2096zfs_ioc_vdev_setfru(zfs_cmd_t *zc)
2097{
2098	spa_t *spa;
2099	char *fru = zc->zc_value;
2100	uint64_t guid = zc->zc_guid;
2101	int error;
2102
2103	error = spa_open(zc->zc_name, &spa, FTAG);
2104	if (error != 0)
2105		return (error);
2106
2107	error = spa_vdev_setfru(spa, guid, fru);
2108	spa_close(spa, FTAG);
2109	return (error);
2110}
2111
2112static int
2113zfs_ioc_objset_stats_impl(zfs_cmd_t *zc, objset_t *os)
2114{
2115	int error = 0;
2116	nvlist_t *nv;
2117
2118	dmu_objset_fast_stat(os, &zc->zc_objset_stats);
2119
2120	if (zc->zc_nvlist_dst != 0 &&
2121	    (error = dsl_prop_get_all(os, &nv)) == 0) {
2122		dmu_objset_stats(os, nv);
2123		/*
2124		 * NB: zvol_get_stats() will read the objset contents,
2125		 * which we aren't supposed to do with a
2126		 * DS_MODE_USER hold, because it could be
2127		 * inconsistent.  So this is a bit of a workaround...
2128		 * XXX reading with out owning
2129		 */
2130		if (!zc->zc_objset_stats.dds_inconsistent &&
2131		    dmu_objset_type(os) == DMU_OST_ZVOL) {
2132			error = zvol_get_stats(os, nv);
2133			if (error == EIO)
2134				return (error);
2135			VERIFY0(error);
2136		}
2137		error = put_nvlist(zc, nv);
2138		nvlist_free(nv);
2139	}
2140
2141	return (error);
2142}
2143
2144/*
2145 * inputs:
2146 * zc_name		name of filesystem
2147 * zc_nvlist_dst_size	size of buffer for property nvlist
2148 *
2149 * outputs:
2150 * zc_objset_stats	stats
2151 * zc_nvlist_dst	property nvlist
2152 * zc_nvlist_dst_size	size of property nvlist
2153 */
2154static int
2155zfs_ioc_objset_stats(zfs_cmd_t *zc)
2156{
2157	objset_t *os;
2158	int error;
2159
2160	error = dmu_objset_hold(zc->zc_name, FTAG, &os);
2161	if (error == 0) {
2162		error = zfs_ioc_objset_stats_impl(zc, os);
2163		dmu_objset_rele(os, FTAG);
2164	}
2165
2166	if (error == ENOMEM)
2167		error = 0;
2168	return (error);
2169}
2170
2171/*
2172 * inputs:
2173 * zc_name		name of filesystem
2174 * zc_nvlist_dst_size	size of buffer for property nvlist
2175 *
2176 * outputs:
2177 * zc_nvlist_dst	received property nvlist
2178 * zc_nvlist_dst_size	size of received property nvlist
2179 *
2180 * Gets received properties (distinct from local properties on or after
2181 * SPA_VERSION_RECVD_PROPS) for callers who want to differentiate received from
2182 * local property values.
2183 */
2184static int
2185zfs_ioc_objset_recvd_props(zfs_cmd_t *zc)
2186{
2187	int error = 0;
2188	nvlist_t *nv;
2189
2190	/*
2191	 * Without this check, we would return local property values if the
2192	 * caller has not already received properties on or after
2193	 * SPA_VERSION_RECVD_PROPS.
2194	 */
2195	if (!dsl_prop_get_hasrecvd(zc->zc_name))
2196		return (SET_ERROR(ENOTSUP));
2197
2198	if (zc->zc_nvlist_dst != 0 &&
2199	    (error = dsl_prop_get_received(zc->zc_name, &nv)) == 0) {
2200		error = put_nvlist(zc, nv);
2201		nvlist_free(nv);
2202	}
2203
2204	return (error);
2205}
2206
2207static int
2208nvl_add_zplprop(objset_t *os, nvlist_t *props, zfs_prop_t prop)
2209{
2210	uint64_t value;
2211	int error;
2212
2213	/*
2214	 * zfs_get_zplprop() will either find a value or give us
2215	 * the default value (if there is one).
2216	 */
2217	if ((error = zfs_get_zplprop(os, prop, &value)) != 0)
2218		return (error);
2219	VERIFY(nvlist_add_uint64(props, zfs_prop_to_name(prop), value) == 0);
2220	return (0);
2221}
2222
2223/*
2224 * inputs:
2225 * zc_name		name of filesystem
2226 * zc_nvlist_dst_size	size of buffer for zpl property nvlist
2227 *
2228 * outputs:
2229 * zc_nvlist_dst	zpl property nvlist
2230 * zc_nvlist_dst_size	size of zpl property nvlist
2231 */
2232static int
2233zfs_ioc_objset_zplprops(zfs_cmd_t *zc)
2234{
2235	objset_t *os;
2236	int err;
2237
2238	/* XXX reading without owning */
2239	if (err = dmu_objset_hold(zc->zc_name, FTAG, &os))
2240		return (err);
2241
2242	dmu_objset_fast_stat(os, &zc->zc_objset_stats);
2243
2244	/*
2245	 * NB: nvl_add_zplprop() will read the objset contents,
2246	 * which we aren't supposed to do with a DS_MODE_USER
2247	 * hold, because it could be inconsistent.
2248	 */
2249	if (zc->zc_nvlist_dst != 0 &&
2250	    !zc->zc_objset_stats.dds_inconsistent &&
2251	    dmu_objset_type(os) == DMU_OST_ZFS) {
2252		nvlist_t *nv;
2253
2254		VERIFY(nvlist_alloc(&nv, NV_UNIQUE_NAME, KM_SLEEP) == 0);
2255		if ((err = nvl_add_zplprop(os, nv, ZFS_PROP_VERSION)) == 0 &&
2256		    (err = nvl_add_zplprop(os, nv, ZFS_PROP_NORMALIZE)) == 0 &&
2257		    (err = nvl_add_zplprop(os, nv, ZFS_PROP_UTF8ONLY)) == 0 &&
2258		    (err = nvl_add_zplprop(os, nv, ZFS_PROP_CASE)) == 0)
2259			err = put_nvlist(zc, nv);
2260		nvlist_free(nv);
2261	} else {
2262		err = SET_ERROR(ENOENT);
2263	}
2264	dmu_objset_rele(os, FTAG);
2265	return (err);
2266}
2267
2268boolean_t
2269dataset_name_hidden(const char *name)
2270{
2271	/*
2272	 * Skip over datasets that are not visible in this zone,
2273	 * internal datasets (which have a $ in their name), and
2274	 * temporary datasets (which have a % in their name).
2275	 */
2276	if (strchr(name, '$') != NULL)
2277		return (B_TRUE);
2278	if (strchr(name, '%') != NULL)
2279		return (B_TRUE);
2280	if (!INGLOBALZONE(curthread) && !zone_dataset_visible(name, NULL))
2281		return (B_TRUE);
2282	return (B_FALSE);
2283}
2284
2285/*
2286 * inputs:
2287 * zc_name		name of filesystem
2288 * zc_cookie		zap cursor
2289 * zc_nvlist_dst_size	size of buffer for property nvlist
2290 *
2291 * outputs:
2292 * zc_name		name of next filesystem
2293 * zc_cookie		zap cursor
2294 * zc_objset_stats	stats
2295 * zc_nvlist_dst	property nvlist
2296 * zc_nvlist_dst_size	size of property nvlist
2297 */
2298static int
2299zfs_ioc_dataset_list_next(zfs_cmd_t *zc)
2300{
2301	objset_t *os;
2302	int error;
2303	char *p;
2304	size_t orig_len = strlen(zc->zc_name);
2305
2306top:
2307	if (error = dmu_objset_hold(zc->zc_name, FTAG, &os)) {
2308		if (error == ENOENT)
2309			error = SET_ERROR(ESRCH);
2310		return (error);
2311	}
2312
2313	p = strrchr(zc->zc_name, '/');
2314	if (p == NULL || p[1] != '\0')
2315		(void) strlcat(zc->zc_name, "/", sizeof (zc->zc_name));
2316	p = zc->zc_name + strlen(zc->zc_name);
2317
2318	do {
2319		error = dmu_dir_list_next(os,
2320		    sizeof (zc->zc_name) - (p - zc->zc_name), p,
2321		    NULL, &zc->zc_cookie);
2322		if (error == ENOENT)
2323			error = SET_ERROR(ESRCH);
2324	} while (error == 0 && dataset_name_hidden(zc->zc_name));
2325	dmu_objset_rele(os, FTAG);
2326
2327	/*
2328	 * If it's an internal dataset (ie. with a '$' in its name),
2329	 * don't try to get stats for it, otherwise we'll return ENOENT.
2330	 */
2331	if (error == 0 && strchr(zc->zc_name, '$') == NULL) {
2332		error = zfs_ioc_objset_stats(zc); /* fill in the stats */
2333		if (error == ENOENT) {
2334			/* We lost a race with destroy, get the next one. */
2335			zc->zc_name[orig_len] = '\0';
2336			goto top;
2337		}
2338	}
2339	return (error);
2340}
2341
2342/*
2343 * inputs:
2344 * zc_name		name of filesystem
2345 * zc_cookie		zap cursor
2346 * zc_nvlist_dst_size	size of buffer for property nvlist
2347 * zc_simple		when set, only name is requested
2348 *
2349 * outputs:
2350 * zc_name		name of next snapshot
2351 * zc_objset_stats	stats
2352 * zc_nvlist_dst	property nvlist
2353 * zc_nvlist_dst_size	size of property nvlist
2354 */
2355static int
2356zfs_ioc_snapshot_list_next(zfs_cmd_t *zc)
2357{
2358	objset_t *os;
2359	int error;
2360
2361	error = dmu_objset_hold(zc->zc_name, FTAG, &os);
2362	if (error != 0) {
2363		return (error == ENOENT ? ESRCH : error);
2364	}
2365
2366	/*
2367	 * A dataset name of maximum length cannot have any snapshots,
2368	 * so exit immediately.
2369	 */
2370	if (strlcat(zc->zc_name, "@", sizeof (zc->zc_name)) >=
2371	    ZFS_MAX_DATASET_NAME_LEN) {
2372		dmu_objset_rele(os, FTAG);
2373		return (SET_ERROR(ESRCH));
2374	}
2375
2376	error = dmu_snapshot_list_next(os,
2377	    sizeof (zc->zc_name) - strlen(zc->zc_name),
2378	    zc->zc_name + strlen(zc->zc_name), &zc->zc_obj, &zc->zc_cookie,
2379	    NULL);
2380
2381	if (error == 0 && !zc->zc_simple) {
2382		dsl_dataset_t *ds;
2383		dsl_pool_t *dp = os->os_dsl_dataset->ds_dir->dd_pool;
2384
2385		error = dsl_dataset_hold_obj(dp, zc->zc_obj, FTAG, &ds);
2386		if (error == 0) {
2387			objset_t *ossnap;
2388
2389			error = dmu_objset_from_ds(ds, &ossnap);
2390			if (error == 0)
2391				error = zfs_ioc_objset_stats_impl(zc, ossnap);
2392			dsl_dataset_rele(ds, FTAG);
2393		}
2394	} else if (error == ENOENT) {
2395		error = SET_ERROR(ESRCH);
2396	}
2397
2398	dmu_objset_rele(os, FTAG);
2399	/* if we failed, undo the @ that we tacked on to zc_name */
2400	if (error != 0)
2401		*strchr(zc->zc_name, '@') = '\0';
2402	return (error);
2403}
2404
2405static int
2406zfs_prop_set_userquota(const char *dsname, nvpair_t *pair)
2407{
2408	const char *propname = nvpair_name(pair);
2409	uint64_t *valary;
2410	unsigned int vallen;
2411	const char *domain;
2412	char *dash;
2413	zfs_userquota_prop_t type;
2414	uint64_t rid;
2415	uint64_t quota;
2416	zfsvfs_t *zfsvfs;
2417	int err;
2418
2419	if (nvpair_type(pair) == DATA_TYPE_NVLIST) {
2420		nvlist_t *attrs;
2421		VERIFY(nvpair_value_nvlist(pair, &attrs) == 0);
2422		if (nvlist_lookup_nvpair(attrs, ZPROP_VALUE,
2423		    &pair) != 0)
2424			return (SET_ERROR(EINVAL));
2425	}
2426
2427	/*
2428	 * A correctly constructed propname is encoded as
2429	 * userquota@<rid>-<domain>.
2430	 */
2431	if ((dash = strchr(propname, '-')) == NULL ||
2432	    nvpair_value_uint64_array(pair, &valary, &vallen) != 0 ||
2433	    vallen != 3)
2434		return (SET_ERROR(EINVAL));
2435
2436	domain = dash + 1;
2437	type = valary[0];
2438	rid = valary[1];
2439	quota = valary[2];
2440
2441	err = zfsvfs_hold(dsname, FTAG, &zfsvfs, B_FALSE);
2442	if (err == 0) {
2443		err = zfs_set_userquota(zfsvfs, type, domain, rid, quota);
2444		zfsvfs_rele(zfsvfs, FTAG);
2445	}
2446
2447	return (err);
2448}
2449
2450/*
2451 * If the named property is one that has a special function to set its value,
2452 * return 0 on success and a positive error code on failure; otherwise if it is
2453 * not one of the special properties handled by this function, return -1.
2454 *
2455 * XXX: It would be better for callers of the property interface if we handled
2456 * these special cases in dsl_prop.c (in the dsl layer).
2457 */
2458static int
2459zfs_prop_set_special(const char *dsname, zprop_source_t source,
2460    nvpair_t *pair)
2461{
2462	const char *propname = nvpair_name(pair);
2463	zfs_prop_t prop = zfs_name_to_prop(propname);
2464	uint64_t intval;
2465	int err = -1;
2466
2467	if (prop == ZPROP_INVAL) {
2468		if (zfs_prop_userquota(propname))
2469			return (zfs_prop_set_userquota(dsname, pair));
2470		return (-1);
2471	}
2472
2473	if (nvpair_type(pair) == DATA_TYPE_NVLIST) {
2474		nvlist_t *attrs;
2475		VERIFY(nvpair_value_nvlist(pair, &attrs) == 0);
2476		VERIFY(nvlist_lookup_nvpair(attrs, ZPROP_VALUE,
2477		    &pair) == 0);
2478	}
2479
2480	if (zfs_prop_get_type(prop) == PROP_TYPE_STRING)
2481		return (-1);
2482
2483	VERIFY(0 == nvpair_value_uint64(pair, &intval));
2484
2485	switch (prop) {
2486	case ZFS_PROP_QUOTA:
2487		err = dsl_dir_set_quota(dsname, source, intval);
2488		break;
2489	case ZFS_PROP_REFQUOTA:
2490		err = dsl_dataset_set_refquota(dsname, source, intval);
2491		break;
2492	case ZFS_PROP_FILESYSTEM_LIMIT:
2493	case ZFS_PROP_SNAPSHOT_LIMIT:
2494		if (intval == UINT64_MAX) {
2495			/* clearing the limit, just do it */
2496			err = 0;
2497		} else {
2498			err = dsl_dir_activate_fs_ss_limit(dsname);
2499		}
2500		/*
2501		 * Set err to -1 to force the zfs_set_prop_nvlist code down the
2502		 * default path to set the value in the nvlist.
2503		 */
2504		if (err == 0)
2505			err = -1;
2506		break;
2507	case ZFS_PROP_RESERVATION:
2508		err = dsl_dir_set_reservation(dsname, source, intval);
2509		break;
2510	case ZFS_PROP_REFRESERVATION:
2511		err = dsl_dataset_set_refreservation(dsname, source, intval);
2512		break;
2513	case ZFS_PROP_VOLSIZE:
2514		err = zvol_set_volsize(dsname, intval);
2515		break;
2516	case ZFS_PROP_VERSION:
2517	{
2518		zfsvfs_t *zfsvfs;
2519
2520		if ((err = zfsvfs_hold(dsname, FTAG, &zfsvfs, B_TRUE)) != 0)
2521			break;
2522
2523		err = zfs_set_version(zfsvfs, intval);
2524		zfsvfs_rele(zfsvfs, FTAG);
2525
2526		if (err == 0 && intval >= ZPL_VERSION_USERSPACE) {
2527			zfs_cmd_t *zc;
2528
2529			zc = kmem_zalloc(sizeof (zfs_cmd_t), KM_SLEEP);
2530			(void) strcpy(zc->zc_name, dsname);
2531			(void) zfs_ioc_userspace_upgrade(zc);
2532			kmem_free(zc, sizeof (zfs_cmd_t));
2533		}
2534		break;
2535	}
2536	default:
2537		err = -1;
2538	}
2539
2540	return (err);
2541}
2542
2543/*
2544 * This function is best effort. If it fails to set any of the given properties,
2545 * it continues to set as many as it can and returns the last error
2546 * encountered. If the caller provides a non-NULL errlist, it will be filled in
2547 * with the list of names of all the properties that failed along with the
2548 * corresponding error numbers.
2549 *
2550 * If every property is set successfully, zero is returned and errlist is not
2551 * modified.
2552 */
2553int
2554zfs_set_prop_nvlist(const char *dsname, zprop_source_t source, nvlist_t *nvl,
2555    nvlist_t *errlist)
2556{
2557	nvpair_t *pair;
2558	nvpair_t *propval;
2559	int rv = 0;
2560	uint64_t intval;
2561	char *strval;
2562	nvlist_t *genericnvl = fnvlist_alloc();
2563	nvlist_t *retrynvl = fnvlist_alloc();
2564
2565retry:
2566	pair = NULL;
2567	while ((pair = nvlist_next_nvpair(nvl, pair)) != NULL) {
2568		const char *propname = nvpair_name(pair);
2569		zfs_prop_t prop = zfs_name_to_prop(propname);
2570		int err = 0;
2571
2572		/* decode the property value */
2573		propval = pair;
2574		if (nvpair_type(pair) == DATA_TYPE_NVLIST) {
2575			nvlist_t *attrs;
2576			attrs = fnvpair_value_nvlist(pair);
2577			if (nvlist_lookup_nvpair(attrs, ZPROP_VALUE,
2578			    &propval) != 0)
2579				err = SET_ERROR(EINVAL);
2580		}
2581
2582		/* Validate value type */
2583		if (err == 0 && prop == ZPROP_INVAL) {
2584			if (zfs_prop_user(propname)) {
2585				if (nvpair_type(propval) != DATA_TYPE_STRING)
2586					err = SET_ERROR(EINVAL);
2587			} else if (zfs_prop_userquota(propname)) {
2588				if (nvpair_type(propval) !=
2589				    DATA_TYPE_UINT64_ARRAY)
2590					err = SET_ERROR(EINVAL);
2591			} else {
2592				err = SET_ERROR(EINVAL);
2593			}
2594		} else if (err == 0) {
2595			if (nvpair_type(propval) == DATA_TYPE_STRING) {
2596				if (zfs_prop_get_type(prop) != PROP_TYPE_STRING)
2597					err = SET_ERROR(EINVAL);
2598			} else if (nvpair_type(propval) == DATA_TYPE_UINT64) {
2599				const char *unused;
2600
2601				intval = fnvpair_value_uint64(propval);
2602
2603				switch (zfs_prop_get_type(prop)) {
2604				case PROP_TYPE_NUMBER:
2605					break;
2606				case PROP_TYPE_STRING:
2607					err = SET_ERROR(EINVAL);
2608					break;
2609				case PROP_TYPE_INDEX:
2610					if (zfs_prop_index_to_string(prop,
2611					    intval, &unused) != 0)
2612						err = SET_ERROR(EINVAL);
2613					break;
2614				default:
2615					cmn_err(CE_PANIC,
2616					    "unknown property type");
2617				}
2618			} else {
2619				err = SET_ERROR(EINVAL);
2620			}
2621		}
2622
2623		/* Validate permissions */
2624		if (err == 0)
2625			err = zfs_check_settable(dsname, pair, CRED());
2626
2627		if (err == 0) {
2628			err = zfs_prop_set_special(dsname, source, pair);
2629			if (err == -1) {
2630				/*
2631				 * For better performance we build up a list of
2632				 * properties to set in a single transaction.
2633				 */
2634				err = nvlist_add_nvpair(genericnvl, pair);
2635			} else if (err != 0 && nvl != retrynvl) {
2636				/*
2637				 * This may be a spurious error caused by
2638				 * receiving quota and reservation out of order.
2639				 * Try again in a second pass.
2640				 */
2641				err = nvlist_add_nvpair(retrynvl, pair);
2642			}
2643		}
2644
2645		if (err != 0) {
2646			if (errlist != NULL)
2647				fnvlist_add_int32(errlist, propname, err);
2648			rv = err;
2649		}
2650	}
2651
2652	if (nvl != retrynvl && !nvlist_empty(retrynvl)) {
2653		nvl = retrynvl;
2654		goto retry;
2655	}
2656
2657	if (!nvlist_empty(genericnvl) &&
2658	    dsl_props_set(dsname, source, genericnvl) != 0) {
2659		/*
2660		 * If this fails, we still want to set as many properties as we
2661		 * can, so try setting them individually.
2662		 */
2663		pair = NULL;
2664		while ((pair = nvlist_next_nvpair(genericnvl, pair)) != NULL) {
2665			const char *propname = nvpair_name(pair);
2666			int err = 0;
2667
2668			propval = pair;
2669			if (nvpair_type(pair) == DATA_TYPE_NVLIST) {
2670				nvlist_t *attrs;
2671				attrs = fnvpair_value_nvlist(pair);
2672				propval = fnvlist_lookup_nvpair(attrs,
2673				    ZPROP_VALUE);
2674			}
2675
2676			if (nvpair_type(propval) == DATA_TYPE_STRING) {
2677				strval = fnvpair_value_string(propval);
2678				err = dsl_prop_set_string(dsname, propname,
2679				    source, strval);
2680			} else {
2681				intval = fnvpair_value_uint64(propval);
2682				err = dsl_prop_set_int(dsname, propname, source,
2683				    intval);
2684			}
2685
2686			if (err != 0) {
2687				if (errlist != NULL) {
2688					fnvlist_add_int32(errlist, propname,
2689					    err);
2690				}
2691				rv = err;
2692			}
2693		}
2694	}
2695	nvlist_free(genericnvl);
2696	nvlist_free(retrynvl);
2697
2698	return (rv);
2699}
2700
2701/*
2702 * Check that all the properties are valid user properties.
2703 */
2704static int
2705zfs_check_userprops(const char *fsname, nvlist_t *nvl)
2706{
2707	nvpair_t *pair = NULL;
2708	int error = 0;
2709
2710	while ((pair = nvlist_next_nvpair(nvl, pair)) != NULL) {
2711		const char *propname = nvpair_name(pair);
2712
2713		if (!zfs_prop_user(propname) ||
2714		    nvpair_type(pair) != DATA_TYPE_STRING)
2715			return (SET_ERROR(EINVAL));
2716
2717		if (error = zfs_secpolicy_write_perms(fsname,
2718		    ZFS_DELEG_PERM_USERPROP, CRED()))
2719			return (error);
2720
2721		if (strlen(propname) >= ZAP_MAXNAMELEN)
2722			return (SET_ERROR(ENAMETOOLONG));
2723
2724		if (strlen(fnvpair_value_string(pair)) >= ZAP_MAXVALUELEN)
2725			return (E2BIG);
2726	}
2727	return (0);
2728}
2729
2730static void
2731props_skip(nvlist_t *props, nvlist_t *skipped, nvlist_t **newprops)
2732{
2733	nvpair_t *pair;
2734
2735	VERIFY(nvlist_alloc(newprops, NV_UNIQUE_NAME, KM_SLEEP) == 0);
2736
2737	pair = NULL;
2738	while ((pair = nvlist_next_nvpair(props, pair)) != NULL) {
2739		if (nvlist_exists(skipped, nvpair_name(pair)))
2740			continue;
2741
2742		VERIFY(nvlist_add_nvpair(*newprops, pair) == 0);
2743	}
2744}
2745
2746static int
2747clear_received_props(const char *dsname, nvlist_t *props,
2748    nvlist_t *skipped)
2749{
2750	int err = 0;
2751	nvlist_t *cleared_props = NULL;
2752	props_skip(props, skipped, &cleared_props);
2753	if (!nvlist_empty(cleared_props)) {
2754		/*
2755		 * Acts on local properties until the dataset has received
2756		 * properties at least once on or after SPA_VERSION_RECVD_PROPS.
2757		 */
2758		zprop_source_t flags = (ZPROP_SRC_NONE |
2759		    (dsl_prop_get_hasrecvd(dsname) ? ZPROP_SRC_RECEIVED : 0));
2760		err = zfs_set_prop_nvlist(dsname, flags, cleared_props, NULL);
2761	}
2762	nvlist_free(cleared_props);
2763	return (err);
2764}
2765
2766/*
2767 * inputs:
2768 * zc_name		name of filesystem
2769 * zc_value		name of property to set
2770 * zc_nvlist_src{_size}	nvlist of properties to apply
2771 * zc_cookie		received properties flag
2772 *
2773 * outputs:
2774 * zc_nvlist_dst{_size} error for each unapplied received property
2775 */
2776static int
2777zfs_ioc_set_prop(zfs_cmd_t *zc)
2778{
2779	nvlist_t *nvl;
2780	boolean_t received = zc->zc_cookie;
2781	zprop_source_t source = (received ? ZPROP_SRC_RECEIVED :
2782	    ZPROP_SRC_LOCAL);
2783	nvlist_t *errors;
2784	int error;
2785
2786	if ((error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
2787	    zc->zc_iflags, &nvl)) != 0)
2788		return (error);
2789
2790	if (received) {
2791		nvlist_t *origprops;
2792
2793		if (dsl_prop_get_received(zc->zc_name, &origprops) == 0) {
2794			(void) clear_received_props(zc->zc_name,
2795			    origprops, nvl);
2796			nvlist_free(origprops);
2797		}
2798
2799		error = dsl_prop_set_hasrecvd(zc->zc_name);
2800	}
2801
2802	errors = fnvlist_alloc();
2803	if (error == 0)
2804		error = zfs_set_prop_nvlist(zc->zc_name, source, nvl, errors);
2805
2806	if (zc->zc_nvlist_dst != 0 && errors != NULL) {
2807		(void) put_nvlist(zc, errors);
2808	}
2809
2810	nvlist_free(errors);
2811	nvlist_free(nvl);
2812	return (error);
2813}
2814
2815/*
2816 * inputs:
2817 * zc_name		name of filesystem
2818 * zc_value		name of property to inherit
2819 * zc_cookie		revert to received value if TRUE
2820 *
2821 * outputs:		none
2822 */
2823static int
2824zfs_ioc_inherit_prop(zfs_cmd_t *zc)
2825{
2826	const char *propname = zc->zc_value;
2827	zfs_prop_t prop = zfs_name_to_prop(propname);
2828	boolean_t received = zc->zc_cookie;
2829	zprop_source_t source = (received
2830	    ? ZPROP_SRC_NONE		/* revert to received value, if any */
2831	    : ZPROP_SRC_INHERITED);	/* explicitly inherit */
2832
2833	if (received) {
2834		nvlist_t *dummy;
2835		nvpair_t *pair;
2836		zprop_type_t type;
2837		int err;
2838
2839		/*
2840		 * zfs_prop_set_special() expects properties in the form of an
2841		 * nvpair with type info.
2842		 */
2843		if (prop == ZPROP_INVAL) {
2844			if (!zfs_prop_user(propname))
2845				return (SET_ERROR(EINVAL));
2846
2847			type = PROP_TYPE_STRING;
2848		} else if (prop == ZFS_PROP_VOLSIZE ||
2849		    prop == ZFS_PROP_VERSION) {
2850			return (SET_ERROR(EINVAL));
2851		} else {
2852			type = zfs_prop_get_type(prop);
2853		}
2854
2855		VERIFY(nvlist_alloc(&dummy, NV_UNIQUE_NAME, KM_SLEEP) == 0);
2856
2857		switch (type) {
2858		case PROP_TYPE_STRING:
2859			VERIFY(0 == nvlist_add_string(dummy, propname, ""));
2860			break;
2861		case PROP_TYPE_NUMBER:
2862		case PROP_TYPE_INDEX:
2863			VERIFY(0 == nvlist_add_uint64(dummy, propname, 0));
2864			break;
2865		default:
2866			nvlist_free(dummy);
2867			return (SET_ERROR(EINVAL));
2868		}
2869
2870		pair = nvlist_next_nvpair(dummy, NULL);
2871		err = zfs_prop_set_special(zc->zc_name, source, pair);
2872		nvlist_free(dummy);
2873		if (err != -1)
2874			return (err); /* special property already handled */
2875	} else {
2876		/*
2877		 * Only check this in the non-received case. We want to allow
2878		 * 'inherit -S' to revert non-inheritable properties like quota
2879		 * and reservation to the received or default values even though
2880		 * they are not considered inheritable.
2881		 */
2882		if (prop != ZPROP_INVAL && !zfs_prop_inheritable(prop))
2883			return (SET_ERROR(EINVAL));
2884	}
2885
2886	/* property name has been validated by zfs_secpolicy_inherit_prop() */
2887	return (dsl_prop_inherit(zc->zc_name, zc->zc_value, source));
2888}
2889
2890static int
2891zfs_ioc_pool_set_props(zfs_cmd_t *zc)
2892{
2893	nvlist_t *props;
2894	spa_t *spa;
2895	int error;
2896	nvpair_t *pair;
2897
2898	if (error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
2899	    zc->zc_iflags, &props))
2900		return (error);
2901
2902	/*
2903	 * If the only property is the configfile, then just do a spa_lookup()
2904	 * to handle the faulted case.
2905	 */
2906	pair = nvlist_next_nvpair(props, NULL);
2907	if (pair != NULL && strcmp(nvpair_name(pair),
2908	    zpool_prop_to_name(ZPOOL_PROP_CACHEFILE)) == 0 &&
2909	    nvlist_next_nvpair(props, pair) == NULL) {
2910		mutex_enter(&spa_namespace_lock);
2911		if ((spa = spa_lookup(zc->zc_name)) != NULL) {
2912			spa_configfile_set(spa, props, B_FALSE);
2913			spa_config_sync(spa, B_FALSE, B_TRUE);
2914		}
2915		mutex_exit(&spa_namespace_lock);
2916		if (spa != NULL) {
2917			nvlist_free(props);
2918			return (0);
2919		}
2920	}
2921
2922	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0) {
2923		nvlist_free(props);
2924		return (error);
2925	}
2926
2927	error = spa_prop_set(spa, props);
2928
2929	nvlist_free(props);
2930	spa_close(spa, FTAG);
2931
2932	return (error);
2933}
2934
2935static int
2936zfs_ioc_pool_get_props(zfs_cmd_t *zc)
2937{
2938	spa_t *spa;
2939	int error;
2940	nvlist_t *nvp = NULL;
2941
2942	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0) {
2943		/*
2944		 * If the pool is faulted, there may be properties we can still
2945		 * get (such as altroot and cachefile), so attempt to get them
2946		 * anyway.
2947		 */
2948		mutex_enter(&spa_namespace_lock);
2949		if ((spa = spa_lookup(zc->zc_name)) != NULL)
2950			error = spa_prop_get(spa, &nvp);
2951		mutex_exit(&spa_namespace_lock);
2952	} else {
2953		error = spa_prop_get(spa, &nvp);
2954		spa_close(spa, FTAG);
2955	}
2956
2957	if (error == 0 && zc->zc_nvlist_dst != 0)
2958		error = put_nvlist(zc, nvp);
2959	else
2960		error = SET_ERROR(EFAULT);
2961
2962	nvlist_free(nvp);
2963	return (error);
2964}
2965
2966/*
2967 * inputs:
2968 * zc_name		name of filesystem
2969 * zc_nvlist_src{_size}	nvlist of delegated permissions
2970 * zc_perm_action	allow/unallow flag
2971 *
2972 * outputs:		none
2973 */
2974static int
2975zfs_ioc_set_fsacl(zfs_cmd_t *zc)
2976{
2977	int error;
2978	nvlist_t *fsaclnv = NULL;
2979
2980	if ((error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
2981	    zc->zc_iflags, &fsaclnv)) != 0)
2982		return (error);
2983
2984	/*
2985	 * Verify nvlist is constructed correctly
2986	 */
2987	if ((error = zfs_deleg_verify_nvlist(fsaclnv)) != 0) {
2988		nvlist_free(fsaclnv);
2989		return (SET_ERROR(EINVAL));
2990	}
2991
2992	/*
2993	 * If we don't have PRIV_SYS_MOUNT, then validate
2994	 * that user is allowed to hand out each permission in
2995	 * the nvlist(s)
2996	 */
2997
2998	error = secpolicy_zfs(CRED());
2999	if (error != 0) {
3000		if (zc->zc_perm_action == B_FALSE) {
3001			error = dsl_deleg_can_allow(zc->zc_name,
3002			    fsaclnv, CRED());
3003		} else {
3004			error = dsl_deleg_can_unallow(zc->zc_name,
3005			    fsaclnv, CRED());
3006		}
3007	}
3008
3009	if (error == 0)
3010		error = dsl_deleg_set(zc->zc_name, fsaclnv, zc->zc_perm_action);
3011
3012	nvlist_free(fsaclnv);
3013	return (error);
3014}
3015
3016/*
3017 * inputs:
3018 * zc_name		name of filesystem
3019 *
3020 * outputs:
3021 * zc_nvlist_src{_size}	nvlist of delegated permissions
3022 */
3023static int
3024zfs_ioc_get_fsacl(zfs_cmd_t *zc)
3025{
3026	nvlist_t *nvp;
3027	int error;
3028
3029	if ((error = dsl_deleg_get(zc->zc_name, &nvp)) == 0) {
3030		error = put_nvlist(zc, nvp);
3031		nvlist_free(nvp);
3032	}
3033
3034	return (error);
3035}
3036
3037/*
3038 * Search the vfs list for a specified resource.  Returns a pointer to it
3039 * or NULL if no suitable entry is found. The caller of this routine
3040 * is responsible for releasing the returned vfs pointer.
3041 */
3042static vfs_t *
3043zfs_get_vfs(const char *resource)
3044{
3045	vfs_t *vfsp;
3046
3047	mtx_lock(&mountlist_mtx);
3048	TAILQ_FOREACH(vfsp, &mountlist, mnt_list) {
3049		if (strcmp(refstr_value(vfsp->vfs_resource), resource) == 0) {
3050			vfs_ref(vfsp);
3051			break;
3052		}
3053	}
3054	mtx_unlock(&mountlist_mtx);
3055	return (vfsp);
3056}
3057
3058/* ARGSUSED */
3059static void
3060zfs_create_cb(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx)
3061{
3062	zfs_creat_t *zct = arg;
3063
3064	zfs_create_fs(os, cr, zct->zct_zplprops, tx);
3065}
3066
3067#define	ZFS_PROP_UNDEFINED	((uint64_t)-1)
3068
3069/*
3070 * inputs:
3071 * os			parent objset pointer (NULL if root fs)
3072 * fuids_ok		fuids allowed in this version of the spa?
3073 * sa_ok		SAs allowed in this version of the spa?
3074 * createprops		list of properties requested by creator
3075 *
3076 * outputs:
3077 * zplprops	values for the zplprops we attach to the master node object
3078 * is_ci	true if requested file system will be purely case-insensitive
3079 *
3080 * Determine the settings for utf8only, normalization and
3081 * casesensitivity.  Specific values may have been requested by the
3082 * creator and/or we can inherit values from the parent dataset.  If
3083 * the file system is of too early a vintage, a creator can not
3084 * request settings for these properties, even if the requested
3085 * setting is the default value.  We don't actually want to create dsl
3086 * properties for these, so remove them from the source nvlist after
3087 * processing.
3088 */
3089static int
3090zfs_fill_zplprops_impl(objset_t *os, uint64_t zplver,
3091    boolean_t fuids_ok, boolean_t sa_ok, nvlist_t *createprops,
3092    nvlist_t *zplprops, boolean_t *is_ci)
3093{
3094	uint64_t sense = ZFS_PROP_UNDEFINED;
3095	uint64_t norm = ZFS_PROP_UNDEFINED;
3096	uint64_t u8 = ZFS_PROP_UNDEFINED;
3097
3098	ASSERT(zplprops != NULL);
3099
3100	/*
3101	 * Pull out creator prop choices, if any.
3102	 */
3103	if (createprops) {
3104		(void) nvlist_lookup_uint64(createprops,
3105		    zfs_prop_to_name(ZFS_PROP_VERSION), &zplver);
3106		(void) nvlist_lookup_uint64(createprops,
3107		    zfs_prop_to_name(ZFS_PROP_NORMALIZE), &norm);
3108		(void) nvlist_remove_all(createprops,
3109		    zfs_prop_to_name(ZFS_PROP_NORMALIZE));
3110		(void) nvlist_lookup_uint64(createprops,
3111		    zfs_prop_to_name(ZFS_PROP_UTF8ONLY), &u8);
3112		(void) nvlist_remove_all(createprops,
3113		    zfs_prop_to_name(ZFS_PROP_UTF8ONLY));
3114		(void) nvlist_lookup_uint64(createprops,
3115		    zfs_prop_to_name(ZFS_PROP_CASE), &sense);
3116		(void) nvlist_remove_all(createprops,
3117		    zfs_prop_to_name(ZFS_PROP_CASE));
3118	}
3119
3120	/*
3121	 * If the zpl version requested is whacky or the file system
3122	 * or pool is version is too "young" to support normalization
3123	 * and the creator tried to set a value for one of the props,
3124	 * error out.
3125	 */
3126	if ((zplver < ZPL_VERSION_INITIAL || zplver > ZPL_VERSION) ||
3127	    (zplver >= ZPL_VERSION_FUID && !fuids_ok) ||
3128	    (zplver >= ZPL_VERSION_SA && !sa_ok) ||
3129	    (zplver < ZPL_VERSION_NORMALIZATION &&
3130	    (norm != ZFS_PROP_UNDEFINED || u8 != ZFS_PROP_UNDEFINED ||
3131	    sense != ZFS_PROP_UNDEFINED)))
3132		return (SET_ERROR(ENOTSUP));
3133
3134	/*
3135	 * Put the version in the zplprops
3136	 */
3137	VERIFY(nvlist_add_uint64(zplprops,
3138	    zfs_prop_to_name(ZFS_PROP_VERSION), zplver) == 0);
3139
3140	if (norm == ZFS_PROP_UNDEFINED)
3141		VERIFY(zfs_get_zplprop(os, ZFS_PROP_NORMALIZE, &norm) == 0);
3142	VERIFY(nvlist_add_uint64(zplprops,
3143	    zfs_prop_to_name(ZFS_PROP_NORMALIZE), norm) == 0);
3144
3145	/*
3146	 * If we're normalizing, names must always be valid UTF-8 strings.
3147	 */
3148	if (norm)
3149		u8 = 1;
3150	if (u8 == ZFS_PROP_UNDEFINED)
3151		VERIFY(zfs_get_zplprop(os, ZFS_PROP_UTF8ONLY, &u8) == 0);
3152	VERIFY(nvlist_add_uint64(zplprops,
3153	    zfs_prop_to_name(ZFS_PROP_UTF8ONLY), u8) == 0);
3154
3155	if (sense == ZFS_PROP_UNDEFINED)
3156		VERIFY(zfs_get_zplprop(os, ZFS_PROP_CASE, &sense) == 0);
3157	VERIFY(nvlist_add_uint64(zplprops,
3158	    zfs_prop_to_name(ZFS_PROP_CASE), sense) == 0);
3159
3160	if (is_ci)
3161		*is_ci = (sense == ZFS_CASE_INSENSITIVE);
3162
3163	return (0);
3164}
3165
3166static int
3167zfs_fill_zplprops(const char *dataset, nvlist_t *createprops,
3168    nvlist_t *zplprops, boolean_t *is_ci)
3169{
3170	boolean_t fuids_ok, sa_ok;
3171	uint64_t zplver = ZPL_VERSION;
3172	objset_t *os = NULL;
3173	char parentname[ZFS_MAX_DATASET_NAME_LEN];
3174	char *cp;
3175	spa_t *spa;
3176	uint64_t spa_vers;
3177	int error;
3178
3179	(void) strlcpy(parentname, dataset, sizeof (parentname));
3180	cp = strrchr(parentname, '/');
3181	ASSERT(cp != NULL);
3182	cp[0] = '\0';
3183
3184	if ((error = spa_open(dataset, &spa, FTAG)) != 0)
3185		return (error);
3186
3187	spa_vers = spa_version(spa);
3188	spa_close(spa, FTAG);
3189
3190	zplver = zfs_zpl_version_map(spa_vers);
3191	fuids_ok = (zplver >= ZPL_VERSION_FUID);
3192	sa_ok = (zplver >= ZPL_VERSION_SA);
3193
3194	/*
3195	 * Open parent object set so we can inherit zplprop values.
3196	 */
3197	if ((error = dmu_objset_hold(parentname, FTAG, &os)) != 0)
3198		return (error);
3199
3200	error = zfs_fill_zplprops_impl(os, zplver, fuids_ok, sa_ok, createprops,
3201	    zplprops, is_ci);
3202	dmu_objset_rele(os, FTAG);
3203	return (error);
3204}
3205
3206static int
3207zfs_fill_zplprops_root(uint64_t spa_vers, nvlist_t *createprops,
3208    nvlist_t *zplprops, boolean_t *is_ci)
3209{
3210	boolean_t fuids_ok;
3211	boolean_t sa_ok;
3212	uint64_t zplver = ZPL_VERSION;
3213	int error;
3214
3215	zplver = zfs_zpl_version_map(spa_vers);
3216	fuids_ok = (zplver >= ZPL_VERSION_FUID);
3217	sa_ok = (zplver >= ZPL_VERSION_SA);
3218
3219	error = zfs_fill_zplprops_impl(NULL, zplver, fuids_ok, sa_ok,
3220	    createprops, zplprops, is_ci);
3221	return (error);
3222}
3223
3224/*
3225 * innvl: {
3226 *     "type" -> dmu_objset_type_t (int32)
3227 *     (optional) "props" -> { prop -> value }
3228 * }
3229 *
3230 * outnvl: propname -> error code (int32)
3231 */
3232static int
3233zfs_ioc_create(const char *fsname, nvlist_t *innvl, nvlist_t *outnvl)
3234{
3235	int error = 0;
3236	zfs_creat_t zct = { 0 };
3237	nvlist_t *nvprops = NULL;
3238	void (*cbfunc)(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx);
3239	int32_t type32;
3240	dmu_objset_type_t type;
3241	boolean_t is_insensitive = B_FALSE;
3242
3243	if (nvlist_lookup_int32(innvl, "type", &type32) != 0)
3244		return (SET_ERROR(EINVAL));
3245	type = type32;
3246	(void) nvlist_lookup_nvlist(innvl, "props", &nvprops);
3247
3248	switch (type) {
3249	case DMU_OST_ZFS:
3250		cbfunc = zfs_create_cb;
3251		break;
3252
3253	case DMU_OST_ZVOL:
3254		cbfunc = zvol_create_cb;
3255		break;
3256
3257	default:
3258		cbfunc = NULL;
3259		break;
3260	}
3261	if (strchr(fsname, '@') ||
3262	    strchr(fsname, '%'))
3263		return (SET_ERROR(EINVAL));
3264
3265	zct.zct_props = nvprops;
3266
3267	if (cbfunc == NULL)
3268		return (SET_ERROR(EINVAL));
3269
3270	if (type == DMU_OST_ZVOL) {
3271		uint64_t volsize, volblocksize;
3272
3273		if (nvprops == NULL)
3274			return (SET_ERROR(EINVAL));
3275		if (nvlist_lookup_uint64(nvprops,
3276		    zfs_prop_to_name(ZFS_PROP_VOLSIZE), &volsize) != 0)
3277			return (SET_ERROR(EINVAL));
3278
3279		if ((error = nvlist_lookup_uint64(nvprops,
3280		    zfs_prop_to_name(ZFS_PROP_VOLBLOCKSIZE),
3281		    &volblocksize)) != 0 && error != ENOENT)
3282			return (SET_ERROR(EINVAL));
3283
3284		if (error != 0)
3285			volblocksize = zfs_prop_default_numeric(
3286			    ZFS_PROP_VOLBLOCKSIZE);
3287
3288		if ((error = zvol_check_volblocksize(
3289		    volblocksize)) != 0 ||
3290		    (error = zvol_check_volsize(volsize,
3291		    volblocksize)) != 0)
3292			return (error);
3293	} else if (type == DMU_OST_ZFS) {
3294		int error;
3295
3296		/*
3297		 * We have to have normalization and
3298		 * case-folding flags correct when we do the
3299		 * file system creation, so go figure them out
3300		 * now.
3301		 */
3302		VERIFY(nvlist_alloc(&zct.zct_zplprops,
3303		    NV_UNIQUE_NAME, KM_SLEEP) == 0);
3304		error = zfs_fill_zplprops(fsname, nvprops,
3305		    zct.zct_zplprops, &is_insensitive);
3306		if (error != 0) {
3307			nvlist_free(zct.zct_zplprops);
3308			return (error);
3309		}
3310	}
3311
3312	error = dmu_objset_create(fsname, type,
3313	    is_insensitive ? DS_FLAG_CI_DATASET : 0, cbfunc, &zct);
3314	nvlist_free(zct.zct_zplprops);
3315
3316	/*
3317	 * It would be nice to do this atomically.
3318	 */
3319	if (error == 0) {
3320		error = zfs_set_prop_nvlist(fsname, ZPROP_SRC_LOCAL,
3321		    nvprops, outnvl);
3322		if (error != 0)
3323			(void) dsl_destroy_head(fsname);
3324	}
3325#ifdef __FreeBSD__
3326	if (error == 0 && type == DMU_OST_ZVOL)
3327		zvol_create_minors(fsname);
3328#endif
3329	return (error);
3330}
3331
3332/*
3333 * innvl: {
3334 *     "origin" -> name of origin snapshot
3335 *     (optional) "props" -> { prop -> value }
3336 * }
3337 *
3338 * outnvl: propname -> error code (int32)
3339 */
3340static int
3341zfs_ioc_clone(const char *fsname, nvlist_t *innvl, nvlist_t *outnvl)
3342{
3343	int error = 0;
3344	nvlist_t *nvprops = NULL;
3345	char *origin_name;
3346
3347	if (nvlist_lookup_string(innvl, "origin", &origin_name) != 0)
3348		return (SET_ERROR(EINVAL));
3349	(void) nvlist_lookup_nvlist(innvl, "props", &nvprops);
3350
3351	if (strchr(fsname, '@') ||
3352	    strchr(fsname, '%'))
3353		return (SET_ERROR(EINVAL));
3354
3355	if (dataset_namecheck(origin_name, NULL, NULL) != 0)
3356		return (SET_ERROR(EINVAL));
3357	error = dmu_objset_clone(fsname, origin_name);
3358	if (error != 0)
3359		return (error);
3360
3361	/*
3362	 * It would be nice to do this atomically.
3363	 */
3364	if (error == 0) {
3365		error = zfs_set_prop_nvlist(fsname, ZPROP_SRC_LOCAL,
3366		    nvprops, outnvl);
3367		if (error != 0)
3368			(void) dsl_destroy_head(fsname);
3369	}
3370#ifdef __FreeBSD__
3371	if (error == 0)
3372		zvol_create_minors(fsname);
3373#endif
3374	return (error);
3375}
3376
3377/*
3378 * innvl: {
3379 *     "snaps" -> { snapshot1, snapshot2 }
3380 *     (optional) "props" -> { prop -> value (string) }
3381 * }
3382 *
3383 * outnvl: snapshot -> error code (int32)
3384 */
3385static int
3386zfs_ioc_snapshot(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl)
3387{
3388	nvlist_t *snaps;
3389	nvlist_t *props = NULL;
3390	int error, poollen;
3391	nvpair_t *pair;
3392
3393	(void) nvlist_lookup_nvlist(innvl, "props", &props);
3394	if ((error = zfs_check_userprops(poolname, props)) != 0)
3395		return (error);
3396
3397	if (!nvlist_empty(props) &&
3398	    zfs_earlier_version(poolname, SPA_VERSION_SNAP_PROPS))
3399		return (SET_ERROR(ENOTSUP));
3400
3401	if (nvlist_lookup_nvlist(innvl, "snaps", &snaps) != 0)
3402		return (SET_ERROR(EINVAL));
3403	poollen = strlen(poolname);
3404	for (pair = nvlist_next_nvpair(snaps, NULL); pair != NULL;
3405	    pair = nvlist_next_nvpair(snaps, pair)) {
3406		const char *name = nvpair_name(pair);
3407		const char *cp = strchr(name, '@');
3408
3409		/*
3410		 * The snap name must contain an @, and the part after it must
3411		 * contain only valid characters.
3412		 */
3413		if (cp == NULL ||
3414		    zfs_component_namecheck(cp + 1, NULL, NULL) != 0)
3415			return (SET_ERROR(EINVAL));
3416
3417		/*
3418		 * The snap must be in the specified pool.
3419		 */
3420		if (strncmp(name, poolname, poollen) != 0 ||
3421		    (name[poollen] != '/' && name[poollen] != '@'))
3422			return (SET_ERROR(EXDEV));
3423
3424		/* This must be the only snap of this fs. */
3425		for (nvpair_t *pair2 = nvlist_next_nvpair(snaps, pair);
3426		    pair2 != NULL; pair2 = nvlist_next_nvpair(snaps, pair2)) {
3427			if (strncmp(name, nvpair_name(pair2), cp - name + 1)
3428			    == 0) {
3429				return (SET_ERROR(EXDEV));
3430			}
3431		}
3432	}
3433
3434	error = dsl_dataset_snapshot(snaps, props, outnvl);
3435	return (error);
3436}
3437
3438/*
3439 * innvl: "message" -> string
3440 */
3441/* ARGSUSED */
3442static int
3443zfs_ioc_log_history(const char *unused, nvlist_t *innvl, nvlist_t *outnvl)
3444{
3445	char *message;
3446	spa_t *spa;
3447	int error;
3448	char *poolname;
3449
3450	/*
3451	 * The poolname in the ioctl is not set, we get it from the TSD,
3452	 * which was set at the end of the last successful ioctl that allows
3453	 * logging.  The secpolicy func already checked that it is set.
3454	 * Only one log ioctl is allowed after each successful ioctl, so
3455	 * we clear the TSD here.
3456	 */
3457	poolname = tsd_get(zfs_allow_log_key);
3458	(void) tsd_set(zfs_allow_log_key, NULL);
3459	error = spa_open(poolname, &spa, FTAG);
3460	strfree(poolname);
3461	if (error != 0)
3462		return (error);
3463
3464	if (nvlist_lookup_string(innvl, "message", &message) != 0)  {
3465		spa_close(spa, FTAG);
3466		return (SET_ERROR(EINVAL));
3467	}
3468
3469	if (spa_version(spa) < SPA_VERSION_ZPOOL_HISTORY) {
3470		spa_close(spa, FTAG);
3471		return (SET_ERROR(ENOTSUP));
3472	}
3473
3474	error = spa_history_log(spa, message);
3475	spa_close(spa, FTAG);
3476	return (error);
3477}
3478
3479#ifdef __FreeBSD__
3480static int
3481zfs_ioc_nextboot(const char *unused, nvlist_t *innvl, nvlist_t *outnvl)
3482{
3483	char name[MAXNAMELEN];
3484	spa_t *spa;
3485	vdev_t *vd;
3486	char *command;
3487	uint64_t pool_guid;
3488	uint64_t vdev_guid;
3489	int error;
3490
3491	if (nvlist_lookup_uint64(innvl,
3492	    ZPOOL_CONFIG_POOL_GUID, &pool_guid) != 0)
3493		return (EINVAL);
3494	if (nvlist_lookup_uint64(innvl,
3495	    ZPOOL_CONFIG_GUID, &vdev_guid) != 0)
3496		return (EINVAL);
3497	if (nvlist_lookup_string(innvl,
3498	    "command", &command) != 0)
3499		return (EINVAL);
3500
3501	mutex_enter(&spa_namespace_lock);
3502	spa = spa_by_guid(pool_guid, vdev_guid);
3503	if (spa != NULL)
3504		strcpy(name, spa_name(spa));
3505	mutex_exit(&spa_namespace_lock);
3506	if (spa == NULL)
3507		return (ENOENT);
3508
3509	if ((error = spa_open(name, &spa, FTAG)) != 0)
3510		return (error);
3511	spa_vdev_state_enter(spa, SCL_ALL);
3512	vd = spa_lookup_by_guid(spa, vdev_guid, B_TRUE);
3513	if (vd == NULL) {
3514		(void) spa_vdev_state_exit(spa, NULL, ENXIO);
3515		spa_close(spa, FTAG);
3516		return (ENODEV);
3517	}
3518	error = vdev_label_write_pad2(vd, command, strlen(command));
3519	(void) spa_vdev_state_exit(spa, NULL, 0);
3520	txg_wait_synced(spa->spa_dsl_pool, 0);
3521	spa_close(spa, FTAG);
3522	return (error);
3523}
3524#endif
3525
3526/*
3527 * The dp_config_rwlock must not be held when calling this, because the
3528 * unmount may need to write out data.
3529 *
3530 * This function is best-effort.  Callers must deal gracefully if it
3531 * remains mounted (or is remounted after this call).
3532 *
3533 * Returns 0 if the argument is not a snapshot, or it is not currently a
3534 * filesystem, or we were able to unmount it.  Returns error code otherwise.
3535 */
3536int
3537zfs_unmount_snap(const char *snapname)
3538{
3539	vfs_t *vfsp;
3540	zfsvfs_t *zfsvfs;
3541#ifdef illumos
3542	int err;
3543#endif
3544
3545	if (strchr(snapname, '@') == NULL)
3546		return (0);
3547
3548	vfsp = zfs_get_vfs(snapname);
3549	if (vfsp == NULL)
3550		return (0);
3551
3552	zfsvfs = vfsp->vfs_data;
3553	ASSERT(!dsl_pool_config_held(dmu_objset_pool(zfsvfs->z_os)));
3554
3555#ifdef illumos
3556	err = vn_vfswlock(vfsp->vfs_vnodecovered);
3557	VFS_RELE(vfsp);
3558	if (err != 0)
3559		return (SET_ERROR(err));
3560#endif
3561
3562	/*
3563	 * Always force the unmount for snapshots.
3564	 */
3565#ifdef illumos
3566	(void) dounmount(vfsp, MS_FORCE, kcred);
3567#else
3568	(void) dounmount(vfsp, MS_FORCE, curthread);
3569#endif
3570	return (0);
3571}
3572
3573/* ARGSUSED */
3574static int
3575zfs_unmount_snap_cb(const char *snapname, void *arg)
3576{
3577	return (zfs_unmount_snap(snapname));
3578}
3579
3580/*
3581 * When a clone is destroyed, its origin may also need to be destroyed,
3582 * in which case it must be unmounted.  This routine will do that unmount
3583 * if necessary.
3584 */
3585void
3586zfs_destroy_unmount_origin(const char *fsname)
3587{
3588	int error;
3589	objset_t *os;
3590	dsl_dataset_t *ds;
3591
3592	error = dmu_objset_hold(fsname, FTAG, &os);
3593	if (error != 0)
3594		return;
3595	ds = dmu_objset_ds(os);
3596	if (dsl_dir_is_clone(ds->ds_dir) && DS_IS_DEFER_DESTROY(ds->ds_prev)) {
3597		char originname[ZFS_MAX_DATASET_NAME_LEN];
3598		dsl_dataset_name(ds->ds_prev, originname);
3599		dmu_objset_rele(os, FTAG);
3600		(void) zfs_unmount_snap(originname);
3601	} else {
3602		dmu_objset_rele(os, FTAG);
3603	}
3604}
3605
3606/*
3607 * innvl: {
3608 *     "snaps" -> { snapshot1, snapshot2 }
3609 *     (optional boolean) "defer"
3610 * }
3611 *
3612 * outnvl: snapshot -> error code (int32)
3613 *
3614 */
3615/* ARGSUSED */
3616static int
3617zfs_ioc_destroy_snaps(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl)
3618{
3619	int error, poollen;
3620	nvlist_t *snaps;
3621	nvpair_t *pair;
3622	boolean_t defer;
3623
3624	if (nvlist_lookup_nvlist(innvl, "snaps", &snaps) != 0)
3625		return (SET_ERROR(EINVAL));
3626	defer = nvlist_exists(innvl, "defer");
3627
3628	poollen = strlen(poolname);
3629	for (pair = nvlist_next_nvpair(snaps, NULL); pair != NULL;
3630	    pair = nvlist_next_nvpair(snaps, pair)) {
3631		const char *name = nvpair_name(pair);
3632
3633		/*
3634		 * The snap must be in the specified pool to prevent the
3635		 * invalid removal of zvol minors below.
3636		 */
3637		if (strncmp(name, poolname, poollen) != 0 ||
3638		    (name[poollen] != '/' && name[poollen] != '@'))
3639			return (SET_ERROR(EXDEV));
3640
3641		error = zfs_unmount_snap(name);
3642		if (error != 0)
3643			return (error);
3644#if defined(__FreeBSD__)
3645		zvol_remove_minors(name);
3646#endif
3647	}
3648
3649	return (dsl_destroy_snapshots_nvl(snaps, defer, outnvl));
3650}
3651
3652/*
3653 * Create bookmarks.  Bookmark names are of the form <fs>#<bmark>.
3654 * All bookmarks must be in the same pool.
3655 *
3656 * innvl: {
3657 *     bookmark1 -> snapshot1, bookmark2 -> snapshot2
3658 * }
3659 *
3660 * outnvl: bookmark -> error code (int32)
3661 *
3662 */
3663/* ARGSUSED */
3664static int
3665zfs_ioc_bookmark(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl)
3666{
3667	for (nvpair_t *pair = nvlist_next_nvpair(innvl, NULL);
3668	    pair != NULL; pair = nvlist_next_nvpair(innvl, pair)) {
3669		char *snap_name;
3670
3671		/*
3672		 * Verify the snapshot argument.
3673		 */
3674		if (nvpair_value_string(pair, &snap_name) != 0)
3675			return (SET_ERROR(EINVAL));
3676
3677
3678		/* Verify that the keys (bookmarks) are unique */
3679		for (nvpair_t *pair2 = nvlist_next_nvpair(innvl, pair);
3680		    pair2 != NULL; pair2 = nvlist_next_nvpair(innvl, pair2)) {
3681			if (strcmp(nvpair_name(pair), nvpair_name(pair2)) == 0)
3682				return (SET_ERROR(EINVAL));
3683		}
3684	}
3685
3686	return (dsl_bookmark_create(innvl, outnvl));
3687}
3688
3689/*
3690 * innvl: {
3691 *     property 1, property 2, ...
3692 * }
3693 *
3694 * outnvl: {
3695 *     bookmark name 1 -> { property 1, property 2, ... },
3696 *     bookmark name 2 -> { property 1, property 2, ... }
3697 * }
3698 *
3699 */
3700static int
3701zfs_ioc_get_bookmarks(const char *fsname, nvlist_t *innvl, nvlist_t *outnvl)
3702{
3703	return (dsl_get_bookmarks(fsname, innvl, outnvl));
3704}
3705
3706/*
3707 * innvl: {
3708 *     bookmark name 1, bookmark name 2
3709 * }
3710 *
3711 * outnvl: bookmark -> error code (int32)
3712 *
3713 */
3714static int
3715zfs_ioc_destroy_bookmarks(const char *poolname, nvlist_t *innvl,
3716    nvlist_t *outnvl)
3717{
3718	int error, poollen;
3719
3720	poollen = strlen(poolname);
3721	for (nvpair_t *pair = nvlist_next_nvpair(innvl, NULL);
3722	    pair != NULL; pair = nvlist_next_nvpair(innvl, pair)) {
3723		const char *name = nvpair_name(pair);
3724		const char *cp = strchr(name, '#');
3725
3726		/*
3727		 * The bookmark name must contain an #, and the part after it
3728		 * must contain only valid characters.
3729		 */
3730		if (cp == NULL ||
3731		    zfs_component_namecheck(cp + 1, NULL, NULL) != 0)
3732			return (SET_ERROR(EINVAL));
3733
3734		/*
3735		 * The bookmark must be in the specified pool.
3736		 */
3737		if (strncmp(name, poolname, poollen) != 0 ||
3738		    (name[poollen] != '/' && name[poollen] != '#'))
3739			return (SET_ERROR(EXDEV));
3740	}
3741
3742	error = dsl_bookmark_destroy(innvl, outnvl);
3743	return (error);
3744}
3745
3746/*
3747 * inputs:
3748 * zc_name		name of dataset to destroy
3749 * zc_objset_type	type of objset
3750 * zc_defer_destroy	mark for deferred destroy
3751 *
3752 * outputs:		none
3753 */
3754static int
3755zfs_ioc_destroy(zfs_cmd_t *zc)
3756{
3757	int err;
3758
3759	if (zc->zc_objset_type == DMU_OST_ZFS) {
3760		err = zfs_unmount_snap(zc->zc_name);
3761		if (err != 0)
3762			return (err);
3763	}
3764
3765	if (strchr(zc->zc_name, '@'))
3766		err = dsl_destroy_snapshot(zc->zc_name, zc->zc_defer_destroy);
3767	else
3768		err = dsl_destroy_head(zc->zc_name);
3769	if (zc->zc_objset_type == DMU_OST_ZVOL && err == 0)
3770#ifdef __FreeBSD__
3771		zvol_remove_minors(zc->zc_name);
3772#else
3773		(void) zvol_remove_minor(zc->zc_name);
3774#endif
3775	return (err);
3776}
3777
3778/*
3779 * fsname is name of dataset to rollback (to most recent snapshot)
3780 *
3781 * innvl is not used.
3782 *
3783 * outnvl: "target" -> name of most recent snapshot
3784 * }
3785 */
3786/* ARGSUSED */
3787static int
3788zfs_ioc_rollback(const char *fsname, nvlist_t *args, nvlist_t *outnvl)
3789{
3790	zfsvfs_t *zfsvfs;
3791	int error;
3792
3793	if (getzfsvfs(fsname, &zfsvfs) == 0) {
3794		dsl_dataset_t *ds;
3795
3796		ds = dmu_objset_ds(zfsvfs->z_os);
3797		error = zfs_suspend_fs(zfsvfs);
3798		if (error == 0) {
3799			int resume_err;
3800
3801			error = dsl_dataset_rollback(fsname, zfsvfs, outnvl);
3802			resume_err = zfs_resume_fs(zfsvfs, ds);
3803			error = error ? error : resume_err;
3804		}
3805#ifdef illumos
3806		VFS_RELE(zfsvfs->z_vfs);
3807#else
3808		vfs_unbusy(zfsvfs->z_vfs);
3809#endif
3810	} else {
3811		error = dsl_dataset_rollback(fsname, NULL, outnvl);
3812	}
3813	return (error);
3814}
3815
3816static int
3817recursive_unmount(const char *fsname, void *arg)
3818{
3819	const char *snapname = arg;
3820	char fullname[ZFS_MAX_DATASET_NAME_LEN];
3821
3822	(void) snprintf(fullname, sizeof (fullname), "%s@%s", fsname, snapname);
3823	return (zfs_unmount_snap(fullname));
3824}
3825
3826/*
3827 * inputs:
3828 * zc_name	old name of dataset
3829 * zc_value	new name of dataset
3830 * zc_cookie	recursive flag (only valid for snapshots)
3831 *
3832 * outputs:	none
3833 */
3834static int
3835zfs_ioc_rename(zfs_cmd_t *zc)
3836{
3837	boolean_t recursive = zc->zc_cookie & 1;
3838	char *at;
3839	boolean_t allow_mounted = B_TRUE;
3840
3841#ifdef __FreeBSD__
3842	allow_mounted = (zc->zc_cookie & 2) != 0;
3843#endif
3844
3845	zc->zc_value[sizeof (zc->zc_value) - 1] = '\0';
3846	if (dataset_namecheck(zc->zc_value, NULL, NULL) != 0 ||
3847	    strchr(zc->zc_value, '%'))
3848		return (SET_ERROR(EINVAL));
3849
3850	at = strchr(zc->zc_name, '@');
3851	if (at != NULL) {
3852		/* snaps must be in same fs */
3853		int error;
3854
3855		if (strncmp(zc->zc_name, zc->zc_value, at - zc->zc_name + 1))
3856			return (SET_ERROR(EXDEV));
3857		*at = '\0';
3858		if (zc->zc_objset_type == DMU_OST_ZFS && !allow_mounted) {
3859			error = dmu_objset_find(zc->zc_name,
3860			    recursive_unmount, at + 1,
3861			    recursive ? DS_FIND_CHILDREN : 0);
3862			if (error != 0) {
3863				*at = '@';
3864				return (error);
3865			}
3866		}
3867		error = dsl_dataset_rename_snapshot(zc->zc_name,
3868		    at + 1, strchr(zc->zc_value, '@') + 1, recursive);
3869		*at = '@';
3870
3871		return (error);
3872	} else {
3873#ifdef illumos
3874		if (zc->zc_objset_type == DMU_OST_ZVOL)
3875			(void) zvol_remove_minor(zc->zc_name);
3876#endif
3877		return (dsl_dir_rename(zc->zc_name, zc->zc_value));
3878	}
3879}
3880
3881static int
3882zfs_check_settable(const char *dsname, nvpair_t *pair, cred_t *cr)
3883{
3884	const char *propname = nvpair_name(pair);
3885	boolean_t issnap = (strchr(dsname, '@') != NULL);
3886	zfs_prop_t prop = zfs_name_to_prop(propname);
3887	uint64_t intval;
3888	int err;
3889
3890	if (prop == ZPROP_INVAL) {
3891		if (zfs_prop_user(propname)) {
3892			if (err = zfs_secpolicy_write_perms(dsname,
3893			    ZFS_DELEG_PERM_USERPROP, cr))
3894				return (err);
3895			return (0);
3896		}
3897
3898		if (!issnap && zfs_prop_userquota(propname)) {
3899			const char *perm = NULL;
3900			const char *uq_prefix =
3901			    zfs_userquota_prop_prefixes[ZFS_PROP_USERQUOTA];
3902			const char *gq_prefix =
3903			    zfs_userquota_prop_prefixes[ZFS_PROP_GROUPQUOTA];
3904
3905			if (strncmp(propname, uq_prefix,
3906			    strlen(uq_prefix)) == 0) {
3907				perm = ZFS_DELEG_PERM_USERQUOTA;
3908			} else if (strncmp(propname, gq_prefix,
3909			    strlen(gq_prefix)) == 0) {
3910				perm = ZFS_DELEG_PERM_GROUPQUOTA;
3911			} else {
3912				/* USERUSED and GROUPUSED are read-only */
3913				return (SET_ERROR(EINVAL));
3914			}
3915
3916			if (err = zfs_secpolicy_write_perms(dsname, perm, cr))
3917				return (err);
3918			return (0);
3919		}
3920
3921		return (SET_ERROR(EINVAL));
3922	}
3923
3924	if (issnap)
3925		return (SET_ERROR(EINVAL));
3926
3927	if (nvpair_type(pair) == DATA_TYPE_NVLIST) {
3928		/*
3929		 * dsl_prop_get_all_impl() returns properties in this
3930		 * format.
3931		 */
3932		nvlist_t *attrs;
3933		VERIFY(nvpair_value_nvlist(pair, &attrs) == 0);
3934		VERIFY(nvlist_lookup_nvpair(attrs, ZPROP_VALUE,
3935		    &pair) == 0);
3936	}
3937
3938	/*
3939	 * Check that this value is valid for this pool version
3940	 */
3941	switch (prop) {
3942	case ZFS_PROP_COMPRESSION:
3943		/*
3944		 * If the user specified gzip compression, make sure
3945		 * the SPA supports it. We ignore any errors here since
3946		 * we'll catch them later.
3947		 */
3948		if (nvpair_value_uint64(pair, &intval) == 0) {
3949			if (intval >= ZIO_COMPRESS_GZIP_1 &&
3950			    intval <= ZIO_COMPRESS_GZIP_9 &&
3951			    zfs_earlier_version(dsname,
3952			    SPA_VERSION_GZIP_COMPRESSION)) {
3953				return (SET_ERROR(ENOTSUP));
3954			}
3955
3956			if (intval == ZIO_COMPRESS_ZLE &&
3957			    zfs_earlier_version(dsname,
3958			    SPA_VERSION_ZLE_COMPRESSION))
3959				return (SET_ERROR(ENOTSUP));
3960
3961			if (intval == ZIO_COMPRESS_LZ4) {
3962				spa_t *spa;
3963
3964				if ((err = spa_open(dsname, &spa, FTAG)) != 0)
3965					return (err);
3966
3967				if (!spa_feature_is_enabled(spa,
3968				    SPA_FEATURE_LZ4_COMPRESS)) {
3969					spa_close(spa, FTAG);
3970					return (SET_ERROR(ENOTSUP));
3971				}
3972				spa_close(spa, FTAG);
3973			}
3974
3975			/*
3976			 * If this is a bootable dataset then
3977			 * verify that the compression algorithm
3978			 * is supported for booting. We must return
3979			 * something other than ENOTSUP since it
3980			 * implies a downrev pool version.
3981			 */
3982			if (zfs_is_bootfs(dsname) &&
3983			    !BOOTFS_COMPRESS_VALID(intval)) {
3984				return (SET_ERROR(ERANGE));
3985			}
3986		}
3987		break;
3988
3989	case ZFS_PROP_COPIES:
3990		if (zfs_earlier_version(dsname, SPA_VERSION_DITTO_BLOCKS))
3991			return (SET_ERROR(ENOTSUP));
3992		break;
3993
3994	case ZFS_PROP_RECORDSIZE:
3995		/* Record sizes above 128k need the feature to be enabled */
3996		if (nvpair_value_uint64(pair, &intval) == 0 &&
3997		    intval > SPA_OLD_MAXBLOCKSIZE) {
3998			spa_t *spa;
3999
4000			/*
4001			 * If this is a bootable dataset then
4002			 * the we don't allow large (>128K) blocks,
4003			 * because GRUB doesn't support them.
4004			 */
4005			if (zfs_is_bootfs(dsname) &&
4006			    intval > SPA_OLD_MAXBLOCKSIZE) {
4007				return (SET_ERROR(ERANGE));
4008			}
4009
4010			/*
4011			 * We don't allow setting the property above 1MB,
4012			 * unless the tunable has been changed.
4013			 */
4014			if (intval > zfs_max_recordsize ||
4015			    intval > SPA_MAXBLOCKSIZE)
4016				return (SET_ERROR(ERANGE));
4017
4018			if ((err = spa_open(dsname, &spa, FTAG)) != 0)
4019				return (err);
4020
4021			if (!spa_feature_is_enabled(spa,
4022			    SPA_FEATURE_LARGE_BLOCKS)) {
4023				spa_close(spa, FTAG);
4024				return (SET_ERROR(ENOTSUP));
4025			}
4026			spa_close(spa, FTAG);
4027		}
4028		break;
4029
4030	case ZFS_PROP_SHARESMB:
4031		if (zpl_earlier_version(dsname, ZPL_VERSION_FUID))
4032			return (SET_ERROR(ENOTSUP));
4033		break;
4034
4035	case ZFS_PROP_ACLINHERIT:
4036		if (nvpair_type(pair) == DATA_TYPE_UINT64 &&
4037		    nvpair_value_uint64(pair, &intval) == 0) {
4038			if (intval == ZFS_ACL_PASSTHROUGH_X &&
4039			    zfs_earlier_version(dsname,
4040			    SPA_VERSION_PASSTHROUGH_X))
4041				return (SET_ERROR(ENOTSUP));
4042		}
4043		break;
4044
4045	case ZFS_PROP_CHECKSUM:
4046	case ZFS_PROP_DEDUP:
4047	{
4048		spa_feature_t feature;
4049		spa_t *spa;
4050
4051		/* dedup feature version checks */
4052		if (prop == ZFS_PROP_DEDUP &&
4053		    zfs_earlier_version(dsname, SPA_VERSION_DEDUP))
4054			return (SET_ERROR(ENOTSUP));
4055
4056		if (nvpair_value_uint64(pair, &intval) != 0)
4057			return (SET_ERROR(EINVAL));
4058
4059		/* check prop value is enabled in features */
4060		feature = zio_checksum_to_feature(intval & ZIO_CHECKSUM_MASK);
4061		if (feature == SPA_FEATURE_NONE)
4062			break;
4063
4064		if ((err = spa_open(dsname, &spa, FTAG)) != 0)
4065			return (err);
4066		/*
4067		 * Salted checksums are not supported on root pools.
4068		 */
4069		if (spa_bootfs(spa) != 0 &&
4070		    intval < ZIO_CHECKSUM_FUNCTIONS &&
4071		    (zio_checksum_table[intval].ci_flags &
4072		    ZCHECKSUM_FLAG_SALTED)) {
4073			spa_close(spa, FTAG);
4074			return (SET_ERROR(ERANGE));
4075		}
4076		if (!spa_feature_is_enabled(spa, feature)) {
4077			spa_close(spa, FTAG);
4078			return (SET_ERROR(ENOTSUP));
4079		}
4080		spa_close(spa, FTAG);
4081		break;
4082	}
4083	}
4084
4085	return (zfs_secpolicy_setprop(dsname, prop, pair, CRED()));
4086}
4087
4088/*
4089 * Checks for a race condition to make sure we don't increment a feature flag
4090 * multiple times.
4091 */
4092static int
4093zfs_prop_activate_feature_check(void *arg, dmu_tx_t *tx)
4094{
4095	spa_t *spa = dmu_tx_pool(tx)->dp_spa;
4096	spa_feature_t *featurep = arg;
4097
4098	if (!spa_feature_is_active(spa, *featurep))
4099		return (0);
4100	else
4101		return (SET_ERROR(EBUSY));
4102}
4103
4104/*
4105 * The callback invoked on feature activation in the sync task caused by
4106 * zfs_prop_activate_feature.
4107 */
4108static void
4109zfs_prop_activate_feature_sync(void *arg, dmu_tx_t *tx)
4110{
4111	spa_t *spa = dmu_tx_pool(tx)->dp_spa;
4112	spa_feature_t *featurep = arg;
4113
4114	spa_feature_incr(spa, *featurep, tx);
4115}
4116
4117/*
4118 * Activates a feature on a pool in response to a property setting. This
4119 * creates a new sync task which modifies the pool to reflect the feature
4120 * as being active.
4121 */
4122static int
4123zfs_prop_activate_feature(spa_t *spa, spa_feature_t feature)
4124{
4125	int err;
4126
4127	/* EBUSY here indicates that the feature is already active */
4128	err = dsl_sync_task(spa_name(spa),
4129	    zfs_prop_activate_feature_check, zfs_prop_activate_feature_sync,
4130	    &feature, 2, ZFS_SPACE_CHECK_RESERVED);
4131
4132	if (err != 0 && err != EBUSY)
4133		return (err);
4134	else
4135		return (0);
4136}
4137
4138/*
4139 * Removes properties from the given props list that fail permission checks
4140 * needed to clear them and to restore them in case of a receive error. For each
4141 * property, make sure we have both set and inherit permissions.
4142 *
4143 * Returns the first error encountered if any permission checks fail. If the
4144 * caller provides a non-NULL errlist, it also gives the complete list of names
4145 * of all the properties that failed a permission check along with the
4146 * corresponding error numbers. The caller is responsible for freeing the
4147 * returned errlist.
4148 *
4149 * If every property checks out successfully, zero is returned and the list
4150 * pointed at by errlist is NULL.
4151 */
4152static int
4153zfs_check_clearable(char *dataset, nvlist_t *props, nvlist_t **errlist)
4154{
4155	zfs_cmd_t *zc;
4156	nvpair_t *pair, *next_pair;
4157	nvlist_t *errors;
4158	int err, rv = 0;
4159
4160	if (props == NULL)
4161		return (0);
4162
4163	VERIFY(nvlist_alloc(&errors, NV_UNIQUE_NAME, KM_SLEEP) == 0);
4164
4165	zc = kmem_alloc(sizeof (zfs_cmd_t), KM_SLEEP);
4166	(void) strcpy(zc->zc_name, dataset);
4167	pair = nvlist_next_nvpair(props, NULL);
4168	while (pair != NULL) {
4169		next_pair = nvlist_next_nvpair(props, pair);
4170
4171		(void) strcpy(zc->zc_value, nvpair_name(pair));
4172		if ((err = zfs_check_settable(dataset, pair, CRED())) != 0 ||
4173		    (err = zfs_secpolicy_inherit_prop(zc, NULL, CRED())) != 0) {
4174			VERIFY(nvlist_remove_nvpair(props, pair) == 0);
4175			VERIFY(nvlist_add_int32(errors,
4176			    zc->zc_value, err) == 0);
4177		}
4178		pair = next_pair;
4179	}
4180	kmem_free(zc, sizeof (zfs_cmd_t));
4181
4182	if ((pair = nvlist_next_nvpair(errors, NULL)) == NULL) {
4183		nvlist_free(errors);
4184		errors = NULL;
4185	} else {
4186		VERIFY(nvpair_value_int32(pair, &rv) == 0);
4187	}
4188
4189	if (errlist == NULL)
4190		nvlist_free(errors);
4191	else
4192		*errlist = errors;
4193
4194	return (rv);
4195}
4196
4197static boolean_t
4198propval_equals(nvpair_t *p1, nvpair_t *p2)
4199{
4200	if (nvpair_type(p1) == DATA_TYPE_NVLIST) {
4201		/* dsl_prop_get_all_impl() format */
4202		nvlist_t *attrs;
4203		VERIFY(nvpair_value_nvlist(p1, &attrs) == 0);
4204		VERIFY(nvlist_lookup_nvpair(attrs, ZPROP_VALUE,
4205		    &p1) == 0);
4206	}
4207
4208	if (nvpair_type(p2) == DATA_TYPE_NVLIST) {
4209		nvlist_t *attrs;
4210		VERIFY(nvpair_value_nvlist(p2, &attrs) == 0);
4211		VERIFY(nvlist_lookup_nvpair(attrs, ZPROP_VALUE,
4212		    &p2) == 0);
4213	}
4214
4215	if (nvpair_type(p1) != nvpair_type(p2))
4216		return (B_FALSE);
4217
4218	if (nvpair_type(p1) == DATA_TYPE_STRING) {
4219		char *valstr1, *valstr2;
4220
4221		VERIFY(nvpair_value_string(p1, (char **)&valstr1) == 0);
4222		VERIFY(nvpair_value_string(p2, (char **)&valstr2) == 0);
4223		return (strcmp(valstr1, valstr2) == 0);
4224	} else {
4225		uint64_t intval1, intval2;
4226
4227		VERIFY(nvpair_value_uint64(p1, &intval1) == 0);
4228		VERIFY(nvpair_value_uint64(p2, &intval2) == 0);
4229		return (intval1 == intval2);
4230	}
4231}
4232
4233/*
4234 * Remove properties from props if they are not going to change (as determined
4235 * by comparison with origprops). Remove them from origprops as well, since we
4236 * do not need to clear or restore properties that won't change.
4237 */
4238static void
4239props_reduce(nvlist_t *props, nvlist_t *origprops)
4240{
4241	nvpair_t *pair, *next_pair;
4242
4243	if (origprops == NULL)
4244		return; /* all props need to be received */
4245
4246	pair = nvlist_next_nvpair(props, NULL);
4247	while (pair != NULL) {
4248		const char *propname = nvpair_name(pair);
4249		nvpair_t *match;
4250
4251		next_pair = nvlist_next_nvpair(props, pair);
4252
4253		if ((nvlist_lookup_nvpair(origprops, propname,
4254		    &match) != 0) || !propval_equals(pair, match))
4255			goto next; /* need to set received value */
4256
4257		/* don't clear the existing received value */
4258		(void) nvlist_remove_nvpair(origprops, match);
4259		/* don't bother receiving the property */
4260		(void) nvlist_remove_nvpair(props, pair);
4261next:
4262		pair = next_pair;
4263	}
4264}
4265
4266/*
4267 * Extract properties that cannot be set PRIOR to the receipt of a dataset.
4268 * For example, refquota cannot be set until after the receipt of a dataset,
4269 * because in replication streams, an older/earlier snapshot may exceed the
4270 * refquota.  We want to receive the older/earlier snapshot, but setting
4271 * refquota pre-receipt will set the dsl's ACTUAL quota, which will prevent
4272 * the older/earlier snapshot from being received (with EDQUOT).
4273 *
4274 * The ZFS test "zfs_receive_011_pos" demonstrates such a scenario.
4275 *
4276 * libzfs will need to be judicious handling errors encountered by props
4277 * extracted by this function.
4278 */
4279static nvlist_t *
4280extract_delay_props(nvlist_t *props)
4281{
4282	nvlist_t *delayprops;
4283	nvpair_t *nvp, *tmp;
4284	static const zfs_prop_t delayable[] = { ZFS_PROP_REFQUOTA, 0 };
4285	int i;
4286
4287	VERIFY(nvlist_alloc(&delayprops, NV_UNIQUE_NAME, KM_SLEEP) == 0);
4288
4289	for (nvp = nvlist_next_nvpair(props, NULL); nvp != NULL;
4290	    nvp = nvlist_next_nvpair(props, nvp)) {
4291		/*
4292		 * strcmp() is safe because zfs_prop_to_name() always returns
4293		 * a bounded string.
4294		 */
4295		for (i = 0; delayable[i] != 0; i++) {
4296			if (strcmp(zfs_prop_to_name(delayable[i]),
4297			    nvpair_name(nvp)) == 0) {
4298				break;
4299			}
4300		}
4301		if (delayable[i] != 0) {
4302			tmp = nvlist_prev_nvpair(props, nvp);
4303			VERIFY(nvlist_add_nvpair(delayprops, nvp) == 0);
4304			VERIFY(nvlist_remove_nvpair(props, nvp) == 0);
4305			nvp = tmp;
4306		}
4307	}
4308
4309	if (nvlist_empty(delayprops)) {
4310		nvlist_free(delayprops);
4311		delayprops = NULL;
4312	}
4313	return (delayprops);
4314}
4315
4316#ifdef	DEBUG
4317static boolean_t zfs_ioc_recv_inject_err;
4318#endif
4319
4320/*
4321 * inputs:
4322 * zc_name		name of containing filesystem
4323 * zc_nvlist_src{_size}	nvlist of properties to apply
4324 * zc_value		name of snapshot to create
4325 * zc_string		name of clone origin (if DRR_FLAG_CLONE)
4326 * zc_cookie		file descriptor to recv from
4327 * zc_begin_record	the BEGIN record of the stream (not byteswapped)
4328 * zc_guid		force flag
4329 * zc_cleanup_fd	cleanup-on-exit file descriptor
4330 * zc_action_handle	handle for this guid/ds mapping (or zero on first call)
4331 * zc_resumable		if data is incomplete assume sender will resume
4332 *
4333 * outputs:
4334 * zc_cookie		number of bytes read
4335 * zc_nvlist_dst{_size} error for each unapplied received property
4336 * zc_obj		zprop_errflags_t
4337 * zc_action_handle	handle for this guid/ds mapping
4338 */
4339static int
4340zfs_ioc_recv(zfs_cmd_t *zc)
4341{
4342	file_t *fp;
4343	dmu_recv_cookie_t drc;
4344	boolean_t force = (boolean_t)zc->zc_guid;
4345	int fd;
4346	int error = 0;
4347	int props_error = 0;
4348	nvlist_t *errors;
4349	offset_t off;
4350	nvlist_t *props = NULL; /* sent properties */
4351	nvlist_t *origprops = NULL; /* existing properties */
4352	nvlist_t *delayprops = NULL; /* sent properties applied post-receive */
4353	char *origin = NULL;
4354	char *tosnap;
4355	char tofs[ZFS_MAX_DATASET_NAME_LEN];
4356	cap_rights_t rights;
4357	boolean_t first_recvd_props = B_FALSE;
4358
4359	if (dataset_namecheck(zc->zc_value, NULL, NULL) != 0 ||
4360	    strchr(zc->zc_value, '@') == NULL ||
4361	    strchr(zc->zc_value, '%'))
4362		return (SET_ERROR(EINVAL));
4363
4364	(void) strcpy(tofs, zc->zc_value);
4365	tosnap = strchr(tofs, '@');
4366	*tosnap++ = '\0';
4367
4368	if (zc->zc_nvlist_src != 0 &&
4369	    (error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
4370	    zc->zc_iflags, &props)) != 0)
4371		return (error);
4372
4373	fd = zc->zc_cookie;
4374#ifdef illumos
4375	fp = getf(fd);
4376#else
4377	fget_read(curthread, fd, cap_rights_init(&rights, CAP_PREAD), &fp);
4378#endif
4379	if (fp == NULL) {
4380		nvlist_free(props);
4381		return (SET_ERROR(EBADF));
4382	}
4383
4384	errors = fnvlist_alloc();
4385
4386	if (zc->zc_string[0])
4387		origin = zc->zc_string;
4388
4389	error = dmu_recv_begin(tofs, tosnap,
4390	    &zc->zc_begin_record, force, zc->zc_resumable, origin, &drc);
4391	if (error != 0)
4392		goto out;
4393
4394	/*
4395	 * Set properties before we receive the stream so that they are applied
4396	 * to the new data. Note that we must call dmu_recv_stream() if
4397	 * dmu_recv_begin() succeeds.
4398	 */
4399	if (props != NULL && !drc.drc_newfs) {
4400		if (spa_version(dsl_dataset_get_spa(drc.drc_ds)) >=
4401		    SPA_VERSION_RECVD_PROPS &&
4402		    !dsl_prop_get_hasrecvd(tofs))
4403			first_recvd_props = B_TRUE;
4404
4405		/*
4406		 * If new received properties are supplied, they are to
4407		 * completely replace the existing received properties, so stash
4408		 * away the existing ones.
4409		 */
4410		if (dsl_prop_get_received(tofs, &origprops) == 0) {
4411			nvlist_t *errlist = NULL;
4412			/*
4413			 * Don't bother writing a property if its value won't
4414			 * change (and avoid the unnecessary security checks).
4415			 *
4416			 * The first receive after SPA_VERSION_RECVD_PROPS is a
4417			 * special case where we blow away all local properties
4418			 * regardless.
4419			 */
4420			if (!first_recvd_props)
4421				props_reduce(props, origprops);
4422			if (zfs_check_clearable(tofs, origprops, &errlist) != 0)
4423				(void) nvlist_merge(errors, errlist, 0);
4424			nvlist_free(errlist);
4425
4426			if (clear_received_props(tofs, origprops,
4427			    first_recvd_props ? NULL : props) != 0)
4428				zc->zc_obj |= ZPROP_ERR_NOCLEAR;
4429		} else {
4430			zc->zc_obj |= ZPROP_ERR_NOCLEAR;
4431		}
4432	}
4433
4434	if (props != NULL) {
4435		props_error = dsl_prop_set_hasrecvd(tofs);
4436
4437		if (props_error == 0) {
4438			delayprops = extract_delay_props(props);
4439			(void) zfs_set_prop_nvlist(tofs, ZPROP_SRC_RECEIVED,
4440			    props, errors);
4441		}
4442	}
4443
4444	off = fp->f_offset;
4445	error = dmu_recv_stream(&drc, fp, &off, zc->zc_cleanup_fd,
4446	    &zc->zc_action_handle);
4447
4448	if (error == 0) {
4449		zfsvfs_t *zfsvfs = NULL;
4450
4451		if (getzfsvfs(tofs, &zfsvfs) == 0) {
4452			/* online recv */
4453			dsl_dataset_t *ds;
4454			int end_err;
4455
4456			ds = dmu_objset_ds(zfsvfs->z_os);
4457			error = zfs_suspend_fs(zfsvfs);
4458			/*
4459			 * If the suspend fails, then the recv_end will
4460			 * likely also fail, and clean up after itself.
4461			 */
4462			end_err = dmu_recv_end(&drc, zfsvfs);
4463			if (error == 0)
4464				error = zfs_resume_fs(zfsvfs, ds);
4465			error = error ? error : end_err;
4466#ifdef illumos
4467			VFS_RELE(zfsvfs->z_vfs);
4468#else
4469			vfs_unbusy(zfsvfs->z_vfs);
4470#endif
4471		} else {
4472			error = dmu_recv_end(&drc, NULL);
4473		}
4474
4475		/* Set delayed properties now, after we're done receiving. */
4476		if (delayprops != NULL && error == 0) {
4477			(void) zfs_set_prop_nvlist(tofs, ZPROP_SRC_RECEIVED,
4478			    delayprops, errors);
4479		}
4480	}
4481
4482	if (delayprops != NULL) {
4483		/*
4484		 * Merge delayed props back in with initial props, in case
4485		 * we're DEBUG and zfs_ioc_recv_inject_err is set (which means
4486		 * we have to make sure clear_received_props() includes
4487		 * the delayed properties).
4488		 *
4489		 * Since zfs_ioc_recv_inject_err is only in DEBUG kernels,
4490		 * using ASSERT() will be just like a VERIFY.
4491		 */
4492		ASSERT(nvlist_merge(props, delayprops, 0) == 0);
4493		nvlist_free(delayprops);
4494	}
4495
4496	/*
4497	 * Now that all props, initial and delayed, are set, report the prop
4498	 * errors to the caller.
4499	 */
4500	if (zc->zc_nvlist_dst_size != 0 &&
4501	    (nvlist_smush(errors, zc->zc_nvlist_dst_size) != 0 ||
4502	    put_nvlist(zc, errors) != 0)) {
4503		/*
4504		 * Caller made zc->zc_nvlist_dst less than the minimum expected
4505		 * size or supplied an invalid address.
4506		 */
4507		props_error = SET_ERROR(EINVAL);
4508	}
4509
4510	zc->zc_cookie = off - fp->f_offset;
4511	if (off >= 0 && off <= MAXOFFSET_T)
4512		fp->f_offset = off;
4513
4514#ifdef	DEBUG
4515	if (zfs_ioc_recv_inject_err) {
4516		zfs_ioc_recv_inject_err = B_FALSE;
4517		error = 1;
4518	}
4519#endif
4520
4521#ifdef __FreeBSD__
4522	if (error == 0)
4523		zvol_create_minors(tofs);
4524#endif
4525
4526	/*
4527	 * On error, restore the original props.
4528	 */
4529	if (error != 0 && props != NULL && !drc.drc_newfs) {
4530		if (clear_received_props(tofs, props, NULL) != 0) {
4531			/*
4532			 * We failed to clear the received properties.
4533			 * Since we may have left a $recvd value on the
4534			 * system, we can't clear the $hasrecvd flag.
4535			 */
4536			zc->zc_obj |= ZPROP_ERR_NORESTORE;
4537		} else if (first_recvd_props) {
4538			dsl_prop_unset_hasrecvd(tofs);
4539		}
4540
4541		if (origprops == NULL && !drc.drc_newfs) {
4542			/* We failed to stash the original properties. */
4543			zc->zc_obj |= ZPROP_ERR_NORESTORE;
4544		}
4545
4546		/*
4547		 * dsl_props_set() will not convert RECEIVED to LOCAL on or
4548		 * after SPA_VERSION_RECVD_PROPS, so we need to specify LOCAL
4549		 * explictly if we're restoring local properties cleared in the
4550		 * first new-style receive.
4551		 */
4552		if (origprops != NULL &&
4553		    zfs_set_prop_nvlist(tofs, (first_recvd_props ?
4554		    ZPROP_SRC_LOCAL : ZPROP_SRC_RECEIVED),
4555		    origprops, NULL) != 0) {
4556			/*
4557			 * We stashed the original properties but failed to
4558			 * restore them.
4559			 */
4560			zc->zc_obj |= ZPROP_ERR_NORESTORE;
4561		}
4562	}
4563out:
4564	nvlist_free(props);
4565	nvlist_free(origprops);
4566	nvlist_free(errors);
4567	releasef(fd);
4568
4569	if (error == 0)
4570		error = props_error;
4571
4572	return (error);
4573}
4574
4575/*
4576 * inputs:
4577 * zc_name	name of snapshot to send
4578 * zc_cookie	file descriptor to send stream to
4579 * zc_obj	fromorigin flag (mutually exclusive with zc_fromobj)
4580 * zc_sendobj	objsetid of snapshot to send
4581 * zc_fromobj	objsetid of incremental fromsnap (may be zero)
4582 * zc_guid	if set, estimate size of stream only.  zc_cookie is ignored.
4583 *		output size in zc_objset_type.
4584 * zc_flags	lzc_send_flags
4585 *
4586 * outputs:
4587 * zc_objset_type	estimated size, if zc_guid is set
4588 */
4589static int
4590zfs_ioc_send(zfs_cmd_t *zc)
4591{
4592	int error;
4593	offset_t off;
4594	boolean_t estimate = (zc->zc_guid != 0);
4595	boolean_t embedok = (zc->zc_flags & 0x1);
4596	boolean_t large_block_ok = (zc->zc_flags & 0x2);
4597
4598	if (zc->zc_obj != 0) {
4599		dsl_pool_t *dp;
4600		dsl_dataset_t *tosnap;
4601
4602		error = dsl_pool_hold(zc->zc_name, FTAG, &dp);
4603		if (error != 0)
4604			return (error);
4605
4606		error = dsl_dataset_hold_obj(dp, zc->zc_sendobj, FTAG, &tosnap);
4607		if (error != 0) {
4608			dsl_pool_rele(dp, FTAG);
4609			return (error);
4610		}
4611
4612		if (dsl_dir_is_clone(tosnap->ds_dir))
4613			zc->zc_fromobj =
4614			    dsl_dir_phys(tosnap->ds_dir)->dd_origin_obj;
4615		dsl_dataset_rele(tosnap, FTAG);
4616		dsl_pool_rele(dp, FTAG);
4617	}
4618
4619	if (estimate) {
4620		dsl_pool_t *dp;
4621		dsl_dataset_t *tosnap;
4622		dsl_dataset_t *fromsnap = NULL;
4623
4624		error = dsl_pool_hold(zc->zc_name, FTAG, &dp);
4625		if (error != 0)
4626			return (error);
4627
4628		error = dsl_dataset_hold_obj(dp, zc->zc_sendobj, FTAG, &tosnap);
4629		if (error != 0) {
4630			dsl_pool_rele(dp, FTAG);
4631			return (error);
4632		}
4633
4634		if (zc->zc_fromobj != 0) {
4635			error = dsl_dataset_hold_obj(dp, zc->zc_fromobj,
4636			    FTAG, &fromsnap);
4637			if (error != 0) {
4638				dsl_dataset_rele(tosnap, FTAG);
4639				dsl_pool_rele(dp, FTAG);
4640				return (error);
4641			}
4642		}
4643
4644		error = dmu_send_estimate(tosnap, fromsnap,
4645		    &zc->zc_objset_type);
4646
4647		if (fromsnap != NULL)
4648			dsl_dataset_rele(fromsnap, FTAG);
4649		dsl_dataset_rele(tosnap, FTAG);
4650		dsl_pool_rele(dp, FTAG);
4651	} else {
4652		file_t *fp;
4653		cap_rights_t rights;
4654
4655#ifdef illumos
4656		fp = getf(zc->zc_cookie);
4657#else
4658		fget_write(curthread, zc->zc_cookie,
4659		    cap_rights_init(&rights, CAP_WRITE), &fp);
4660#endif
4661		if (fp == NULL)
4662			return (SET_ERROR(EBADF));
4663
4664		off = fp->f_offset;
4665		error = dmu_send_obj(zc->zc_name, zc->zc_sendobj,
4666		    zc->zc_fromobj, embedok, large_block_ok,
4667#ifdef illumos
4668		    zc->zc_cookie, fp->f_vnode, &off);
4669#else
4670		    zc->zc_cookie, fp, &off);
4671#endif
4672
4673		if (off >= 0 && off <= MAXOFFSET_T)
4674			fp->f_offset = off;
4675		releasef(zc->zc_cookie);
4676	}
4677	return (error);
4678}
4679
4680/*
4681 * inputs:
4682 * zc_name	name of snapshot on which to report progress
4683 * zc_cookie	file descriptor of send stream
4684 *
4685 * outputs:
4686 * zc_cookie	number of bytes written in send stream thus far
4687 */
4688static int
4689zfs_ioc_send_progress(zfs_cmd_t *zc)
4690{
4691	dsl_pool_t *dp;
4692	dsl_dataset_t *ds;
4693	dmu_sendarg_t *dsp = NULL;
4694	int error;
4695
4696	error = dsl_pool_hold(zc->zc_name, FTAG, &dp);
4697	if (error != 0)
4698		return (error);
4699
4700	error = dsl_dataset_hold(dp, zc->zc_name, FTAG, &ds);
4701	if (error != 0) {
4702		dsl_pool_rele(dp, FTAG);
4703		return (error);
4704	}
4705
4706	mutex_enter(&ds->ds_sendstream_lock);
4707
4708	/*
4709	 * Iterate over all the send streams currently active on this dataset.
4710	 * If there's one which matches the specified file descriptor _and_ the
4711	 * stream was started by the current process, return the progress of
4712	 * that stream.
4713	 */
4714	for (dsp = list_head(&ds->ds_sendstreams); dsp != NULL;
4715	    dsp = list_next(&ds->ds_sendstreams, dsp)) {
4716		if (dsp->dsa_outfd == zc->zc_cookie &&
4717		    dsp->dsa_proc == curproc)
4718			break;
4719	}
4720
4721	if (dsp != NULL)
4722		zc->zc_cookie = *(dsp->dsa_off);
4723	else
4724		error = SET_ERROR(ENOENT);
4725
4726	mutex_exit(&ds->ds_sendstream_lock);
4727	dsl_dataset_rele(ds, FTAG);
4728	dsl_pool_rele(dp, FTAG);
4729	return (error);
4730}
4731
4732static int
4733zfs_ioc_inject_fault(zfs_cmd_t *zc)
4734{
4735	int id, error;
4736
4737	error = zio_inject_fault(zc->zc_name, (int)zc->zc_guid, &id,
4738	    &zc->zc_inject_record);
4739
4740	if (error == 0)
4741		zc->zc_guid = (uint64_t)id;
4742
4743	return (error);
4744}
4745
4746static int
4747zfs_ioc_clear_fault(zfs_cmd_t *zc)
4748{
4749	return (zio_clear_fault((int)zc->zc_guid));
4750}
4751
4752static int
4753zfs_ioc_inject_list_next(zfs_cmd_t *zc)
4754{
4755	int id = (int)zc->zc_guid;
4756	int error;
4757
4758	error = zio_inject_list_next(&id, zc->zc_name, sizeof (zc->zc_name),
4759	    &zc->zc_inject_record);
4760
4761	zc->zc_guid = id;
4762
4763	return (error);
4764}
4765
4766static int
4767zfs_ioc_error_log(zfs_cmd_t *zc)
4768{
4769	spa_t *spa;
4770	int error;
4771	size_t count = (size_t)zc->zc_nvlist_dst_size;
4772
4773	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
4774		return (error);
4775
4776	error = spa_get_errlog(spa, (void *)(uintptr_t)zc->zc_nvlist_dst,
4777	    &count);
4778	if (error == 0)
4779		zc->zc_nvlist_dst_size = count;
4780	else
4781		zc->zc_nvlist_dst_size = spa_get_errlog_size(spa);
4782
4783	spa_close(spa, FTAG);
4784
4785	return (error);
4786}
4787
4788static int
4789zfs_ioc_clear(zfs_cmd_t *zc)
4790{
4791	spa_t *spa;
4792	vdev_t *vd;
4793	int error;
4794
4795	/*
4796	 * On zpool clear we also fix up missing slogs
4797	 */
4798	mutex_enter(&spa_namespace_lock);
4799	spa = spa_lookup(zc->zc_name);
4800	if (spa == NULL) {
4801		mutex_exit(&spa_namespace_lock);
4802		return (SET_ERROR(EIO));
4803	}
4804	if (spa_get_log_state(spa) == SPA_LOG_MISSING) {
4805		/* we need to let spa_open/spa_load clear the chains */
4806		spa_set_log_state(spa, SPA_LOG_CLEAR);
4807	}
4808	spa->spa_last_open_failed = 0;
4809	mutex_exit(&spa_namespace_lock);
4810
4811	if (zc->zc_cookie & ZPOOL_NO_REWIND) {
4812		error = spa_open(zc->zc_name, &spa, FTAG);
4813	} else {
4814		nvlist_t *policy;
4815		nvlist_t *config = NULL;
4816
4817		if (zc->zc_nvlist_src == 0)
4818			return (SET_ERROR(EINVAL));
4819
4820		if ((error = get_nvlist(zc->zc_nvlist_src,
4821		    zc->zc_nvlist_src_size, zc->zc_iflags, &policy)) == 0) {
4822			error = spa_open_rewind(zc->zc_name, &spa, FTAG,
4823			    policy, &config);
4824			if (config != NULL) {
4825				int err;
4826
4827				if ((err = put_nvlist(zc, config)) != 0)
4828					error = err;
4829				nvlist_free(config);
4830			}
4831			nvlist_free(policy);
4832		}
4833	}
4834
4835	if (error != 0)
4836		return (error);
4837
4838	spa_vdev_state_enter(spa, SCL_NONE);
4839
4840	if (zc->zc_guid == 0) {
4841		vd = NULL;
4842	} else {
4843		vd = spa_lookup_by_guid(spa, zc->zc_guid, B_TRUE);
4844		if (vd == NULL) {
4845			(void) spa_vdev_state_exit(spa, NULL, ENODEV);
4846			spa_close(spa, FTAG);
4847			return (SET_ERROR(ENODEV));
4848		}
4849	}
4850
4851	vdev_clear(spa, vd);
4852
4853	(void) spa_vdev_state_exit(spa, NULL, 0);
4854
4855	/*
4856	 * Resume any suspended I/Os.
4857	 */
4858	if (zio_resume(spa) != 0)
4859		error = SET_ERROR(EIO);
4860
4861	spa_close(spa, FTAG);
4862
4863	return (error);
4864}
4865
4866static int
4867zfs_ioc_pool_reopen(zfs_cmd_t *zc)
4868{
4869	spa_t *spa;
4870	int error;
4871
4872	error = spa_open(zc->zc_name, &spa, FTAG);
4873	if (error != 0)
4874		return (error);
4875
4876	spa_vdev_state_enter(spa, SCL_NONE);
4877
4878	/*
4879	 * If a resilver is already in progress then set the
4880	 * spa_scrub_reopen flag to B_TRUE so that we don't restart
4881	 * the scan as a side effect of the reopen. Otherwise, let
4882	 * vdev_open() decided if a resilver is required.
4883	 */
4884	spa->spa_scrub_reopen = dsl_scan_resilvering(spa->spa_dsl_pool);
4885	vdev_reopen(spa->spa_root_vdev);
4886	spa->spa_scrub_reopen = B_FALSE;
4887
4888	(void) spa_vdev_state_exit(spa, NULL, 0);
4889	spa_close(spa, FTAG);
4890	return (0);
4891}
4892/*
4893 * inputs:
4894 * zc_name	name of filesystem
4895 * zc_value	name of origin snapshot
4896 *
4897 * outputs:
4898 * zc_string	name of conflicting snapshot, if there is one
4899 */
4900static int
4901zfs_ioc_promote(zfs_cmd_t *zc)
4902{
4903	char *cp;
4904
4905	/*
4906	 * We don't need to unmount *all* the origin fs's snapshots, but
4907	 * it's easier.
4908	 */
4909	cp = strchr(zc->zc_value, '@');
4910	if (cp)
4911		*cp = '\0';
4912	(void) dmu_objset_find(zc->zc_value,
4913	    zfs_unmount_snap_cb, NULL, DS_FIND_SNAPSHOTS);
4914	return (dsl_dataset_promote(zc->zc_name, zc->zc_string));
4915}
4916
4917/*
4918 * Retrieve a single {user|group}{used|quota}@... property.
4919 *
4920 * inputs:
4921 * zc_name	name of filesystem
4922 * zc_objset_type zfs_userquota_prop_t
4923 * zc_value	domain name (eg. "S-1-234-567-89")
4924 * zc_guid	RID/UID/GID
4925 *
4926 * outputs:
4927 * zc_cookie	property value
4928 */
4929static int
4930zfs_ioc_userspace_one(zfs_cmd_t *zc)
4931{
4932	zfsvfs_t *zfsvfs;
4933	int error;
4934
4935	if (zc->zc_objset_type >= ZFS_NUM_USERQUOTA_PROPS)
4936		return (SET_ERROR(EINVAL));
4937
4938	error = zfsvfs_hold(zc->zc_name, FTAG, &zfsvfs, B_FALSE);
4939	if (error != 0)
4940		return (error);
4941
4942	error = zfs_userspace_one(zfsvfs,
4943	    zc->zc_objset_type, zc->zc_value, zc->zc_guid, &zc->zc_cookie);
4944	zfsvfs_rele(zfsvfs, FTAG);
4945
4946	return (error);
4947}
4948
4949/*
4950 * inputs:
4951 * zc_name		name of filesystem
4952 * zc_cookie		zap cursor
4953 * zc_objset_type	zfs_userquota_prop_t
4954 * zc_nvlist_dst[_size] buffer to fill (not really an nvlist)
4955 *
4956 * outputs:
4957 * zc_nvlist_dst[_size]	data buffer (array of zfs_useracct_t)
4958 * zc_cookie	zap cursor
4959 */
4960static int
4961zfs_ioc_userspace_many(zfs_cmd_t *zc)
4962{
4963	zfsvfs_t *zfsvfs;
4964	int bufsize = zc->zc_nvlist_dst_size;
4965
4966	if (bufsize <= 0)
4967		return (SET_ERROR(ENOMEM));
4968
4969	int error = zfsvfs_hold(zc->zc_name, FTAG, &zfsvfs, B_FALSE);
4970	if (error != 0)
4971		return (error);
4972
4973	void *buf = kmem_alloc(bufsize, KM_SLEEP);
4974
4975	error = zfs_userspace_many(zfsvfs, zc->zc_objset_type, &zc->zc_cookie,
4976	    buf, &zc->zc_nvlist_dst_size);
4977
4978	if (error == 0) {
4979		error = ddi_copyout(buf,
4980		    (void *)(uintptr_t)zc->zc_nvlist_dst,
4981		    zc->zc_nvlist_dst_size, zc->zc_iflags);
4982	}
4983	kmem_free(buf, bufsize);
4984	zfsvfs_rele(zfsvfs, FTAG);
4985
4986	return (error);
4987}
4988
4989/*
4990 * inputs:
4991 * zc_name		name of filesystem
4992 *
4993 * outputs:
4994 * none
4995 */
4996static int
4997zfs_ioc_userspace_upgrade(zfs_cmd_t *zc)
4998{
4999	objset_t *os;
5000	int error = 0;
5001	zfsvfs_t *zfsvfs;
5002
5003	if (getzfsvfs(zc->zc_name, &zfsvfs) == 0) {
5004		if (!dmu_objset_userused_enabled(zfsvfs->z_os)) {
5005			/*
5006			 * If userused is not enabled, it may be because the
5007			 * objset needs to be closed & reopened (to grow the
5008			 * objset_phys_t).  Suspend/resume the fs will do that.
5009			 */
5010			dsl_dataset_t *ds, *newds;
5011
5012			ds = dmu_objset_ds(zfsvfs->z_os);
5013			error = zfs_suspend_fs(zfsvfs);
5014			if (error == 0) {
5015				dmu_objset_refresh_ownership(ds, &newds,
5016				    zfsvfs);
5017				error = zfs_resume_fs(zfsvfs, newds);
5018			}
5019		}
5020		if (error == 0)
5021			error = dmu_objset_userspace_upgrade(zfsvfs->z_os);
5022#ifdef illumos
5023		VFS_RELE(zfsvfs->z_vfs);
5024#else
5025		vfs_unbusy(zfsvfs->z_vfs);
5026#endif
5027	} else {
5028		/* XXX kind of reading contents without owning */
5029		error = dmu_objset_hold(zc->zc_name, FTAG, &os);
5030		if (error != 0)
5031			return (error);
5032
5033		error = dmu_objset_userspace_upgrade(os);
5034		dmu_objset_rele(os, FTAG);
5035	}
5036
5037	return (error);
5038}
5039
5040#ifdef illumos
5041/*
5042 * We don't want to have a hard dependency
5043 * against some special symbols in sharefs
5044 * nfs, and smbsrv.  Determine them if needed when
5045 * the first file system is shared.
5046 * Neither sharefs, nfs or smbsrv are unloadable modules.
5047 */
5048int (*znfsexport_fs)(void *arg);
5049int (*zshare_fs)(enum sharefs_sys_op, share_t *, uint32_t);
5050int (*zsmbexport_fs)(void *arg, boolean_t add_share);
5051
5052int zfs_nfsshare_inited;
5053int zfs_smbshare_inited;
5054
5055ddi_modhandle_t nfs_mod;
5056ddi_modhandle_t sharefs_mod;
5057ddi_modhandle_t smbsrv_mod;
5058#endif	/* illumos */
5059kmutex_t zfs_share_lock;
5060
5061#ifdef illumos
5062static int
5063zfs_init_sharefs()
5064{
5065	int error;
5066
5067	ASSERT(MUTEX_HELD(&zfs_share_lock));
5068	/* Both NFS and SMB shares also require sharetab support. */
5069	if (sharefs_mod == NULL && ((sharefs_mod =
5070	    ddi_modopen("fs/sharefs",
5071	    KRTLD_MODE_FIRST, &error)) == NULL)) {
5072		return (SET_ERROR(ENOSYS));
5073	}
5074	if (zshare_fs == NULL && ((zshare_fs =
5075	    (int (*)(enum sharefs_sys_op, share_t *, uint32_t))
5076	    ddi_modsym(sharefs_mod, "sharefs_impl", &error)) == NULL)) {
5077		return (SET_ERROR(ENOSYS));
5078	}
5079	return (0);
5080}
5081#endif	/* illumos */
5082
5083static int
5084zfs_ioc_share(zfs_cmd_t *zc)
5085{
5086#ifdef illumos
5087	int error;
5088	int opcode;
5089
5090	switch (zc->zc_share.z_sharetype) {
5091	case ZFS_SHARE_NFS:
5092	case ZFS_UNSHARE_NFS:
5093		if (zfs_nfsshare_inited == 0) {
5094			mutex_enter(&zfs_share_lock);
5095			if (nfs_mod == NULL && ((nfs_mod = ddi_modopen("fs/nfs",
5096			    KRTLD_MODE_FIRST, &error)) == NULL)) {
5097				mutex_exit(&zfs_share_lock);
5098				return (SET_ERROR(ENOSYS));
5099			}
5100			if (znfsexport_fs == NULL &&
5101			    ((znfsexport_fs = (int (*)(void *))
5102			    ddi_modsym(nfs_mod,
5103			    "nfs_export", &error)) == NULL)) {
5104				mutex_exit(&zfs_share_lock);
5105				return (SET_ERROR(ENOSYS));
5106			}
5107			error = zfs_init_sharefs();
5108			if (error != 0) {
5109				mutex_exit(&zfs_share_lock);
5110				return (SET_ERROR(ENOSYS));
5111			}
5112			zfs_nfsshare_inited = 1;
5113			mutex_exit(&zfs_share_lock);
5114		}
5115		break;
5116	case ZFS_SHARE_SMB:
5117	case ZFS_UNSHARE_SMB:
5118		if (zfs_smbshare_inited == 0) {
5119			mutex_enter(&zfs_share_lock);
5120			if (smbsrv_mod == NULL && ((smbsrv_mod =
5121			    ddi_modopen("drv/smbsrv",
5122			    KRTLD_MODE_FIRST, &error)) == NULL)) {
5123				mutex_exit(&zfs_share_lock);
5124				return (SET_ERROR(ENOSYS));
5125			}
5126			if (zsmbexport_fs == NULL && ((zsmbexport_fs =
5127			    (int (*)(void *, boolean_t))ddi_modsym(smbsrv_mod,
5128			    "smb_server_share", &error)) == NULL)) {
5129				mutex_exit(&zfs_share_lock);
5130				return (SET_ERROR(ENOSYS));
5131			}
5132			error = zfs_init_sharefs();
5133			if (error != 0) {
5134				mutex_exit(&zfs_share_lock);
5135				return (SET_ERROR(ENOSYS));
5136			}
5137			zfs_smbshare_inited = 1;
5138			mutex_exit(&zfs_share_lock);
5139		}
5140		break;
5141	default:
5142		return (SET_ERROR(EINVAL));
5143	}
5144
5145	switch (zc->zc_share.z_sharetype) {
5146	case ZFS_SHARE_NFS:
5147	case ZFS_UNSHARE_NFS:
5148		if (error =
5149		    znfsexport_fs((void *)
5150		    (uintptr_t)zc->zc_share.z_exportdata))
5151			return (error);
5152		break;
5153	case ZFS_SHARE_SMB:
5154	case ZFS_UNSHARE_SMB:
5155		if (error = zsmbexport_fs((void *)
5156		    (uintptr_t)zc->zc_share.z_exportdata,
5157		    zc->zc_share.z_sharetype == ZFS_SHARE_SMB ?
5158		    B_TRUE: B_FALSE)) {
5159			return (error);
5160		}
5161		break;
5162	}
5163
5164	opcode = (zc->zc_share.z_sharetype == ZFS_SHARE_NFS ||
5165	    zc->zc_share.z_sharetype == ZFS_SHARE_SMB) ?
5166	    SHAREFS_ADD : SHAREFS_REMOVE;
5167
5168	/*
5169	 * Add or remove share from sharetab
5170	 */
5171	error = zshare_fs(opcode,
5172	    (void *)(uintptr_t)zc->zc_share.z_sharedata,
5173	    zc->zc_share.z_sharemax);
5174
5175	return (error);
5176
5177#else	/* !illumos */
5178	return (ENOSYS);
5179#endif	/* illumos */
5180}
5181
5182ace_t full_access[] = {
5183	{(uid_t)-1, ACE_ALL_PERMS, ACE_EVERYONE, 0}
5184};
5185
5186/*
5187 * inputs:
5188 * zc_name		name of containing filesystem
5189 * zc_obj		object # beyond which we want next in-use object #
5190 *
5191 * outputs:
5192 * zc_obj		next in-use object #
5193 */
5194static int
5195zfs_ioc_next_obj(zfs_cmd_t *zc)
5196{
5197	objset_t *os = NULL;
5198	int error;
5199
5200	error = dmu_objset_hold(zc->zc_name, FTAG, &os);
5201	if (error != 0)
5202		return (error);
5203
5204	error = dmu_object_next(os, &zc->zc_obj, B_FALSE,
5205	    dsl_dataset_phys(os->os_dsl_dataset)->ds_prev_snap_txg);
5206
5207	dmu_objset_rele(os, FTAG);
5208	return (error);
5209}
5210
5211/*
5212 * inputs:
5213 * zc_name		name of filesystem
5214 * zc_value		prefix name for snapshot
5215 * zc_cleanup_fd	cleanup-on-exit file descriptor for calling process
5216 *
5217 * outputs:
5218 * zc_value		short name of new snapshot
5219 */
5220static int
5221zfs_ioc_tmp_snapshot(zfs_cmd_t *zc)
5222{
5223	char *snap_name;
5224	char *hold_name;
5225	int error;
5226	minor_t minor;
5227
5228	error = zfs_onexit_fd_hold(zc->zc_cleanup_fd, &minor);
5229	if (error != 0)
5230		return (error);
5231
5232	snap_name = kmem_asprintf("%s-%016llx", zc->zc_value,
5233	    (u_longlong_t)ddi_get_lbolt64());
5234	hold_name = kmem_asprintf("%%%s", zc->zc_value);
5235
5236	error = dsl_dataset_snapshot_tmp(zc->zc_name, snap_name, minor,
5237	    hold_name);
5238	if (error == 0)
5239		(void) strcpy(zc->zc_value, snap_name);
5240	strfree(snap_name);
5241	strfree(hold_name);
5242	zfs_onexit_fd_rele(zc->zc_cleanup_fd);
5243	return (error);
5244}
5245
5246/*
5247 * inputs:
5248 * zc_name		name of "to" snapshot
5249 * zc_value		name of "from" snapshot
5250 * zc_cookie		file descriptor to write diff data on
5251 *
5252 * outputs:
5253 * dmu_diff_record_t's to the file descriptor
5254 */
5255static int
5256zfs_ioc_diff(zfs_cmd_t *zc)
5257{
5258	file_t *fp;
5259	cap_rights_t rights;
5260	offset_t off;
5261	int error;
5262
5263#ifdef illumos
5264	fp = getf(zc->zc_cookie);
5265#else
5266	fget_write(curthread, zc->zc_cookie,
5267		    cap_rights_init(&rights, CAP_WRITE), &fp);
5268#endif
5269	if (fp == NULL)
5270		return (SET_ERROR(EBADF));
5271
5272	off = fp->f_offset;
5273
5274#ifdef illumos
5275	error = dmu_diff(zc->zc_name, zc->zc_value, fp->f_vnode, &off);
5276#else
5277	error = dmu_diff(zc->zc_name, zc->zc_value, fp, &off);
5278#endif
5279
5280	if (off >= 0 && off <= MAXOFFSET_T)
5281		fp->f_offset = off;
5282	releasef(zc->zc_cookie);
5283
5284	return (error);
5285}
5286
5287#ifdef illumos
5288/*
5289 * Remove all ACL files in shares dir
5290 */
5291static int
5292zfs_smb_acl_purge(znode_t *dzp)
5293{
5294	zap_cursor_t	zc;
5295	zap_attribute_t	zap;
5296	zfsvfs_t *zfsvfs = dzp->z_zfsvfs;
5297	int error;
5298
5299	for (zap_cursor_init(&zc, zfsvfs->z_os, dzp->z_id);
5300	    (error = zap_cursor_retrieve(&zc, &zap)) == 0;
5301	    zap_cursor_advance(&zc)) {
5302		if ((error = VOP_REMOVE(ZTOV(dzp), zap.za_name, kcred,
5303		    NULL, 0)) != 0)
5304			break;
5305	}
5306	zap_cursor_fini(&zc);
5307	return (error);
5308}
5309#endif	/* illumos */
5310
5311static int
5312zfs_ioc_smb_acl(zfs_cmd_t *zc)
5313{
5314#ifdef illumos
5315	vnode_t *vp;
5316	znode_t *dzp;
5317	vnode_t *resourcevp = NULL;
5318	znode_t *sharedir;
5319	zfsvfs_t *zfsvfs;
5320	nvlist_t *nvlist;
5321	char *src, *target;
5322	vattr_t vattr;
5323	vsecattr_t vsec;
5324	int error = 0;
5325
5326	if ((error = lookupname(zc->zc_value, UIO_SYSSPACE,
5327	    NO_FOLLOW, NULL, &vp)) != 0)
5328		return (error);
5329
5330	/* Now make sure mntpnt and dataset are ZFS */
5331
5332	if (strcmp(vp->v_vfsp->mnt_stat.f_fstypename, "zfs") != 0 ||
5333	    (strcmp((char *)refstr_value(vp->v_vfsp->vfs_resource),
5334	    zc->zc_name) != 0)) {
5335		VN_RELE(vp);
5336		return (SET_ERROR(EINVAL));
5337	}
5338
5339	dzp = VTOZ(vp);
5340	zfsvfs = dzp->z_zfsvfs;
5341	ZFS_ENTER(zfsvfs);
5342
5343	/*
5344	 * Create share dir if its missing.
5345	 */
5346	mutex_enter(&zfsvfs->z_lock);
5347	if (zfsvfs->z_shares_dir == 0) {
5348		dmu_tx_t *tx;
5349
5350		tx = dmu_tx_create(zfsvfs->z_os);
5351		dmu_tx_hold_zap(tx, MASTER_NODE_OBJ, TRUE,
5352		    ZFS_SHARES_DIR);
5353		dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, FALSE, NULL);
5354		error = dmu_tx_assign(tx, TXG_WAIT);
5355		if (error != 0) {
5356			dmu_tx_abort(tx);
5357		} else {
5358			error = zfs_create_share_dir(zfsvfs, tx);
5359			dmu_tx_commit(tx);
5360		}
5361		if (error != 0) {
5362			mutex_exit(&zfsvfs->z_lock);
5363			VN_RELE(vp);
5364			ZFS_EXIT(zfsvfs);
5365			return (error);
5366		}
5367	}
5368	mutex_exit(&zfsvfs->z_lock);
5369
5370	ASSERT(zfsvfs->z_shares_dir);
5371	if ((error = zfs_zget(zfsvfs, zfsvfs->z_shares_dir, &sharedir)) != 0) {
5372		VN_RELE(vp);
5373		ZFS_EXIT(zfsvfs);
5374		return (error);
5375	}
5376
5377	switch (zc->zc_cookie) {
5378	case ZFS_SMB_ACL_ADD:
5379		vattr.va_mask = AT_MODE|AT_UID|AT_GID|AT_TYPE;
5380		vattr.va_type = VREG;
5381		vattr.va_mode = S_IFREG|0777;
5382		vattr.va_uid = 0;
5383		vattr.va_gid = 0;
5384
5385		vsec.vsa_mask = VSA_ACE;
5386		vsec.vsa_aclentp = &full_access;
5387		vsec.vsa_aclentsz = sizeof (full_access);
5388		vsec.vsa_aclcnt = 1;
5389
5390		error = VOP_CREATE(ZTOV(sharedir), zc->zc_string,
5391		    &vattr, EXCL, 0, &resourcevp, kcred, 0, NULL, &vsec);
5392		if (resourcevp)
5393			VN_RELE(resourcevp);
5394		break;
5395
5396	case ZFS_SMB_ACL_REMOVE:
5397		error = VOP_REMOVE(ZTOV(sharedir), zc->zc_string, kcred,
5398		    NULL, 0);
5399		break;
5400
5401	case ZFS_SMB_ACL_RENAME:
5402		if ((error = get_nvlist(zc->zc_nvlist_src,
5403		    zc->zc_nvlist_src_size, zc->zc_iflags, &nvlist)) != 0) {
5404			VN_RELE(vp);
5405			VN_RELE(ZTOV(sharedir));
5406			ZFS_EXIT(zfsvfs);
5407			return (error);
5408		}
5409		if (nvlist_lookup_string(nvlist, ZFS_SMB_ACL_SRC, &src) ||
5410		    nvlist_lookup_string(nvlist, ZFS_SMB_ACL_TARGET,
5411		    &target)) {
5412			VN_RELE(vp);
5413			VN_RELE(ZTOV(sharedir));
5414			ZFS_EXIT(zfsvfs);
5415			nvlist_free(nvlist);
5416			return (error);
5417		}
5418		error = VOP_RENAME(ZTOV(sharedir), src, ZTOV(sharedir), target,
5419		    kcred, NULL, 0);
5420		nvlist_free(nvlist);
5421		break;
5422
5423	case ZFS_SMB_ACL_PURGE:
5424		error = zfs_smb_acl_purge(sharedir);
5425		break;
5426
5427	default:
5428		error = SET_ERROR(EINVAL);
5429		break;
5430	}
5431
5432	VN_RELE(vp);
5433	VN_RELE(ZTOV(sharedir));
5434
5435	ZFS_EXIT(zfsvfs);
5436
5437	return (error);
5438#else	/* !illumos */
5439	return (EOPNOTSUPP);
5440#endif	/* illumos */
5441}
5442
5443/*
5444 * innvl: {
5445 *     "holds" -> { snapname -> holdname (string), ... }
5446 *     (optional) "cleanup_fd" -> fd (int32)
5447 * }
5448 *
5449 * outnvl: {
5450 *     snapname -> error value (int32)
5451 *     ...
5452 * }
5453 */
5454/* ARGSUSED */
5455static int
5456zfs_ioc_hold(const char *pool, nvlist_t *args, nvlist_t *errlist)
5457{
5458	nvpair_t *pair;
5459	nvlist_t *holds;
5460	int cleanup_fd = -1;
5461	int error;
5462	minor_t minor = 0;
5463
5464	error = nvlist_lookup_nvlist(args, "holds", &holds);
5465	if (error != 0)
5466		return (SET_ERROR(EINVAL));
5467
5468	/* make sure the user didn't pass us any invalid (empty) tags */
5469	for (pair = nvlist_next_nvpair(holds, NULL); pair != NULL;
5470	    pair = nvlist_next_nvpair(holds, pair)) {
5471		char *htag;
5472
5473		error = nvpair_value_string(pair, &htag);
5474		if (error != 0)
5475			return (SET_ERROR(error));
5476
5477		if (strlen(htag) == 0)
5478			return (SET_ERROR(EINVAL));
5479	}
5480
5481	if (nvlist_lookup_int32(args, "cleanup_fd", &cleanup_fd) == 0) {
5482		error = zfs_onexit_fd_hold(cleanup_fd, &minor);
5483		if (error != 0)
5484			return (error);
5485	}
5486
5487	error = dsl_dataset_user_hold(holds, minor, errlist);
5488	if (minor != 0)
5489		zfs_onexit_fd_rele(cleanup_fd);
5490	return (error);
5491}
5492
5493/*
5494 * innvl is not used.
5495 *
5496 * outnvl: {
5497 *    holdname -> time added (uint64 seconds since epoch)
5498 *    ...
5499 * }
5500 */
5501/* ARGSUSED */
5502static int
5503zfs_ioc_get_holds(const char *snapname, nvlist_t *args, nvlist_t *outnvl)
5504{
5505	return (dsl_dataset_get_holds(snapname, outnvl));
5506}
5507
5508/*
5509 * innvl: {
5510 *     snapname -> { holdname, ... }
5511 *     ...
5512 * }
5513 *
5514 * outnvl: {
5515 *     snapname -> error value (int32)
5516 *     ...
5517 * }
5518 */
5519/* ARGSUSED */
5520static int
5521zfs_ioc_release(const char *pool, nvlist_t *holds, nvlist_t *errlist)
5522{
5523	return (dsl_dataset_user_release(holds, errlist));
5524}
5525
5526/*
5527 * inputs:
5528 * zc_name		name of new filesystem or snapshot
5529 * zc_value		full name of old snapshot
5530 *
5531 * outputs:
5532 * zc_cookie		space in bytes
5533 * zc_objset_type	compressed space in bytes
5534 * zc_perm_action	uncompressed space in bytes
5535 */
5536static int
5537zfs_ioc_space_written(zfs_cmd_t *zc)
5538{
5539	int error;
5540	dsl_pool_t *dp;
5541	dsl_dataset_t *new, *old;
5542
5543	error = dsl_pool_hold(zc->zc_name, FTAG, &dp);
5544	if (error != 0)
5545		return (error);
5546	error = dsl_dataset_hold(dp, zc->zc_name, FTAG, &new);
5547	if (error != 0) {
5548		dsl_pool_rele(dp, FTAG);
5549		return (error);
5550	}
5551	error = dsl_dataset_hold(dp, zc->zc_value, FTAG, &old);
5552	if (error != 0) {
5553		dsl_dataset_rele(new, FTAG);
5554		dsl_pool_rele(dp, FTAG);
5555		return (error);
5556	}
5557
5558	error = dsl_dataset_space_written(old, new, &zc->zc_cookie,
5559	    &zc->zc_objset_type, &zc->zc_perm_action);
5560	dsl_dataset_rele(old, FTAG);
5561	dsl_dataset_rele(new, FTAG);
5562	dsl_pool_rele(dp, FTAG);
5563	return (error);
5564}
5565
5566/*
5567 * innvl: {
5568 *     "firstsnap" -> snapshot name
5569 * }
5570 *
5571 * outnvl: {
5572 *     "used" -> space in bytes
5573 *     "compressed" -> compressed space in bytes
5574 *     "uncompressed" -> uncompressed space in bytes
5575 * }
5576 */
5577static int
5578zfs_ioc_space_snaps(const char *lastsnap, nvlist_t *innvl, nvlist_t *outnvl)
5579{
5580	int error;
5581	dsl_pool_t *dp;
5582	dsl_dataset_t *new, *old;
5583	char *firstsnap;
5584	uint64_t used, comp, uncomp;
5585
5586	if (nvlist_lookup_string(innvl, "firstsnap", &firstsnap) != 0)
5587		return (SET_ERROR(EINVAL));
5588
5589	error = dsl_pool_hold(lastsnap, FTAG, &dp);
5590	if (error != 0)
5591		return (error);
5592
5593	error = dsl_dataset_hold(dp, lastsnap, FTAG, &new);
5594	if (error == 0 && !new->ds_is_snapshot) {
5595		dsl_dataset_rele(new, FTAG);
5596		error = SET_ERROR(EINVAL);
5597	}
5598	if (error != 0) {
5599		dsl_pool_rele(dp, FTAG);
5600		return (error);
5601	}
5602	error = dsl_dataset_hold(dp, firstsnap, FTAG, &old);
5603	if (error == 0 && !old->ds_is_snapshot) {
5604		dsl_dataset_rele(old, FTAG);
5605		error = SET_ERROR(EINVAL);
5606	}
5607	if (error != 0) {
5608		dsl_dataset_rele(new, FTAG);
5609		dsl_pool_rele(dp, FTAG);
5610		return (error);
5611	}
5612
5613	error = dsl_dataset_space_wouldfree(old, new, &used, &comp, &uncomp);
5614	dsl_dataset_rele(old, FTAG);
5615	dsl_dataset_rele(new, FTAG);
5616	dsl_pool_rele(dp, FTAG);
5617	fnvlist_add_uint64(outnvl, "used", used);
5618	fnvlist_add_uint64(outnvl, "compressed", comp);
5619	fnvlist_add_uint64(outnvl, "uncompressed", uncomp);
5620	return (error);
5621}
5622
5623static int
5624zfs_ioc_jail(zfs_cmd_t *zc)
5625{
5626
5627	return (zone_dataset_attach(curthread->td_ucred, zc->zc_name,
5628	    (int)zc->zc_jailid));
5629}
5630
5631static int
5632zfs_ioc_unjail(zfs_cmd_t *zc)
5633{
5634
5635	return (zone_dataset_detach(curthread->td_ucred, zc->zc_name,
5636	    (int)zc->zc_jailid));
5637}
5638
5639/*
5640 * innvl: {
5641 *     "fd" -> file descriptor to write stream to (int32)
5642 *     (optional) "fromsnap" -> full snap name to send an incremental from
5643 *     (optional) "largeblockok" -> (value ignored)
5644 *         indicates that blocks > 128KB are permitted
5645 *     (optional) "embedok" -> (value ignored)
5646 *         presence indicates DRR_WRITE_EMBEDDED records are permitted
5647 *     (optional) "resume_object" and "resume_offset" -> (uint64)
5648 *         if present, resume send stream from specified object and offset.
5649 * }
5650 *
5651 * outnvl is unused
5652 */
5653/* ARGSUSED */
5654static int
5655zfs_ioc_send_new(const char *snapname, nvlist_t *innvl, nvlist_t *outnvl)
5656{
5657	cap_rights_t rights;
5658	file_t *fp;
5659	int error;
5660	offset_t off;
5661	char *fromname = NULL;
5662	int fd;
5663	boolean_t largeblockok;
5664	boolean_t embedok;
5665	uint64_t resumeobj = 0;
5666	uint64_t resumeoff = 0;
5667
5668	error = nvlist_lookup_int32(innvl, "fd", &fd);
5669	if (error != 0)
5670		return (SET_ERROR(EINVAL));
5671
5672	(void) nvlist_lookup_string(innvl, "fromsnap", &fromname);
5673
5674	largeblockok = nvlist_exists(innvl, "largeblockok");
5675	embedok = nvlist_exists(innvl, "embedok");
5676
5677	(void) nvlist_lookup_uint64(innvl, "resume_object", &resumeobj);
5678	(void) nvlist_lookup_uint64(innvl, "resume_offset", &resumeoff);
5679
5680#ifdef illumos
5681	file_t *fp = getf(fd);
5682#else
5683	fget_write(curthread, fd, cap_rights_init(&rights, CAP_WRITE), &fp);
5684#endif
5685	if (fp == NULL)
5686		return (SET_ERROR(EBADF));
5687
5688	off = fp->f_offset;
5689	error = dmu_send(snapname, fromname, embedok, largeblockok, fd,
5690#ifdef illumos
5691	    resumeobj, resumeoff, fp->f_vnode, &off);
5692#else
5693	    resumeobj, resumeoff, fp, &off);
5694#endif
5695
5696#ifdef illumos
5697	if (VOP_SEEK(fp->f_vnode, fp->f_offset, &off, NULL) == 0)
5698		fp->f_offset = off;
5699#else
5700	fp->f_offset = off;
5701#endif
5702
5703	releasef(fd);
5704	return (error);
5705}
5706
5707/*
5708 * Determine approximately how large a zfs send stream will be -- the number
5709 * of bytes that will be written to the fd supplied to zfs_ioc_send_new().
5710 *
5711 * innvl: {
5712 *     (optional) "from" -> full snap or bookmark name to send an incremental
5713 *                          from
5714 * }
5715 *
5716 * outnvl: {
5717 *     "space" -> bytes of space (uint64)
5718 * }
5719 */
5720static int
5721zfs_ioc_send_space(const char *snapname, nvlist_t *innvl, nvlist_t *outnvl)
5722{
5723	dsl_pool_t *dp;
5724	dsl_dataset_t *tosnap;
5725	int error;
5726	char *fromname;
5727	uint64_t space;
5728
5729	error = dsl_pool_hold(snapname, FTAG, &dp);
5730	if (error != 0)
5731		return (error);
5732
5733	error = dsl_dataset_hold(dp, snapname, FTAG, &tosnap);
5734	if (error != 0) {
5735		dsl_pool_rele(dp, FTAG);
5736		return (error);
5737	}
5738
5739	error = nvlist_lookup_string(innvl, "from", &fromname);
5740	if (error == 0) {
5741		if (strchr(fromname, '@') != NULL) {
5742			/*
5743			 * If from is a snapshot, hold it and use the more
5744			 * efficient dmu_send_estimate to estimate send space
5745			 * size using deadlists.
5746			 */
5747			dsl_dataset_t *fromsnap;
5748			error = dsl_dataset_hold(dp, fromname, FTAG, &fromsnap);
5749			if (error != 0)
5750				goto out;
5751			error = dmu_send_estimate(tosnap, fromsnap, &space);
5752			dsl_dataset_rele(fromsnap, FTAG);
5753		} else if (strchr(fromname, '#') != NULL) {
5754			/*
5755			 * If from is a bookmark, fetch the creation TXG of the
5756			 * snapshot it was created from and use that to find
5757			 * blocks that were born after it.
5758			 */
5759			zfs_bookmark_phys_t frombm;
5760
5761			error = dsl_bookmark_lookup(dp, fromname, tosnap,
5762			    &frombm);
5763			if (error != 0)
5764				goto out;
5765			error = dmu_send_estimate_from_txg(tosnap,
5766			    frombm.zbm_creation_txg, &space);
5767		} else {
5768			/*
5769			 * from is not properly formatted as a snapshot or
5770			 * bookmark
5771			 */
5772			error = SET_ERROR(EINVAL);
5773			goto out;
5774		}
5775	} else {
5776		// If estimating the size of a full send, use dmu_send_estimate
5777		error = dmu_send_estimate(tosnap, NULL, &space);
5778	}
5779
5780	fnvlist_add_uint64(outnvl, "space", space);
5781
5782out:
5783	dsl_dataset_rele(tosnap, FTAG);
5784	dsl_pool_rele(dp, FTAG);
5785	return (error);
5786}
5787
5788static zfs_ioc_vec_t zfs_ioc_vec[ZFS_IOC_LAST - ZFS_IOC_FIRST];
5789
5790static void
5791zfs_ioctl_register_legacy(zfs_ioc_t ioc, zfs_ioc_legacy_func_t *func,
5792    zfs_secpolicy_func_t *secpolicy, zfs_ioc_namecheck_t namecheck,
5793    boolean_t log_history, zfs_ioc_poolcheck_t pool_check)
5794{
5795	zfs_ioc_vec_t *vec = &zfs_ioc_vec[ioc - ZFS_IOC_FIRST];
5796
5797	ASSERT3U(ioc, >=, ZFS_IOC_FIRST);
5798	ASSERT3U(ioc, <, ZFS_IOC_LAST);
5799	ASSERT3P(vec->zvec_legacy_func, ==, NULL);
5800	ASSERT3P(vec->zvec_func, ==, NULL);
5801
5802	vec->zvec_legacy_func = func;
5803	vec->zvec_secpolicy = secpolicy;
5804	vec->zvec_namecheck = namecheck;
5805	vec->zvec_allow_log = log_history;
5806	vec->zvec_pool_check = pool_check;
5807}
5808
5809/*
5810 * See the block comment at the beginning of this file for details on
5811 * each argument to this function.
5812 */
5813static void
5814zfs_ioctl_register(const char *name, zfs_ioc_t ioc, zfs_ioc_func_t *func,
5815    zfs_secpolicy_func_t *secpolicy, zfs_ioc_namecheck_t namecheck,
5816    zfs_ioc_poolcheck_t pool_check, boolean_t smush_outnvlist,
5817    boolean_t allow_log)
5818{
5819	zfs_ioc_vec_t *vec = &zfs_ioc_vec[ioc - ZFS_IOC_FIRST];
5820
5821	ASSERT3U(ioc, >=, ZFS_IOC_FIRST);
5822	ASSERT3U(ioc, <, ZFS_IOC_LAST);
5823	ASSERT3P(vec->zvec_legacy_func, ==, NULL);
5824	ASSERT3P(vec->zvec_func, ==, NULL);
5825
5826	/* if we are logging, the name must be valid */
5827	ASSERT(!allow_log || namecheck != NO_NAME);
5828
5829	vec->zvec_name = name;
5830	vec->zvec_func = func;
5831	vec->zvec_secpolicy = secpolicy;
5832	vec->zvec_namecheck = namecheck;
5833	vec->zvec_pool_check = pool_check;
5834	vec->zvec_smush_outnvlist = smush_outnvlist;
5835	vec->zvec_allow_log = allow_log;
5836}
5837
5838static void
5839zfs_ioctl_register_pool(zfs_ioc_t ioc, zfs_ioc_legacy_func_t *func,
5840    zfs_secpolicy_func_t *secpolicy, boolean_t log_history,
5841    zfs_ioc_poolcheck_t pool_check)
5842{
5843	zfs_ioctl_register_legacy(ioc, func, secpolicy,
5844	    POOL_NAME, log_history, pool_check);
5845}
5846
5847static void
5848zfs_ioctl_register_dataset_nolog(zfs_ioc_t ioc, zfs_ioc_legacy_func_t *func,
5849    zfs_secpolicy_func_t *secpolicy, zfs_ioc_poolcheck_t pool_check)
5850{
5851	zfs_ioctl_register_legacy(ioc, func, secpolicy,
5852	    DATASET_NAME, B_FALSE, pool_check);
5853}
5854
5855static void
5856zfs_ioctl_register_pool_modify(zfs_ioc_t ioc, zfs_ioc_legacy_func_t *func)
5857{
5858	zfs_ioctl_register_legacy(ioc, func, zfs_secpolicy_config,
5859	    POOL_NAME, B_TRUE, POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY);
5860}
5861
5862static void
5863zfs_ioctl_register_pool_meta(zfs_ioc_t ioc, zfs_ioc_legacy_func_t *func,
5864    zfs_secpolicy_func_t *secpolicy)
5865{
5866	zfs_ioctl_register_legacy(ioc, func, secpolicy,
5867	    NO_NAME, B_FALSE, POOL_CHECK_NONE);
5868}
5869
5870static void
5871zfs_ioctl_register_dataset_read_secpolicy(zfs_ioc_t ioc,
5872    zfs_ioc_legacy_func_t *func, zfs_secpolicy_func_t *secpolicy)
5873{
5874	zfs_ioctl_register_legacy(ioc, func, secpolicy,
5875	    DATASET_NAME, B_FALSE, POOL_CHECK_SUSPENDED);
5876}
5877
5878static void
5879zfs_ioctl_register_dataset_read(zfs_ioc_t ioc, zfs_ioc_legacy_func_t *func)
5880{
5881	zfs_ioctl_register_dataset_read_secpolicy(ioc, func,
5882	    zfs_secpolicy_read);
5883}
5884
5885static void
5886zfs_ioctl_register_dataset_modify(zfs_ioc_t ioc, zfs_ioc_legacy_func_t *func,
5887    zfs_secpolicy_func_t *secpolicy)
5888{
5889	zfs_ioctl_register_legacy(ioc, func, secpolicy,
5890	    DATASET_NAME, B_TRUE, POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY);
5891}
5892
5893static void
5894zfs_ioctl_init(void)
5895{
5896	zfs_ioctl_register("snapshot", ZFS_IOC_SNAPSHOT,
5897	    zfs_ioc_snapshot, zfs_secpolicy_snapshot, POOL_NAME,
5898	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE);
5899
5900	zfs_ioctl_register("log_history", ZFS_IOC_LOG_HISTORY,
5901	    zfs_ioc_log_history, zfs_secpolicy_log_history, NO_NAME,
5902	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_FALSE, B_FALSE);
5903
5904	zfs_ioctl_register("space_snaps", ZFS_IOC_SPACE_SNAPS,
5905	    zfs_ioc_space_snaps, zfs_secpolicy_read, DATASET_NAME,
5906	    POOL_CHECK_SUSPENDED, B_FALSE, B_FALSE);
5907
5908	zfs_ioctl_register("send", ZFS_IOC_SEND_NEW,
5909	    zfs_ioc_send_new, zfs_secpolicy_send_new, DATASET_NAME,
5910	    POOL_CHECK_SUSPENDED, B_FALSE, B_FALSE);
5911
5912	zfs_ioctl_register("send_space", ZFS_IOC_SEND_SPACE,
5913	    zfs_ioc_send_space, zfs_secpolicy_read, DATASET_NAME,
5914	    POOL_CHECK_SUSPENDED, B_FALSE, B_FALSE);
5915
5916	zfs_ioctl_register("create", ZFS_IOC_CREATE,
5917	    zfs_ioc_create, zfs_secpolicy_create_clone, DATASET_NAME,
5918	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE);
5919
5920	zfs_ioctl_register("clone", ZFS_IOC_CLONE,
5921	    zfs_ioc_clone, zfs_secpolicy_create_clone, DATASET_NAME,
5922	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE);
5923
5924	zfs_ioctl_register("destroy_snaps", ZFS_IOC_DESTROY_SNAPS,
5925	    zfs_ioc_destroy_snaps, zfs_secpolicy_destroy_snaps, POOL_NAME,
5926	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE);
5927
5928	zfs_ioctl_register("hold", ZFS_IOC_HOLD,
5929	    zfs_ioc_hold, zfs_secpolicy_hold, POOL_NAME,
5930	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE);
5931	zfs_ioctl_register("release", ZFS_IOC_RELEASE,
5932	    zfs_ioc_release, zfs_secpolicy_release, POOL_NAME,
5933	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE);
5934
5935	zfs_ioctl_register("get_holds", ZFS_IOC_GET_HOLDS,
5936	    zfs_ioc_get_holds, zfs_secpolicy_read, DATASET_NAME,
5937	    POOL_CHECK_SUSPENDED, B_FALSE, B_FALSE);
5938
5939	zfs_ioctl_register("rollback", ZFS_IOC_ROLLBACK,
5940	    zfs_ioc_rollback, zfs_secpolicy_rollback, DATASET_NAME,
5941	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_FALSE, B_TRUE);
5942
5943	zfs_ioctl_register("bookmark", ZFS_IOC_BOOKMARK,
5944	    zfs_ioc_bookmark, zfs_secpolicy_bookmark, POOL_NAME,
5945	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE);
5946
5947	zfs_ioctl_register("get_bookmarks", ZFS_IOC_GET_BOOKMARKS,
5948	    zfs_ioc_get_bookmarks, zfs_secpolicy_read, DATASET_NAME,
5949	    POOL_CHECK_SUSPENDED, B_FALSE, B_FALSE);
5950
5951	zfs_ioctl_register("destroy_bookmarks", ZFS_IOC_DESTROY_BOOKMARKS,
5952	    zfs_ioc_destroy_bookmarks, zfs_secpolicy_destroy_bookmarks,
5953	    POOL_NAME,
5954	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE);
5955
5956	/* IOCTLS that use the legacy function signature */
5957
5958	zfs_ioctl_register_legacy(ZFS_IOC_POOL_FREEZE, zfs_ioc_pool_freeze,
5959	    zfs_secpolicy_config, NO_NAME, B_FALSE, POOL_CHECK_READONLY);
5960
5961	zfs_ioctl_register_pool(ZFS_IOC_POOL_CREATE, zfs_ioc_pool_create,
5962	    zfs_secpolicy_config, B_TRUE, POOL_CHECK_NONE);
5963	zfs_ioctl_register_pool_modify(ZFS_IOC_POOL_SCAN,
5964	    zfs_ioc_pool_scan);
5965	zfs_ioctl_register_pool_modify(ZFS_IOC_POOL_UPGRADE,
5966	    zfs_ioc_pool_upgrade);
5967	zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_ADD,
5968	    zfs_ioc_vdev_add);
5969	zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_REMOVE,
5970	    zfs_ioc_vdev_remove);
5971	zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_SET_STATE,
5972	    zfs_ioc_vdev_set_state);
5973	zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_ATTACH,
5974	    zfs_ioc_vdev_attach);
5975	zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_DETACH,
5976	    zfs_ioc_vdev_detach);
5977	zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_SETPATH,
5978	    zfs_ioc_vdev_setpath);
5979	zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_SETFRU,
5980	    zfs_ioc_vdev_setfru);
5981	zfs_ioctl_register_pool_modify(ZFS_IOC_POOL_SET_PROPS,
5982	    zfs_ioc_pool_set_props);
5983	zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_SPLIT,
5984	    zfs_ioc_vdev_split);
5985	zfs_ioctl_register_pool_modify(ZFS_IOC_POOL_REGUID,
5986	    zfs_ioc_pool_reguid);
5987
5988	zfs_ioctl_register_pool_meta(ZFS_IOC_POOL_CONFIGS,
5989	    zfs_ioc_pool_configs, zfs_secpolicy_none);
5990	zfs_ioctl_register_pool_meta(ZFS_IOC_POOL_TRYIMPORT,
5991	    zfs_ioc_pool_tryimport, zfs_secpolicy_config);
5992	zfs_ioctl_register_pool_meta(ZFS_IOC_INJECT_FAULT,
5993	    zfs_ioc_inject_fault, zfs_secpolicy_inject);
5994	zfs_ioctl_register_pool_meta(ZFS_IOC_CLEAR_FAULT,
5995	    zfs_ioc_clear_fault, zfs_secpolicy_inject);
5996	zfs_ioctl_register_pool_meta(ZFS_IOC_INJECT_LIST_NEXT,
5997	    zfs_ioc_inject_list_next, zfs_secpolicy_inject);
5998
5999	/*
6000	 * pool destroy, and export don't log the history as part of
6001	 * zfsdev_ioctl, but rather zfs_ioc_pool_export
6002	 * does the logging of those commands.
6003	 */
6004	zfs_ioctl_register_pool(ZFS_IOC_POOL_DESTROY, zfs_ioc_pool_destroy,
6005	    zfs_secpolicy_config, B_FALSE, POOL_CHECK_NONE);
6006	zfs_ioctl_register_pool(ZFS_IOC_POOL_EXPORT, zfs_ioc_pool_export,
6007	    zfs_secpolicy_config, B_FALSE, POOL_CHECK_NONE);
6008
6009	zfs_ioctl_register_pool(ZFS_IOC_POOL_STATS, zfs_ioc_pool_stats,
6010	    zfs_secpolicy_read, B_FALSE, POOL_CHECK_NONE);
6011	zfs_ioctl_register_pool(ZFS_IOC_POOL_GET_PROPS, zfs_ioc_pool_get_props,
6012	    zfs_secpolicy_read, B_FALSE, POOL_CHECK_NONE);
6013
6014	zfs_ioctl_register_pool(ZFS_IOC_ERROR_LOG, zfs_ioc_error_log,
6015	    zfs_secpolicy_inject, B_FALSE, POOL_CHECK_NONE);
6016	zfs_ioctl_register_pool(ZFS_IOC_DSOBJ_TO_DSNAME,
6017	    zfs_ioc_dsobj_to_dsname,
6018	    zfs_secpolicy_diff, B_FALSE, POOL_CHECK_NONE);
6019	zfs_ioctl_register_pool(ZFS_IOC_POOL_GET_HISTORY,
6020	    zfs_ioc_pool_get_history,
6021	    zfs_secpolicy_config, B_FALSE, POOL_CHECK_SUSPENDED);
6022
6023	zfs_ioctl_register_pool(ZFS_IOC_POOL_IMPORT, zfs_ioc_pool_import,
6024	    zfs_secpolicy_config, B_TRUE, POOL_CHECK_NONE);
6025
6026	zfs_ioctl_register_pool(ZFS_IOC_CLEAR, zfs_ioc_clear,
6027	    zfs_secpolicy_config, B_TRUE, POOL_CHECK_NONE);
6028	zfs_ioctl_register_pool(ZFS_IOC_POOL_REOPEN, zfs_ioc_pool_reopen,
6029	    zfs_secpolicy_config, B_TRUE, POOL_CHECK_SUSPENDED);
6030
6031	zfs_ioctl_register_dataset_read(ZFS_IOC_SPACE_WRITTEN,
6032	    zfs_ioc_space_written);
6033	zfs_ioctl_register_dataset_read(ZFS_IOC_OBJSET_RECVD_PROPS,
6034	    zfs_ioc_objset_recvd_props);
6035	zfs_ioctl_register_dataset_read(ZFS_IOC_NEXT_OBJ,
6036	    zfs_ioc_next_obj);
6037	zfs_ioctl_register_dataset_read(ZFS_IOC_GET_FSACL,
6038	    zfs_ioc_get_fsacl);
6039	zfs_ioctl_register_dataset_read(ZFS_IOC_OBJSET_STATS,
6040	    zfs_ioc_objset_stats);
6041	zfs_ioctl_register_dataset_read(ZFS_IOC_OBJSET_ZPLPROPS,
6042	    zfs_ioc_objset_zplprops);
6043	zfs_ioctl_register_dataset_read(ZFS_IOC_DATASET_LIST_NEXT,
6044	    zfs_ioc_dataset_list_next);
6045	zfs_ioctl_register_dataset_read(ZFS_IOC_SNAPSHOT_LIST_NEXT,
6046	    zfs_ioc_snapshot_list_next);
6047	zfs_ioctl_register_dataset_read(ZFS_IOC_SEND_PROGRESS,
6048	    zfs_ioc_send_progress);
6049
6050	zfs_ioctl_register_dataset_read_secpolicy(ZFS_IOC_DIFF,
6051	    zfs_ioc_diff, zfs_secpolicy_diff);
6052	zfs_ioctl_register_dataset_read_secpolicy(ZFS_IOC_OBJ_TO_STATS,
6053	    zfs_ioc_obj_to_stats, zfs_secpolicy_diff);
6054	zfs_ioctl_register_dataset_read_secpolicy(ZFS_IOC_OBJ_TO_PATH,
6055	    zfs_ioc_obj_to_path, zfs_secpolicy_diff);
6056	zfs_ioctl_register_dataset_read_secpolicy(ZFS_IOC_USERSPACE_ONE,
6057	    zfs_ioc_userspace_one, zfs_secpolicy_userspace_one);
6058	zfs_ioctl_register_dataset_read_secpolicy(ZFS_IOC_USERSPACE_MANY,
6059	    zfs_ioc_userspace_many, zfs_secpolicy_userspace_many);
6060	zfs_ioctl_register_dataset_read_secpolicy(ZFS_IOC_SEND,
6061	    zfs_ioc_send, zfs_secpolicy_send);
6062
6063	zfs_ioctl_register_dataset_modify(ZFS_IOC_SET_PROP, zfs_ioc_set_prop,
6064	    zfs_secpolicy_none);
6065	zfs_ioctl_register_dataset_modify(ZFS_IOC_DESTROY, zfs_ioc_destroy,
6066	    zfs_secpolicy_destroy);
6067	zfs_ioctl_register_dataset_modify(ZFS_IOC_RENAME, zfs_ioc_rename,
6068	    zfs_secpolicy_rename);
6069	zfs_ioctl_register_dataset_modify(ZFS_IOC_RECV, zfs_ioc_recv,
6070	    zfs_secpolicy_recv);
6071	zfs_ioctl_register_dataset_modify(ZFS_IOC_PROMOTE, zfs_ioc_promote,
6072	    zfs_secpolicy_promote);
6073	zfs_ioctl_register_dataset_modify(ZFS_IOC_INHERIT_PROP,
6074	    zfs_ioc_inherit_prop, zfs_secpolicy_inherit_prop);
6075	zfs_ioctl_register_dataset_modify(ZFS_IOC_SET_FSACL, zfs_ioc_set_fsacl,
6076	    zfs_secpolicy_set_fsacl);
6077
6078	zfs_ioctl_register_dataset_nolog(ZFS_IOC_SHARE, zfs_ioc_share,
6079	    zfs_secpolicy_share, POOL_CHECK_NONE);
6080	zfs_ioctl_register_dataset_nolog(ZFS_IOC_SMB_ACL, zfs_ioc_smb_acl,
6081	    zfs_secpolicy_smb_acl, POOL_CHECK_NONE);
6082	zfs_ioctl_register_dataset_nolog(ZFS_IOC_USERSPACE_UPGRADE,
6083	    zfs_ioc_userspace_upgrade, zfs_secpolicy_userspace_upgrade,
6084	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY);
6085	zfs_ioctl_register_dataset_nolog(ZFS_IOC_TMP_SNAPSHOT,
6086	    zfs_ioc_tmp_snapshot, zfs_secpolicy_tmp_snapshot,
6087	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY);
6088
6089#ifdef __FreeBSD__
6090	zfs_ioctl_register_dataset_nolog(ZFS_IOC_JAIL, zfs_ioc_jail,
6091	    zfs_secpolicy_config, POOL_CHECK_NONE);
6092	zfs_ioctl_register_dataset_nolog(ZFS_IOC_UNJAIL, zfs_ioc_unjail,
6093	    zfs_secpolicy_config, POOL_CHECK_NONE);
6094	zfs_ioctl_register("fbsd_nextboot", ZFS_IOC_NEXTBOOT,
6095	    zfs_ioc_nextboot, zfs_secpolicy_config, NO_NAME,
6096	    POOL_CHECK_NONE, B_FALSE, B_FALSE);
6097#endif
6098}
6099
6100int
6101pool_status_check(const char *name, zfs_ioc_namecheck_t type,
6102    zfs_ioc_poolcheck_t check)
6103{
6104	spa_t *spa;
6105	int error;
6106
6107	ASSERT(type == POOL_NAME || type == DATASET_NAME);
6108
6109	if (check & POOL_CHECK_NONE)
6110		return (0);
6111
6112	error = spa_open(name, &spa, FTAG);
6113	if (error == 0) {
6114		if ((check & POOL_CHECK_SUSPENDED) && spa_suspended(spa))
6115			error = SET_ERROR(EAGAIN);
6116		else if ((check & POOL_CHECK_READONLY) && !spa_writeable(spa))
6117			error = SET_ERROR(EROFS);
6118		spa_close(spa, FTAG);
6119	}
6120	return (error);
6121}
6122
6123/*
6124 * Find a free minor number.
6125 */
6126minor_t
6127zfsdev_minor_alloc(void)
6128{
6129	static minor_t last_minor;
6130	minor_t m;
6131
6132	ASSERT(MUTEX_HELD(&spa_namespace_lock));
6133
6134	for (m = last_minor + 1; m != last_minor; m++) {
6135		if (m > ZFSDEV_MAX_MINOR)
6136			m = 1;
6137		if (ddi_get_soft_state(zfsdev_state, m) == NULL) {
6138			last_minor = m;
6139			return (m);
6140		}
6141	}
6142
6143	return (0);
6144}
6145
6146static int
6147zfs_ctldev_init(struct cdev *devp)
6148{
6149	minor_t minor;
6150	zfs_soft_state_t *zs;
6151
6152	ASSERT(MUTEX_HELD(&spa_namespace_lock));
6153
6154	minor = zfsdev_minor_alloc();
6155	if (minor == 0)
6156		return (SET_ERROR(ENXIO));
6157
6158	if (ddi_soft_state_zalloc(zfsdev_state, minor) != DDI_SUCCESS)
6159		return (SET_ERROR(EAGAIN));
6160
6161	devfs_set_cdevpriv((void *)(uintptr_t)minor, zfsdev_close);
6162
6163	zs = ddi_get_soft_state(zfsdev_state, minor);
6164	zs->zss_type = ZSST_CTLDEV;
6165	zfs_onexit_init((zfs_onexit_t **)&zs->zss_data);
6166
6167	return (0);
6168}
6169
6170static void
6171zfs_ctldev_destroy(zfs_onexit_t *zo, minor_t minor)
6172{
6173	ASSERT(MUTEX_HELD(&spa_namespace_lock));
6174
6175	zfs_onexit_destroy(zo);
6176	ddi_soft_state_free(zfsdev_state, minor);
6177}
6178
6179void *
6180zfsdev_get_soft_state(minor_t minor, enum zfs_soft_state_type which)
6181{
6182	zfs_soft_state_t *zp;
6183
6184	zp = ddi_get_soft_state(zfsdev_state, minor);
6185	if (zp == NULL || zp->zss_type != which)
6186		return (NULL);
6187
6188	return (zp->zss_data);
6189}
6190
6191static int
6192zfsdev_open(struct cdev *devp, int flag, int mode, struct thread *td)
6193{
6194	int error = 0;
6195
6196#ifdef illumos
6197	if (getminor(*devp) != 0)
6198		return (zvol_open(devp, flag, otyp, cr));
6199#endif
6200
6201	/* This is the control device. Allocate a new minor if requested. */
6202	if (flag & FEXCL) {
6203		mutex_enter(&spa_namespace_lock);
6204		error = zfs_ctldev_init(devp);
6205		mutex_exit(&spa_namespace_lock);
6206	}
6207
6208	return (error);
6209}
6210
6211static void
6212zfsdev_close(void *data)
6213{
6214	zfs_onexit_t *zo;
6215	minor_t minor = (minor_t)(uintptr_t)data;
6216
6217	if (minor == 0)
6218		return;
6219
6220	mutex_enter(&spa_namespace_lock);
6221	zo = zfsdev_get_soft_state(minor, ZSST_CTLDEV);
6222	if (zo == NULL) {
6223		mutex_exit(&spa_namespace_lock);
6224		return;
6225	}
6226	zfs_ctldev_destroy(zo, minor);
6227	mutex_exit(&spa_namespace_lock);
6228}
6229
6230static int
6231zfsdev_ioctl(struct cdev *dev, u_long zcmd, caddr_t arg, int flag,
6232    struct thread *td)
6233{
6234	zfs_cmd_t *zc;
6235	uint_t vecnum;
6236	int error, rc, len;
6237#ifdef illumos
6238	minor_t minor = getminor(dev);
6239#else
6240	zfs_iocparm_t *zc_iocparm;
6241	int cflag, cmd, oldvecnum;
6242	boolean_t newioc, compat;
6243	void *compat_zc = NULL;
6244	cred_t *cr = td->td_ucred;
6245#endif
6246	const zfs_ioc_vec_t *vec;
6247	char *saved_poolname = NULL;
6248	nvlist_t *innvl = NULL;
6249
6250	cflag = ZFS_CMD_COMPAT_NONE;
6251	compat = B_FALSE;
6252	newioc = B_TRUE;	/* "new" style (zfs_iocparm_t) ioctl */
6253
6254	len = IOCPARM_LEN(zcmd);
6255	vecnum = cmd = zcmd & 0xff;
6256
6257	/*
6258	 * Check if we are talking to supported older binaries
6259	 * and translate zfs_cmd if necessary
6260	 */
6261	if (len != sizeof(zfs_iocparm_t)) {
6262		newioc = B_FALSE;
6263		compat = B_TRUE;
6264
6265		vecnum = cmd;
6266
6267		switch (len) {
6268		case sizeof(zfs_cmd_zcmd_t):
6269			cflag = ZFS_CMD_COMPAT_LZC;
6270			break;
6271		case sizeof(zfs_cmd_deadman_t):
6272			cflag = ZFS_CMD_COMPAT_DEADMAN;
6273			break;
6274		case sizeof(zfs_cmd_v28_t):
6275			cflag = ZFS_CMD_COMPAT_V28;
6276			break;
6277		case sizeof(zfs_cmd_v15_t):
6278			cflag = ZFS_CMD_COMPAT_V15;
6279			vecnum = zfs_ioctl_v15_to_v28[cmd];
6280
6281			/*
6282			 * Return without further handling
6283			 * if the command is blacklisted.
6284			 */
6285			if (vecnum == ZFS_IOC_COMPAT_PASS)
6286				return (0);
6287			else if (vecnum == ZFS_IOC_COMPAT_FAIL)
6288				return (ENOTSUP);
6289			break;
6290		default:
6291			return (EINVAL);
6292		}
6293	}
6294
6295#ifdef illumos
6296	vecnum = cmd - ZFS_IOC_FIRST;
6297	ASSERT3U(getmajor(dev), ==, ddi_driver_major(zfs_dip));
6298#endif
6299
6300	if (vecnum >= sizeof (zfs_ioc_vec) / sizeof (zfs_ioc_vec[0]))
6301		return (SET_ERROR(EINVAL));
6302	vec = &zfs_ioc_vec[vecnum];
6303
6304	zc = kmem_zalloc(sizeof(zfs_cmd_t), KM_SLEEP);
6305
6306#ifdef illumos
6307	error = ddi_copyin((void *)arg, zc, sizeof (zfs_cmd_t), flag);
6308	if (error != 0) {
6309		error = SET_ERROR(EFAULT);
6310		goto out;
6311	}
6312#else	/* !illumos */
6313	bzero(zc, sizeof(zfs_cmd_t));
6314
6315	if (newioc) {
6316		zc_iocparm = (void *)arg;
6317
6318		switch (zc_iocparm->zfs_ioctl_version) {
6319		case ZFS_IOCVER_CURRENT:
6320			if (zc_iocparm->zfs_cmd_size != sizeof(zfs_cmd_t)) {
6321				error = SET_ERROR(EINVAL);
6322				goto out;
6323			}
6324			break;
6325		case ZFS_IOCVER_INLANES:
6326			if (zc_iocparm->zfs_cmd_size != sizeof(zfs_cmd_inlanes_t)) {
6327				error = SET_ERROR(EFAULT);
6328				goto out;
6329			}
6330			compat = B_TRUE;
6331			cflag = ZFS_CMD_COMPAT_INLANES;
6332			break;
6333		case ZFS_IOCVER_RESUME:
6334			if (zc_iocparm->zfs_cmd_size != sizeof(zfs_cmd_resume_t)) {
6335				error = SET_ERROR(EFAULT);
6336				goto out;
6337			}
6338			compat = B_TRUE;
6339			cflag = ZFS_CMD_COMPAT_RESUME;
6340			break;
6341		case ZFS_IOCVER_EDBP:
6342			if (zc_iocparm->zfs_cmd_size != sizeof(zfs_cmd_edbp_t)) {
6343				error = SET_ERROR(EFAULT);
6344				goto out;
6345			}
6346			compat = B_TRUE;
6347			cflag = ZFS_CMD_COMPAT_EDBP;
6348			break;
6349		case ZFS_IOCVER_ZCMD:
6350			if (zc_iocparm->zfs_cmd_size > sizeof(zfs_cmd_t) ||
6351			    zc_iocparm->zfs_cmd_size < sizeof(zfs_cmd_zcmd_t)) {
6352				error = SET_ERROR(EFAULT);
6353				goto out;
6354			}
6355			compat = B_TRUE;
6356			cflag = ZFS_CMD_COMPAT_ZCMD;
6357			break;
6358		default:
6359			error = SET_ERROR(EINVAL);
6360			goto out;
6361			/* NOTREACHED */
6362		}
6363
6364		if (compat) {
6365			ASSERT(sizeof(zfs_cmd_t) >= zc_iocparm->zfs_cmd_size);
6366			compat_zc = kmem_zalloc(sizeof(zfs_cmd_t), KM_SLEEP);
6367			bzero(compat_zc, sizeof(zfs_cmd_t));
6368
6369			error = ddi_copyin((void *)(uintptr_t)zc_iocparm->zfs_cmd,
6370			    compat_zc, zc_iocparm->zfs_cmd_size, flag);
6371			if (error != 0) {
6372				error = SET_ERROR(EFAULT);
6373				goto out;
6374			}
6375		} else {
6376			error = ddi_copyin((void *)(uintptr_t)zc_iocparm->zfs_cmd,
6377			    zc, zc_iocparm->zfs_cmd_size, flag);
6378			if (error != 0) {
6379				error = SET_ERROR(EFAULT);
6380				goto out;
6381			}
6382		}
6383	}
6384
6385	if (compat) {
6386		if (newioc) {
6387			ASSERT(compat_zc != NULL);
6388			zfs_cmd_compat_get(zc, compat_zc, cflag);
6389		} else {
6390			ASSERT(compat_zc == NULL);
6391			zfs_cmd_compat_get(zc, arg, cflag);
6392		}
6393		oldvecnum = vecnum;
6394		error = zfs_ioctl_compat_pre(zc, &vecnum, cflag);
6395		if (error != 0)
6396			goto out;
6397		if (oldvecnum != vecnum)
6398			vec = &zfs_ioc_vec[vecnum];
6399	}
6400#endif	/* !illumos */
6401
6402	zc->zc_iflags = flag & FKIOCTL;
6403	if (zc->zc_nvlist_src_size != 0) {
6404		error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
6405		    zc->zc_iflags, &innvl);
6406		if (error != 0)
6407			goto out;
6408	}
6409
6410	/* rewrite innvl for backwards compatibility */
6411	if (compat)
6412		innvl = zfs_ioctl_compat_innvl(zc, innvl, vecnum, cflag);
6413
6414	/*
6415	 * Ensure that all pool/dataset names are valid before we pass down to
6416	 * the lower layers.
6417	 */
6418	zc->zc_name[sizeof (zc->zc_name) - 1] = '\0';
6419	switch (vec->zvec_namecheck) {
6420	case POOL_NAME:
6421		if (pool_namecheck(zc->zc_name, NULL, NULL) != 0)
6422			error = SET_ERROR(EINVAL);
6423		else
6424			error = pool_status_check(zc->zc_name,
6425			    vec->zvec_namecheck, vec->zvec_pool_check);
6426		break;
6427
6428	case DATASET_NAME:
6429		if (dataset_namecheck(zc->zc_name, NULL, NULL) != 0)
6430			error = SET_ERROR(EINVAL);
6431		else
6432			error = pool_status_check(zc->zc_name,
6433			    vec->zvec_namecheck, vec->zvec_pool_check);
6434		break;
6435
6436	case NO_NAME:
6437		break;
6438	}
6439
6440	if (error == 0)
6441		error = vec->zvec_secpolicy(zc, innvl, cr);
6442
6443	if (error != 0)
6444		goto out;
6445
6446	/* legacy ioctls can modify zc_name */
6447	len = strcspn(zc->zc_name, "/@#") + 1;
6448	saved_poolname = kmem_alloc(len, KM_SLEEP);
6449	(void) strlcpy(saved_poolname, zc->zc_name, len);
6450
6451	if (vec->zvec_func != NULL) {
6452		nvlist_t *outnvl;
6453		int puterror = 0;
6454		spa_t *spa;
6455		nvlist_t *lognv = NULL;
6456
6457		ASSERT(vec->zvec_legacy_func == NULL);
6458
6459		/*
6460		 * Add the innvl to the lognv before calling the func,
6461		 * in case the func changes the innvl.
6462		 */
6463		if (vec->zvec_allow_log) {
6464			lognv = fnvlist_alloc();
6465			fnvlist_add_string(lognv, ZPOOL_HIST_IOCTL,
6466			    vec->zvec_name);
6467			if (!nvlist_empty(innvl)) {
6468				fnvlist_add_nvlist(lognv, ZPOOL_HIST_INPUT_NVL,
6469				    innvl);
6470			}
6471		}
6472
6473		outnvl = fnvlist_alloc();
6474		error = vec->zvec_func(zc->zc_name, innvl, outnvl);
6475
6476		if (error == 0 && vec->zvec_allow_log &&
6477		    spa_open(zc->zc_name, &spa, FTAG) == 0) {
6478			if (!nvlist_empty(outnvl)) {
6479				fnvlist_add_nvlist(lognv, ZPOOL_HIST_OUTPUT_NVL,
6480				    outnvl);
6481			}
6482			(void) spa_history_log_nvl(spa, lognv);
6483			spa_close(spa, FTAG);
6484		}
6485		fnvlist_free(lognv);
6486
6487		/* rewrite outnvl for backwards compatibility */
6488		if (compat)
6489			outnvl = zfs_ioctl_compat_outnvl(zc, outnvl, vecnum,
6490			    cflag);
6491
6492		if (!nvlist_empty(outnvl) || zc->zc_nvlist_dst_size != 0) {
6493			int smusherror = 0;
6494			if (vec->zvec_smush_outnvlist) {
6495				smusherror = nvlist_smush(outnvl,
6496				    zc->zc_nvlist_dst_size);
6497			}
6498			if (smusherror == 0)
6499				puterror = put_nvlist(zc, outnvl);
6500		}
6501
6502		if (puterror != 0)
6503			error = puterror;
6504
6505		nvlist_free(outnvl);
6506	} else {
6507		error = vec->zvec_legacy_func(zc);
6508	}
6509
6510out:
6511	nvlist_free(innvl);
6512
6513#ifdef illumos
6514	rc = ddi_copyout(zc, (void *)arg, sizeof (zfs_cmd_t), flag);
6515	if (error == 0 && rc != 0)
6516		error = SET_ERROR(EFAULT);
6517#else
6518	if (compat) {
6519		zfs_ioctl_compat_post(zc, cmd, cflag);
6520		if (newioc) {
6521			ASSERT(compat_zc != NULL);
6522			ASSERT(sizeof(zfs_cmd_t) >= zc_iocparm->zfs_cmd_size);
6523
6524			zfs_cmd_compat_put(zc, compat_zc, vecnum, cflag);
6525			rc = ddi_copyout(compat_zc,
6526			    (void *)(uintptr_t)zc_iocparm->zfs_cmd,
6527			    zc_iocparm->zfs_cmd_size, flag);
6528			if (error == 0 && rc != 0)
6529				error = SET_ERROR(EFAULT);
6530			kmem_free(compat_zc, sizeof (zfs_cmd_t));
6531		} else {
6532			zfs_cmd_compat_put(zc, arg, vecnum, cflag);
6533		}
6534	} else {
6535		ASSERT(newioc);
6536
6537		rc = ddi_copyout(zc, (void *)(uintptr_t)zc_iocparm->zfs_cmd,
6538		    sizeof (zfs_cmd_t), flag);
6539		if (error == 0 && rc != 0)
6540			error = SET_ERROR(EFAULT);
6541	}
6542#endif
6543	if (error == 0 && vec->zvec_allow_log) {
6544		char *s = tsd_get(zfs_allow_log_key);
6545		if (s != NULL)
6546			strfree(s);
6547		(void) tsd_set(zfs_allow_log_key, saved_poolname);
6548	} else {
6549		if (saved_poolname != NULL)
6550			strfree(saved_poolname);
6551	}
6552
6553	kmem_free(zc, sizeof (zfs_cmd_t));
6554	return (error);
6555}
6556
6557#ifdef illumos
6558static int
6559zfs_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
6560{
6561	if (cmd != DDI_ATTACH)
6562		return (DDI_FAILURE);
6563
6564	if (ddi_create_minor_node(dip, "zfs", S_IFCHR, 0,
6565	    DDI_PSEUDO, 0) == DDI_FAILURE)
6566		return (DDI_FAILURE);
6567
6568	zfs_dip = dip;
6569
6570	ddi_report_dev(dip);
6571
6572	return (DDI_SUCCESS);
6573}
6574
6575static int
6576zfs_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
6577{
6578	if (spa_busy() || zfs_busy() || zvol_busy())
6579		return (DDI_FAILURE);
6580
6581	if (cmd != DDI_DETACH)
6582		return (DDI_FAILURE);
6583
6584	zfs_dip = NULL;
6585
6586	ddi_prop_remove_all(dip);
6587	ddi_remove_minor_node(dip, NULL);
6588
6589	return (DDI_SUCCESS);
6590}
6591
6592/*ARGSUSED*/
6593static int
6594zfs_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result)
6595{
6596	switch (infocmd) {
6597	case DDI_INFO_DEVT2DEVINFO:
6598		*result = zfs_dip;
6599		return (DDI_SUCCESS);
6600
6601	case DDI_INFO_DEVT2INSTANCE:
6602		*result = (void *)0;
6603		return (DDI_SUCCESS);
6604	}
6605
6606	return (DDI_FAILURE);
6607}
6608#endif	/* illumos */
6609
6610/*
6611 * OK, so this is a little weird.
6612 *
6613 * /dev/zfs is the control node, i.e. minor 0.
6614 * /dev/zvol/[r]dsk/pool/dataset are the zvols, minor > 0.
6615 *
6616 * /dev/zfs has basically nothing to do except serve up ioctls,
6617 * so most of the standard driver entry points are in zvol.c.
6618 */
6619#ifdef illumos
6620static struct cb_ops zfs_cb_ops = {
6621	zfsdev_open,	/* open */
6622	zfsdev_close,	/* close */
6623	zvol_strategy,	/* strategy */
6624	nodev,		/* print */
6625	zvol_dump,	/* dump */
6626	zvol_read,	/* read */
6627	zvol_write,	/* write */
6628	zfsdev_ioctl,	/* ioctl */
6629	nodev,		/* devmap */
6630	nodev,		/* mmap */
6631	nodev,		/* segmap */
6632	nochpoll,	/* poll */
6633	ddi_prop_op,	/* prop_op */
6634	NULL,		/* streamtab */
6635	D_NEW | D_MP | D_64BIT,		/* Driver compatibility flag */
6636	CB_REV,		/* version */
6637	nodev,		/* async read */
6638	nodev,		/* async write */
6639};
6640
6641static struct dev_ops zfs_dev_ops = {
6642	DEVO_REV,	/* version */
6643	0,		/* refcnt */
6644	zfs_info,	/* info */
6645	nulldev,	/* identify */
6646	nulldev,	/* probe */
6647	zfs_attach,	/* attach */
6648	zfs_detach,	/* detach */
6649	nodev,		/* reset */
6650	&zfs_cb_ops,	/* driver operations */
6651	NULL,		/* no bus operations */
6652	NULL,		/* power */
6653	ddi_quiesce_not_needed,	/* quiesce */
6654};
6655
6656static struct modldrv zfs_modldrv = {
6657	&mod_driverops,
6658	"ZFS storage pool",
6659	&zfs_dev_ops
6660};
6661
6662static struct modlinkage modlinkage = {
6663	MODREV_1,
6664	(void *)&zfs_modlfs,
6665	(void *)&zfs_modldrv,
6666	NULL
6667};
6668#endif	/* illumos */
6669
6670static struct cdevsw zfs_cdevsw = {
6671	.d_version =	D_VERSION,
6672	.d_open =	zfsdev_open,
6673	.d_ioctl =	zfsdev_ioctl,
6674	.d_name =	ZFS_DEV_NAME
6675};
6676
6677static void
6678zfs_allow_log_destroy(void *arg)
6679{
6680	char *poolname = arg;
6681	strfree(poolname);
6682}
6683
6684static void
6685zfsdev_init(void)
6686{
6687	zfsdev = make_dev(&zfs_cdevsw, 0x0, UID_ROOT, GID_OPERATOR, 0666,
6688	    ZFS_DEV_NAME);
6689}
6690
6691static void
6692zfsdev_fini(void)
6693{
6694	if (zfsdev != NULL)
6695		destroy_dev(zfsdev);
6696}
6697
6698static struct root_hold_token *zfs_root_token;
6699struct proc *zfsproc;
6700
6701#ifdef illumos
6702int
6703_init(void)
6704{
6705	int error;
6706
6707	spa_init(FREAD | FWRITE);
6708	zfs_init();
6709	zvol_init();
6710	zfs_ioctl_init();
6711
6712	if ((error = mod_install(&modlinkage)) != 0) {
6713		zvol_fini();
6714		zfs_fini();
6715		spa_fini();
6716		return (error);
6717	}
6718
6719	tsd_create(&zfs_fsyncer_key, NULL);
6720	tsd_create(&rrw_tsd_key, rrw_tsd_destroy);
6721	tsd_create(&zfs_allow_log_key, zfs_allow_log_destroy);
6722
6723	error = ldi_ident_from_mod(&modlinkage, &zfs_li);
6724	ASSERT(error == 0);
6725	mutex_init(&zfs_share_lock, NULL, MUTEX_DEFAULT, NULL);
6726
6727	return (0);
6728}
6729
6730int
6731_fini(void)
6732{
6733	int error;
6734
6735	if (spa_busy() || zfs_busy() || zvol_busy() || zio_injection_enabled)
6736		return (SET_ERROR(EBUSY));
6737
6738	if ((error = mod_remove(&modlinkage)) != 0)
6739		return (error);
6740
6741	zvol_fini();
6742	zfs_fini();
6743	spa_fini();
6744	if (zfs_nfsshare_inited)
6745		(void) ddi_modclose(nfs_mod);
6746	if (zfs_smbshare_inited)
6747		(void) ddi_modclose(smbsrv_mod);
6748	if (zfs_nfsshare_inited || zfs_smbshare_inited)
6749		(void) ddi_modclose(sharefs_mod);
6750
6751	tsd_destroy(&zfs_fsyncer_key);
6752	ldi_ident_release(zfs_li);
6753	zfs_li = NULL;
6754	mutex_destroy(&zfs_share_lock);
6755
6756	return (error);
6757}
6758
6759int
6760_info(struct modinfo *modinfop)
6761{
6762	return (mod_info(&modlinkage, modinfop));
6763}
6764#endif	/* illumos */
6765
6766static int zfs__init(void);
6767static int zfs__fini(void);
6768static void zfs_shutdown(void *, int);
6769
6770static eventhandler_tag zfs_shutdown_event_tag;
6771
6772#ifdef __FreeBSD__
6773#define ZFS_MIN_KSTACK_PAGES 4
6774#endif
6775
6776int
6777zfs__init(void)
6778{
6779
6780#ifdef __FreeBSD__
6781#if KSTACK_PAGES < ZFS_MIN_KSTACK_PAGES
6782	printf("ZFS NOTICE: KSTACK_PAGES is %d which could result in stack "
6783	    "overflow panic!\nPlease consider adding "
6784	    "'options KSTACK_PAGES=%d' to your kernel config\n", KSTACK_PAGES,
6785	    ZFS_MIN_KSTACK_PAGES);
6786#endif
6787#endif
6788	zfs_root_token = root_mount_hold("ZFS");
6789
6790	mutex_init(&zfs_share_lock, NULL, MUTEX_DEFAULT, NULL);
6791
6792	spa_init(FREAD | FWRITE);
6793	zfs_init();
6794	zvol_init();
6795	zfs_ioctl_init();
6796
6797	tsd_create(&zfs_fsyncer_key, NULL);
6798	tsd_create(&rrw_tsd_key, rrw_tsd_destroy);
6799	tsd_create(&zfs_allow_log_key, zfs_allow_log_destroy);
6800	tsd_create(&zfs_geom_probe_vdev_key, NULL);
6801
6802	printf("ZFS storage pool version: features support (" SPA_VERSION_STRING ")\n");
6803	root_mount_rel(zfs_root_token);
6804
6805	zfsdev_init();
6806
6807	return (0);
6808}
6809
6810int
6811zfs__fini(void)
6812{
6813	if (spa_busy() || zfs_busy() || zvol_busy() ||
6814	    zio_injection_enabled) {
6815		return (EBUSY);
6816	}
6817
6818	zfsdev_fini();
6819	zvol_fini();
6820	zfs_fini();
6821	spa_fini();
6822
6823	tsd_destroy(&zfs_fsyncer_key);
6824	tsd_destroy(&rrw_tsd_key);
6825	tsd_destroy(&zfs_allow_log_key);
6826
6827	mutex_destroy(&zfs_share_lock);
6828
6829	return (0);
6830}
6831
6832static void
6833zfs_shutdown(void *arg __unused, int howto __unused)
6834{
6835
6836	/*
6837	 * ZFS fini routines can not properly work in a panic-ed system.
6838	 */
6839	if (panicstr == NULL)
6840		(void)zfs__fini();
6841}
6842
6843
6844static int
6845zfs_modevent(module_t mod, int type, void *unused __unused)
6846{
6847	int err;
6848
6849	switch (type) {
6850	case MOD_LOAD:
6851		err = zfs__init();
6852		if (err == 0)
6853			zfs_shutdown_event_tag = EVENTHANDLER_REGISTER(
6854			    shutdown_post_sync, zfs_shutdown, NULL,
6855			    SHUTDOWN_PRI_FIRST);
6856		return (err);
6857	case MOD_UNLOAD:
6858		err = zfs__fini();
6859		if (err == 0 && zfs_shutdown_event_tag != NULL)
6860			EVENTHANDLER_DEREGISTER(shutdown_post_sync,
6861			    zfs_shutdown_event_tag);
6862		return (err);
6863	case MOD_SHUTDOWN:
6864		return (0);
6865	default:
6866		break;
6867	}
6868	return (EOPNOTSUPP);
6869}
6870
6871static moduledata_t zfs_mod = {
6872	"zfsctrl",
6873	zfs_modevent,
6874	0
6875};
6876DECLARE_MODULE(zfsctrl, zfs_mod, SI_SUB_VFS, SI_ORDER_ANY);
6877MODULE_VERSION(zfsctrl, 1);
6878MODULE_DEPEND(zfsctrl, opensolaris, 1, 1, 1);
6879MODULE_DEPEND(zfsctrl, krpc, 1, 1, 1);
6880MODULE_DEPEND(zfsctrl, acl_nfs4, 1, 1, 1);
6881