zfs_ioctl.c revision 332547
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22/*
23 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
24 * Copyright (c) 2011-2012 Pawel Jakub Dawidek. All rights reserved.
25 * Copyright 2013 Martin Matuska <mm@FreeBSD.org>. All rights reserved.
26 * Copyright 2014 Xin Li <delphij@FreeBSD.org>. All rights reserved.
27 * Copyright 2015, OmniTI Computer Consulting, Inc. All rights reserved.
28 * Copyright 2015 Nexenta Systems, Inc.  All rights reserved.
29 * Copyright (c) 2014, 2016 Joyent, Inc. All rights reserved.
30 * Copyright (c) 2011, 2017 by Delphix. All rights reserved.
31 * Copyright (c) 2013 by Saso Kiselkov. All rights reserved.
32 * Copyright (c) 2013 Steven Hartland. All rights reserved.
33 * Copyright (c) 2014 Integros [integros.com]
34 * Copyright 2016 Toomas Soome <tsoome@me.com>
35 * Copyright 2017 RackTop Systems.
36 * Copyright (c) 2017 Datto Inc.
37 * Copyright 2016 Toomas Soome <tsoome@me.com>
38 */
39
40/*
41 * ZFS ioctls.
42 *
43 * This file handles the ioctls to /dev/zfs, used for configuring ZFS storage
44 * pools and filesystems, e.g. with /sbin/zfs and /sbin/zpool.
45 *
46 * There are two ways that we handle ioctls: the legacy way where almost
47 * all of the logic is in the ioctl callback, and the new way where most
48 * of the marshalling is handled in the common entry point, zfsdev_ioctl().
49 *
50 * Non-legacy ioctls should be registered by calling
51 * zfs_ioctl_register() from zfs_ioctl_init().  The ioctl is invoked
52 * from userland by lzc_ioctl().
53 *
54 * The registration arguments are as follows:
55 *
56 * const char *name
57 *   The name of the ioctl.  This is used for history logging.  If the
58 *   ioctl returns successfully (the callback returns 0), and allow_log
59 *   is true, then a history log entry will be recorded with the input &
60 *   output nvlists.  The log entry can be printed with "zpool history -i".
61 *
62 * zfs_ioc_t ioc
63 *   The ioctl request number, which userland will pass to ioctl(2).
64 *   The ioctl numbers can change from release to release, because
65 *   the caller (libzfs) must be matched to the kernel.
66 *
67 * zfs_secpolicy_func_t *secpolicy
68 *   This function will be called before the zfs_ioc_func_t, to
69 *   determine if this operation is permitted.  It should return EPERM
70 *   on failure, and 0 on success.  Checks include determining if the
71 *   dataset is visible in this zone, and if the user has either all
72 *   zfs privileges in the zone (SYS_MOUNT), or has been granted permission
73 *   to do this operation on this dataset with "zfs allow".
74 *
75 * zfs_ioc_namecheck_t namecheck
76 *   This specifies what to expect in the zfs_cmd_t:zc_name -- a pool
77 *   name, a dataset name, or nothing.  If the name is not well-formed,
78 *   the ioctl will fail and the callback will not be called.
79 *   Therefore, the callback can assume that the name is well-formed
80 *   (e.g. is null-terminated, doesn't have more than one '@' character,
81 *   doesn't have invalid characters).
82 *
83 * zfs_ioc_poolcheck_t pool_check
84 *   This specifies requirements on the pool state.  If the pool does
85 *   not meet them (is suspended or is readonly), the ioctl will fail
86 *   and the callback will not be called.  If any checks are specified
87 *   (i.e. it is not POOL_CHECK_NONE), namecheck must not be NO_NAME.
88 *   Multiple checks can be or-ed together (e.g. POOL_CHECK_SUSPENDED |
89 *   POOL_CHECK_READONLY).
90 *
91 * boolean_t smush_outnvlist
92 *   If smush_outnvlist is true, then the output is presumed to be a
93 *   list of errors, and it will be "smushed" down to fit into the
94 *   caller's buffer, by removing some entries and replacing them with a
95 *   single "N_MORE_ERRORS" entry indicating how many were removed.  See
96 *   nvlist_smush() for details.  If smush_outnvlist is false, and the
97 *   outnvlist does not fit into the userland-provided buffer, then the
98 *   ioctl will fail with ENOMEM.
99 *
100 * zfs_ioc_func_t *func
101 *   The callback function that will perform the operation.
102 *
103 *   The callback should return 0 on success, or an error number on
104 *   failure.  If the function fails, the userland ioctl will return -1,
105 *   and errno will be set to the callback's return value.  The callback
106 *   will be called with the following arguments:
107 *
108 *   const char *name
109 *     The name of the pool or dataset to operate on, from
110 *     zfs_cmd_t:zc_name.  The 'namecheck' argument specifies the
111 *     expected type (pool, dataset, or none).
112 *
113 *   nvlist_t *innvl
114 *     The input nvlist, deserialized from zfs_cmd_t:zc_nvlist_src.  Or
115 *     NULL if no input nvlist was provided.  Changes to this nvlist are
116 *     ignored.  If the input nvlist could not be deserialized, the
117 *     ioctl will fail and the callback will not be called.
118 *
119 *   nvlist_t *outnvl
120 *     The output nvlist, initially empty.  The callback can fill it in,
121 *     and it will be returned to userland by serializing it into
122 *     zfs_cmd_t:zc_nvlist_dst.  If it is non-empty, and serialization
123 *     fails (e.g. because the caller didn't supply a large enough
124 *     buffer), then the overall ioctl will fail.  See the
125 *     'smush_nvlist' argument above for additional behaviors.
126 *
127 *     There are two typical uses of the output nvlist:
128 *       - To return state, e.g. property values.  In this case,
129 *         smush_outnvlist should be false.  If the buffer was not large
130 *         enough, the caller will reallocate a larger buffer and try
131 *         the ioctl again.
132 *
133 *       - To return multiple errors from an ioctl which makes on-disk
134 *         changes.  In this case, smush_outnvlist should be true.
135 *         Ioctls which make on-disk modifications should generally not
136 *         use the outnvl if they succeed, because the caller can not
137 *         distinguish between the operation failing, and
138 *         deserialization failing.
139 */
140#ifdef __FreeBSD__
141#include "opt_kstack_pages.h"
142#endif
143
144#include <sys/types.h>
145#include <sys/param.h>
146#include <sys/systm.h>
147#include <sys/conf.h>
148#include <sys/kernel.h>
149#include <sys/lock.h>
150#include <sys/malloc.h>
151#include <sys/mutex.h>
152#include <sys/proc.h>
153#include <sys/errno.h>
154#include <sys/uio.h>
155#include <sys/buf.h>
156#include <sys/file.h>
157#include <sys/kmem.h>
158#include <sys/conf.h>
159#include <sys/cmn_err.h>
160#include <sys/stat.h>
161#include <sys/zfs_ioctl.h>
162#include <sys/zfs_vfsops.h>
163#include <sys/zfs_znode.h>
164#include <sys/zap.h>
165#include <sys/spa.h>
166#include <sys/spa_impl.h>
167#include <sys/vdev.h>
168#include <sys/dmu.h>
169#include <sys/dsl_dir.h>
170#include <sys/dsl_dataset.h>
171#include <sys/dsl_prop.h>
172#include <sys/dsl_deleg.h>
173#include <sys/dmu_objset.h>
174#include <sys/dmu_impl.h>
175#include <sys/dmu_tx.h>
176#include <sys/sunddi.h>
177#include <sys/policy.h>
178#include <sys/zone.h>
179#include <sys/nvpair.h>
180#include <sys/mount.h>
181#include <sys/taskqueue.h>
182#include <sys/sdt.h>
183#include <sys/varargs.h>
184#include <sys/fs/zfs.h>
185#include <sys/zfs_ctldir.h>
186#include <sys/zfs_dir.h>
187#include <sys/zfs_onexit.h>
188#include <sys/zvol.h>
189#include <sys/dsl_scan.h>
190#include <sys/dmu_objset.h>
191#include <sys/dmu_send.h>
192#include <sys/dsl_destroy.h>
193#include <sys/dsl_bookmark.h>
194#include <sys/dsl_userhold.h>
195#include <sys/zfeature.h>
196#include <sys/zcp.h>
197#include <sys/zio_checksum.h>
198#include <sys/vdev_removal.h>
199
200#include "zfs_namecheck.h"
201#include "zfs_prop.h"
202#include "zfs_deleg.h"
203#include "zfs_comutil.h"
204#include "zfs_ioctl_compat.h"
205
206#include "lua.h"
207#include "lauxlib.h"
208
209static struct cdev *zfsdev;
210
211extern void zfs_init(void);
212extern void zfs_fini(void);
213
214uint_t zfs_fsyncer_key;
215extern uint_t rrw_tsd_key;
216static uint_t zfs_allow_log_key;
217extern uint_t zfs_geom_probe_vdev_key;
218
219typedef int zfs_ioc_legacy_func_t(zfs_cmd_t *);
220typedef int zfs_ioc_func_t(const char *, nvlist_t *, nvlist_t *);
221typedef int zfs_secpolicy_func_t(zfs_cmd_t *, nvlist_t *, cred_t *);
222
223typedef enum {
224	NO_NAME,
225	POOL_NAME,
226	DATASET_NAME
227} zfs_ioc_namecheck_t;
228
229typedef enum {
230	POOL_CHECK_NONE		= 1 << 0,
231	POOL_CHECK_SUSPENDED	= 1 << 1,
232	POOL_CHECK_READONLY	= 1 << 2,
233} zfs_ioc_poolcheck_t;
234
235typedef struct zfs_ioc_vec {
236	zfs_ioc_legacy_func_t	*zvec_legacy_func;
237	zfs_ioc_func_t		*zvec_func;
238	zfs_secpolicy_func_t	*zvec_secpolicy;
239	zfs_ioc_namecheck_t	zvec_namecheck;
240	boolean_t		zvec_allow_log;
241	zfs_ioc_poolcheck_t	zvec_pool_check;
242	boolean_t		zvec_smush_outnvlist;
243	const char		*zvec_name;
244} zfs_ioc_vec_t;
245
246/* This array is indexed by zfs_userquota_prop_t */
247static const char *userquota_perms[] = {
248	ZFS_DELEG_PERM_USERUSED,
249	ZFS_DELEG_PERM_USERQUOTA,
250	ZFS_DELEG_PERM_GROUPUSED,
251	ZFS_DELEG_PERM_GROUPQUOTA,
252};
253
254static int zfs_ioc_userspace_upgrade(zfs_cmd_t *zc);
255static int zfs_check_settable(const char *name, nvpair_t *property,
256    cred_t *cr);
257static int zfs_check_clearable(char *dataset, nvlist_t *props,
258    nvlist_t **errors);
259static int zfs_fill_zplprops_root(uint64_t, nvlist_t *, nvlist_t *,
260    boolean_t *);
261int zfs_set_prop_nvlist(const char *, zprop_source_t, nvlist_t *, nvlist_t *);
262static int get_nvlist(uint64_t nvl, uint64_t size, int iflag, nvlist_t **nvp);
263
264static void zfsdev_close(void *data);
265
266static int zfs_prop_activate_feature(spa_t *spa, spa_feature_t feature);
267
268/* _NOTE(PRINTFLIKE(4)) - this is printf-like, but lint is too whiney */
269void
270__dprintf(const char *file, const char *func, int line, const char *fmt, ...)
271{
272	const char *newfile;
273	char buf[512];
274	va_list adx;
275
276	/*
277	 * Get rid of annoying "../common/" prefix to filename.
278	 */
279	newfile = strrchr(file, '/');
280	if (newfile != NULL) {
281		newfile = newfile + 1; /* Get rid of leading / */
282	} else {
283		newfile = file;
284	}
285
286	va_start(adx, fmt);
287	(void) vsnprintf(buf, sizeof (buf), fmt, adx);
288	va_end(adx);
289
290	/*
291	 * To get this data, use the zfs-dprintf probe as so:
292	 * dtrace -q -n 'zfs-dprintf \
293	 *	/stringof(arg0) == "dbuf.c"/ \
294	 *	{printf("%s: %s", stringof(arg1), stringof(arg3))}'
295	 * arg0 = file name
296	 * arg1 = function name
297	 * arg2 = line number
298	 * arg3 = message
299	 */
300	DTRACE_PROBE4(zfs__dprintf,
301	    char *, newfile, char *, func, int, line, char *, buf);
302}
303
304static void
305history_str_free(char *buf)
306{
307	kmem_free(buf, HIS_MAX_RECORD_LEN);
308}
309
310static char *
311history_str_get(zfs_cmd_t *zc)
312{
313	char *buf;
314
315	if (zc->zc_history == 0)
316		return (NULL);
317
318	buf = kmem_alloc(HIS_MAX_RECORD_LEN, KM_SLEEP);
319	if (copyinstr((void *)(uintptr_t)zc->zc_history,
320	    buf, HIS_MAX_RECORD_LEN, NULL) != 0) {
321		history_str_free(buf);
322		return (NULL);
323	}
324
325	buf[HIS_MAX_RECORD_LEN -1] = '\0';
326
327	return (buf);
328}
329
330/*
331 * Check to see if the named dataset is currently defined as bootable
332 */
333static boolean_t
334zfs_is_bootfs(const char *name)
335{
336	objset_t *os;
337
338	if (dmu_objset_hold(name, FTAG, &os) == 0) {
339		boolean_t ret;
340		ret = (dmu_objset_id(os) == spa_bootfs(dmu_objset_spa(os)));
341		dmu_objset_rele(os, FTAG);
342		return (ret);
343	}
344	return (B_FALSE);
345}
346
347/*
348 * Return non-zero if the spa version is less than requested version.
349 */
350static int
351zfs_earlier_version(const char *name, int version)
352{
353	spa_t *spa;
354
355	if (spa_open(name, &spa, FTAG) == 0) {
356		if (spa_version(spa) < version) {
357			spa_close(spa, FTAG);
358			return (1);
359		}
360		spa_close(spa, FTAG);
361	}
362	return (0);
363}
364
365/*
366 * Return TRUE if the ZPL version is less than requested version.
367 */
368static boolean_t
369zpl_earlier_version(const char *name, int version)
370{
371	objset_t *os;
372	boolean_t rc = B_TRUE;
373
374	if (dmu_objset_hold(name, FTAG, &os) == 0) {
375		uint64_t zplversion;
376
377		if (dmu_objset_type(os) != DMU_OST_ZFS) {
378			dmu_objset_rele(os, FTAG);
379			return (B_TRUE);
380		}
381		/* XXX reading from non-owned objset */
382		if (zfs_get_zplprop(os, ZFS_PROP_VERSION, &zplversion) == 0)
383			rc = zplversion < version;
384		dmu_objset_rele(os, FTAG);
385	}
386	return (rc);
387}
388
389static void
390zfs_log_history(zfs_cmd_t *zc)
391{
392	spa_t *spa;
393	char *buf;
394
395	if ((buf = history_str_get(zc)) == NULL)
396		return;
397
398	if (spa_open(zc->zc_name, &spa, FTAG) == 0) {
399		if (spa_version(spa) >= SPA_VERSION_ZPOOL_HISTORY)
400			(void) spa_history_log(spa, buf);
401		spa_close(spa, FTAG);
402	}
403	history_str_free(buf);
404}
405
406/*
407 * Policy for top-level read operations (list pools).  Requires no privileges,
408 * and can be used in the local zone, as there is no associated dataset.
409 */
410/* ARGSUSED */
411static int
412zfs_secpolicy_none(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
413{
414	return (0);
415}
416
417/*
418 * Policy for dataset read operations (list children, get statistics).  Requires
419 * no privileges, but must be visible in the local zone.
420 */
421/* ARGSUSED */
422static int
423zfs_secpolicy_read(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
424{
425	if (INGLOBALZONE(curthread) ||
426	    zone_dataset_visible(zc->zc_name, NULL))
427		return (0);
428
429	return (SET_ERROR(ENOENT));
430}
431
432static int
433zfs_dozonecheck_impl(const char *dataset, uint64_t zoned, cred_t *cr)
434{
435	int writable = 1;
436
437	/*
438	 * The dataset must be visible by this zone -- check this first
439	 * so they don't see EPERM on something they shouldn't know about.
440	 */
441	if (!INGLOBALZONE(curthread) &&
442	    !zone_dataset_visible(dataset, &writable))
443		return (SET_ERROR(ENOENT));
444
445	if (INGLOBALZONE(curthread)) {
446		/*
447		 * If the fs is zoned, only root can access it from the
448		 * global zone.
449		 */
450		if (secpolicy_zfs(cr) && zoned)
451			return (SET_ERROR(EPERM));
452	} else {
453		/*
454		 * If we are in a local zone, the 'zoned' property must be set.
455		 */
456		if (!zoned)
457			return (SET_ERROR(EPERM));
458
459		/* must be writable by this zone */
460		if (!writable)
461			return (SET_ERROR(EPERM));
462	}
463	return (0);
464}
465
466static int
467zfs_dozonecheck(const char *dataset, cred_t *cr)
468{
469	uint64_t zoned;
470
471	if (dsl_prop_get_integer(dataset, "jailed", &zoned, NULL))
472		return (SET_ERROR(ENOENT));
473
474	return (zfs_dozonecheck_impl(dataset, zoned, cr));
475}
476
477static int
478zfs_dozonecheck_ds(const char *dataset, dsl_dataset_t *ds, cred_t *cr)
479{
480	uint64_t zoned;
481
482	if (dsl_prop_get_int_ds(ds, "jailed", &zoned))
483		return (SET_ERROR(ENOENT));
484
485	return (zfs_dozonecheck_impl(dataset, zoned, cr));
486}
487
488static int
489zfs_secpolicy_write_perms_ds(const char *name, dsl_dataset_t *ds,
490    const char *perm, cred_t *cr)
491{
492	int error;
493
494	error = zfs_dozonecheck_ds(name, ds, cr);
495	if (error == 0) {
496		error = secpolicy_zfs(cr);
497		if (error != 0)
498			error = dsl_deleg_access_impl(ds, perm, cr);
499	}
500	return (error);
501}
502
503static int
504zfs_secpolicy_write_perms(const char *name, const char *perm, cred_t *cr)
505{
506	int error;
507	dsl_dataset_t *ds;
508	dsl_pool_t *dp;
509
510	/*
511	 * First do a quick check for root in the global zone, which
512	 * is allowed to do all write_perms.  This ensures that zfs_ioc_*
513	 * will get to handle nonexistent datasets.
514	 */
515	if (INGLOBALZONE(curthread) && secpolicy_zfs(cr) == 0)
516		return (0);
517
518	error = dsl_pool_hold(name, FTAG, &dp);
519	if (error != 0)
520		return (error);
521
522	error = dsl_dataset_hold(dp, name, FTAG, &ds);
523	if (error != 0) {
524		dsl_pool_rele(dp, FTAG);
525		return (error);
526	}
527
528	error = zfs_secpolicy_write_perms_ds(name, ds, perm, cr);
529
530	dsl_dataset_rele(ds, FTAG);
531	dsl_pool_rele(dp, FTAG);
532	return (error);
533}
534
535#ifdef SECLABEL
536/*
537 * Policy for setting the security label property.
538 *
539 * Returns 0 for success, non-zero for access and other errors.
540 */
541static int
542zfs_set_slabel_policy(const char *name, char *strval, cred_t *cr)
543{
544	char		ds_hexsl[MAXNAMELEN];
545	bslabel_t	ds_sl, new_sl;
546	boolean_t	new_default = FALSE;
547	uint64_t	zoned;
548	int		needed_priv = -1;
549	int		error;
550
551	/* First get the existing dataset label. */
552	error = dsl_prop_get(name, zfs_prop_to_name(ZFS_PROP_MLSLABEL),
553	    1, sizeof (ds_hexsl), &ds_hexsl, NULL);
554	if (error != 0)
555		return (SET_ERROR(EPERM));
556
557	if (strcasecmp(strval, ZFS_MLSLABEL_DEFAULT) == 0)
558		new_default = TRUE;
559
560	/* The label must be translatable */
561	if (!new_default && (hexstr_to_label(strval, &new_sl) != 0))
562		return (SET_ERROR(EINVAL));
563
564	/*
565	 * In a non-global zone, disallow attempts to set a label that
566	 * doesn't match that of the zone; otherwise no other checks
567	 * are needed.
568	 */
569	if (!INGLOBALZONE(curproc)) {
570		if (new_default || !blequal(&new_sl, CR_SL(CRED())))
571			return (SET_ERROR(EPERM));
572		return (0);
573	}
574
575	/*
576	 * For global-zone datasets (i.e., those whose zoned property is
577	 * "off", verify that the specified new label is valid for the
578	 * global zone.
579	 */
580	if (dsl_prop_get_integer(name,
581	    zfs_prop_to_name(ZFS_PROP_ZONED), &zoned, NULL))
582		return (SET_ERROR(EPERM));
583	if (!zoned) {
584		if (zfs_check_global_label(name, strval) != 0)
585			return (SET_ERROR(EPERM));
586	}
587
588	/*
589	 * If the existing dataset label is nondefault, check if the
590	 * dataset is mounted (label cannot be changed while mounted).
591	 * Get the zfsvfs; if there isn't one, then the dataset isn't
592	 * mounted (or isn't a dataset, doesn't exist, ...).
593	 */
594	if (strcasecmp(ds_hexsl, ZFS_MLSLABEL_DEFAULT) != 0) {
595		objset_t *os;
596		static char *setsl_tag = "setsl_tag";
597
598		/*
599		 * Try to own the dataset; abort if there is any error,
600		 * (e.g., already mounted, in use, or other error).
601		 */
602		error = dmu_objset_own(name, DMU_OST_ZFS, B_TRUE,
603		    setsl_tag, &os);
604		if (error != 0)
605			return (SET_ERROR(EPERM));
606
607		dmu_objset_disown(os, setsl_tag);
608
609		if (new_default) {
610			needed_priv = PRIV_FILE_DOWNGRADE_SL;
611			goto out_check;
612		}
613
614		if (hexstr_to_label(strval, &new_sl) != 0)
615			return (SET_ERROR(EPERM));
616
617		if (blstrictdom(&ds_sl, &new_sl))
618			needed_priv = PRIV_FILE_DOWNGRADE_SL;
619		else if (blstrictdom(&new_sl, &ds_sl))
620			needed_priv = PRIV_FILE_UPGRADE_SL;
621	} else {
622		/* dataset currently has a default label */
623		if (!new_default)
624			needed_priv = PRIV_FILE_UPGRADE_SL;
625	}
626
627out_check:
628	if (needed_priv != -1)
629		return (PRIV_POLICY(cr, needed_priv, B_FALSE, EPERM, NULL));
630	return (0);
631}
632#endif	/* SECLABEL */
633
634static int
635zfs_secpolicy_setprop(const char *dsname, zfs_prop_t prop, nvpair_t *propval,
636    cred_t *cr)
637{
638	char *strval;
639
640	/*
641	 * Check permissions for special properties.
642	 */
643	switch (prop) {
644	case ZFS_PROP_ZONED:
645		/*
646		 * Disallow setting of 'zoned' from within a local zone.
647		 */
648		if (!INGLOBALZONE(curthread))
649			return (SET_ERROR(EPERM));
650		break;
651
652	case ZFS_PROP_QUOTA:
653	case ZFS_PROP_FILESYSTEM_LIMIT:
654	case ZFS_PROP_SNAPSHOT_LIMIT:
655		if (!INGLOBALZONE(curthread)) {
656			uint64_t zoned;
657			char setpoint[ZFS_MAX_DATASET_NAME_LEN];
658			/*
659			 * Unprivileged users are allowed to modify the
660			 * limit on things *under* (ie. contained by)
661			 * the thing they own.
662			 */
663			if (dsl_prop_get_integer(dsname, "jailed", &zoned,
664			    setpoint))
665				return (SET_ERROR(EPERM));
666			if (!zoned || strlen(dsname) <= strlen(setpoint))
667				return (SET_ERROR(EPERM));
668		}
669		break;
670
671	case ZFS_PROP_MLSLABEL:
672#ifdef SECLABEL
673		if (!is_system_labeled())
674			return (SET_ERROR(EPERM));
675
676		if (nvpair_value_string(propval, &strval) == 0) {
677			int err;
678
679			err = zfs_set_slabel_policy(dsname, strval, CRED());
680			if (err != 0)
681				return (err);
682		}
683#else
684		return (EOPNOTSUPP);
685#endif
686		break;
687	}
688
689	return (zfs_secpolicy_write_perms(dsname, zfs_prop_to_name(prop), cr));
690}
691
692/* ARGSUSED */
693static int
694zfs_secpolicy_set_fsacl(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
695{
696	int error;
697
698	error = zfs_dozonecheck(zc->zc_name, cr);
699	if (error != 0)
700		return (error);
701
702	/*
703	 * permission to set permissions will be evaluated later in
704	 * dsl_deleg_can_allow()
705	 */
706	return (0);
707}
708
709/* ARGSUSED */
710static int
711zfs_secpolicy_rollback(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
712{
713	return (zfs_secpolicy_write_perms(zc->zc_name,
714	    ZFS_DELEG_PERM_ROLLBACK, cr));
715}
716
717/* ARGSUSED */
718static int
719zfs_secpolicy_send(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
720{
721	dsl_pool_t *dp;
722	dsl_dataset_t *ds;
723	char *cp;
724	int error;
725
726	/*
727	 * Generate the current snapshot name from the given objsetid, then
728	 * use that name for the secpolicy/zone checks.
729	 */
730	cp = strchr(zc->zc_name, '@');
731	if (cp == NULL)
732		return (SET_ERROR(EINVAL));
733	error = dsl_pool_hold(zc->zc_name, FTAG, &dp);
734	if (error != 0)
735		return (error);
736
737	error = dsl_dataset_hold_obj(dp, zc->zc_sendobj, FTAG, &ds);
738	if (error != 0) {
739		dsl_pool_rele(dp, FTAG);
740		return (error);
741	}
742
743	dsl_dataset_name(ds, zc->zc_name);
744
745	error = zfs_secpolicy_write_perms_ds(zc->zc_name, ds,
746	    ZFS_DELEG_PERM_SEND, cr);
747	dsl_dataset_rele(ds, FTAG);
748	dsl_pool_rele(dp, FTAG);
749
750	return (error);
751}
752
753/* ARGSUSED */
754static int
755zfs_secpolicy_send_new(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
756{
757	return (zfs_secpolicy_write_perms(zc->zc_name,
758	    ZFS_DELEG_PERM_SEND, cr));
759}
760
761/* ARGSUSED */
762static int
763zfs_secpolicy_deleg_share(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
764{
765	vnode_t *vp;
766	int error;
767
768	if ((error = lookupname(zc->zc_value, UIO_SYSSPACE,
769	    NO_FOLLOW, NULL, &vp)) != 0)
770		return (error);
771
772	/* Now make sure mntpnt and dataset are ZFS */
773
774	if (strcmp(vp->v_vfsp->mnt_stat.f_fstypename, "zfs") != 0 ||
775	    (strcmp((char *)refstr_value(vp->v_vfsp->vfs_resource),
776	    zc->zc_name) != 0)) {
777		VN_RELE(vp);
778		return (SET_ERROR(EPERM));
779	}
780
781	VN_RELE(vp);
782	return (dsl_deleg_access(zc->zc_name,
783	    ZFS_DELEG_PERM_SHARE, cr));
784}
785
786int
787zfs_secpolicy_share(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
788{
789	if (!INGLOBALZONE(curthread))
790		return (SET_ERROR(EPERM));
791
792	if (secpolicy_nfs(cr) == 0) {
793		return (0);
794	} else {
795		return (zfs_secpolicy_deleg_share(zc, innvl, cr));
796	}
797}
798
799int
800zfs_secpolicy_smb_acl(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
801{
802	if (!INGLOBALZONE(curthread))
803		return (SET_ERROR(EPERM));
804
805	if (secpolicy_smb(cr) == 0) {
806		return (0);
807	} else {
808		return (zfs_secpolicy_deleg_share(zc, innvl, cr));
809	}
810}
811
812static int
813zfs_get_parent(const char *datasetname, char *parent, int parentsize)
814{
815	char *cp;
816
817	/*
818	 * Remove the @bla or /bla from the end of the name to get the parent.
819	 */
820	(void) strncpy(parent, datasetname, parentsize);
821	cp = strrchr(parent, '@');
822	if (cp != NULL) {
823		cp[0] = '\0';
824	} else {
825		cp = strrchr(parent, '/');
826		if (cp == NULL)
827			return (SET_ERROR(ENOENT));
828		cp[0] = '\0';
829	}
830
831	return (0);
832}
833
834int
835zfs_secpolicy_destroy_perms(const char *name, cred_t *cr)
836{
837	int error;
838
839	if ((error = zfs_secpolicy_write_perms(name,
840	    ZFS_DELEG_PERM_MOUNT, cr)) != 0)
841		return (error);
842
843	return (zfs_secpolicy_write_perms(name, ZFS_DELEG_PERM_DESTROY, cr));
844}
845
846/* ARGSUSED */
847static int
848zfs_secpolicy_destroy(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
849{
850	return (zfs_secpolicy_destroy_perms(zc->zc_name, cr));
851}
852
853/*
854 * Destroying snapshots with delegated permissions requires
855 * descendant mount and destroy permissions.
856 */
857/* ARGSUSED */
858static int
859zfs_secpolicy_destroy_snaps(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
860{
861	nvlist_t *snaps;
862	nvpair_t *pair, *nextpair;
863	int error = 0;
864
865	if (nvlist_lookup_nvlist(innvl, "snaps", &snaps) != 0)
866		return (SET_ERROR(EINVAL));
867	for (pair = nvlist_next_nvpair(snaps, NULL); pair != NULL;
868	    pair = nextpair) {
869		nextpair = nvlist_next_nvpair(snaps, pair);
870		error = zfs_secpolicy_destroy_perms(nvpair_name(pair), cr);
871		if (error == ENOENT) {
872			/*
873			 * Ignore any snapshots that don't exist (we consider
874			 * them "already destroyed").  Remove the name from the
875			 * nvl here in case the snapshot is created between
876			 * now and when we try to destroy it (in which case
877			 * we don't want to destroy it since we haven't
878			 * checked for permission).
879			 */
880			fnvlist_remove_nvpair(snaps, pair);
881			error = 0;
882		}
883		if (error != 0)
884			break;
885	}
886
887	return (error);
888}
889
890int
891zfs_secpolicy_rename_perms(const char *from, const char *to, cred_t *cr)
892{
893	char	parentname[ZFS_MAX_DATASET_NAME_LEN];
894	int	error;
895
896	if ((error = zfs_secpolicy_write_perms(from,
897	    ZFS_DELEG_PERM_RENAME, cr)) != 0)
898		return (error);
899
900	if ((error = zfs_secpolicy_write_perms(from,
901	    ZFS_DELEG_PERM_MOUNT, cr)) != 0)
902		return (error);
903
904	if ((error = zfs_get_parent(to, parentname,
905	    sizeof (parentname))) != 0)
906		return (error);
907
908	if ((error = zfs_secpolicy_write_perms(parentname,
909	    ZFS_DELEG_PERM_CREATE, cr)) != 0)
910		return (error);
911
912	if ((error = zfs_secpolicy_write_perms(parentname,
913	    ZFS_DELEG_PERM_MOUNT, cr)) != 0)
914		return (error);
915
916	return (error);
917}
918
919/* ARGSUSED */
920static int
921zfs_secpolicy_rename(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
922{
923	char *at = NULL;
924	int error;
925
926	if ((zc->zc_cookie & 1) != 0) {
927		/*
928		 * This is recursive rename, so the starting snapshot might
929		 * not exist. Check file system or volume permission instead.
930		 */
931		at = strchr(zc->zc_name, '@');
932		if (at == NULL)
933			return (EINVAL);
934		*at = '\0';
935	}
936
937	error = zfs_secpolicy_rename_perms(zc->zc_name, zc->zc_value, cr);
938
939	if (at != NULL)
940		*at = '@';
941
942	return (error);
943}
944
945/* ARGSUSED */
946static int
947zfs_secpolicy_promote(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
948{
949	dsl_pool_t *dp;
950	dsl_dataset_t *clone;
951	int error;
952
953	error = zfs_secpolicy_write_perms(zc->zc_name,
954	    ZFS_DELEG_PERM_PROMOTE, cr);
955	if (error != 0)
956		return (error);
957
958	error = dsl_pool_hold(zc->zc_name, FTAG, &dp);
959	if (error != 0)
960		return (error);
961
962	error = dsl_dataset_hold(dp, zc->zc_name, FTAG, &clone);
963
964	if (error == 0) {
965		char parentname[ZFS_MAX_DATASET_NAME_LEN];
966		dsl_dataset_t *origin = NULL;
967		dsl_dir_t *dd;
968		dd = clone->ds_dir;
969
970		error = dsl_dataset_hold_obj(dd->dd_pool,
971		    dsl_dir_phys(dd)->dd_origin_obj, FTAG, &origin);
972		if (error != 0) {
973			dsl_dataset_rele(clone, FTAG);
974			dsl_pool_rele(dp, FTAG);
975			return (error);
976		}
977
978		error = zfs_secpolicy_write_perms_ds(zc->zc_name, clone,
979		    ZFS_DELEG_PERM_MOUNT, cr);
980
981		dsl_dataset_name(origin, parentname);
982		if (error == 0) {
983			error = zfs_secpolicy_write_perms_ds(parentname, origin,
984			    ZFS_DELEG_PERM_PROMOTE, cr);
985		}
986		dsl_dataset_rele(clone, FTAG);
987		dsl_dataset_rele(origin, FTAG);
988	}
989	dsl_pool_rele(dp, FTAG);
990	return (error);
991}
992
993/* ARGSUSED */
994static int
995zfs_secpolicy_recv(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
996{
997	int error;
998
999	if ((error = zfs_secpolicy_write_perms(zc->zc_name,
1000	    ZFS_DELEG_PERM_RECEIVE, cr)) != 0)
1001		return (error);
1002
1003	if ((error = zfs_secpolicy_write_perms(zc->zc_name,
1004	    ZFS_DELEG_PERM_MOUNT, cr)) != 0)
1005		return (error);
1006
1007	return (zfs_secpolicy_write_perms(zc->zc_name,
1008	    ZFS_DELEG_PERM_CREATE, cr));
1009}
1010
1011int
1012zfs_secpolicy_snapshot_perms(const char *name, cred_t *cr)
1013{
1014	return (zfs_secpolicy_write_perms(name,
1015	    ZFS_DELEG_PERM_SNAPSHOT, cr));
1016}
1017
1018/*
1019 * Check for permission to create each snapshot in the nvlist.
1020 */
1021/* ARGSUSED */
1022static int
1023zfs_secpolicy_snapshot(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1024{
1025	nvlist_t *snaps;
1026	int error;
1027	nvpair_t *pair;
1028
1029	if (nvlist_lookup_nvlist(innvl, "snaps", &snaps) != 0)
1030		return (SET_ERROR(EINVAL));
1031	for (pair = nvlist_next_nvpair(snaps, NULL); pair != NULL;
1032	    pair = nvlist_next_nvpair(snaps, pair)) {
1033		char *name = nvpair_name(pair);
1034		char *atp = strchr(name, '@');
1035
1036		if (atp == NULL) {
1037			error = SET_ERROR(EINVAL);
1038			break;
1039		}
1040		*atp = '\0';
1041		error = zfs_secpolicy_snapshot_perms(name, cr);
1042		*atp = '@';
1043		if (error != 0)
1044			break;
1045	}
1046	return (error);
1047}
1048
1049/*
1050 * Check for permission to create each snapshot in the nvlist.
1051 */
1052/* ARGSUSED */
1053static int
1054zfs_secpolicy_bookmark(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1055{
1056	int error = 0;
1057
1058	for (nvpair_t *pair = nvlist_next_nvpair(innvl, NULL);
1059	    pair != NULL; pair = nvlist_next_nvpair(innvl, pair)) {
1060		char *name = nvpair_name(pair);
1061		char *hashp = strchr(name, '#');
1062
1063		if (hashp == NULL) {
1064			error = SET_ERROR(EINVAL);
1065			break;
1066		}
1067		*hashp = '\0';
1068		error = zfs_secpolicy_write_perms(name,
1069		    ZFS_DELEG_PERM_BOOKMARK, cr);
1070		*hashp = '#';
1071		if (error != 0)
1072			break;
1073	}
1074	return (error);
1075}
1076
1077/* ARGSUSED */
1078static int
1079zfs_secpolicy_remap(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1080{
1081	return (zfs_secpolicy_write_perms(zc->zc_name,
1082	    ZFS_DELEG_PERM_REMAP, cr));
1083}
1084
1085/* ARGSUSED */
1086static int
1087zfs_secpolicy_destroy_bookmarks(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1088{
1089	nvpair_t *pair, *nextpair;
1090	int error = 0;
1091
1092	for (pair = nvlist_next_nvpair(innvl, NULL); pair != NULL;
1093	    pair = nextpair) {
1094		char *name = nvpair_name(pair);
1095		char *hashp = strchr(name, '#');
1096		nextpair = nvlist_next_nvpair(innvl, pair);
1097
1098		if (hashp == NULL) {
1099			error = SET_ERROR(EINVAL);
1100			break;
1101		}
1102
1103		*hashp = '\0';
1104		error = zfs_secpolicy_write_perms(name,
1105		    ZFS_DELEG_PERM_DESTROY, cr);
1106		*hashp = '#';
1107		if (error == ENOENT) {
1108			/*
1109			 * Ignore any filesystems that don't exist (we consider
1110			 * their bookmarks "already destroyed").  Remove
1111			 * the name from the nvl here in case the filesystem
1112			 * is created between now and when we try to destroy
1113			 * the bookmark (in which case we don't want to
1114			 * destroy it since we haven't checked for permission).
1115			 */
1116			fnvlist_remove_nvpair(innvl, pair);
1117			error = 0;
1118		}
1119		if (error != 0)
1120			break;
1121	}
1122
1123	return (error);
1124}
1125
1126/* ARGSUSED */
1127static int
1128zfs_secpolicy_log_history(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1129{
1130	/*
1131	 * Even root must have a proper TSD so that we know what pool
1132	 * to log to.
1133	 */
1134	if (tsd_get(zfs_allow_log_key) == NULL)
1135		return (SET_ERROR(EPERM));
1136	return (0);
1137}
1138
1139static int
1140zfs_secpolicy_create_clone(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1141{
1142	char	parentname[ZFS_MAX_DATASET_NAME_LEN];
1143	int	error;
1144	char	*origin;
1145
1146	if ((error = zfs_get_parent(zc->zc_name, parentname,
1147	    sizeof (parentname))) != 0)
1148		return (error);
1149
1150	if (nvlist_lookup_string(innvl, "origin", &origin) == 0 &&
1151	    (error = zfs_secpolicy_write_perms(origin,
1152	    ZFS_DELEG_PERM_CLONE, cr)) != 0)
1153		return (error);
1154
1155	if ((error = zfs_secpolicy_write_perms(parentname,
1156	    ZFS_DELEG_PERM_CREATE, cr)) != 0)
1157		return (error);
1158
1159	return (zfs_secpolicy_write_perms(parentname,
1160	    ZFS_DELEG_PERM_MOUNT, cr));
1161}
1162
1163/*
1164 * Policy for pool operations - create/destroy pools, add vdevs, etc.  Requires
1165 * SYS_CONFIG privilege, which is not available in a local zone.
1166 */
1167/* ARGSUSED */
1168static int
1169zfs_secpolicy_config(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1170{
1171	if (secpolicy_sys_config(cr, B_FALSE) != 0)
1172		return (SET_ERROR(EPERM));
1173
1174	return (0);
1175}
1176
1177/*
1178 * Policy for object to name lookups.
1179 */
1180/* ARGSUSED */
1181static int
1182zfs_secpolicy_diff(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1183{
1184	int error;
1185
1186	if ((error = secpolicy_sys_config(cr, B_FALSE)) == 0)
1187		return (0);
1188
1189	error = zfs_secpolicy_write_perms(zc->zc_name, ZFS_DELEG_PERM_DIFF, cr);
1190	return (error);
1191}
1192
1193/*
1194 * Policy for fault injection.  Requires all privileges.
1195 */
1196/* ARGSUSED */
1197static int
1198zfs_secpolicy_inject(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1199{
1200	return (secpolicy_zinject(cr));
1201}
1202
1203/* ARGSUSED */
1204static int
1205zfs_secpolicy_inherit_prop(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1206{
1207	zfs_prop_t prop = zfs_name_to_prop(zc->zc_value);
1208
1209	if (prop == ZPROP_INVAL) {
1210		if (!zfs_prop_user(zc->zc_value))
1211			return (SET_ERROR(EINVAL));
1212		return (zfs_secpolicy_write_perms(zc->zc_name,
1213		    ZFS_DELEG_PERM_USERPROP, cr));
1214	} else {
1215		return (zfs_secpolicy_setprop(zc->zc_name, prop,
1216		    NULL, cr));
1217	}
1218}
1219
1220static int
1221zfs_secpolicy_userspace_one(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1222{
1223	int err = zfs_secpolicy_read(zc, innvl, cr);
1224	if (err)
1225		return (err);
1226
1227	if (zc->zc_objset_type >= ZFS_NUM_USERQUOTA_PROPS)
1228		return (SET_ERROR(EINVAL));
1229
1230	if (zc->zc_value[0] == 0) {
1231		/*
1232		 * They are asking about a posix uid/gid.  If it's
1233		 * themself, allow it.
1234		 */
1235		if (zc->zc_objset_type == ZFS_PROP_USERUSED ||
1236		    zc->zc_objset_type == ZFS_PROP_USERQUOTA) {
1237			if (zc->zc_guid == crgetuid(cr))
1238				return (0);
1239		} else {
1240			if (groupmember(zc->zc_guid, cr))
1241				return (0);
1242		}
1243	}
1244
1245	return (zfs_secpolicy_write_perms(zc->zc_name,
1246	    userquota_perms[zc->zc_objset_type], cr));
1247}
1248
1249static int
1250zfs_secpolicy_userspace_many(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1251{
1252	int err = zfs_secpolicy_read(zc, innvl, cr);
1253	if (err)
1254		return (err);
1255
1256	if (zc->zc_objset_type >= ZFS_NUM_USERQUOTA_PROPS)
1257		return (SET_ERROR(EINVAL));
1258
1259	return (zfs_secpolicy_write_perms(zc->zc_name,
1260	    userquota_perms[zc->zc_objset_type], cr));
1261}
1262
1263/* ARGSUSED */
1264static int
1265zfs_secpolicy_userspace_upgrade(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1266{
1267	return (zfs_secpolicy_setprop(zc->zc_name, ZFS_PROP_VERSION,
1268	    NULL, cr));
1269}
1270
1271/* ARGSUSED */
1272static int
1273zfs_secpolicy_hold(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1274{
1275	nvpair_t *pair;
1276	nvlist_t *holds;
1277	int error;
1278
1279	error = nvlist_lookup_nvlist(innvl, "holds", &holds);
1280	if (error != 0)
1281		return (SET_ERROR(EINVAL));
1282
1283	for (pair = nvlist_next_nvpair(holds, NULL); pair != NULL;
1284	    pair = nvlist_next_nvpair(holds, pair)) {
1285		char fsname[ZFS_MAX_DATASET_NAME_LEN];
1286		error = dmu_fsname(nvpair_name(pair), fsname);
1287		if (error != 0)
1288			return (error);
1289		error = zfs_secpolicy_write_perms(fsname,
1290		    ZFS_DELEG_PERM_HOLD, cr);
1291		if (error != 0)
1292			return (error);
1293	}
1294	return (0);
1295}
1296
1297/* ARGSUSED */
1298static int
1299zfs_secpolicy_release(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1300{
1301	nvpair_t *pair;
1302	int error;
1303
1304	for (pair = nvlist_next_nvpair(innvl, NULL); pair != NULL;
1305	    pair = nvlist_next_nvpair(innvl, pair)) {
1306		char fsname[ZFS_MAX_DATASET_NAME_LEN];
1307		error = dmu_fsname(nvpair_name(pair), fsname);
1308		if (error != 0)
1309			return (error);
1310		error = zfs_secpolicy_write_perms(fsname,
1311		    ZFS_DELEG_PERM_RELEASE, cr);
1312		if (error != 0)
1313			return (error);
1314	}
1315	return (0);
1316}
1317
1318/*
1319 * Policy for allowing temporary snapshots to be taken or released
1320 */
1321static int
1322zfs_secpolicy_tmp_snapshot(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1323{
1324	/*
1325	 * A temporary snapshot is the same as a snapshot,
1326	 * hold, destroy and release all rolled into one.
1327	 * Delegated diff alone is sufficient that we allow this.
1328	 */
1329	int error;
1330
1331	if ((error = zfs_secpolicy_write_perms(zc->zc_name,
1332	    ZFS_DELEG_PERM_DIFF, cr)) == 0)
1333		return (0);
1334
1335	error = zfs_secpolicy_snapshot_perms(zc->zc_name, cr);
1336	if (error == 0)
1337		error = zfs_secpolicy_hold(zc, innvl, cr);
1338	if (error == 0)
1339		error = zfs_secpolicy_release(zc, innvl, cr);
1340	if (error == 0)
1341		error = zfs_secpolicy_destroy(zc, innvl, cr);
1342	return (error);
1343}
1344
1345/*
1346 * Returns the nvlist as specified by the user in the zfs_cmd_t.
1347 */
1348static int
1349get_nvlist(uint64_t nvl, uint64_t size, int iflag, nvlist_t **nvp)
1350{
1351	char *packed;
1352	int error;
1353	nvlist_t *list = NULL;
1354
1355	/*
1356	 * Read in and unpack the user-supplied nvlist.
1357	 */
1358	if (size == 0)
1359		return (SET_ERROR(EINVAL));
1360
1361	packed = kmem_alloc(size, KM_SLEEP);
1362
1363	if ((error = ddi_copyin((void *)(uintptr_t)nvl, packed, size,
1364	    iflag)) != 0) {
1365		kmem_free(packed, size);
1366		return (SET_ERROR(EFAULT));
1367	}
1368
1369	if ((error = nvlist_unpack(packed, size, &list, 0)) != 0) {
1370		kmem_free(packed, size);
1371		return (error);
1372	}
1373
1374	kmem_free(packed, size);
1375
1376	*nvp = list;
1377	return (0);
1378}
1379
1380/*
1381 * Reduce the size of this nvlist until it can be serialized in 'max' bytes.
1382 * Entries will be removed from the end of the nvlist, and one int32 entry
1383 * named "N_MORE_ERRORS" will be added indicating how many entries were
1384 * removed.
1385 */
1386static int
1387nvlist_smush(nvlist_t *errors, size_t max)
1388{
1389	size_t size;
1390
1391	size = fnvlist_size(errors);
1392
1393	if (size > max) {
1394		nvpair_t *more_errors;
1395		int n = 0;
1396
1397		if (max < 1024)
1398			return (SET_ERROR(ENOMEM));
1399
1400		fnvlist_add_int32(errors, ZPROP_N_MORE_ERRORS, 0);
1401		more_errors = nvlist_prev_nvpair(errors, NULL);
1402
1403		do {
1404			nvpair_t *pair = nvlist_prev_nvpair(errors,
1405			    more_errors);
1406			fnvlist_remove_nvpair(errors, pair);
1407			n++;
1408			size = fnvlist_size(errors);
1409		} while (size > max);
1410
1411		fnvlist_remove_nvpair(errors, more_errors);
1412		fnvlist_add_int32(errors, ZPROP_N_MORE_ERRORS, n);
1413		ASSERT3U(fnvlist_size(errors), <=, max);
1414	}
1415
1416	return (0);
1417}
1418
1419static int
1420put_nvlist(zfs_cmd_t *zc, nvlist_t *nvl)
1421{
1422	char *packed = NULL;
1423	int error = 0;
1424	size_t size;
1425
1426	size = fnvlist_size(nvl);
1427
1428	if (size > zc->zc_nvlist_dst_size) {
1429		/*
1430		 * Solaris returns ENOMEM here, because even if an error is
1431		 * returned from an ioctl(2), new zc_nvlist_dst_size will be
1432		 * passed to the userland. This is not the case for FreeBSD.
1433		 * We need to return 0, so the kernel will copy the
1434		 * zc_nvlist_dst_size back and the userland can discover that a
1435		 * bigger buffer is needed.
1436		 */
1437		error = 0;
1438	} else {
1439		packed = fnvlist_pack(nvl, &size);
1440		if (ddi_copyout(packed, (void *)(uintptr_t)zc->zc_nvlist_dst,
1441		    size, zc->zc_iflags) != 0)
1442			error = SET_ERROR(EFAULT);
1443		fnvlist_pack_free(packed, size);
1444	}
1445
1446	zc->zc_nvlist_dst_size = size;
1447	zc->zc_nvlist_dst_filled = B_TRUE;
1448	return (error);
1449}
1450
1451int
1452getzfsvfs_impl(objset_t *os, vfs_t **vfsp)
1453{
1454	zfsvfs_t *zfvp;
1455	int error = 0;
1456
1457	if (dmu_objset_type(os) != DMU_OST_ZFS) {
1458		return (SET_ERROR(EINVAL));
1459	}
1460
1461	mutex_enter(&os->os_user_ptr_lock);
1462	zfvp = dmu_objset_get_user(os);
1463	if (zfvp) {
1464		*vfsp = zfvp->z_vfs;
1465		vfs_ref(zfvp->z_vfs);
1466	} else {
1467		error = SET_ERROR(ESRCH);
1468	}
1469	mutex_exit(&os->os_user_ptr_lock);
1470	return (error);
1471}
1472
1473int
1474getzfsvfs(const char *dsname, zfsvfs_t **zfvp)
1475{
1476	objset_t *os;
1477	vfs_t *vfsp;
1478	int error;
1479
1480	error = dmu_objset_hold(dsname, FTAG, &os);
1481	if (error != 0)
1482		return (error);
1483	error = getzfsvfs_impl(os, &vfsp);
1484	dmu_objset_rele(os, FTAG);
1485	if (error != 0)
1486		return (error);
1487
1488	error = vfs_busy(vfsp, 0);
1489	vfs_rel(vfsp);
1490	if (error != 0) {
1491		*zfvp = NULL;
1492		error = SET_ERROR(ESRCH);
1493	} else {
1494		*zfvp = vfsp->vfs_data;
1495	}
1496	return (error);
1497}
1498
1499/*
1500 * Find a zfsvfs_t for a mounted filesystem, or create our own, in which
1501 * case its z_vfs will be NULL, and it will be opened as the owner.
1502 * If 'writer' is set, the z_teardown_lock will be held for RW_WRITER,
1503 * which prevents all vnode ops from running.
1504 */
1505static int
1506zfsvfs_hold(const char *name, void *tag, zfsvfs_t **zfvp, boolean_t writer)
1507{
1508	int error = 0;
1509
1510	if (getzfsvfs(name, zfvp) != 0)
1511		error = zfsvfs_create(name, zfvp);
1512	if (error == 0) {
1513		rrm_enter(&(*zfvp)->z_teardown_lock, (writer) ? RW_WRITER :
1514		    RW_READER, tag);
1515#ifdef illumos
1516		if ((*zfvp)->z_unmounted) {
1517			/*
1518			 * XXX we could probably try again, since the unmounting
1519			 * thread should be just about to disassociate the
1520			 * objset from the zfsvfs.
1521			 */
1522			rrm_exit(&(*zfvp)->z_teardown_lock, tag);
1523			return (SET_ERROR(EBUSY));
1524		}
1525#else
1526		/*
1527		 * vfs_busy() ensures that the filesystem is not and
1528		 * can not be unmounted.
1529		 */
1530		ASSERT(!(*zfvp)->z_unmounted);
1531#endif
1532	}
1533	return (error);
1534}
1535
1536static void
1537zfsvfs_rele(zfsvfs_t *zfsvfs, void *tag)
1538{
1539	rrm_exit(&zfsvfs->z_teardown_lock, tag);
1540
1541	if (zfsvfs->z_vfs) {
1542#ifdef illumos
1543		VFS_RELE(zfsvfs->z_vfs);
1544#else
1545		vfs_unbusy(zfsvfs->z_vfs);
1546#endif
1547	} else {
1548		dmu_objset_disown(zfsvfs->z_os, zfsvfs);
1549		zfsvfs_free(zfsvfs);
1550	}
1551}
1552
1553static int
1554zfs_ioc_pool_create(zfs_cmd_t *zc)
1555{
1556	int error;
1557	nvlist_t *config, *props = NULL;
1558	nvlist_t *rootprops = NULL;
1559	nvlist_t *zplprops = NULL;
1560
1561	if (error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
1562	    zc->zc_iflags, &config))
1563		return (error);
1564
1565	if (zc->zc_nvlist_src_size != 0 && (error =
1566	    get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
1567	    zc->zc_iflags, &props))) {
1568		nvlist_free(config);
1569		return (error);
1570	}
1571
1572	if (props) {
1573		nvlist_t *nvl = NULL;
1574		uint64_t version = SPA_VERSION;
1575
1576		(void) nvlist_lookup_uint64(props,
1577		    zpool_prop_to_name(ZPOOL_PROP_VERSION), &version);
1578		if (!SPA_VERSION_IS_SUPPORTED(version)) {
1579			error = SET_ERROR(EINVAL);
1580			goto pool_props_bad;
1581		}
1582		(void) nvlist_lookup_nvlist(props, ZPOOL_ROOTFS_PROPS, &nvl);
1583		if (nvl) {
1584			error = nvlist_dup(nvl, &rootprops, KM_SLEEP);
1585			if (error != 0) {
1586				nvlist_free(config);
1587				nvlist_free(props);
1588				return (error);
1589			}
1590			(void) nvlist_remove_all(props, ZPOOL_ROOTFS_PROPS);
1591		}
1592		VERIFY(nvlist_alloc(&zplprops, NV_UNIQUE_NAME, KM_SLEEP) == 0);
1593		error = zfs_fill_zplprops_root(version, rootprops,
1594		    zplprops, NULL);
1595		if (error != 0)
1596			goto pool_props_bad;
1597	}
1598
1599	error = spa_create(zc->zc_name, config, props, zplprops);
1600
1601	/*
1602	 * Set the remaining root properties
1603	 */
1604	if (!error && (error = zfs_set_prop_nvlist(zc->zc_name,
1605	    ZPROP_SRC_LOCAL, rootprops, NULL)) != 0)
1606		(void) spa_destroy(zc->zc_name);
1607
1608pool_props_bad:
1609	nvlist_free(rootprops);
1610	nvlist_free(zplprops);
1611	nvlist_free(config);
1612	nvlist_free(props);
1613
1614	return (error);
1615}
1616
1617static int
1618zfs_ioc_pool_destroy(zfs_cmd_t *zc)
1619{
1620	int error;
1621	zfs_log_history(zc);
1622	error = spa_destroy(zc->zc_name);
1623	if (error == 0)
1624		zvol_remove_minors(zc->zc_name);
1625	return (error);
1626}
1627
1628static int
1629zfs_ioc_pool_import(zfs_cmd_t *zc)
1630{
1631	nvlist_t *config, *props = NULL;
1632	uint64_t guid;
1633	int error;
1634
1635	if ((error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
1636	    zc->zc_iflags, &config)) != 0)
1637		return (error);
1638
1639	if (zc->zc_nvlist_src_size != 0 && (error =
1640	    get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
1641	    zc->zc_iflags, &props))) {
1642		nvlist_free(config);
1643		return (error);
1644	}
1645
1646	if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID, &guid) != 0 ||
1647	    guid != zc->zc_guid)
1648		error = SET_ERROR(EINVAL);
1649	else
1650		error = spa_import(zc->zc_name, config, props, zc->zc_cookie);
1651
1652	if (zc->zc_nvlist_dst != 0) {
1653		int err;
1654
1655		if ((err = put_nvlist(zc, config)) != 0)
1656			error = err;
1657	}
1658
1659	nvlist_free(config);
1660
1661	nvlist_free(props);
1662
1663	return (error);
1664}
1665
1666static int
1667zfs_ioc_pool_export(zfs_cmd_t *zc)
1668{
1669	int error;
1670	boolean_t force = (boolean_t)zc->zc_cookie;
1671	boolean_t hardforce = (boolean_t)zc->zc_guid;
1672
1673	zfs_log_history(zc);
1674	error = spa_export(zc->zc_name, NULL, force, hardforce);
1675	if (error == 0)
1676		zvol_remove_minors(zc->zc_name);
1677	return (error);
1678}
1679
1680static int
1681zfs_ioc_pool_configs(zfs_cmd_t *zc)
1682{
1683	nvlist_t *configs;
1684	int error;
1685
1686	if ((configs = spa_all_configs(&zc->zc_cookie)) == NULL)
1687		return (SET_ERROR(EEXIST));
1688
1689	error = put_nvlist(zc, configs);
1690
1691	nvlist_free(configs);
1692
1693	return (error);
1694}
1695
1696/*
1697 * inputs:
1698 * zc_name		name of the pool
1699 *
1700 * outputs:
1701 * zc_cookie		real errno
1702 * zc_nvlist_dst	config nvlist
1703 * zc_nvlist_dst_size	size of config nvlist
1704 */
1705static int
1706zfs_ioc_pool_stats(zfs_cmd_t *zc)
1707{
1708	nvlist_t *config;
1709	int error;
1710	int ret = 0;
1711
1712	error = spa_get_stats(zc->zc_name, &config, zc->zc_value,
1713	    sizeof (zc->zc_value));
1714
1715	if (config != NULL) {
1716		ret = put_nvlist(zc, config);
1717		nvlist_free(config);
1718
1719		/*
1720		 * The config may be present even if 'error' is non-zero.
1721		 * In this case we return success, and preserve the real errno
1722		 * in 'zc_cookie'.
1723		 */
1724		zc->zc_cookie = error;
1725	} else {
1726		ret = error;
1727	}
1728
1729	return (ret);
1730}
1731
1732/*
1733 * Try to import the given pool, returning pool stats as appropriate so that
1734 * user land knows which devices are available and overall pool health.
1735 */
1736static int
1737zfs_ioc_pool_tryimport(zfs_cmd_t *zc)
1738{
1739	nvlist_t *tryconfig, *config;
1740	int error;
1741
1742	if ((error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
1743	    zc->zc_iflags, &tryconfig)) != 0)
1744		return (error);
1745
1746	config = spa_tryimport(tryconfig);
1747
1748	nvlist_free(tryconfig);
1749
1750	if (config == NULL)
1751		return (SET_ERROR(EINVAL));
1752
1753	error = put_nvlist(zc, config);
1754	nvlist_free(config);
1755
1756	return (error);
1757}
1758
1759/*
1760 * inputs:
1761 * zc_name              name of the pool
1762 * zc_cookie            scan func (pool_scan_func_t)
1763 * zc_flags             scrub pause/resume flag (pool_scrub_cmd_t)
1764 */
1765static int
1766zfs_ioc_pool_scan(zfs_cmd_t *zc)
1767{
1768	spa_t *spa;
1769	int error;
1770
1771	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
1772		return (error);
1773
1774	if (zc->zc_flags >= POOL_SCRUB_FLAGS_END)
1775		return (SET_ERROR(EINVAL));
1776
1777	if (zc->zc_flags == POOL_SCRUB_PAUSE)
1778		error = spa_scrub_pause_resume(spa, POOL_SCRUB_PAUSE);
1779	else if (zc->zc_cookie == POOL_SCAN_NONE)
1780		error = spa_scan_stop(spa);
1781	else
1782		error = spa_scan(spa, zc->zc_cookie);
1783
1784	spa_close(spa, FTAG);
1785
1786	return (error);
1787}
1788
1789static int
1790zfs_ioc_pool_freeze(zfs_cmd_t *zc)
1791{
1792	spa_t *spa;
1793	int error;
1794
1795	error = spa_open(zc->zc_name, &spa, FTAG);
1796	if (error == 0) {
1797		spa_freeze(spa);
1798		spa_close(spa, FTAG);
1799	}
1800	return (error);
1801}
1802
1803static int
1804zfs_ioc_pool_upgrade(zfs_cmd_t *zc)
1805{
1806	spa_t *spa;
1807	int error;
1808
1809	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
1810		return (error);
1811
1812	if (zc->zc_cookie < spa_version(spa) ||
1813	    !SPA_VERSION_IS_SUPPORTED(zc->zc_cookie)) {
1814		spa_close(spa, FTAG);
1815		return (SET_ERROR(EINVAL));
1816	}
1817
1818	spa_upgrade(spa, zc->zc_cookie);
1819	spa_close(spa, FTAG);
1820
1821	return (error);
1822}
1823
1824static int
1825zfs_ioc_pool_get_history(zfs_cmd_t *zc)
1826{
1827	spa_t *spa;
1828	char *hist_buf;
1829	uint64_t size;
1830	int error;
1831
1832	if ((size = zc->zc_history_len) == 0)
1833		return (SET_ERROR(EINVAL));
1834
1835	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
1836		return (error);
1837
1838	if (spa_version(spa) < SPA_VERSION_ZPOOL_HISTORY) {
1839		spa_close(spa, FTAG);
1840		return (SET_ERROR(ENOTSUP));
1841	}
1842
1843	hist_buf = kmem_alloc(size, KM_SLEEP);
1844	if ((error = spa_history_get(spa, &zc->zc_history_offset,
1845	    &zc->zc_history_len, hist_buf)) == 0) {
1846		error = ddi_copyout(hist_buf,
1847		    (void *)(uintptr_t)zc->zc_history,
1848		    zc->zc_history_len, zc->zc_iflags);
1849	}
1850
1851	spa_close(spa, FTAG);
1852	kmem_free(hist_buf, size);
1853	return (error);
1854}
1855
1856static int
1857zfs_ioc_pool_reguid(zfs_cmd_t *zc)
1858{
1859	spa_t *spa;
1860	int error;
1861
1862	error = spa_open(zc->zc_name, &spa, FTAG);
1863	if (error == 0) {
1864		error = spa_change_guid(spa);
1865		spa_close(spa, FTAG);
1866	}
1867	return (error);
1868}
1869
1870static int
1871zfs_ioc_dsobj_to_dsname(zfs_cmd_t *zc)
1872{
1873	return (dsl_dsobj_to_dsname(zc->zc_name, zc->zc_obj, zc->zc_value));
1874}
1875
1876/*
1877 * inputs:
1878 * zc_name		name of filesystem
1879 * zc_obj		object to find
1880 *
1881 * outputs:
1882 * zc_value		name of object
1883 */
1884static int
1885zfs_ioc_obj_to_path(zfs_cmd_t *zc)
1886{
1887	objset_t *os;
1888	int error;
1889
1890	/* XXX reading from objset not owned */
1891	if ((error = dmu_objset_hold(zc->zc_name, FTAG, &os)) != 0)
1892		return (error);
1893	if (dmu_objset_type(os) != DMU_OST_ZFS) {
1894		dmu_objset_rele(os, FTAG);
1895		return (SET_ERROR(EINVAL));
1896	}
1897	error = zfs_obj_to_path(os, zc->zc_obj, zc->zc_value,
1898	    sizeof (zc->zc_value));
1899	dmu_objset_rele(os, FTAG);
1900
1901	return (error);
1902}
1903
1904/*
1905 * inputs:
1906 * zc_name		name of filesystem
1907 * zc_obj		object to find
1908 *
1909 * outputs:
1910 * zc_stat		stats on object
1911 * zc_value		path to object
1912 */
1913static int
1914zfs_ioc_obj_to_stats(zfs_cmd_t *zc)
1915{
1916	objset_t *os;
1917	int error;
1918
1919	/* XXX reading from objset not owned */
1920	if ((error = dmu_objset_hold(zc->zc_name, FTAG, &os)) != 0)
1921		return (error);
1922	if (dmu_objset_type(os) != DMU_OST_ZFS) {
1923		dmu_objset_rele(os, FTAG);
1924		return (SET_ERROR(EINVAL));
1925	}
1926	error = zfs_obj_to_stats(os, zc->zc_obj, &zc->zc_stat, zc->zc_value,
1927	    sizeof (zc->zc_value));
1928	dmu_objset_rele(os, FTAG);
1929
1930	return (error);
1931}
1932
1933static int
1934zfs_ioc_vdev_add(zfs_cmd_t *zc)
1935{
1936	spa_t *spa;
1937	int error;
1938	nvlist_t *config, **l2cache, **spares;
1939	uint_t nl2cache = 0, nspares = 0;
1940
1941	error = spa_open(zc->zc_name, &spa, FTAG);
1942	if (error != 0)
1943		return (error);
1944
1945	error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
1946	    zc->zc_iflags, &config);
1947	(void) nvlist_lookup_nvlist_array(config, ZPOOL_CONFIG_L2CACHE,
1948	    &l2cache, &nl2cache);
1949
1950	(void) nvlist_lookup_nvlist_array(config, ZPOOL_CONFIG_SPARES,
1951	    &spares, &nspares);
1952
1953#ifdef illumos
1954	/*
1955	 * A root pool with concatenated devices is not supported.
1956	 * Thus, can not add a device to a root pool.
1957	 *
1958	 * Intent log device can not be added to a rootpool because
1959	 * during mountroot, zil is replayed, a seperated log device
1960	 * can not be accessed during the mountroot time.
1961	 *
1962	 * l2cache and spare devices are ok to be added to a rootpool.
1963	 */
1964	if (spa_bootfs(spa) != 0 && nl2cache == 0 && nspares == 0) {
1965		nvlist_free(config);
1966		spa_close(spa, FTAG);
1967		return (SET_ERROR(EDOM));
1968	}
1969#endif /* illumos */
1970
1971	if (error == 0) {
1972		error = spa_vdev_add(spa, config);
1973		nvlist_free(config);
1974	}
1975	spa_close(spa, FTAG);
1976	return (error);
1977}
1978
1979/*
1980 * inputs:
1981 * zc_name		name of the pool
1982 * zc_guid		guid of vdev to remove
1983 * zc_cookie		cancel removal
1984 */
1985static int
1986zfs_ioc_vdev_remove(zfs_cmd_t *zc)
1987{
1988	spa_t *spa;
1989	int error;
1990
1991	error = spa_open(zc->zc_name, &spa, FTAG);
1992	if (error != 0)
1993		return (error);
1994	if (zc->zc_cookie != 0) {
1995		error = spa_vdev_remove_cancel(spa);
1996	} else {
1997		error = spa_vdev_remove(spa, zc->zc_guid, B_FALSE);
1998	}
1999	spa_close(spa, FTAG);
2000	return (error);
2001}
2002
2003static int
2004zfs_ioc_vdev_set_state(zfs_cmd_t *zc)
2005{
2006	spa_t *spa;
2007	int error;
2008	vdev_state_t newstate = VDEV_STATE_UNKNOWN;
2009
2010	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
2011		return (error);
2012	switch (zc->zc_cookie) {
2013	case VDEV_STATE_ONLINE:
2014		error = vdev_online(spa, zc->zc_guid, zc->zc_obj, &newstate);
2015		break;
2016
2017	case VDEV_STATE_OFFLINE:
2018		error = vdev_offline(spa, zc->zc_guid, zc->zc_obj);
2019		break;
2020
2021	case VDEV_STATE_FAULTED:
2022		if (zc->zc_obj != VDEV_AUX_ERR_EXCEEDED &&
2023		    zc->zc_obj != VDEV_AUX_EXTERNAL)
2024			zc->zc_obj = VDEV_AUX_ERR_EXCEEDED;
2025
2026		error = vdev_fault(spa, zc->zc_guid, zc->zc_obj);
2027		break;
2028
2029	case VDEV_STATE_DEGRADED:
2030		if (zc->zc_obj != VDEV_AUX_ERR_EXCEEDED &&
2031		    zc->zc_obj != VDEV_AUX_EXTERNAL)
2032			zc->zc_obj = VDEV_AUX_ERR_EXCEEDED;
2033
2034		error = vdev_degrade(spa, zc->zc_guid, zc->zc_obj);
2035		break;
2036
2037	default:
2038		error = SET_ERROR(EINVAL);
2039	}
2040	zc->zc_cookie = newstate;
2041	spa_close(spa, FTAG);
2042	return (error);
2043}
2044
2045static int
2046zfs_ioc_vdev_attach(zfs_cmd_t *zc)
2047{
2048	spa_t *spa;
2049	int replacing = zc->zc_cookie;
2050	nvlist_t *config;
2051	int error;
2052
2053	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
2054		return (error);
2055
2056	if ((error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
2057	    zc->zc_iflags, &config)) == 0) {
2058		error = spa_vdev_attach(spa, zc->zc_guid, config, replacing);
2059		nvlist_free(config);
2060	}
2061
2062	spa_close(spa, FTAG);
2063	return (error);
2064}
2065
2066static int
2067zfs_ioc_vdev_detach(zfs_cmd_t *zc)
2068{
2069	spa_t *spa;
2070	int error;
2071
2072	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
2073		return (error);
2074
2075	error = spa_vdev_detach(spa, zc->zc_guid, 0, B_FALSE);
2076
2077	spa_close(spa, FTAG);
2078	return (error);
2079}
2080
2081static int
2082zfs_ioc_vdev_split(zfs_cmd_t *zc)
2083{
2084	spa_t *spa;
2085	nvlist_t *config, *props = NULL;
2086	int error;
2087	boolean_t exp = !!(zc->zc_cookie & ZPOOL_EXPORT_AFTER_SPLIT);
2088
2089	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
2090		return (error);
2091
2092	if (error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
2093	    zc->zc_iflags, &config)) {
2094		spa_close(spa, FTAG);
2095		return (error);
2096	}
2097
2098	if (zc->zc_nvlist_src_size != 0 && (error =
2099	    get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
2100	    zc->zc_iflags, &props))) {
2101		spa_close(spa, FTAG);
2102		nvlist_free(config);
2103		return (error);
2104	}
2105
2106	error = spa_vdev_split_mirror(spa, zc->zc_string, config, props, exp);
2107
2108	spa_close(spa, FTAG);
2109
2110	nvlist_free(config);
2111	nvlist_free(props);
2112
2113	return (error);
2114}
2115
2116static int
2117zfs_ioc_vdev_setpath(zfs_cmd_t *zc)
2118{
2119	spa_t *spa;
2120	char *path = zc->zc_value;
2121	uint64_t guid = zc->zc_guid;
2122	int error;
2123
2124	error = spa_open(zc->zc_name, &spa, FTAG);
2125	if (error != 0)
2126		return (error);
2127
2128	error = spa_vdev_setpath(spa, guid, path);
2129	spa_close(spa, FTAG);
2130	return (error);
2131}
2132
2133static int
2134zfs_ioc_vdev_setfru(zfs_cmd_t *zc)
2135{
2136	spa_t *spa;
2137	char *fru = zc->zc_value;
2138	uint64_t guid = zc->zc_guid;
2139	int error;
2140
2141	error = spa_open(zc->zc_name, &spa, FTAG);
2142	if (error != 0)
2143		return (error);
2144
2145	error = spa_vdev_setfru(spa, guid, fru);
2146	spa_close(spa, FTAG);
2147	return (error);
2148}
2149
2150static int
2151zfs_ioc_objset_stats_impl(zfs_cmd_t *zc, objset_t *os)
2152{
2153	int error = 0;
2154	nvlist_t *nv;
2155
2156	dmu_objset_fast_stat(os, &zc->zc_objset_stats);
2157
2158	if (zc->zc_nvlist_dst != 0 &&
2159	    (error = dsl_prop_get_all(os, &nv)) == 0) {
2160		dmu_objset_stats(os, nv);
2161		/*
2162		 * NB: zvol_get_stats() will read the objset contents,
2163		 * which we aren't supposed to do with a
2164		 * DS_MODE_USER hold, because it could be
2165		 * inconsistent.  So this is a bit of a workaround...
2166		 * XXX reading with out owning
2167		 */
2168		if (!zc->zc_objset_stats.dds_inconsistent &&
2169		    dmu_objset_type(os) == DMU_OST_ZVOL) {
2170			error = zvol_get_stats(os, nv);
2171			if (error == EIO)
2172				return (error);
2173			VERIFY0(error);
2174		}
2175		error = put_nvlist(zc, nv);
2176		nvlist_free(nv);
2177	}
2178
2179	return (error);
2180}
2181
2182/*
2183 * inputs:
2184 * zc_name		name of filesystem
2185 * zc_nvlist_dst_size	size of buffer for property nvlist
2186 *
2187 * outputs:
2188 * zc_objset_stats	stats
2189 * zc_nvlist_dst	property nvlist
2190 * zc_nvlist_dst_size	size of property nvlist
2191 */
2192static int
2193zfs_ioc_objset_stats(zfs_cmd_t *zc)
2194{
2195	objset_t *os;
2196	int error;
2197
2198	error = dmu_objset_hold(zc->zc_name, FTAG, &os);
2199	if (error == 0) {
2200		error = zfs_ioc_objset_stats_impl(zc, os);
2201		dmu_objset_rele(os, FTAG);
2202	}
2203
2204	if (error == ENOMEM)
2205		error = 0;
2206	return (error);
2207}
2208
2209/*
2210 * inputs:
2211 * zc_name		name of filesystem
2212 * zc_nvlist_dst_size	size of buffer for property nvlist
2213 *
2214 * outputs:
2215 * zc_nvlist_dst	received property nvlist
2216 * zc_nvlist_dst_size	size of received property nvlist
2217 *
2218 * Gets received properties (distinct from local properties on or after
2219 * SPA_VERSION_RECVD_PROPS) for callers who want to differentiate received from
2220 * local property values.
2221 */
2222static int
2223zfs_ioc_objset_recvd_props(zfs_cmd_t *zc)
2224{
2225	int error = 0;
2226	nvlist_t *nv;
2227
2228	/*
2229	 * Without this check, we would return local property values if the
2230	 * caller has not already received properties on or after
2231	 * SPA_VERSION_RECVD_PROPS.
2232	 */
2233	if (!dsl_prop_get_hasrecvd(zc->zc_name))
2234		return (SET_ERROR(ENOTSUP));
2235
2236	if (zc->zc_nvlist_dst != 0 &&
2237	    (error = dsl_prop_get_received(zc->zc_name, &nv)) == 0) {
2238		error = put_nvlist(zc, nv);
2239		nvlist_free(nv);
2240	}
2241
2242	return (error);
2243}
2244
2245static int
2246nvl_add_zplprop(objset_t *os, nvlist_t *props, zfs_prop_t prop)
2247{
2248	uint64_t value;
2249	int error;
2250
2251	/*
2252	 * zfs_get_zplprop() will either find a value or give us
2253	 * the default value (if there is one).
2254	 */
2255	if ((error = zfs_get_zplprop(os, prop, &value)) != 0)
2256		return (error);
2257	VERIFY(nvlist_add_uint64(props, zfs_prop_to_name(prop), value) == 0);
2258	return (0);
2259}
2260
2261/*
2262 * inputs:
2263 * zc_name		name of filesystem
2264 * zc_nvlist_dst_size	size of buffer for zpl property nvlist
2265 *
2266 * outputs:
2267 * zc_nvlist_dst	zpl property nvlist
2268 * zc_nvlist_dst_size	size of zpl property nvlist
2269 */
2270static int
2271zfs_ioc_objset_zplprops(zfs_cmd_t *zc)
2272{
2273	objset_t *os;
2274	int err;
2275
2276	/* XXX reading without owning */
2277	if (err = dmu_objset_hold(zc->zc_name, FTAG, &os))
2278		return (err);
2279
2280	dmu_objset_fast_stat(os, &zc->zc_objset_stats);
2281
2282	/*
2283	 * NB: nvl_add_zplprop() will read the objset contents,
2284	 * which we aren't supposed to do with a DS_MODE_USER
2285	 * hold, because it could be inconsistent.
2286	 */
2287	if (zc->zc_nvlist_dst != 0 &&
2288	    !zc->zc_objset_stats.dds_inconsistent &&
2289	    dmu_objset_type(os) == DMU_OST_ZFS) {
2290		nvlist_t *nv;
2291
2292		VERIFY(nvlist_alloc(&nv, NV_UNIQUE_NAME, KM_SLEEP) == 0);
2293		if ((err = nvl_add_zplprop(os, nv, ZFS_PROP_VERSION)) == 0 &&
2294		    (err = nvl_add_zplprop(os, nv, ZFS_PROP_NORMALIZE)) == 0 &&
2295		    (err = nvl_add_zplprop(os, nv, ZFS_PROP_UTF8ONLY)) == 0 &&
2296		    (err = nvl_add_zplprop(os, nv, ZFS_PROP_CASE)) == 0)
2297			err = put_nvlist(zc, nv);
2298		nvlist_free(nv);
2299	} else {
2300		err = SET_ERROR(ENOENT);
2301	}
2302	dmu_objset_rele(os, FTAG);
2303	return (err);
2304}
2305
2306boolean_t
2307dataset_name_hidden(const char *name)
2308{
2309	/*
2310	 * Skip over datasets that are not visible in this zone,
2311	 * internal datasets (which have a $ in their name), and
2312	 * temporary datasets (which have a % in their name).
2313	 */
2314	if (strchr(name, '$') != NULL)
2315		return (B_TRUE);
2316	if (strchr(name, '%') != NULL)
2317		return (B_TRUE);
2318	if (!INGLOBALZONE(curthread) && !zone_dataset_visible(name, NULL))
2319		return (B_TRUE);
2320	return (B_FALSE);
2321}
2322
2323/*
2324 * inputs:
2325 * zc_name		name of filesystem
2326 * zc_cookie		zap cursor
2327 * zc_nvlist_dst_size	size of buffer for property nvlist
2328 *
2329 * outputs:
2330 * zc_name		name of next filesystem
2331 * zc_cookie		zap cursor
2332 * zc_objset_stats	stats
2333 * zc_nvlist_dst	property nvlist
2334 * zc_nvlist_dst_size	size of property nvlist
2335 */
2336static int
2337zfs_ioc_dataset_list_next(zfs_cmd_t *zc)
2338{
2339	objset_t *os;
2340	int error;
2341	char *p;
2342	size_t orig_len = strlen(zc->zc_name);
2343
2344top:
2345	if (error = dmu_objset_hold(zc->zc_name, FTAG, &os)) {
2346		if (error == ENOENT)
2347			error = SET_ERROR(ESRCH);
2348		return (error);
2349	}
2350
2351	p = strrchr(zc->zc_name, '/');
2352	if (p == NULL || p[1] != '\0')
2353		(void) strlcat(zc->zc_name, "/", sizeof (zc->zc_name));
2354	p = zc->zc_name + strlen(zc->zc_name);
2355
2356	do {
2357		error = dmu_dir_list_next(os,
2358		    sizeof (zc->zc_name) - (p - zc->zc_name), p,
2359		    NULL, &zc->zc_cookie);
2360		if (error == ENOENT)
2361			error = SET_ERROR(ESRCH);
2362	} while (error == 0 && dataset_name_hidden(zc->zc_name));
2363	dmu_objset_rele(os, FTAG);
2364
2365	/*
2366	 * If it's an internal dataset (ie. with a '$' in its name),
2367	 * don't try to get stats for it, otherwise we'll return ENOENT.
2368	 */
2369	if (error == 0 && strchr(zc->zc_name, '$') == NULL) {
2370		error = zfs_ioc_objset_stats(zc); /* fill in the stats */
2371		if (error == ENOENT) {
2372			/* We lost a race with destroy, get the next one. */
2373			zc->zc_name[orig_len] = '\0';
2374			goto top;
2375		}
2376	}
2377	return (error);
2378}
2379
2380/*
2381 * inputs:
2382 * zc_name		name of filesystem
2383 * zc_cookie		zap cursor
2384 * zc_nvlist_dst_size	size of buffer for property nvlist
2385 * zc_simple		when set, only name is requested
2386 *
2387 * outputs:
2388 * zc_name		name of next snapshot
2389 * zc_objset_stats	stats
2390 * zc_nvlist_dst	property nvlist
2391 * zc_nvlist_dst_size	size of property nvlist
2392 */
2393static int
2394zfs_ioc_snapshot_list_next(zfs_cmd_t *zc)
2395{
2396	objset_t *os;
2397	int error;
2398
2399	error = dmu_objset_hold(zc->zc_name, FTAG, &os);
2400	if (error != 0) {
2401		return (error == ENOENT ? ESRCH : error);
2402	}
2403
2404	/*
2405	 * A dataset name of maximum length cannot have any snapshots,
2406	 * so exit immediately.
2407	 */
2408	if (strlcat(zc->zc_name, "@", sizeof (zc->zc_name)) >=
2409	    ZFS_MAX_DATASET_NAME_LEN) {
2410		dmu_objset_rele(os, FTAG);
2411		return (SET_ERROR(ESRCH));
2412	}
2413
2414	error = dmu_snapshot_list_next(os,
2415	    sizeof (zc->zc_name) - strlen(zc->zc_name),
2416	    zc->zc_name + strlen(zc->zc_name), &zc->zc_obj, &zc->zc_cookie,
2417	    NULL);
2418
2419	if (error == 0 && !zc->zc_simple) {
2420		dsl_dataset_t *ds;
2421		dsl_pool_t *dp = os->os_dsl_dataset->ds_dir->dd_pool;
2422
2423		error = dsl_dataset_hold_obj(dp, zc->zc_obj, FTAG, &ds);
2424		if (error == 0) {
2425			objset_t *ossnap;
2426
2427			error = dmu_objset_from_ds(ds, &ossnap);
2428			if (error == 0)
2429				error = zfs_ioc_objset_stats_impl(zc, ossnap);
2430			dsl_dataset_rele(ds, FTAG);
2431		}
2432	} else if (error == ENOENT) {
2433		error = SET_ERROR(ESRCH);
2434	}
2435
2436	dmu_objset_rele(os, FTAG);
2437	/* if we failed, undo the @ that we tacked on to zc_name */
2438	if (error != 0)
2439		*strchr(zc->zc_name, '@') = '\0';
2440	return (error);
2441}
2442
2443static int
2444zfs_prop_set_userquota(const char *dsname, nvpair_t *pair)
2445{
2446	const char *propname = nvpair_name(pair);
2447	uint64_t *valary;
2448	unsigned int vallen;
2449	const char *domain;
2450	char *dash;
2451	zfs_userquota_prop_t type;
2452	uint64_t rid;
2453	uint64_t quota;
2454	zfsvfs_t *zfsvfs;
2455	int err;
2456
2457	if (nvpair_type(pair) == DATA_TYPE_NVLIST) {
2458		nvlist_t *attrs;
2459		VERIFY(nvpair_value_nvlist(pair, &attrs) == 0);
2460		if (nvlist_lookup_nvpair(attrs, ZPROP_VALUE,
2461		    &pair) != 0)
2462			return (SET_ERROR(EINVAL));
2463	}
2464
2465	/*
2466	 * A correctly constructed propname is encoded as
2467	 * userquota@<rid>-<domain>.
2468	 */
2469	if ((dash = strchr(propname, '-')) == NULL ||
2470	    nvpair_value_uint64_array(pair, &valary, &vallen) != 0 ||
2471	    vallen != 3)
2472		return (SET_ERROR(EINVAL));
2473
2474	domain = dash + 1;
2475	type = valary[0];
2476	rid = valary[1];
2477	quota = valary[2];
2478
2479	err = zfsvfs_hold(dsname, FTAG, &zfsvfs, B_FALSE);
2480	if (err == 0) {
2481		err = zfs_set_userquota(zfsvfs, type, domain, rid, quota);
2482		zfsvfs_rele(zfsvfs, FTAG);
2483	}
2484
2485	return (err);
2486}
2487
2488/*
2489 * If the named property is one that has a special function to set its value,
2490 * return 0 on success and a positive error code on failure; otherwise if it is
2491 * not one of the special properties handled by this function, return -1.
2492 *
2493 * XXX: It would be better for callers of the property interface if we handled
2494 * these special cases in dsl_prop.c (in the dsl layer).
2495 */
2496static int
2497zfs_prop_set_special(const char *dsname, zprop_source_t source,
2498    nvpair_t *pair)
2499{
2500	const char *propname = nvpair_name(pair);
2501	zfs_prop_t prop = zfs_name_to_prop(propname);
2502	uint64_t intval;
2503	int err = -1;
2504
2505	if (prop == ZPROP_INVAL) {
2506		if (zfs_prop_userquota(propname))
2507			return (zfs_prop_set_userquota(dsname, pair));
2508		return (-1);
2509	}
2510
2511	if (nvpair_type(pair) == DATA_TYPE_NVLIST) {
2512		nvlist_t *attrs;
2513		VERIFY(nvpair_value_nvlist(pair, &attrs) == 0);
2514		VERIFY(nvlist_lookup_nvpair(attrs, ZPROP_VALUE,
2515		    &pair) == 0);
2516	}
2517
2518	if (zfs_prop_get_type(prop) == PROP_TYPE_STRING)
2519		return (-1);
2520
2521	VERIFY(0 == nvpair_value_uint64(pair, &intval));
2522
2523	switch (prop) {
2524	case ZFS_PROP_QUOTA:
2525		err = dsl_dir_set_quota(dsname, source, intval);
2526		break;
2527	case ZFS_PROP_REFQUOTA:
2528		err = dsl_dataset_set_refquota(dsname, source, intval);
2529		break;
2530	case ZFS_PROP_FILESYSTEM_LIMIT:
2531	case ZFS_PROP_SNAPSHOT_LIMIT:
2532		if (intval == UINT64_MAX) {
2533			/* clearing the limit, just do it */
2534			err = 0;
2535		} else {
2536			err = dsl_dir_activate_fs_ss_limit(dsname);
2537		}
2538		/*
2539		 * Set err to -1 to force the zfs_set_prop_nvlist code down the
2540		 * default path to set the value in the nvlist.
2541		 */
2542		if (err == 0)
2543			err = -1;
2544		break;
2545	case ZFS_PROP_RESERVATION:
2546		err = dsl_dir_set_reservation(dsname, source, intval);
2547		break;
2548	case ZFS_PROP_REFRESERVATION:
2549		err = dsl_dataset_set_refreservation(dsname, source, intval);
2550		break;
2551	case ZFS_PROP_VOLSIZE:
2552		err = zvol_set_volsize(dsname, intval);
2553		break;
2554	case ZFS_PROP_VERSION:
2555	{
2556		zfsvfs_t *zfsvfs;
2557
2558		if ((err = zfsvfs_hold(dsname, FTAG, &zfsvfs, B_TRUE)) != 0)
2559			break;
2560
2561		err = zfs_set_version(zfsvfs, intval);
2562		zfsvfs_rele(zfsvfs, FTAG);
2563
2564		if (err == 0 && intval >= ZPL_VERSION_USERSPACE) {
2565			zfs_cmd_t *zc;
2566
2567			zc = kmem_zalloc(sizeof (zfs_cmd_t), KM_SLEEP);
2568			(void) strcpy(zc->zc_name, dsname);
2569			(void) zfs_ioc_userspace_upgrade(zc);
2570			kmem_free(zc, sizeof (zfs_cmd_t));
2571		}
2572		break;
2573	}
2574	default:
2575		err = -1;
2576	}
2577
2578	return (err);
2579}
2580
2581/*
2582 * This function is best effort. If it fails to set any of the given properties,
2583 * it continues to set as many as it can and returns the last error
2584 * encountered. If the caller provides a non-NULL errlist, it will be filled in
2585 * with the list of names of all the properties that failed along with the
2586 * corresponding error numbers.
2587 *
2588 * If every property is set successfully, zero is returned and errlist is not
2589 * modified.
2590 */
2591int
2592zfs_set_prop_nvlist(const char *dsname, zprop_source_t source, nvlist_t *nvl,
2593    nvlist_t *errlist)
2594{
2595	nvpair_t *pair;
2596	nvpair_t *propval;
2597	int rv = 0;
2598	uint64_t intval;
2599	char *strval;
2600	nvlist_t *genericnvl = fnvlist_alloc();
2601	nvlist_t *retrynvl = fnvlist_alloc();
2602
2603retry:
2604	pair = NULL;
2605	while ((pair = nvlist_next_nvpair(nvl, pair)) != NULL) {
2606		const char *propname = nvpair_name(pair);
2607		zfs_prop_t prop = zfs_name_to_prop(propname);
2608		int err = 0;
2609
2610		/* decode the property value */
2611		propval = pair;
2612		if (nvpair_type(pair) == DATA_TYPE_NVLIST) {
2613			nvlist_t *attrs;
2614			attrs = fnvpair_value_nvlist(pair);
2615			if (nvlist_lookup_nvpair(attrs, ZPROP_VALUE,
2616			    &propval) != 0)
2617				err = SET_ERROR(EINVAL);
2618		}
2619
2620		/* Validate value type */
2621		if (err == 0 && prop == ZPROP_INVAL) {
2622			if (zfs_prop_user(propname)) {
2623				if (nvpair_type(propval) != DATA_TYPE_STRING)
2624					err = SET_ERROR(EINVAL);
2625			} else if (zfs_prop_userquota(propname)) {
2626				if (nvpair_type(propval) !=
2627				    DATA_TYPE_UINT64_ARRAY)
2628					err = SET_ERROR(EINVAL);
2629			} else {
2630				err = SET_ERROR(EINVAL);
2631			}
2632		} else if (err == 0) {
2633			if (nvpair_type(propval) == DATA_TYPE_STRING) {
2634				if (zfs_prop_get_type(prop) != PROP_TYPE_STRING)
2635					err = SET_ERROR(EINVAL);
2636			} else if (nvpair_type(propval) == DATA_TYPE_UINT64) {
2637				const char *unused;
2638
2639				intval = fnvpair_value_uint64(propval);
2640
2641				switch (zfs_prop_get_type(prop)) {
2642				case PROP_TYPE_NUMBER:
2643					break;
2644				case PROP_TYPE_STRING:
2645					err = SET_ERROR(EINVAL);
2646					break;
2647				case PROP_TYPE_INDEX:
2648					if (zfs_prop_index_to_string(prop,
2649					    intval, &unused) != 0)
2650						err = SET_ERROR(EINVAL);
2651					break;
2652				default:
2653					cmn_err(CE_PANIC,
2654					    "unknown property type");
2655				}
2656			} else {
2657				err = SET_ERROR(EINVAL);
2658			}
2659		}
2660
2661		/* Validate permissions */
2662		if (err == 0)
2663			err = zfs_check_settable(dsname, pair, CRED());
2664
2665		if (err == 0) {
2666			err = zfs_prop_set_special(dsname, source, pair);
2667			if (err == -1) {
2668				/*
2669				 * For better performance we build up a list of
2670				 * properties to set in a single transaction.
2671				 */
2672				err = nvlist_add_nvpair(genericnvl, pair);
2673			} else if (err != 0 && nvl != retrynvl) {
2674				/*
2675				 * This may be a spurious error caused by
2676				 * receiving quota and reservation out of order.
2677				 * Try again in a second pass.
2678				 */
2679				err = nvlist_add_nvpair(retrynvl, pair);
2680			}
2681		}
2682
2683		if (err != 0) {
2684			if (errlist != NULL)
2685				fnvlist_add_int32(errlist, propname, err);
2686			rv = err;
2687		}
2688	}
2689
2690	if (nvl != retrynvl && !nvlist_empty(retrynvl)) {
2691		nvl = retrynvl;
2692		goto retry;
2693	}
2694
2695	if (!nvlist_empty(genericnvl) &&
2696	    dsl_props_set(dsname, source, genericnvl) != 0) {
2697		/*
2698		 * If this fails, we still want to set as many properties as we
2699		 * can, so try setting them individually.
2700		 */
2701		pair = NULL;
2702		while ((pair = nvlist_next_nvpair(genericnvl, pair)) != NULL) {
2703			const char *propname = nvpair_name(pair);
2704			int err = 0;
2705
2706			propval = pair;
2707			if (nvpair_type(pair) == DATA_TYPE_NVLIST) {
2708				nvlist_t *attrs;
2709				attrs = fnvpair_value_nvlist(pair);
2710				propval = fnvlist_lookup_nvpair(attrs,
2711				    ZPROP_VALUE);
2712			}
2713
2714			if (nvpair_type(propval) == DATA_TYPE_STRING) {
2715				strval = fnvpair_value_string(propval);
2716				err = dsl_prop_set_string(dsname, propname,
2717				    source, strval);
2718			} else {
2719				intval = fnvpair_value_uint64(propval);
2720				err = dsl_prop_set_int(dsname, propname, source,
2721				    intval);
2722			}
2723
2724			if (err != 0) {
2725				if (errlist != NULL) {
2726					fnvlist_add_int32(errlist, propname,
2727					    err);
2728				}
2729				rv = err;
2730			}
2731		}
2732	}
2733	nvlist_free(genericnvl);
2734	nvlist_free(retrynvl);
2735
2736	return (rv);
2737}
2738
2739/*
2740 * Check that all the properties are valid user properties.
2741 */
2742static int
2743zfs_check_userprops(const char *fsname, nvlist_t *nvl)
2744{
2745	nvpair_t *pair = NULL;
2746	int error = 0;
2747
2748	while ((pair = nvlist_next_nvpair(nvl, pair)) != NULL) {
2749		const char *propname = nvpair_name(pair);
2750
2751		if (!zfs_prop_user(propname) ||
2752		    nvpair_type(pair) != DATA_TYPE_STRING)
2753			return (SET_ERROR(EINVAL));
2754
2755		if (error = zfs_secpolicy_write_perms(fsname,
2756		    ZFS_DELEG_PERM_USERPROP, CRED()))
2757			return (error);
2758
2759		if (strlen(propname) >= ZAP_MAXNAMELEN)
2760			return (SET_ERROR(ENAMETOOLONG));
2761
2762		if (strlen(fnvpair_value_string(pair)) >= ZAP_MAXVALUELEN)
2763			return (E2BIG);
2764	}
2765	return (0);
2766}
2767
2768static void
2769props_skip(nvlist_t *props, nvlist_t *skipped, nvlist_t **newprops)
2770{
2771	nvpair_t *pair;
2772
2773	VERIFY(nvlist_alloc(newprops, NV_UNIQUE_NAME, KM_SLEEP) == 0);
2774
2775	pair = NULL;
2776	while ((pair = nvlist_next_nvpair(props, pair)) != NULL) {
2777		if (nvlist_exists(skipped, nvpair_name(pair)))
2778			continue;
2779
2780		VERIFY(nvlist_add_nvpair(*newprops, pair) == 0);
2781	}
2782}
2783
2784static int
2785clear_received_props(const char *dsname, nvlist_t *props,
2786    nvlist_t *skipped)
2787{
2788	int err = 0;
2789	nvlist_t *cleared_props = NULL;
2790	props_skip(props, skipped, &cleared_props);
2791	if (!nvlist_empty(cleared_props)) {
2792		/*
2793		 * Acts on local properties until the dataset has received
2794		 * properties at least once on or after SPA_VERSION_RECVD_PROPS.
2795		 */
2796		zprop_source_t flags = (ZPROP_SRC_NONE |
2797		    (dsl_prop_get_hasrecvd(dsname) ? ZPROP_SRC_RECEIVED : 0));
2798		err = zfs_set_prop_nvlist(dsname, flags, cleared_props, NULL);
2799	}
2800	nvlist_free(cleared_props);
2801	return (err);
2802}
2803
2804/*
2805 * inputs:
2806 * zc_name		name of filesystem
2807 * zc_value		name of property to set
2808 * zc_nvlist_src{_size}	nvlist of properties to apply
2809 * zc_cookie		received properties flag
2810 *
2811 * outputs:
2812 * zc_nvlist_dst{_size} error for each unapplied received property
2813 */
2814static int
2815zfs_ioc_set_prop(zfs_cmd_t *zc)
2816{
2817	nvlist_t *nvl;
2818	boolean_t received = zc->zc_cookie;
2819	zprop_source_t source = (received ? ZPROP_SRC_RECEIVED :
2820	    ZPROP_SRC_LOCAL);
2821	nvlist_t *errors;
2822	int error;
2823
2824	if ((error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
2825	    zc->zc_iflags, &nvl)) != 0)
2826		return (error);
2827
2828	if (received) {
2829		nvlist_t *origprops;
2830
2831		if (dsl_prop_get_received(zc->zc_name, &origprops) == 0) {
2832			(void) clear_received_props(zc->zc_name,
2833			    origprops, nvl);
2834			nvlist_free(origprops);
2835		}
2836
2837		error = dsl_prop_set_hasrecvd(zc->zc_name);
2838	}
2839
2840	errors = fnvlist_alloc();
2841	if (error == 0)
2842		error = zfs_set_prop_nvlist(zc->zc_name, source, nvl, errors);
2843
2844	if (zc->zc_nvlist_dst != 0 && errors != NULL) {
2845		(void) put_nvlist(zc, errors);
2846	}
2847
2848	nvlist_free(errors);
2849	nvlist_free(nvl);
2850	return (error);
2851}
2852
2853/*
2854 * inputs:
2855 * zc_name		name of filesystem
2856 * zc_value		name of property to inherit
2857 * zc_cookie		revert to received value if TRUE
2858 *
2859 * outputs:		none
2860 */
2861static int
2862zfs_ioc_inherit_prop(zfs_cmd_t *zc)
2863{
2864	const char *propname = zc->zc_value;
2865	zfs_prop_t prop = zfs_name_to_prop(propname);
2866	boolean_t received = zc->zc_cookie;
2867	zprop_source_t source = (received
2868	    ? ZPROP_SRC_NONE		/* revert to received value, if any */
2869	    : ZPROP_SRC_INHERITED);	/* explicitly inherit */
2870
2871	if (received) {
2872		nvlist_t *dummy;
2873		nvpair_t *pair;
2874		zprop_type_t type;
2875		int err;
2876
2877		/*
2878		 * zfs_prop_set_special() expects properties in the form of an
2879		 * nvpair with type info.
2880		 */
2881		if (prop == ZPROP_INVAL) {
2882			if (!zfs_prop_user(propname))
2883				return (SET_ERROR(EINVAL));
2884
2885			type = PROP_TYPE_STRING;
2886		} else if (prop == ZFS_PROP_VOLSIZE ||
2887		    prop == ZFS_PROP_VERSION) {
2888			return (SET_ERROR(EINVAL));
2889		} else {
2890			type = zfs_prop_get_type(prop);
2891		}
2892
2893		VERIFY(nvlist_alloc(&dummy, NV_UNIQUE_NAME, KM_SLEEP) == 0);
2894
2895		switch (type) {
2896		case PROP_TYPE_STRING:
2897			VERIFY(0 == nvlist_add_string(dummy, propname, ""));
2898			break;
2899		case PROP_TYPE_NUMBER:
2900		case PROP_TYPE_INDEX:
2901			VERIFY(0 == nvlist_add_uint64(dummy, propname, 0));
2902			break;
2903		default:
2904			nvlist_free(dummy);
2905			return (SET_ERROR(EINVAL));
2906		}
2907
2908		pair = nvlist_next_nvpair(dummy, NULL);
2909		err = zfs_prop_set_special(zc->zc_name, source, pair);
2910		nvlist_free(dummy);
2911		if (err != -1)
2912			return (err); /* special property already handled */
2913	} else {
2914		/*
2915		 * Only check this in the non-received case. We want to allow
2916		 * 'inherit -S' to revert non-inheritable properties like quota
2917		 * and reservation to the received or default values even though
2918		 * they are not considered inheritable.
2919		 */
2920		if (prop != ZPROP_INVAL && !zfs_prop_inheritable(prop))
2921			return (SET_ERROR(EINVAL));
2922	}
2923
2924	/* property name has been validated by zfs_secpolicy_inherit_prop() */
2925	return (dsl_prop_inherit(zc->zc_name, zc->zc_value, source));
2926}
2927
2928static int
2929zfs_ioc_pool_set_props(zfs_cmd_t *zc)
2930{
2931	nvlist_t *props;
2932	spa_t *spa;
2933	int error;
2934	nvpair_t *pair;
2935
2936	if (error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
2937	    zc->zc_iflags, &props))
2938		return (error);
2939
2940	/*
2941	 * If the only property is the configfile, then just do a spa_lookup()
2942	 * to handle the faulted case.
2943	 */
2944	pair = nvlist_next_nvpair(props, NULL);
2945	if (pair != NULL && strcmp(nvpair_name(pair),
2946	    zpool_prop_to_name(ZPOOL_PROP_CACHEFILE)) == 0 &&
2947	    nvlist_next_nvpair(props, pair) == NULL) {
2948		mutex_enter(&spa_namespace_lock);
2949		if ((spa = spa_lookup(zc->zc_name)) != NULL) {
2950			spa_configfile_set(spa, props, B_FALSE);
2951			spa_write_cachefile(spa, B_FALSE, B_TRUE);
2952		}
2953		mutex_exit(&spa_namespace_lock);
2954		if (spa != NULL) {
2955			nvlist_free(props);
2956			return (0);
2957		}
2958	}
2959
2960	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0) {
2961		nvlist_free(props);
2962		return (error);
2963	}
2964
2965	error = spa_prop_set(spa, props);
2966
2967	nvlist_free(props);
2968	spa_close(spa, FTAG);
2969
2970	return (error);
2971}
2972
2973static int
2974zfs_ioc_pool_get_props(zfs_cmd_t *zc)
2975{
2976	spa_t *spa;
2977	int error;
2978	nvlist_t *nvp = NULL;
2979
2980	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0) {
2981		/*
2982		 * If the pool is faulted, there may be properties we can still
2983		 * get (such as altroot and cachefile), so attempt to get them
2984		 * anyway.
2985		 */
2986		mutex_enter(&spa_namespace_lock);
2987		if ((spa = spa_lookup(zc->zc_name)) != NULL)
2988			error = spa_prop_get(spa, &nvp);
2989		mutex_exit(&spa_namespace_lock);
2990	} else {
2991		error = spa_prop_get(spa, &nvp);
2992		spa_close(spa, FTAG);
2993	}
2994
2995	if (error == 0 && zc->zc_nvlist_dst != 0)
2996		error = put_nvlist(zc, nvp);
2997	else
2998		error = SET_ERROR(EFAULT);
2999
3000	nvlist_free(nvp);
3001	return (error);
3002}
3003
3004/*
3005 * inputs:
3006 * zc_name		name of filesystem
3007 * zc_nvlist_src{_size}	nvlist of delegated permissions
3008 * zc_perm_action	allow/unallow flag
3009 *
3010 * outputs:		none
3011 */
3012static int
3013zfs_ioc_set_fsacl(zfs_cmd_t *zc)
3014{
3015	int error;
3016	nvlist_t *fsaclnv = NULL;
3017
3018	if ((error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
3019	    zc->zc_iflags, &fsaclnv)) != 0)
3020		return (error);
3021
3022	/*
3023	 * Verify nvlist is constructed correctly
3024	 */
3025	if ((error = zfs_deleg_verify_nvlist(fsaclnv)) != 0) {
3026		nvlist_free(fsaclnv);
3027		return (SET_ERROR(EINVAL));
3028	}
3029
3030	/*
3031	 * If we don't have PRIV_SYS_MOUNT, then validate
3032	 * that user is allowed to hand out each permission in
3033	 * the nvlist(s)
3034	 */
3035
3036	error = secpolicy_zfs(CRED());
3037	if (error != 0) {
3038		if (zc->zc_perm_action == B_FALSE) {
3039			error = dsl_deleg_can_allow(zc->zc_name,
3040			    fsaclnv, CRED());
3041		} else {
3042			error = dsl_deleg_can_unallow(zc->zc_name,
3043			    fsaclnv, CRED());
3044		}
3045	}
3046
3047	if (error == 0)
3048		error = dsl_deleg_set(zc->zc_name, fsaclnv, zc->zc_perm_action);
3049
3050	nvlist_free(fsaclnv);
3051	return (error);
3052}
3053
3054/*
3055 * inputs:
3056 * zc_name		name of filesystem
3057 *
3058 * outputs:
3059 * zc_nvlist_src{_size}	nvlist of delegated permissions
3060 */
3061static int
3062zfs_ioc_get_fsacl(zfs_cmd_t *zc)
3063{
3064	nvlist_t *nvp;
3065	int error;
3066
3067	if ((error = dsl_deleg_get(zc->zc_name, &nvp)) == 0) {
3068		error = put_nvlist(zc, nvp);
3069		nvlist_free(nvp);
3070	}
3071
3072	return (error);
3073}
3074
3075/* ARGSUSED */
3076static void
3077zfs_create_cb(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx)
3078{
3079	zfs_creat_t *zct = arg;
3080
3081	zfs_create_fs(os, cr, zct->zct_zplprops, tx);
3082}
3083
3084#define	ZFS_PROP_UNDEFINED	((uint64_t)-1)
3085
3086/*
3087 * inputs:
3088 * os			parent objset pointer (NULL if root fs)
3089 * fuids_ok		fuids allowed in this version of the spa?
3090 * sa_ok		SAs allowed in this version of the spa?
3091 * createprops		list of properties requested by creator
3092 *
3093 * outputs:
3094 * zplprops	values for the zplprops we attach to the master node object
3095 * is_ci	true if requested file system will be purely case-insensitive
3096 *
3097 * Determine the settings for utf8only, normalization and
3098 * casesensitivity.  Specific values may have been requested by the
3099 * creator and/or we can inherit values from the parent dataset.  If
3100 * the file system is of too early a vintage, a creator can not
3101 * request settings for these properties, even if the requested
3102 * setting is the default value.  We don't actually want to create dsl
3103 * properties for these, so remove them from the source nvlist after
3104 * processing.
3105 */
3106static int
3107zfs_fill_zplprops_impl(objset_t *os, uint64_t zplver,
3108    boolean_t fuids_ok, boolean_t sa_ok, nvlist_t *createprops,
3109    nvlist_t *zplprops, boolean_t *is_ci)
3110{
3111	uint64_t sense = ZFS_PROP_UNDEFINED;
3112	uint64_t norm = ZFS_PROP_UNDEFINED;
3113	uint64_t u8 = ZFS_PROP_UNDEFINED;
3114
3115	ASSERT(zplprops != NULL);
3116
3117	if (os != NULL && os->os_phys->os_type != DMU_OST_ZFS)
3118		return (SET_ERROR(EINVAL));
3119
3120	/*
3121	 * Pull out creator prop choices, if any.
3122	 */
3123	if (createprops) {
3124		(void) nvlist_lookup_uint64(createprops,
3125		    zfs_prop_to_name(ZFS_PROP_VERSION), &zplver);
3126		(void) nvlist_lookup_uint64(createprops,
3127		    zfs_prop_to_name(ZFS_PROP_NORMALIZE), &norm);
3128		(void) nvlist_remove_all(createprops,
3129		    zfs_prop_to_name(ZFS_PROP_NORMALIZE));
3130		(void) nvlist_lookup_uint64(createprops,
3131		    zfs_prop_to_name(ZFS_PROP_UTF8ONLY), &u8);
3132		(void) nvlist_remove_all(createprops,
3133		    zfs_prop_to_name(ZFS_PROP_UTF8ONLY));
3134		(void) nvlist_lookup_uint64(createprops,
3135		    zfs_prop_to_name(ZFS_PROP_CASE), &sense);
3136		(void) nvlist_remove_all(createprops,
3137		    zfs_prop_to_name(ZFS_PROP_CASE));
3138	}
3139
3140	/*
3141	 * If the zpl version requested is whacky or the file system
3142	 * or pool is version is too "young" to support normalization
3143	 * and the creator tried to set a value for one of the props,
3144	 * error out.
3145	 */
3146	if ((zplver < ZPL_VERSION_INITIAL || zplver > ZPL_VERSION) ||
3147	    (zplver >= ZPL_VERSION_FUID && !fuids_ok) ||
3148	    (zplver >= ZPL_VERSION_SA && !sa_ok) ||
3149	    (zplver < ZPL_VERSION_NORMALIZATION &&
3150	    (norm != ZFS_PROP_UNDEFINED || u8 != ZFS_PROP_UNDEFINED ||
3151	    sense != ZFS_PROP_UNDEFINED)))
3152		return (SET_ERROR(ENOTSUP));
3153
3154	/*
3155	 * Put the version in the zplprops
3156	 */
3157	VERIFY(nvlist_add_uint64(zplprops,
3158	    zfs_prop_to_name(ZFS_PROP_VERSION), zplver) == 0);
3159
3160	if (norm == ZFS_PROP_UNDEFINED)
3161		VERIFY(zfs_get_zplprop(os, ZFS_PROP_NORMALIZE, &norm) == 0);
3162	VERIFY(nvlist_add_uint64(zplprops,
3163	    zfs_prop_to_name(ZFS_PROP_NORMALIZE), norm) == 0);
3164
3165	/*
3166	 * If we're normalizing, names must always be valid UTF-8 strings.
3167	 */
3168	if (norm)
3169		u8 = 1;
3170	if (u8 == ZFS_PROP_UNDEFINED)
3171		VERIFY(zfs_get_zplprop(os, ZFS_PROP_UTF8ONLY, &u8) == 0);
3172	VERIFY(nvlist_add_uint64(zplprops,
3173	    zfs_prop_to_name(ZFS_PROP_UTF8ONLY), u8) == 0);
3174
3175	if (sense == ZFS_PROP_UNDEFINED)
3176		VERIFY(zfs_get_zplprop(os, ZFS_PROP_CASE, &sense) == 0);
3177	VERIFY(nvlist_add_uint64(zplprops,
3178	    zfs_prop_to_name(ZFS_PROP_CASE), sense) == 0);
3179
3180	if (is_ci)
3181		*is_ci = (sense == ZFS_CASE_INSENSITIVE);
3182
3183	return (0);
3184}
3185
3186static int
3187zfs_fill_zplprops(const char *dataset, nvlist_t *createprops,
3188    nvlist_t *zplprops, boolean_t *is_ci)
3189{
3190	boolean_t fuids_ok, sa_ok;
3191	uint64_t zplver = ZPL_VERSION;
3192	objset_t *os = NULL;
3193	char parentname[ZFS_MAX_DATASET_NAME_LEN];
3194	char *cp;
3195	spa_t *spa;
3196	uint64_t spa_vers;
3197	int error;
3198
3199	(void) strlcpy(parentname, dataset, sizeof (parentname));
3200	cp = strrchr(parentname, '/');
3201	ASSERT(cp != NULL);
3202	cp[0] = '\0';
3203
3204	if ((error = spa_open(dataset, &spa, FTAG)) != 0)
3205		return (error);
3206
3207	spa_vers = spa_version(spa);
3208	spa_close(spa, FTAG);
3209
3210	zplver = zfs_zpl_version_map(spa_vers);
3211	fuids_ok = (zplver >= ZPL_VERSION_FUID);
3212	sa_ok = (zplver >= ZPL_VERSION_SA);
3213
3214	/*
3215	 * Open parent object set so we can inherit zplprop values.
3216	 */
3217	if ((error = dmu_objset_hold(parentname, FTAG, &os)) != 0)
3218		return (error);
3219
3220	error = zfs_fill_zplprops_impl(os, zplver, fuids_ok, sa_ok, createprops,
3221	    zplprops, is_ci);
3222	dmu_objset_rele(os, FTAG);
3223	return (error);
3224}
3225
3226static int
3227zfs_fill_zplprops_root(uint64_t spa_vers, nvlist_t *createprops,
3228    nvlist_t *zplprops, boolean_t *is_ci)
3229{
3230	boolean_t fuids_ok;
3231	boolean_t sa_ok;
3232	uint64_t zplver = ZPL_VERSION;
3233	int error;
3234
3235	zplver = zfs_zpl_version_map(spa_vers);
3236	fuids_ok = (zplver >= ZPL_VERSION_FUID);
3237	sa_ok = (zplver >= ZPL_VERSION_SA);
3238
3239	error = zfs_fill_zplprops_impl(NULL, zplver, fuids_ok, sa_ok,
3240	    createprops, zplprops, is_ci);
3241	return (error);
3242}
3243
3244/*
3245 * innvl: {
3246 *     "type" -> dmu_objset_type_t (int32)
3247 *     (optional) "props" -> { prop -> value }
3248 * }
3249 *
3250 * outnvl: propname -> error code (int32)
3251 */
3252static int
3253zfs_ioc_create(const char *fsname, nvlist_t *innvl, nvlist_t *outnvl)
3254{
3255	int error = 0;
3256	zfs_creat_t zct = { 0 };
3257	nvlist_t *nvprops = NULL;
3258	void (*cbfunc)(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx);
3259	int32_t type32;
3260	dmu_objset_type_t type;
3261	boolean_t is_insensitive = B_FALSE;
3262
3263	if (nvlist_lookup_int32(innvl, "type", &type32) != 0)
3264		return (SET_ERROR(EINVAL));
3265	type = type32;
3266	(void) nvlist_lookup_nvlist(innvl, "props", &nvprops);
3267
3268	switch (type) {
3269	case DMU_OST_ZFS:
3270		cbfunc = zfs_create_cb;
3271		break;
3272
3273	case DMU_OST_ZVOL:
3274		cbfunc = zvol_create_cb;
3275		break;
3276
3277	default:
3278		cbfunc = NULL;
3279		break;
3280	}
3281	if (strchr(fsname, '@') ||
3282	    strchr(fsname, '%'))
3283		return (SET_ERROR(EINVAL));
3284
3285	zct.zct_props = nvprops;
3286
3287	if (cbfunc == NULL)
3288		return (SET_ERROR(EINVAL));
3289
3290	if (type == DMU_OST_ZVOL) {
3291		uint64_t volsize, volblocksize;
3292
3293		if (nvprops == NULL)
3294			return (SET_ERROR(EINVAL));
3295		if (nvlist_lookup_uint64(nvprops,
3296		    zfs_prop_to_name(ZFS_PROP_VOLSIZE), &volsize) != 0)
3297			return (SET_ERROR(EINVAL));
3298
3299		if ((error = nvlist_lookup_uint64(nvprops,
3300		    zfs_prop_to_name(ZFS_PROP_VOLBLOCKSIZE),
3301		    &volblocksize)) != 0 && error != ENOENT)
3302			return (SET_ERROR(EINVAL));
3303
3304		if (error != 0)
3305			volblocksize = zfs_prop_default_numeric(
3306			    ZFS_PROP_VOLBLOCKSIZE);
3307
3308		if ((error = zvol_check_volblocksize(
3309		    volblocksize)) != 0 ||
3310		    (error = zvol_check_volsize(volsize,
3311		    volblocksize)) != 0)
3312			return (error);
3313	} else if (type == DMU_OST_ZFS) {
3314		int error;
3315
3316		/*
3317		 * We have to have normalization and
3318		 * case-folding flags correct when we do the
3319		 * file system creation, so go figure them out
3320		 * now.
3321		 */
3322		VERIFY(nvlist_alloc(&zct.zct_zplprops,
3323		    NV_UNIQUE_NAME, KM_SLEEP) == 0);
3324		error = zfs_fill_zplprops(fsname, nvprops,
3325		    zct.zct_zplprops, &is_insensitive);
3326		if (error != 0) {
3327			nvlist_free(zct.zct_zplprops);
3328			return (error);
3329		}
3330	}
3331
3332	error = dmu_objset_create(fsname, type,
3333	    is_insensitive ? DS_FLAG_CI_DATASET : 0, cbfunc, &zct);
3334	nvlist_free(zct.zct_zplprops);
3335
3336	/*
3337	 * It would be nice to do this atomically.
3338	 */
3339	if (error == 0) {
3340		error = zfs_set_prop_nvlist(fsname, ZPROP_SRC_LOCAL,
3341		    nvprops, outnvl);
3342		if (error != 0)
3343			(void) dsl_destroy_head(fsname);
3344	}
3345#ifdef __FreeBSD__
3346	if (error == 0 && type == DMU_OST_ZVOL)
3347		zvol_create_minors(fsname);
3348#endif
3349	return (error);
3350}
3351
3352/*
3353 * innvl: {
3354 *     "origin" -> name of origin snapshot
3355 *     (optional) "props" -> { prop -> value }
3356 * }
3357 *
3358 * outnvl: propname -> error code (int32)
3359 */
3360static int
3361zfs_ioc_clone(const char *fsname, nvlist_t *innvl, nvlist_t *outnvl)
3362{
3363	int error = 0;
3364	nvlist_t *nvprops = NULL;
3365	char *origin_name;
3366
3367	if (nvlist_lookup_string(innvl, "origin", &origin_name) != 0)
3368		return (SET_ERROR(EINVAL));
3369	(void) nvlist_lookup_nvlist(innvl, "props", &nvprops);
3370
3371	if (strchr(fsname, '@') ||
3372	    strchr(fsname, '%'))
3373		return (SET_ERROR(EINVAL));
3374
3375	if (dataset_namecheck(origin_name, NULL, NULL) != 0)
3376		return (SET_ERROR(EINVAL));
3377	error = dmu_objset_clone(fsname, origin_name);
3378	if (error != 0)
3379		return (error);
3380
3381	/*
3382	 * It would be nice to do this atomically.
3383	 */
3384	if (error == 0) {
3385		error = zfs_set_prop_nvlist(fsname, ZPROP_SRC_LOCAL,
3386		    nvprops, outnvl);
3387		if (error != 0)
3388			(void) dsl_destroy_head(fsname);
3389	}
3390#ifdef __FreeBSD__
3391	if (error == 0)
3392		zvol_create_minors(fsname);
3393#endif
3394	return (error);
3395}
3396
3397/* ARGSUSED */
3398static int
3399zfs_ioc_remap(const char *fsname, nvlist_t *innvl, nvlist_t *outnvl)
3400{
3401	if (strchr(fsname, '@') ||
3402	    strchr(fsname, '%'))
3403		return (SET_ERROR(EINVAL));
3404
3405	return (dmu_objset_remap_indirects(fsname));
3406}
3407
3408/*
3409 * innvl: {
3410 *     "snaps" -> { snapshot1, snapshot2 }
3411 *     (optional) "props" -> { prop -> value (string) }
3412 * }
3413 *
3414 * outnvl: snapshot -> error code (int32)
3415 */
3416static int
3417zfs_ioc_snapshot(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl)
3418{
3419	nvlist_t *snaps;
3420	nvlist_t *props = NULL;
3421	int error, poollen;
3422	nvpair_t *pair;
3423
3424	(void) nvlist_lookup_nvlist(innvl, "props", &props);
3425	if ((error = zfs_check_userprops(poolname, props)) != 0)
3426		return (error);
3427
3428	if (!nvlist_empty(props) &&
3429	    zfs_earlier_version(poolname, SPA_VERSION_SNAP_PROPS))
3430		return (SET_ERROR(ENOTSUP));
3431
3432	if (nvlist_lookup_nvlist(innvl, "snaps", &snaps) != 0)
3433		return (SET_ERROR(EINVAL));
3434	poollen = strlen(poolname);
3435	for (pair = nvlist_next_nvpair(snaps, NULL); pair != NULL;
3436	    pair = nvlist_next_nvpair(snaps, pair)) {
3437		const char *name = nvpair_name(pair);
3438		const char *cp = strchr(name, '@');
3439
3440		/*
3441		 * The snap name must contain an @, and the part after it must
3442		 * contain only valid characters.
3443		 */
3444		if (cp == NULL ||
3445		    zfs_component_namecheck(cp + 1, NULL, NULL) != 0)
3446			return (SET_ERROR(EINVAL));
3447
3448		/*
3449		 * The snap must be in the specified pool.
3450		 */
3451		if (strncmp(name, poolname, poollen) != 0 ||
3452		    (name[poollen] != '/' && name[poollen] != '@'))
3453			return (SET_ERROR(EXDEV));
3454
3455		/* This must be the only snap of this fs. */
3456		for (nvpair_t *pair2 = nvlist_next_nvpair(snaps, pair);
3457		    pair2 != NULL; pair2 = nvlist_next_nvpair(snaps, pair2)) {
3458			if (strncmp(name, nvpair_name(pair2), cp - name + 1)
3459			    == 0) {
3460				return (SET_ERROR(EXDEV));
3461			}
3462		}
3463	}
3464
3465	error = dsl_dataset_snapshot(snaps, props, outnvl);
3466	return (error);
3467}
3468
3469/*
3470 * innvl: "message" -> string
3471 */
3472/* ARGSUSED */
3473static int
3474zfs_ioc_log_history(const char *unused, nvlist_t *innvl, nvlist_t *outnvl)
3475{
3476	char *message;
3477	spa_t *spa;
3478	int error;
3479	char *poolname;
3480
3481	/*
3482	 * The poolname in the ioctl is not set, we get it from the TSD,
3483	 * which was set at the end of the last successful ioctl that allows
3484	 * logging.  The secpolicy func already checked that it is set.
3485	 * Only one log ioctl is allowed after each successful ioctl, so
3486	 * we clear the TSD here.
3487	 */
3488	poolname = tsd_get(zfs_allow_log_key);
3489	(void) tsd_set(zfs_allow_log_key, NULL);
3490	error = spa_open(poolname, &spa, FTAG);
3491	strfree(poolname);
3492	if (error != 0)
3493		return (error);
3494
3495	if (nvlist_lookup_string(innvl, "message", &message) != 0)  {
3496		spa_close(spa, FTAG);
3497		return (SET_ERROR(EINVAL));
3498	}
3499
3500	if (spa_version(spa) < SPA_VERSION_ZPOOL_HISTORY) {
3501		spa_close(spa, FTAG);
3502		return (SET_ERROR(ENOTSUP));
3503	}
3504
3505	error = spa_history_log(spa, message);
3506	spa_close(spa, FTAG);
3507	return (error);
3508}
3509
3510#ifdef __FreeBSD__
3511static int
3512zfs_ioc_nextboot(const char *unused, nvlist_t *innvl, nvlist_t *outnvl)
3513{
3514	char name[MAXNAMELEN];
3515	spa_t *spa;
3516	vdev_t *vd;
3517	char *command;
3518	uint64_t pool_guid;
3519	uint64_t vdev_guid;
3520	int error;
3521
3522	if (nvlist_lookup_uint64(innvl,
3523	    ZPOOL_CONFIG_POOL_GUID, &pool_guid) != 0)
3524		return (EINVAL);
3525	if (nvlist_lookup_uint64(innvl,
3526	    ZPOOL_CONFIG_GUID, &vdev_guid) != 0)
3527		return (EINVAL);
3528	if (nvlist_lookup_string(innvl,
3529	    "command", &command) != 0)
3530		return (EINVAL);
3531
3532	mutex_enter(&spa_namespace_lock);
3533	spa = spa_by_guid(pool_guid, vdev_guid);
3534	if (spa != NULL)
3535		strcpy(name, spa_name(spa));
3536	mutex_exit(&spa_namespace_lock);
3537	if (spa == NULL)
3538		return (ENOENT);
3539
3540	if ((error = spa_open(name, &spa, FTAG)) != 0)
3541		return (error);
3542	spa_vdev_state_enter(spa, SCL_ALL);
3543	vd = spa_lookup_by_guid(spa, vdev_guid, B_TRUE);
3544	if (vd == NULL) {
3545		(void) spa_vdev_state_exit(spa, NULL, ENXIO);
3546		spa_close(spa, FTAG);
3547		return (ENODEV);
3548	}
3549	error = vdev_label_write_pad2(vd, command, strlen(command));
3550	(void) spa_vdev_state_exit(spa, NULL, 0);
3551	txg_wait_synced(spa->spa_dsl_pool, 0);
3552	spa_close(spa, FTAG);
3553	return (error);
3554}
3555#endif
3556
3557/*
3558 * The dp_config_rwlock must not be held when calling this, because the
3559 * unmount may need to write out data.
3560 *
3561 * This function is best-effort.  Callers must deal gracefully if it
3562 * remains mounted (or is remounted after this call).
3563 *
3564 * Returns 0 if the argument is not a snapshot, or it is not currently a
3565 * filesystem, or we were able to unmount it.  Returns error code otherwise.
3566 */
3567void
3568zfs_unmount_snap(const char *snapname)
3569{
3570	vfs_t *vfsp = NULL;
3571	zfsvfs_t *zfsvfs = NULL;
3572
3573	if (strchr(snapname, '@') == NULL)
3574		return;
3575
3576	int err = getzfsvfs(snapname, &zfsvfs);
3577	if (err != 0) {
3578		ASSERT3P(zfsvfs, ==, NULL);
3579		return;
3580	}
3581	vfsp = zfsvfs->z_vfs;
3582
3583	ASSERT(!dsl_pool_config_held(dmu_objset_pool(zfsvfs->z_os)));
3584
3585#ifdef illumos
3586	err = vn_vfswlock(vfsp->vfs_vnodecovered);
3587	VFS_RELE(vfsp);
3588	if (err != 0)
3589		return;
3590#endif
3591
3592	/*
3593	 * Always force the unmount for snapshots.
3594	 */
3595#ifdef illumos
3596	(void) dounmount(vfsp, MS_FORCE, kcred);
3597#else
3598	vfs_ref(vfsp);
3599	vfs_unbusy(vfsp);
3600	(void) dounmount(vfsp, MS_FORCE, curthread);
3601#endif
3602}
3603
3604/* ARGSUSED */
3605static int
3606zfs_unmount_snap_cb(const char *snapname, void *arg)
3607{
3608	zfs_unmount_snap(snapname);
3609	return (0);
3610}
3611
3612/*
3613 * When a clone is destroyed, its origin may also need to be destroyed,
3614 * in which case it must be unmounted.  This routine will do that unmount
3615 * if necessary.
3616 */
3617void
3618zfs_destroy_unmount_origin(const char *fsname)
3619{
3620	int error;
3621	objset_t *os;
3622	dsl_dataset_t *ds;
3623
3624	error = dmu_objset_hold(fsname, FTAG, &os);
3625	if (error != 0)
3626		return;
3627	ds = dmu_objset_ds(os);
3628	if (dsl_dir_is_clone(ds->ds_dir) && DS_IS_DEFER_DESTROY(ds->ds_prev)) {
3629		char originname[ZFS_MAX_DATASET_NAME_LEN];
3630		dsl_dataset_name(ds->ds_prev, originname);
3631		dmu_objset_rele(os, FTAG);
3632		zfs_unmount_snap(originname);
3633	} else {
3634		dmu_objset_rele(os, FTAG);
3635	}
3636}
3637
3638/*
3639 * innvl: {
3640 *     "snaps" -> { snapshot1, snapshot2 }
3641 *     (optional boolean) "defer"
3642 * }
3643 *
3644 * outnvl: snapshot -> error code (int32)
3645 *
3646 */
3647/* ARGSUSED */
3648static int
3649zfs_ioc_destroy_snaps(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl)
3650{
3651	int error, poollen;
3652	nvlist_t *snaps;
3653	nvpair_t *pair;
3654	boolean_t defer;
3655
3656	if (nvlist_lookup_nvlist(innvl, "snaps", &snaps) != 0)
3657		return (SET_ERROR(EINVAL));
3658	defer = nvlist_exists(innvl, "defer");
3659
3660	poollen = strlen(poolname);
3661	for (pair = nvlist_next_nvpair(snaps, NULL); pair != NULL;
3662	    pair = nvlist_next_nvpair(snaps, pair)) {
3663		const char *name = nvpair_name(pair);
3664
3665		/*
3666		 * The snap must be in the specified pool to prevent the
3667		 * invalid removal of zvol minors below.
3668		 */
3669		if (strncmp(name, poolname, poollen) != 0 ||
3670		    (name[poollen] != '/' && name[poollen] != '@'))
3671			return (SET_ERROR(EXDEV));
3672
3673		zfs_unmount_snap(nvpair_name(pair));
3674#if defined(__FreeBSD__)
3675		zvol_remove_minors(name);
3676#endif
3677	}
3678
3679	return (dsl_destroy_snapshots_nvl(snaps, defer, outnvl));
3680}
3681
3682/*
3683 * Create bookmarks.  Bookmark names are of the form <fs>#<bmark>.
3684 * All bookmarks must be in the same pool.
3685 *
3686 * innvl: {
3687 *     bookmark1 -> snapshot1, bookmark2 -> snapshot2
3688 * }
3689 *
3690 * outnvl: bookmark -> error code (int32)
3691 *
3692 */
3693/* ARGSUSED */
3694static int
3695zfs_ioc_bookmark(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl)
3696{
3697	for (nvpair_t *pair = nvlist_next_nvpair(innvl, NULL);
3698	    pair != NULL; pair = nvlist_next_nvpair(innvl, pair)) {
3699		char *snap_name;
3700
3701		/*
3702		 * Verify the snapshot argument.
3703		 */
3704		if (nvpair_value_string(pair, &snap_name) != 0)
3705			return (SET_ERROR(EINVAL));
3706
3707
3708		/* Verify that the keys (bookmarks) are unique */
3709		for (nvpair_t *pair2 = nvlist_next_nvpair(innvl, pair);
3710		    pair2 != NULL; pair2 = nvlist_next_nvpair(innvl, pair2)) {
3711			if (strcmp(nvpair_name(pair), nvpair_name(pair2)) == 0)
3712				return (SET_ERROR(EINVAL));
3713		}
3714	}
3715
3716	return (dsl_bookmark_create(innvl, outnvl));
3717}
3718
3719/*
3720 * innvl: {
3721 *     property 1, property 2, ...
3722 * }
3723 *
3724 * outnvl: {
3725 *     bookmark name 1 -> { property 1, property 2, ... },
3726 *     bookmark name 2 -> { property 1, property 2, ... }
3727 * }
3728 *
3729 */
3730static int
3731zfs_ioc_get_bookmarks(const char *fsname, nvlist_t *innvl, nvlist_t *outnvl)
3732{
3733	return (dsl_get_bookmarks(fsname, innvl, outnvl));
3734}
3735
3736/*
3737 * innvl: {
3738 *     bookmark name 1, bookmark name 2
3739 * }
3740 *
3741 * outnvl: bookmark -> error code (int32)
3742 *
3743 */
3744static int
3745zfs_ioc_destroy_bookmarks(const char *poolname, nvlist_t *innvl,
3746    nvlist_t *outnvl)
3747{
3748	int error, poollen;
3749
3750	poollen = strlen(poolname);
3751	for (nvpair_t *pair = nvlist_next_nvpair(innvl, NULL);
3752	    pair != NULL; pair = nvlist_next_nvpair(innvl, pair)) {
3753		const char *name = nvpair_name(pair);
3754		const char *cp = strchr(name, '#');
3755
3756		/*
3757		 * The bookmark name must contain an #, and the part after it
3758		 * must contain only valid characters.
3759		 */
3760		if (cp == NULL ||
3761		    zfs_component_namecheck(cp + 1, NULL, NULL) != 0)
3762			return (SET_ERROR(EINVAL));
3763
3764		/*
3765		 * The bookmark must be in the specified pool.
3766		 */
3767		if (strncmp(name, poolname, poollen) != 0 ||
3768		    (name[poollen] != '/' && name[poollen] != '#'))
3769			return (SET_ERROR(EXDEV));
3770	}
3771
3772	error = dsl_bookmark_destroy(innvl, outnvl);
3773	return (error);
3774}
3775
3776static int
3777zfs_ioc_channel_program(const char *poolname, nvlist_t *innvl,
3778    nvlist_t *outnvl)
3779{
3780	char *program;
3781	uint64_t instrlimit, memlimit;
3782	boolean_t sync_flag;
3783	nvpair_t *nvarg = NULL;
3784
3785	if (0 != nvlist_lookup_string(innvl, ZCP_ARG_PROGRAM, &program)) {
3786		return (EINVAL);
3787	}
3788	if (0 != nvlist_lookup_boolean_value(innvl, ZCP_ARG_SYNC, &sync_flag)) {
3789		sync_flag = B_TRUE;
3790	}
3791	if (0 != nvlist_lookup_uint64(innvl, ZCP_ARG_INSTRLIMIT, &instrlimit)) {
3792		instrlimit = ZCP_DEFAULT_INSTRLIMIT;
3793	}
3794	if (0 != nvlist_lookup_uint64(innvl, ZCP_ARG_MEMLIMIT, &memlimit)) {
3795		memlimit = ZCP_DEFAULT_MEMLIMIT;
3796	}
3797	if (0 != nvlist_lookup_nvpair(innvl, ZCP_ARG_ARGLIST, &nvarg)) {
3798		return (EINVAL);
3799	}
3800
3801	if (instrlimit == 0 || instrlimit > zfs_lua_max_instrlimit)
3802		return (EINVAL);
3803	if (memlimit == 0 || memlimit > zfs_lua_max_memlimit)
3804		return (EINVAL);
3805
3806	return (zcp_eval(poolname, program, sync_flag, instrlimit, memlimit,
3807	    nvarg, outnvl));
3808}
3809
3810/*
3811 * innvl: unused
3812 * outnvl: empty
3813 */
3814/* ARGSUSED */
3815static int
3816zfs_ioc_pool_checkpoint(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl)
3817{
3818	return (spa_checkpoint(poolname));
3819}
3820
3821/*
3822 * innvl: unused
3823 * outnvl: empty
3824 */
3825/* ARGSUSED */
3826static int
3827zfs_ioc_pool_discard_checkpoint(const char *poolname, nvlist_t *innvl,
3828    nvlist_t *outnvl)
3829{
3830	return (spa_checkpoint_discard(poolname));
3831}
3832
3833/*
3834 * inputs:
3835 * zc_name		name of dataset to destroy
3836 * zc_objset_type	type of objset
3837 * zc_defer_destroy	mark for deferred destroy
3838 *
3839 * outputs:		none
3840 */
3841static int
3842zfs_ioc_destroy(zfs_cmd_t *zc)
3843{
3844	int err;
3845
3846	if (zc->zc_objset_type == DMU_OST_ZFS)
3847		zfs_unmount_snap(zc->zc_name);
3848
3849	if (strchr(zc->zc_name, '@'))
3850		err = dsl_destroy_snapshot(zc->zc_name, zc->zc_defer_destroy);
3851	else
3852		err = dsl_destroy_head(zc->zc_name);
3853	if (zc->zc_objset_type == DMU_OST_ZVOL && err == 0)
3854#ifdef __FreeBSD__
3855		zvol_remove_minors(zc->zc_name);
3856#else
3857		(void) zvol_remove_minor(zc->zc_name);
3858#endif
3859	return (err);
3860}
3861
3862/*
3863 * fsname is name of dataset to rollback (to most recent snapshot)
3864 *
3865 * innvl may contain name of expected target snapshot
3866 *
3867 * outnvl: "target" -> name of most recent snapshot
3868 * }
3869 */
3870/* ARGSUSED */
3871static int
3872zfs_ioc_rollback(const char *fsname, nvlist_t *innvl, nvlist_t *outnvl)
3873{
3874	zfsvfs_t *zfsvfs;
3875	char *target = NULL;
3876	int error;
3877
3878	(void) nvlist_lookup_string(innvl, "target", &target);
3879	if (target != NULL) {
3880		const char *cp = strchr(target, '@');
3881
3882		/*
3883		 * The snap name must contain an @, and the part after it must
3884		 * contain only valid characters.
3885		 */
3886		if (cp == NULL ||
3887		    zfs_component_namecheck(cp + 1, NULL, NULL) != 0)
3888			return (SET_ERROR(EINVAL));
3889	}
3890
3891	if (getzfsvfs(fsname, &zfsvfs) == 0) {
3892		dsl_dataset_t *ds;
3893
3894		ds = dmu_objset_ds(zfsvfs->z_os);
3895		error = zfs_suspend_fs(zfsvfs);
3896		if (error == 0) {
3897			int resume_err;
3898
3899			error = dsl_dataset_rollback(fsname, target, zfsvfs,
3900			    outnvl);
3901			resume_err = zfs_resume_fs(zfsvfs, ds);
3902			error = error ? error : resume_err;
3903		}
3904#ifdef illumos
3905		VFS_RELE(zfsvfs->z_vfs);
3906#else
3907		vfs_unbusy(zfsvfs->z_vfs);
3908#endif
3909	} else {
3910		error = dsl_dataset_rollback(fsname, target, NULL, outnvl);
3911	}
3912	return (error);
3913}
3914
3915static int
3916recursive_unmount(const char *fsname, void *arg)
3917{
3918	const char *snapname = arg;
3919	char fullname[ZFS_MAX_DATASET_NAME_LEN];
3920
3921	(void) snprintf(fullname, sizeof (fullname), "%s@%s", fsname, snapname);
3922	zfs_unmount_snap(fullname);
3923
3924	return (0);
3925}
3926
3927/*
3928 * inputs:
3929 * zc_name	old name of dataset
3930 * zc_value	new name of dataset
3931 * zc_cookie	recursive flag (only valid for snapshots)
3932 *
3933 * outputs:	none
3934 */
3935static int
3936zfs_ioc_rename(zfs_cmd_t *zc)
3937{
3938	boolean_t recursive = zc->zc_cookie & 1;
3939	char *at;
3940	boolean_t allow_mounted = B_TRUE;
3941
3942#ifdef __FreeBSD__
3943	allow_mounted = (zc->zc_cookie & 2) != 0;
3944#endif
3945
3946	/* "zfs rename" from and to ...%recv datasets should both fail */
3947	zc->zc_name[sizeof (zc->zc_name) - 1] = '\0';
3948	zc->zc_value[sizeof (zc->zc_value) - 1] = '\0';
3949	if (dataset_namecheck(zc->zc_name, NULL, NULL) != 0 ||
3950	    dataset_namecheck(zc->zc_value, NULL, NULL) != 0 ||
3951	    strchr(zc->zc_name, '%') || strchr(zc->zc_value, '%'))
3952		return (SET_ERROR(EINVAL));
3953
3954	at = strchr(zc->zc_name, '@');
3955	if (at != NULL) {
3956		/* snaps must be in same fs */
3957		int error;
3958
3959		if (strncmp(zc->zc_name, zc->zc_value, at - zc->zc_name + 1))
3960			return (SET_ERROR(EXDEV));
3961		*at = '\0';
3962		if (zc->zc_objset_type == DMU_OST_ZFS && !allow_mounted) {
3963			error = dmu_objset_find(zc->zc_name,
3964			    recursive_unmount, at + 1,
3965			    recursive ? DS_FIND_CHILDREN : 0);
3966			if (error != 0) {
3967				*at = '@';
3968				return (error);
3969			}
3970		}
3971		error = dsl_dataset_rename_snapshot(zc->zc_name,
3972		    at + 1, strchr(zc->zc_value, '@') + 1, recursive);
3973		*at = '@';
3974
3975		return (error);
3976	} else {
3977#ifdef illumos
3978		if (zc->zc_objset_type == DMU_OST_ZVOL)
3979			(void) zvol_remove_minor(zc->zc_name);
3980#endif
3981		return (dsl_dir_rename(zc->zc_name, zc->zc_value));
3982	}
3983}
3984
3985static int
3986zfs_check_settable(const char *dsname, nvpair_t *pair, cred_t *cr)
3987{
3988	const char *propname = nvpair_name(pair);
3989	boolean_t issnap = (strchr(dsname, '@') != NULL);
3990	zfs_prop_t prop = zfs_name_to_prop(propname);
3991	uint64_t intval;
3992	int err;
3993
3994	if (prop == ZPROP_INVAL) {
3995		if (zfs_prop_user(propname)) {
3996			if (err = zfs_secpolicy_write_perms(dsname,
3997			    ZFS_DELEG_PERM_USERPROP, cr))
3998				return (err);
3999			return (0);
4000		}
4001
4002		if (!issnap && zfs_prop_userquota(propname)) {
4003			const char *perm = NULL;
4004			const char *uq_prefix =
4005			    zfs_userquota_prop_prefixes[ZFS_PROP_USERQUOTA];
4006			const char *gq_prefix =
4007			    zfs_userquota_prop_prefixes[ZFS_PROP_GROUPQUOTA];
4008
4009			if (strncmp(propname, uq_prefix,
4010			    strlen(uq_prefix)) == 0) {
4011				perm = ZFS_DELEG_PERM_USERQUOTA;
4012			} else if (strncmp(propname, gq_prefix,
4013			    strlen(gq_prefix)) == 0) {
4014				perm = ZFS_DELEG_PERM_GROUPQUOTA;
4015			} else {
4016				/* USERUSED and GROUPUSED are read-only */
4017				return (SET_ERROR(EINVAL));
4018			}
4019
4020			if (err = zfs_secpolicy_write_perms(dsname, perm, cr))
4021				return (err);
4022			return (0);
4023		}
4024
4025		return (SET_ERROR(EINVAL));
4026	}
4027
4028	if (issnap)
4029		return (SET_ERROR(EINVAL));
4030
4031	if (nvpair_type(pair) == DATA_TYPE_NVLIST) {
4032		/*
4033		 * dsl_prop_get_all_impl() returns properties in this
4034		 * format.
4035		 */
4036		nvlist_t *attrs;
4037		VERIFY(nvpair_value_nvlist(pair, &attrs) == 0);
4038		VERIFY(nvlist_lookup_nvpair(attrs, ZPROP_VALUE,
4039		    &pair) == 0);
4040	}
4041
4042	/*
4043	 * Check that this value is valid for this pool version
4044	 */
4045	switch (prop) {
4046	case ZFS_PROP_COMPRESSION:
4047		/*
4048		 * If the user specified gzip compression, make sure
4049		 * the SPA supports it. We ignore any errors here since
4050		 * we'll catch them later.
4051		 */
4052		if (nvpair_value_uint64(pair, &intval) == 0) {
4053			if (intval >= ZIO_COMPRESS_GZIP_1 &&
4054			    intval <= ZIO_COMPRESS_GZIP_9 &&
4055			    zfs_earlier_version(dsname,
4056			    SPA_VERSION_GZIP_COMPRESSION)) {
4057				return (SET_ERROR(ENOTSUP));
4058			}
4059
4060			if (intval == ZIO_COMPRESS_ZLE &&
4061			    zfs_earlier_version(dsname,
4062			    SPA_VERSION_ZLE_COMPRESSION))
4063				return (SET_ERROR(ENOTSUP));
4064
4065			if (intval == ZIO_COMPRESS_LZ4) {
4066				spa_t *spa;
4067
4068				if ((err = spa_open(dsname, &spa, FTAG)) != 0)
4069					return (err);
4070
4071				if (!spa_feature_is_enabled(spa,
4072				    SPA_FEATURE_LZ4_COMPRESS)) {
4073					spa_close(spa, FTAG);
4074					return (SET_ERROR(ENOTSUP));
4075				}
4076				spa_close(spa, FTAG);
4077			}
4078
4079			/*
4080			 * If this is a bootable dataset then
4081			 * verify that the compression algorithm
4082			 * is supported for booting. We must return
4083			 * something other than ENOTSUP since it
4084			 * implies a downrev pool version.
4085			 */
4086			if (zfs_is_bootfs(dsname) &&
4087			    !BOOTFS_COMPRESS_VALID(intval)) {
4088				return (SET_ERROR(ERANGE));
4089			}
4090		}
4091		break;
4092
4093	case ZFS_PROP_COPIES:
4094		if (zfs_earlier_version(dsname, SPA_VERSION_DITTO_BLOCKS))
4095			return (SET_ERROR(ENOTSUP));
4096		break;
4097
4098	case ZFS_PROP_RECORDSIZE:
4099		/* Record sizes above 128k need the feature to be enabled */
4100		if (nvpair_value_uint64(pair, &intval) == 0 &&
4101		    intval > SPA_OLD_MAXBLOCKSIZE) {
4102			spa_t *spa;
4103
4104			/*
4105			 * We don't allow setting the property above 1MB,
4106			 * unless the tunable has been changed.
4107			 */
4108			if (intval > zfs_max_recordsize ||
4109			    intval > SPA_MAXBLOCKSIZE)
4110				return (SET_ERROR(ERANGE));
4111
4112			if ((err = spa_open(dsname, &spa, FTAG)) != 0)
4113				return (err);
4114
4115			if (!spa_feature_is_enabled(spa,
4116			    SPA_FEATURE_LARGE_BLOCKS)) {
4117				spa_close(spa, FTAG);
4118				return (SET_ERROR(ENOTSUP));
4119			}
4120			spa_close(spa, FTAG);
4121		}
4122		break;
4123
4124	case ZFS_PROP_SHARESMB:
4125		if (zpl_earlier_version(dsname, ZPL_VERSION_FUID))
4126			return (SET_ERROR(ENOTSUP));
4127		break;
4128
4129	case ZFS_PROP_ACLINHERIT:
4130		if (nvpair_type(pair) == DATA_TYPE_UINT64 &&
4131		    nvpair_value_uint64(pair, &intval) == 0) {
4132			if (intval == ZFS_ACL_PASSTHROUGH_X &&
4133			    zfs_earlier_version(dsname,
4134			    SPA_VERSION_PASSTHROUGH_X))
4135				return (SET_ERROR(ENOTSUP));
4136		}
4137		break;
4138
4139	case ZFS_PROP_CHECKSUM:
4140	case ZFS_PROP_DEDUP:
4141	{
4142		spa_feature_t feature;
4143		spa_t *spa;
4144
4145		/* dedup feature version checks */
4146		if (prop == ZFS_PROP_DEDUP &&
4147		    zfs_earlier_version(dsname, SPA_VERSION_DEDUP))
4148			return (SET_ERROR(ENOTSUP));
4149
4150		if (nvpair_value_uint64(pair, &intval) != 0)
4151			return (SET_ERROR(EINVAL));
4152
4153		/* check prop value is enabled in features */
4154		feature = zio_checksum_to_feature(intval & ZIO_CHECKSUM_MASK);
4155		if (feature == SPA_FEATURE_NONE)
4156			break;
4157
4158		if ((err = spa_open(dsname, &spa, FTAG)) != 0)
4159			return (err);
4160		/*
4161		 * Salted checksums are not supported on root pools.
4162		 */
4163		if (spa_bootfs(spa) != 0 &&
4164		    intval < ZIO_CHECKSUM_FUNCTIONS &&
4165		    (zio_checksum_table[intval].ci_flags &
4166		    ZCHECKSUM_FLAG_SALTED)) {
4167			spa_close(spa, FTAG);
4168			return (SET_ERROR(ERANGE));
4169		}
4170		if (!spa_feature_is_enabled(spa, feature)) {
4171			spa_close(spa, FTAG);
4172			return (SET_ERROR(ENOTSUP));
4173		}
4174		spa_close(spa, FTAG);
4175		break;
4176	}
4177	}
4178
4179	return (zfs_secpolicy_setprop(dsname, prop, pair, CRED()));
4180}
4181
4182/*
4183 * Checks for a race condition to make sure we don't increment a feature flag
4184 * multiple times.
4185 */
4186static int
4187zfs_prop_activate_feature_check(void *arg, dmu_tx_t *tx)
4188{
4189	spa_t *spa = dmu_tx_pool(tx)->dp_spa;
4190	spa_feature_t *featurep = arg;
4191
4192	if (!spa_feature_is_active(spa, *featurep))
4193		return (0);
4194	else
4195		return (SET_ERROR(EBUSY));
4196}
4197
4198/*
4199 * The callback invoked on feature activation in the sync task caused by
4200 * zfs_prop_activate_feature.
4201 */
4202static void
4203zfs_prop_activate_feature_sync(void *arg, dmu_tx_t *tx)
4204{
4205	spa_t *spa = dmu_tx_pool(tx)->dp_spa;
4206	spa_feature_t *featurep = arg;
4207
4208	spa_feature_incr(spa, *featurep, tx);
4209}
4210
4211/*
4212 * Activates a feature on a pool in response to a property setting. This
4213 * creates a new sync task which modifies the pool to reflect the feature
4214 * as being active.
4215 */
4216static int
4217zfs_prop_activate_feature(spa_t *spa, spa_feature_t feature)
4218{
4219	int err;
4220
4221	/* EBUSY here indicates that the feature is already active */
4222	err = dsl_sync_task(spa_name(spa),
4223	    zfs_prop_activate_feature_check, zfs_prop_activate_feature_sync,
4224	    &feature, 2, ZFS_SPACE_CHECK_RESERVED);
4225
4226	if (err != 0 && err != EBUSY)
4227		return (err);
4228	else
4229		return (0);
4230}
4231
4232/*
4233 * Removes properties from the given props list that fail permission checks
4234 * needed to clear them and to restore them in case of a receive error. For each
4235 * property, make sure we have both set and inherit permissions.
4236 *
4237 * Returns the first error encountered if any permission checks fail. If the
4238 * caller provides a non-NULL errlist, it also gives the complete list of names
4239 * of all the properties that failed a permission check along with the
4240 * corresponding error numbers. The caller is responsible for freeing the
4241 * returned errlist.
4242 *
4243 * If every property checks out successfully, zero is returned and the list
4244 * pointed at by errlist is NULL.
4245 */
4246static int
4247zfs_check_clearable(char *dataset, nvlist_t *props, nvlist_t **errlist)
4248{
4249	zfs_cmd_t *zc;
4250	nvpair_t *pair, *next_pair;
4251	nvlist_t *errors;
4252	int err, rv = 0;
4253
4254	if (props == NULL)
4255		return (0);
4256
4257	VERIFY(nvlist_alloc(&errors, NV_UNIQUE_NAME, KM_SLEEP) == 0);
4258
4259	zc = kmem_alloc(sizeof (zfs_cmd_t), KM_SLEEP);
4260	(void) strcpy(zc->zc_name, dataset);
4261	pair = nvlist_next_nvpair(props, NULL);
4262	while (pair != NULL) {
4263		next_pair = nvlist_next_nvpair(props, pair);
4264
4265		(void) strcpy(zc->zc_value, nvpair_name(pair));
4266		if ((err = zfs_check_settable(dataset, pair, CRED())) != 0 ||
4267		    (err = zfs_secpolicy_inherit_prop(zc, NULL, CRED())) != 0) {
4268			VERIFY(nvlist_remove_nvpair(props, pair) == 0);
4269			VERIFY(nvlist_add_int32(errors,
4270			    zc->zc_value, err) == 0);
4271		}
4272		pair = next_pair;
4273	}
4274	kmem_free(zc, sizeof (zfs_cmd_t));
4275
4276	if ((pair = nvlist_next_nvpair(errors, NULL)) == NULL) {
4277		nvlist_free(errors);
4278		errors = NULL;
4279	} else {
4280		VERIFY(nvpair_value_int32(pair, &rv) == 0);
4281	}
4282
4283	if (errlist == NULL)
4284		nvlist_free(errors);
4285	else
4286		*errlist = errors;
4287
4288	return (rv);
4289}
4290
4291static boolean_t
4292propval_equals(nvpair_t *p1, nvpair_t *p2)
4293{
4294	if (nvpair_type(p1) == DATA_TYPE_NVLIST) {
4295		/* dsl_prop_get_all_impl() format */
4296		nvlist_t *attrs;
4297		VERIFY(nvpair_value_nvlist(p1, &attrs) == 0);
4298		VERIFY(nvlist_lookup_nvpair(attrs, ZPROP_VALUE,
4299		    &p1) == 0);
4300	}
4301
4302	if (nvpair_type(p2) == DATA_TYPE_NVLIST) {
4303		nvlist_t *attrs;
4304		VERIFY(nvpair_value_nvlist(p2, &attrs) == 0);
4305		VERIFY(nvlist_lookup_nvpair(attrs, ZPROP_VALUE,
4306		    &p2) == 0);
4307	}
4308
4309	if (nvpair_type(p1) != nvpair_type(p2))
4310		return (B_FALSE);
4311
4312	if (nvpair_type(p1) == DATA_TYPE_STRING) {
4313		char *valstr1, *valstr2;
4314
4315		VERIFY(nvpair_value_string(p1, (char **)&valstr1) == 0);
4316		VERIFY(nvpair_value_string(p2, (char **)&valstr2) == 0);
4317		return (strcmp(valstr1, valstr2) == 0);
4318	} else {
4319		uint64_t intval1, intval2;
4320
4321		VERIFY(nvpair_value_uint64(p1, &intval1) == 0);
4322		VERIFY(nvpair_value_uint64(p2, &intval2) == 0);
4323		return (intval1 == intval2);
4324	}
4325}
4326
4327/*
4328 * Remove properties from props if they are not going to change (as determined
4329 * by comparison with origprops). Remove them from origprops as well, since we
4330 * do not need to clear or restore properties that won't change.
4331 */
4332static void
4333props_reduce(nvlist_t *props, nvlist_t *origprops)
4334{
4335	nvpair_t *pair, *next_pair;
4336
4337	if (origprops == NULL)
4338		return; /* all props need to be received */
4339
4340	pair = nvlist_next_nvpair(props, NULL);
4341	while (pair != NULL) {
4342		const char *propname = nvpair_name(pair);
4343		nvpair_t *match;
4344
4345		next_pair = nvlist_next_nvpair(props, pair);
4346
4347		if ((nvlist_lookup_nvpair(origprops, propname,
4348		    &match) != 0) || !propval_equals(pair, match))
4349			goto next; /* need to set received value */
4350
4351		/* don't clear the existing received value */
4352		(void) nvlist_remove_nvpair(origprops, match);
4353		/* don't bother receiving the property */
4354		(void) nvlist_remove_nvpair(props, pair);
4355next:
4356		pair = next_pair;
4357	}
4358}
4359
4360/*
4361 * Extract properties that cannot be set PRIOR to the receipt of a dataset.
4362 * For example, refquota cannot be set until after the receipt of a dataset,
4363 * because in replication streams, an older/earlier snapshot may exceed the
4364 * refquota.  We want to receive the older/earlier snapshot, but setting
4365 * refquota pre-receipt will set the dsl's ACTUAL quota, which will prevent
4366 * the older/earlier snapshot from being received (with EDQUOT).
4367 *
4368 * The ZFS test "zfs_receive_011_pos" demonstrates such a scenario.
4369 *
4370 * libzfs will need to be judicious handling errors encountered by props
4371 * extracted by this function.
4372 */
4373static nvlist_t *
4374extract_delay_props(nvlist_t *props)
4375{
4376	nvlist_t *delayprops;
4377	nvpair_t *nvp, *tmp;
4378	static const zfs_prop_t delayable[] = { ZFS_PROP_REFQUOTA, 0 };
4379	int i;
4380
4381	VERIFY(nvlist_alloc(&delayprops, NV_UNIQUE_NAME, KM_SLEEP) == 0);
4382
4383	for (nvp = nvlist_next_nvpair(props, NULL); nvp != NULL;
4384	    nvp = nvlist_next_nvpair(props, nvp)) {
4385		/*
4386		 * strcmp() is safe because zfs_prop_to_name() always returns
4387		 * a bounded string.
4388		 */
4389		for (i = 0; delayable[i] != 0; i++) {
4390			if (strcmp(zfs_prop_to_name(delayable[i]),
4391			    nvpair_name(nvp)) == 0) {
4392				break;
4393			}
4394		}
4395		if (delayable[i] != 0) {
4396			tmp = nvlist_prev_nvpair(props, nvp);
4397			VERIFY(nvlist_add_nvpair(delayprops, nvp) == 0);
4398			VERIFY(nvlist_remove_nvpair(props, nvp) == 0);
4399			nvp = tmp;
4400		}
4401	}
4402
4403	if (nvlist_empty(delayprops)) {
4404		nvlist_free(delayprops);
4405		delayprops = NULL;
4406	}
4407	return (delayprops);
4408}
4409
4410#ifdef	DEBUG
4411static boolean_t zfs_ioc_recv_inject_err;
4412#endif
4413
4414/*
4415 * inputs:
4416 * zc_name		name of containing filesystem
4417 * zc_nvlist_src{_size}	nvlist of properties to apply
4418 * zc_value		name of snapshot to create
4419 * zc_string		name of clone origin (if DRR_FLAG_CLONE)
4420 * zc_cookie		file descriptor to recv from
4421 * zc_begin_record	the BEGIN record of the stream (not byteswapped)
4422 * zc_guid		force flag
4423 * zc_cleanup_fd	cleanup-on-exit file descriptor
4424 * zc_action_handle	handle for this guid/ds mapping (or zero on first call)
4425 * zc_resumable		if data is incomplete assume sender will resume
4426 *
4427 * outputs:
4428 * zc_cookie		number of bytes read
4429 * zc_nvlist_dst{_size} error for each unapplied received property
4430 * zc_obj		zprop_errflags_t
4431 * zc_action_handle	handle for this guid/ds mapping
4432 */
4433static int
4434zfs_ioc_recv(zfs_cmd_t *zc)
4435{
4436	file_t *fp;
4437	dmu_recv_cookie_t drc;
4438	boolean_t force = (boolean_t)zc->zc_guid;
4439	int fd;
4440	int error = 0;
4441	int props_error = 0;
4442	nvlist_t *errors;
4443	offset_t off;
4444	nvlist_t *props = NULL; /* sent properties */
4445	nvlist_t *origprops = NULL; /* existing properties */
4446	nvlist_t *delayprops = NULL; /* sent properties applied post-receive */
4447	char *origin = NULL;
4448	char *tosnap;
4449	char tofs[ZFS_MAX_DATASET_NAME_LEN];
4450	cap_rights_t rights;
4451	boolean_t first_recvd_props = B_FALSE;
4452
4453	if (dataset_namecheck(zc->zc_value, NULL, NULL) != 0 ||
4454	    strchr(zc->zc_value, '@') == NULL ||
4455	    strchr(zc->zc_value, '%'))
4456		return (SET_ERROR(EINVAL));
4457
4458	(void) strcpy(tofs, zc->zc_value);
4459	tosnap = strchr(tofs, '@');
4460	*tosnap++ = '\0';
4461
4462	if (zc->zc_nvlist_src != 0 &&
4463	    (error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
4464	    zc->zc_iflags, &props)) != 0)
4465		return (error);
4466
4467	fd = zc->zc_cookie;
4468#ifdef illumos
4469	fp = getf(fd);
4470#else
4471	fget_read(curthread, fd, cap_rights_init(&rights, CAP_PREAD), &fp);
4472#endif
4473	if (fp == NULL) {
4474		nvlist_free(props);
4475		return (SET_ERROR(EBADF));
4476	}
4477
4478	errors = fnvlist_alloc();
4479
4480	if (zc->zc_string[0])
4481		origin = zc->zc_string;
4482
4483	error = dmu_recv_begin(tofs, tosnap,
4484	    &zc->zc_begin_record, force, zc->zc_resumable, origin, &drc);
4485	if (error != 0)
4486		goto out;
4487
4488	/*
4489	 * Set properties before we receive the stream so that they are applied
4490	 * to the new data. Note that we must call dmu_recv_stream() if
4491	 * dmu_recv_begin() succeeds.
4492	 */
4493	if (props != NULL && !drc.drc_newfs) {
4494		if (spa_version(dsl_dataset_get_spa(drc.drc_ds)) >=
4495		    SPA_VERSION_RECVD_PROPS &&
4496		    !dsl_prop_get_hasrecvd(tofs))
4497			first_recvd_props = B_TRUE;
4498
4499		/*
4500		 * If new received properties are supplied, they are to
4501		 * completely replace the existing received properties, so stash
4502		 * away the existing ones.
4503		 */
4504		if (dsl_prop_get_received(tofs, &origprops) == 0) {
4505			nvlist_t *errlist = NULL;
4506			/*
4507			 * Don't bother writing a property if its value won't
4508			 * change (and avoid the unnecessary security checks).
4509			 *
4510			 * The first receive after SPA_VERSION_RECVD_PROPS is a
4511			 * special case where we blow away all local properties
4512			 * regardless.
4513			 */
4514			if (!first_recvd_props)
4515				props_reduce(props, origprops);
4516			if (zfs_check_clearable(tofs, origprops, &errlist) != 0)
4517				(void) nvlist_merge(errors, errlist, 0);
4518			nvlist_free(errlist);
4519
4520			if (clear_received_props(tofs, origprops,
4521			    first_recvd_props ? NULL : props) != 0)
4522				zc->zc_obj |= ZPROP_ERR_NOCLEAR;
4523		} else {
4524			zc->zc_obj |= ZPROP_ERR_NOCLEAR;
4525		}
4526	}
4527
4528	if (props != NULL) {
4529		props_error = dsl_prop_set_hasrecvd(tofs);
4530
4531		if (props_error == 0) {
4532			delayprops = extract_delay_props(props);
4533			(void) zfs_set_prop_nvlist(tofs, ZPROP_SRC_RECEIVED,
4534			    props, errors);
4535		}
4536	}
4537
4538	off = fp->f_offset;
4539	error = dmu_recv_stream(&drc, fp, &off, zc->zc_cleanup_fd,
4540	    &zc->zc_action_handle);
4541
4542	if (error == 0) {
4543		zfsvfs_t *zfsvfs = NULL;
4544
4545		if (getzfsvfs(tofs, &zfsvfs) == 0) {
4546			/* online recv */
4547			dsl_dataset_t *ds;
4548			int end_err;
4549
4550			ds = dmu_objset_ds(zfsvfs->z_os);
4551			error = zfs_suspend_fs(zfsvfs);
4552			/*
4553			 * If the suspend fails, then the recv_end will
4554			 * likely also fail, and clean up after itself.
4555			 */
4556			end_err = dmu_recv_end(&drc, zfsvfs);
4557			if (error == 0)
4558				error = zfs_resume_fs(zfsvfs, ds);
4559			error = error ? error : end_err;
4560#ifdef illumos
4561			VFS_RELE(zfsvfs->z_vfs);
4562#else
4563			vfs_unbusy(zfsvfs->z_vfs);
4564#endif
4565		} else {
4566			error = dmu_recv_end(&drc, NULL);
4567		}
4568
4569		/* Set delayed properties now, after we're done receiving. */
4570		if (delayprops != NULL && error == 0) {
4571			(void) zfs_set_prop_nvlist(tofs, ZPROP_SRC_RECEIVED,
4572			    delayprops, errors);
4573		}
4574	}
4575
4576	if (delayprops != NULL) {
4577		/*
4578		 * Merge delayed props back in with initial props, in case
4579		 * we're DEBUG and zfs_ioc_recv_inject_err is set (which means
4580		 * we have to make sure clear_received_props() includes
4581		 * the delayed properties).
4582		 *
4583		 * Since zfs_ioc_recv_inject_err is only in DEBUG kernels,
4584		 * using ASSERT() will be just like a VERIFY.
4585		 */
4586		ASSERT(nvlist_merge(props, delayprops, 0) == 0);
4587		nvlist_free(delayprops);
4588	}
4589
4590	/*
4591	 * Now that all props, initial and delayed, are set, report the prop
4592	 * errors to the caller.
4593	 */
4594	if (zc->zc_nvlist_dst_size != 0 &&
4595	    (nvlist_smush(errors, zc->zc_nvlist_dst_size) != 0 ||
4596	    put_nvlist(zc, errors) != 0)) {
4597		/*
4598		 * Caller made zc->zc_nvlist_dst less than the minimum expected
4599		 * size or supplied an invalid address.
4600		 */
4601		props_error = SET_ERROR(EINVAL);
4602	}
4603
4604	zc->zc_cookie = off - fp->f_offset;
4605	if (off >= 0 && off <= MAXOFFSET_T)
4606		fp->f_offset = off;
4607
4608#ifdef	DEBUG
4609	if (zfs_ioc_recv_inject_err) {
4610		zfs_ioc_recv_inject_err = B_FALSE;
4611		error = 1;
4612	}
4613#endif
4614
4615#ifdef __FreeBSD__
4616	if (error == 0)
4617		zvol_create_minors(tofs);
4618#endif
4619
4620	/*
4621	 * On error, restore the original props.
4622	 */
4623	if (error != 0 && props != NULL && !drc.drc_newfs) {
4624		if (clear_received_props(tofs, props, NULL) != 0) {
4625			/*
4626			 * We failed to clear the received properties.
4627			 * Since we may have left a $recvd value on the
4628			 * system, we can't clear the $hasrecvd flag.
4629			 */
4630			zc->zc_obj |= ZPROP_ERR_NORESTORE;
4631		} else if (first_recvd_props) {
4632			dsl_prop_unset_hasrecvd(tofs);
4633		}
4634
4635		if (origprops == NULL && !drc.drc_newfs) {
4636			/* We failed to stash the original properties. */
4637			zc->zc_obj |= ZPROP_ERR_NORESTORE;
4638		}
4639
4640		/*
4641		 * dsl_props_set() will not convert RECEIVED to LOCAL on or
4642		 * after SPA_VERSION_RECVD_PROPS, so we need to specify LOCAL
4643		 * explictly if we're restoring local properties cleared in the
4644		 * first new-style receive.
4645		 */
4646		if (origprops != NULL &&
4647		    zfs_set_prop_nvlist(tofs, (first_recvd_props ?
4648		    ZPROP_SRC_LOCAL : ZPROP_SRC_RECEIVED),
4649		    origprops, NULL) != 0) {
4650			/*
4651			 * We stashed the original properties but failed to
4652			 * restore them.
4653			 */
4654			zc->zc_obj |= ZPROP_ERR_NORESTORE;
4655		}
4656	}
4657out:
4658	nvlist_free(props);
4659	nvlist_free(origprops);
4660	nvlist_free(errors);
4661	releasef(fd);
4662
4663	if (error == 0)
4664		error = props_error;
4665
4666	return (error);
4667}
4668
4669/*
4670 * inputs:
4671 * zc_name	name of snapshot to send
4672 * zc_cookie	file descriptor to send stream to
4673 * zc_obj	fromorigin flag (mutually exclusive with zc_fromobj)
4674 * zc_sendobj	objsetid of snapshot to send
4675 * zc_fromobj	objsetid of incremental fromsnap (may be zero)
4676 * zc_guid	if set, estimate size of stream only.  zc_cookie is ignored.
4677 *		output size in zc_objset_type.
4678 * zc_flags	lzc_send_flags
4679 *
4680 * outputs:
4681 * zc_objset_type	estimated size, if zc_guid is set
4682 */
4683static int
4684zfs_ioc_send(zfs_cmd_t *zc)
4685{
4686	int error;
4687	offset_t off;
4688	boolean_t estimate = (zc->zc_guid != 0);
4689	boolean_t embedok = (zc->zc_flags & 0x1);
4690	boolean_t large_block_ok = (zc->zc_flags & 0x2);
4691	boolean_t compressok = (zc->zc_flags & 0x4);
4692
4693	if (zc->zc_obj != 0) {
4694		dsl_pool_t *dp;
4695		dsl_dataset_t *tosnap;
4696
4697		error = dsl_pool_hold(zc->zc_name, FTAG, &dp);
4698		if (error != 0)
4699			return (error);
4700
4701		error = dsl_dataset_hold_obj(dp, zc->zc_sendobj, FTAG, &tosnap);
4702		if (error != 0) {
4703			dsl_pool_rele(dp, FTAG);
4704			return (error);
4705		}
4706
4707		if (dsl_dir_is_clone(tosnap->ds_dir))
4708			zc->zc_fromobj =
4709			    dsl_dir_phys(tosnap->ds_dir)->dd_origin_obj;
4710		dsl_dataset_rele(tosnap, FTAG);
4711		dsl_pool_rele(dp, FTAG);
4712	}
4713
4714	if (estimate) {
4715		dsl_pool_t *dp;
4716		dsl_dataset_t *tosnap;
4717		dsl_dataset_t *fromsnap = NULL;
4718
4719		error = dsl_pool_hold(zc->zc_name, FTAG, &dp);
4720		if (error != 0)
4721			return (error);
4722
4723		error = dsl_dataset_hold_obj(dp, zc->zc_sendobj, FTAG, &tosnap);
4724		if (error != 0) {
4725			dsl_pool_rele(dp, FTAG);
4726			return (error);
4727		}
4728
4729		if (zc->zc_fromobj != 0) {
4730			error = dsl_dataset_hold_obj(dp, zc->zc_fromobj,
4731			    FTAG, &fromsnap);
4732			if (error != 0) {
4733				dsl_dataset_rele(tosnap, FTAG);
4734				dsl_pool_rele(dp, FTAG);
4735				return (error);
4736			}
4737		}
4738
4739		error = dmu_send_estimate(tosnap, fromsnap, compressok,
4740		    &zc->zc_objset_type);
4741
4742		if (fromsnap != NULL)
4743			dsl_dataset_rele(fromsnap, FTAG);
4744		dsl_dataset_rele(tosnap, FTAG);
4745		dsl_pool_rele(dp, FTAG);
4746	} else {
4747		file_t *fp;
4748		cap_rights_t rights;
4749
4750#ifdef illumos
4751		fp = getf(zc->zc_cookie);
4752#else
4753		fget_write(curthread, zc->zc_cookie,
4754		    cap_rights_init(&rights, CAP_WRITE), &fp);
4755#endif
4756		if (fp == NULL)
4757			return (SET_ERROR(EBADF));
4758
4759		off = fp->f_offset;
4760		error = dmu_send_obj(zc->zc_name, zc->zc_sendobj,
4761		    zc->zc_fromobj, embedok, large_block_ok, compressok,
4762#ifdef illumos
4763		    zc->zc_cookie, fp->f_vnode, &off);
4764#else
4765		    zc->zc_cookie, fp, &off);
4766#endif
4767
4768		if (off >= 0 && off <= MAXOFFSET_T)
4769			fp->f_offset = off;
4770		releasef(zc->zc_cookie);
4771	}
4772	return (error);
4773}
4774
4775/*
4776 * inputs:
4777 * zc_name	name of snapshot on which to report progress
4778 * zc_cookie	file descriptor of send stream
4779 *
4780 * outputs:
4781 * zc_cookie	number of bytes written in send stream thus far
4782 */
4783static int
4784zfs_ioc_send_progress(zfs_cmd_t *zc)
4785{
4786	dsl_pool_t *dp;
4787	dsl_dataset_t *ds;
4788	dmu_sendarg_t *dsp = NULL;
4789	int error;
4790
4791	error = dsl_pool_hold(zc->zc_name, FTAG, &dp);
4792	if (error != 0)
4793		return (error);
4794
4795	error = dsl_dataset_hold(dp, zc->zc_name, FTAG, &ds);
4796	if (error != 0) {
4797		dsl_pool_rele(dp, FTAG);
4798		return (error);
4799	}
4800
4801	mutex_enter(&ds->ds_sendstream_lock);
4802
4803	/*
4804	 * Iterate over all the send streams currently active on this dataset.
4805	 * If there's one which matches the specified file descriptor _and_ the
4806	 * stream was started by the current process, return the progress of
4807	 * that stream.
4808	 */
4809	for (dsp = list_head(&ds->ds_sendstreams); dsp != NULL;
4810	    dsp = list_next(&ds->ds_sendstreams, dsp)) {
4811		if (dsp->dsa_outfd == zc->zc_cookie &&
4812		    dsp->dsa_proc == curproc)
4813			break;
4814	}
4815
4816	if (dsp != NULL)
4817		zc->zc_cookie = *(dsp->dsa_off);
4818	else
4819		error = SET_ERROR(ENOENT);
4820
4821	mutex_exit(&ds->ds_sendstream_lock);
4822	dsl_dataset_rele(ds, FTAG);
4823	dsl_pool_rele(dp, FTAG);
4824	return (error);
4825}
4826
4827static int
4828zfs_ioc_inject_fault(zfs_cmd_t *zc)
4829{
4830	int id, error;
4831
4832	error = zio_inject_fault(zc->zc_name, (int)zc->zc_guid, &id,
4833	    &zc->zc_inject_record);
4834
4835	if (error == 0)
4836		zc->zc_guid = (uint64_t)id;
4837
4838	return (error);
4839}
4840
4841static int
4842zfs_ioc_clear_fault(zfs_cmd_t *zc)
4843{
4844	return (zio_clear_fault((int)zc->zc_guid));
4845}
4846
4847static int
4848zfs_ioc_inject_list_next(zfs_cmd_t *zc)
4849{
4850	int id = (int)zc->zc_guid;
4851	int error;
4852
4853	error = zio_inject_list_next(&id, zc->zc_name, sizeof (zc->zc_name),
4854	    &zc->zc_inject_record);
4855
4856	zc->zc_guid = id;
4857
4858	return (error);
4859}
4860
4861static int
4862zfs_ioc_error_log(zfs_cmd_t *zc)
4863{
4864	spa_t *spa;
4865	int error;
4866	size_t count = (size_t)zc->zc_nvlist_dst_size;
4867
4868	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
4869		return (error);
4870
4871	error = spa_get_errlog(spa, (void *)(uintptr_t)zc->zc_nvlist_dst,
4872	    &count);
4873	if (error == 0)
4874		zc->zc_nvlist_dst_size = count;
4875	else
4876		zc->zc_nvlist_dst_size = spa_get_errlog_size(spa);
4877
4878	spa_close(spa, FTAG);
4879
4880	return (error);
4881}
4882
4883static int
4884zfs_ioc_clear(zfs_cmd_t *zc)
4885{
4886	spa_t *spa;
4887	vdev_t *vd;
4888	int error;
4889
4890	/*
4891	 * On zpool clear we also fix up missing slogs
4892	 */
4893	mutex_enter(&spa_namespace_lock);
4894	spa = spa_lookup(zc->zc_name);
4895	if (spa == NULL) {
4896		mutex_exit(&spa_namespace_lock);
4897		return (SET_ERROR(EIO));
4898	}
4899	if (spa_get_log_state(spa) == SPA_LOG_MISSING) {
4900		/* we need to let spa_open/spa_load clear the chains */
4901		spa_set_log_state(spa, SPA_LOG_CLEAR);
4902	}
4903	spa->spa_last_open_failed = 0;
4904	mutex_exit(&spa_namespace_lock);
4905
4906	if (zc->zc_cookie & ZPOOL_NO_REWIND) {
4907		error = spa_open(zc->zc_name, &spa, FTAG);
4908	} else {
4909		nvlist_t *policy;
4910		nvlist_t *config = NULL;
4911
4912		if (zc->zc_nvlist_src == 0)
4913			return (SET_ERROR(EINVAL));
4914
4915		if ((error = get_nvlist(zc->zc_nvlist_src,
4916		    zc->zc_nvlist_src_size, zc->zc_iflags, &policy)) == 0) {
4917			error = spa_open_rewind(zc->zc_name, &spa, FTAG,
4918			    policy, &config);
4919			if (config != NULL) {
4920				int err;
4921
4922				if ((err = put_nvlist(zc, config)) != 0)
4923					error = err;
4924				nvlist_free(config);
4925			}
4926			nvlist_free(policy);
4927		}
4928	}
4929
4930	if (error != 0)
4931		return (error);
4932
4933	spa_vdev_state_enter(spa, SCL_NONE);
4934
4935	if (zc->zc_guid == 0) {
4936		vd = NULL;
4937	} else {
4938		vd = spa_lookup_by_guid(spa, zc->zc_guid, B_TRUE);
4939		if (vd == NULL) {
4940			(void) spa_vdev_state_exit(spa, NULL, ENODEV);
4941			spa_close(spa, FTAG);
4942			return (SET_ERROR(ENODEV));
4943		}
4944	}
4945
4946	vdev_clear(spa, vd);
4947
4948	(void) spa_vdev_state_exit(spa, NULL, 0);
4949
4950	/*
4951	 * Resume any suspended I/Os.
4952	 */
4953	if (zio_resume(spa) != 0)
4954		error = SET_ERROR(EIO);
4955
4956	spa_close(spa, FTAG);
4957
4958	return (error);
4959}
4960
4961static int
4962zfs_ioc_pool_reopen(zfs_cmd_t *zc)
4963{
4964	spa_t *spa;
4965	int error;
4966
4967	error = spa_open(zc->zc_name, &spa, FTAG);
4968	if (error != 0)
4969		return (error);
4970
4971	spa_vdev_state_enter(spa, SCL_NONE);
4972
4973	/*
4974	 * If a resilver is already in progress then set the
4975	 * spa_scrub_reopen flag to B_TRUE so that we don't restart
4976	 * the scan as a side effect of the reopen. Otherwise, let
4977	 * vdev_open() decided if a resilver is required.
4978	 */
4979	spa->spa_scrub_reopen = dsl_scan_resilvering(spa->spa_dsl_pool);
4980	vdev_reopen(spa->spa_root_vdev);
4981	spa->spa_scrub_reopen = B_FALSE;
4982
4983	(void) spa_vdev_state_exit(spa, NULL, 0);
4984	spa_close(spa, FTAG);
4985	return (0);
4986}
4987/*
4988 * inputs:
4989 * zc_name	name of filesystem
4990 *
4991 * outputs:
4992 * zc_string	name of conflicting snapshot, if there is one
4993 */
4994static int
4995zfs_ioc_promote(zfs_cmd_t *zc)
4996{
4997	dsl_pool_t *dp;
4998	dsl_dataset_t *ds, *ods;
4999	char origin[ZFS_MAX_DATASET_NAME_LEN];
5000	char *cp;
5001	int error;
5002
5003	zc->zc_name[sizeof (zc->zc_name) - 1] = '\0';
5004	if (dataset_namecheck(zc->zc_name, NULL, NULL) != 0 ||
5005	    strchr(zc->zc_name, '%'))
5006		return (SET_ERROR(EINVAL));
5007
5008	error = dsl_pool_hold(zc->zc_name, FTAG, &dp);
5009	if (error != 0)
5010		return (error);
5011
5012	error = dsl_dataset_hold(dp, zc->zc_name, FTAG, &ds);
5013	if (error != 0) {
5014		dsl_pool_rele(dp, FTAG);
5015		return (error);
5016	}
5017
5018	if (!dsl_dir_is_clone(ds->ds_dir)) {
5019		dsl_dataset_rele(ds, FTAG);
5020		dsl_pool_rele(dp, FTAG);
5021		return (SET_ERROR(EINVAL));
5022	}
5023
5024	error = dsl_dataset_hold_obj(dp,
5025	    dsl_dir_phys(ds->ds_dir)->dd_origin_obj, FTAG, &ods);
5026	if (error != 0) {
5027		dsl_dataset_rele(ds, FTAG);
5028		dsl_pool_rele(dp, FTAG);
5029		return (error);
5030	}
5031
5032	dsl_dataset_name(ods, origin);
5033	dsl_dataset_rele(ods, FTAG);
5034	dsl_dataset_rele(ds, FTAG);
5035	dsl_pool_rele(dp, FTAG);
5036
5037	/*
5038	 * We don't need to unmount *all* the origin fs's snapshots, but
5039	 * it's easier.
5040	 */
5041	cp = strchr(origin, '@');
5042	if (cp)
5043		*cp = '\0';
5044	(void) dmu_objset_find(origin,
5045	    zfs_unmount_snap_cb, NULL, DS_FIND_SNAPSHOTS);
5046	return (dsl_dataset_promote(zc->zc_name, zc->zc_string));
5047}
5048
5049/*
5050 * Retrieve a single {user|group}{used|quota}@... property.
5051 *
5052 * inputs:
5053 * zc_name	name of filesystem
5054 * zc_objset_type zfs_userquota_prop_t
5055 * zc_value	domain name (eg. "S-1-234-567-89")
5056 * zc_guid	RID/UID/GID
5057 *
5058 * outputs:
5059 * zc_cookie	property value
5060 */
5061static int
5062zfs_ioc_userspace_one(zfs_cmd_t *zc)
5063{
5064	zfsvfs_t *zfsvfs;
5065	int error;
5066
5067	if (zc->zc_objset_type >= ZFS_NUM_USERQUOTA_PROPS)
5068		return (SET_ERROR(EINVAL));
5069
5070	error = zfsvfs_hold(zc->zc_name, FTAG, &zfsvfs, B_FALSE);
5071	if (error != 0)
5072		return (error);
5073
5074	error = zfs_userspace_one(zfsvfs,
5075	    zc->zc_objset_type, zc->zc_value, zc->zc_guid, &zc->zc_cookie);
5076	zfsvfs_rele(zfsvfs, FTAG);
5077
5078	return (error);
5079}
5080
5081/*
5082 * inputs:
5083 * zc_name		name of filesystem
5084 * zc_cookie		zap cursor
5085 * zc_objset_type	zfs_userquota_prop_t
5086 * zc_nvlist_dst[_size] buffer to fill (not really an nvlist)
5087 *
5088 * outputs:
5089 * zc_nvlist_dst[_size]	data buffer (array of zfs_useracct_t)
5090 * zc_cookie	zap cursor
5091 */
5092static int
5093zfs_ioc_userspace_many(zfs_cmd_t *zc)
5094{
5095	zfsvfs_t *zfsvfs;
5096	int bufsize = zc->zc_nvlist_dst_size;
5097
5098	if (bufsize <= 0)
5099		return (SET_ERROR(ENOMEM));
5100
5101	int error = zfsvfs_hold(zc->zc_name, FTAG, &zfsvfs, B_FALSE);
5102	if (error != 0)
5103		return (error);
5104
5105	void *buf = kmem_alloc(bufsize, KM_SLEEP);
5106
5107	error = zfs_userspace_many(zfsvfs, zc->zc_objset_type, &zc->zc_cookie,
5108	    buf, &zc->zc_nvlist_dst_size);
5109
5110	if (error == 0) {
5111		error = ddi_copyout(buf,
5112		    (void *)(uintptr_t)zc->zc_nvlist_dst,
5113		    zc->zc_nvlist_dst_size, zc->zc_iflags);
5114	}
5115	kmem_free(buf, bufsize);
5116	zfsvfs_rele(zfsvfs, FTAG);
5117
5118	return (error);
5119}
5120
5121/*
5122 * inputs:
5123 * zc_name		name of filesystem
5124 *
5125 * outputs:
5126 * none
5127 */
5128static int
5129zfs_ioc_userspace_upgrade(zfs_cmd_t *zc)
5130{
5131	objset_t *os;
5132	int error = 0;
5133	zfsvfs_t *zfsvfs;
5134
5135	if (getzfsvfs(zc->zc_name, &zfsvfs) == 0) {
5136		if (!dmu_objset_userused_enabled(zfsvfs->z_os)) {
5137			/*
5138			 * If userused is not enabled, it may be because the
5139			 * objset needs to be closed & reopened (to grow the
5140			 * objset_phys_t).  Suspend/resume the fs will do that.
5141			 */
5142			dsl_dataset_t *ds, *newds;
5143
5144			ds = dmu_objset_ds(zfsvfs->z_os);
5145			error = zfs_suspend_fs(zfsvfs);
5146			if (error == 0) {
5147				dmu_objset_refresh_ownership(ds, &newds,
5148				    zfsvfs);
5149				error = zfs_resume_fs(zfsvfs, newds);
5150			}
5151		}
5152		if (error == 0)
5153			error = dmu_objset_userspace_upgrade(zfsvfs->z_os);
5154#ifdef illumos
5155		VFS_RELE(zfsvfs->z_vfs);
5156#else
5157		vfs_unbusy(zfsvfs->z_vfs);
5158#endif
5159	} else {
5160		/* XXX kind of reading contents without owning */
5161		error = dmu_objset_hold(zc->zc_name, FTAG, &os);
5162		if (error != 0)
5163			return (error);
5164
5165		error = dmu_objset_userspace_upgrade(os);
5166		dmu_objset_rele(os, FTAG);
5167	}
5168
5169	return (error);
5170}
5171
5172#ifdef illumos
5173/*
5174 * We don't want to have a hard dependency
5175 * against some special symbols in sharefs
5176 * nfs, and smbsrv.  Determine them if needed when
5177 * the first file system is shared.
5178 * Neither sharefs, nfs or smbsrv are unloadable modules.
5179 */
5180int (*znfsexport_fs)(void *arg);
5181int (*zshare_fs)(enum sharefs_sys_op, share_t *, uint32_t);
5182int (*zsmbexport_fs)(void *arg, boolean_t add_share);
5183
5184int zfs_nfsshare_inited;
5185int zfs_smbshare_inited;
5186
5187ddi_modhandle_t nfs_mod;
5188ddi_modhandle_t sharefs_mod;
5189ddi_modhandle_t smbsrv_mod;
5190#endif	/* illumos */
5191kmutex_t zfs_share_lock;
5192
5193#ifdef illumos
5194static int
5195zfs_init_sharefs()
5196{
5197	int error;
5198
5199	ASSERT(MUTEX_HELD(&zfs_share_lock));
5200	/* Both NFS and SMB shares also require sharetab support. */
5201	if (sharefs_mod == NULL && ((sharefs_mod =
5202	    ddi_modopen("fs/sharefs",
5203	    KRTLD_MODE_FIRST, &error)) == NULL)) {
5204		return (SET_ERROR(ENOSYS));
5205	}
5206	if (zshare_fs == NULL && ((zshare_fs =
5207	    (int (*)(enum sharefs_sys_op, share_t *, uint32_t))
5208	    ddi_modsym(sharefs_mod, "sharefs_impl", &error)) == NULL)) {
5209		return (SET_ERROR(ENOSYS));
5210	}
5211	return (0);
5212}
5213#endif	/* illumos */
5214
5215static int
5216zfs_ioc_share(zfs_cmd_t *zc)
5217{
5218#ifdef illumos
5219	int error;
5220	int opcode;
5221
5222	switch (zc->zc_share.z_sharetype) {
5223	case ZFS_SHARE_NFS:
5224	case ZFS_UNSHARE_NFS:
5225		if (zfs_nfsshare_inited == 0) {
5226			mutex_enter(&zfs_share_lock);
5227			if (nfs_mod == NULL && ((nfs_mod = ddi_modopen("fs/nfs",
5228			    KRTLD_MODE_FIRST, &error)) == NULL)) {
5229				mutex_exit(&zfs_share_lock);
5230				return (SET_ERROR(ENOSYS));
5231			}
5232			if (znfsexport_fs == NULL &&
5233			    ((znfsexport_fs = (int (*)(void *))
5234			    ddi_modsym(nfs_mod,
5235			    "nfs_export", &error)) == NULL)) {
5236				mutex_exit(&zfs_share_lock);
5237				return (SET_ERROR(ENOSYS));
5238			}
5239			error = zfs_init_sharefs();
5240			if (error != 0) {
5241				mutex_exit(&zfs_share_lock);
5242				return (SET_ERROR(ENOSYS));
5243			}
5244			zfs_nfsshare_inited = 1;
5245			mutex_exit(&zfs_share_lock);
5246		}
5247		break;
5248	case ZFS_SHARE_SMB:
5249	case ZFS_UNSHARE_SMB:
5250		if (zfs_smbshare_inited == 0) {
5251			mutex_enter(&zfs_share_lock);
5252			if (smbsrv_mod == NULL && ((smbsrv_mod =
5253			    ddi_modopen("drv/smbsrv",
5254			    KRTLD_MODE_FIRST, &error)) == NULL)) {
5255				mutex_exit(&zfs_share_lock);
5256				return (SET_ERROR(ENOSYS));
5257			}
5258			if (zsmbexport_fs == NULL && ((zsmbexport_fs =
5259			    (int (*)(void *, boolean_t))ddi_modsym(smbsrv_mod,
5260			    "smb_server_share", &error)) == NULL)) {
5261				mutex_exit(&zfs_share_lock);
5262				return (SET_ERROR(ENOSYS));
5263			}
5264			error = zfs_init_sharefs();
5265			if (error != 0) {
5266				mutex_exit(&zfs_share_lock);
5267				return (SET_ERROR(ENOSYS));
5268			}
5269			zfs_smbshare_inited = 1;
5270			mutex_exit(&zfs_share_lock);
5271		}
5272		break;
5273	default:
5274		return (SET_ERROR(EINVAL));
5275	}
5276
5277	switch (zc->zc_share.z_sharetype) {
5278	case ZFS_SHARE_NFS:
5279	case ZFS_UNSHARE_NFS:
5280		if (error =
5281		    znfsexport_fs((void *)
5282		    (uintptr_t)zc->zc_share.z_exportdata))
5283			return (error);
5284		break;
5285	case ZFS_SHARE_SMB:
5286	case ZFS_UNSHARE_SMB:
5287		if (error = zsmbexport_fs((void *)
5288		    (uintptr_t)zc->zc_share.z_exportdata,
5289		    zc->zc_share.z_sharetype == ZFS_SHARE_SMB ?
5290		    B_TRUE: B_FALSE)) {
5291			return (error);
5292		}
5293		break;
5294	}
5295
5296	opcode = (zc->zc_share.z_sharetype == ZFS_SHARE_NFS ||
5297	    zc->zc_share.z_sharetype == ZFS_SHARE_SMB) ?
5298	    SHAREFS_ADD : SHAREFS_REMOVE;
5299
5300	/*
5301	 * Add or remove share from sharetab
5302	 */
5303	error = zshare_fs(opcode,
5304	    (void *)(uintptr_t)zc->zc_share.z_sharedata,
5305	    zc->zc_share.z_sharemax);
5306
5307	return (error);
5308
5309#else	/* !illumos */
5310	return (ENOSYS);
5311#endif	/* illumos */
5312}
5313
5314ace_t full_access[] = {
5315	{(uid_t)-1, ACE_ALL_PERMS, ACE_EVERYONE, 0}
5316};
5317
5318/*
5319 * inputs:
5320 * zc_name		name of containing filesystem
5321 * zc_obj		object # beyond which we want next in-use object #
5322 *
5323 * outputs:
5324 * zc_obj		next in-use object #
5325 */
5326static int
5327zfs_ioc_next_obj(zfs_cmd_t *zc)
5328{
5329	objset_t *os = NULL;
5330	int error;
5331
5332	error = dmu_objset_hold(zc->zc_name, FTAG, &os);
5333	if (error != 0)
5334		return (error);
5335
5336	error = dmu_object_next(os, &zc->zc_obj, B_FALSE,
5337	    dsl_dataset_phys(os->os_dsl_dataset)->ds_prev_snap_txg);
5338
5339	dmu_objset_rele(os, FTAG);
5340	return (error);
5341}
5342
5343/*
5344 * inputs:
5345 * zc_name		name of filesystem
5346 * zc_value		prefix name for snapshot
5347 * zc_cleanup_fd	cleanup-on-exit file descriptor for calling process
5348 *
5349 * outputs:
5350 * zc_value		short name of new snapshot
5351 */
5352static int
5353zfs_ioc_tmp_snapshot(zfs_cmd_t *zc)
5354{
5355	char *snap_name;
5356	char *hold_name;
5357	int error;
5358	minor_t minor;
5359
5360	error = zfs_onexit_fd_hold(zc->zc_cleanup_fd, &minor);
5361	if (error != 0)
5362		return (error);
5363
5364	snap_name = kmem_asprintf("%s-%016llx", zc->zc_value,
5365	    (u_longlong_t)ddi_get_lbolt64());
5366	hold_name = kmem_asprintf("%%%s", zc->zc_value);
5367
5368	error = dsl_dataset_snapshot_tmp(zc->zc_name, snap_name, minor,
5369	    hold_name);
5370	if (error == 0)
5371		(void) strcpy(zc->zc_value, snap_name);
5372	strfree(snap_name);
5373	strfree(hold_name);
5374	zfs_onexit_fd_rele(zc->zc_cleanup_fd);
5375	return (error);
5376}
5377
5378/*
5379 * inputs:
5380 * zc_name		name of "to" snapshot
5381 * zc_value		name of "from" snapshot
5382 * zc_cookie		file descriptor to write diff data on
5383 *
5384 * outputs:
5385 * dmu_diff_record_t's to the file descriptor
5386 */
5387static int
5388zfs_ioc_diff(zfs_cmd_t *zc)
5389{
5390	file_t *fp;
5391	cap_rights_t rights;
5392	offset_t off;
5393	int error;
5394
5395#ifdef illumos
5396	fp = getf(zc->zc_cookie);
5397#else
5398	fget_write(curthread, zc->zc_cookie,
5399		    cap_rights_init(&rights, CAP_WRITE), &fp);
5400#endif
5401	if (fp == NULL)
5402		return (SET_ERROR(EBADF));
5403
5404	off = fp->f_offset;
5405
5406#ifdef illumos
5407	error = dmu_diff(zc->zc_name, zc->zc_value, fp->f_vnode, &off);
5408#else
5409	error = dmu_diff(zc->zc_name, zc->zc_value, fp, &off);
5410#endif
5411
5412	if (off >= 0 && off <= MAXOFFSET_T)
5413		fp->f_offset = off;
5414	releasef(zc->zc_cookie);
5415
5416	return (error);
5417}
5418
5419#ifdef illumos
5420/*
5421 * Remove all ACL files in shares dir
5422 */
5423static int
5424zfs_smb_acl_purge(znode_t *dzp)
5425{
5426	zap_cursor_t	zc;
5427	zap_attribute_t	zap;
5428	zfsvfs_t *zfsvfs = dzp->z_zfsvfs;
5429	int error;
5430
5431	for (zap_cursor_init(&zc, zfsvfs->z_os, dzp->z_id);
5432	    (error = zap_cursor_retrieve(&zc, &zap)) == 0;
5433	    zap_cursor_advance(&zc)) {
5434		if ((error = VOP_REMOVE(ZTOV(dzp), zap.za_name, kcred,
5435		    NULL, 0)) != 0)
5436			break;
5437	}
5438	zap_cursor_fini(&zc);
5439	return (error);
5440}
5441#endif	/* illumos */
5442
5443static int
5444zfs_ioc_smb_acl(zfs_cmd_t *zc)
5445{
5446#ifdef illumos
5447	vnode_t *vp;
5448	znode_t *dzp;
5449	vnode_t *resourcevp = NULL;
5450	znode_t *sharedir;
5451	zfsvfs_t *zfsvfs;
5452	nvlist_t *nvlist;
5453	char *src, *target;
5454	vattr_t vattr;
5455	vsecattr_t vsec;
5456	int error = 0;
5457
5458	if ((error = lookupname(zc->zc_value, UIO_SYSSPACE,
5459	    NO_FOLLOW, NULL, &vp)) != 0)
5460		return (error);
5461
5462	/* Now make sure mntpnt and dataset are ZFS */
5463
5464	if (strcmp(vp->v_vfsp->mnt_stat.f_fstypename, "zfs") != 0 ||
5465	    (strcmp((char *)refstr_value(vp->v_vfsp->vfs_resource),
5466	    zc->zc_name) != 0)) {
5467		VN_RELE(vp);
5468		return (SET_ERROR(EINVAL));
5469	}
5470
5471	dzp = VTOZ(vp);
5472	zfsvfs = dzp->z_zfsvfs;
5473	ZFS_ENTER(zfsvfs);
5474
5475	/*
5476	 * Create share dir if its missing.
5477	 */
5478	mutex_enter(&zfsvfs->z_lock);
5479	if (zfsvfs->z_shares_dir == 0) {
5480		dmu_tx_t *tx;
5481
5482		tx = dmu_tx_create(zfsvfs->z_os);
5483		dmu_tx_hold_zap(tx, MASTER_NODE_OBJ, TRUE,
5484		    ZFS_SHARES_DIR);
5485		dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, FALSE, NULL);
5486		error = dmu_tx_assign(tx, TXG_WAIT);
5487		if (error != 0) {
5488			dmu_tx_abort(tx);
5489		} else {
5490			error = zfs_create_share_dir(zfsvfs, tx);
5491			dmu_tx_commit(tx);
5492		}
5493		if (error != 0) {
5494			mutex_exit(&zfsvfs->z_lock);
5495			VN_RELE(vp);
5496			ZFS_EXIT(zfsvfs);
5497			return (error);
5498		}
5499	}
5500	mutex_exit(&zfsvfs->z_lock);
5501
5502	ASSERT(zfsvfs->z_shares_dir);
5503	if ((error = zfs_zget(zfsvfs, zfsvfs->z_shares_dir, &sharedir)) != 0) {
5504		VN_RELE(vp);
5505		ZFS_EXIT(zfsvfs);
5506		return (error);
5507	}
5508
5509	switch (zc->zc_cookie) {
5510	case ZFS_SMB_ACL_ADD:
5511		vattr.va_mask = AT_MODE|AT_UID|AT_GID|AT_TYPE;
5512		vattr.va_type = VREG;
5513		vattr.va_mode = S_IFREG|0777;
5514		vattr.va_uid = 0;
5515		vattr.va_gid = 0;
5516
5517		vsec.vsa_mask = VSA_ACE;
5518		vsec.vsa_aclentp = &full_access;
5519		vsec.vsa_aclentsz = sizeof (full_access);
5520		vsec.vsa_aclcnt = 1;
5521
5522		error = VOP_CREATE(ZTOV(sharedir), zc->zc_string,
5523		    &vattr, EXCL, 0, &resourcevp, kcred, 0, NULL, &vsec);
5524		if (resourcevp)
5525			VN_RELE(resourcevp);
5526		break;
5527
5528	case ZFS_SMB_ACL_REMOVE:
5529		error = VOP_REMOVE(ZTOV(sharedir), zc->zc_string, kcred,
5530		    NULL, 0);
5531		break;
5532
5533	case ZFS_SMB_ACL_RENAME:
5534		if ((error = get_nvlist(zc->zc_nvlist_src,
5535		    zc->zc_nvlist_src_size, zc->zc_iflags, &nvlist)) != 0) {
5536			VN_RELE(vp);
5537			VN_RELE(ZTOV(sharedir));
5538			ZFS_EXIT(zfsvfs);
5539			return (error);
5540		}
5541		if (nvlist_lookup_string(nvlist, ZFS_SMB_ACL_SRC, &src) ||
5542		    nvlist_lookup_string(nvlist, ZFS_SMB_ACL_TARGET,
5543		    &target)) {
5544			VN_RELE(vp);
5545			VN_RELE(ZTOV(sharedir));
5546			ZFS_EXIT(zfsvfs);
5547			nvlist_free(nvlist);
5548			return (error);
5549		}
5550		error = VOP_RENAME(ZTOV(sharedir), src, ZTOV(sharedir), target,
5551		    kcred, NULL, 0);
5552		nvlist_free(nvlist);
5553		break;
5554
5555	case ZFS_SMB_ACL_PURGE:
5556		error = zfs_smb_acl_purge(sharedir);
5557		break;
5558
5559	default:
5560		error = SET_ERROR(EINVAL);
5561		break;
5562	}
5563
5564	VN_RELE(vp);
5565	VN_RELE(ZTOV(sharedir));
5566
5567	ZFS_EXIT(zfsvfs);
5568
5569	return (error);
5570#else	/* !illumos */
5571	return (EOPNOTSUPP);
5572#endif	/* illumos */
5573}
5574
5575/*
5576 * innvl: {
5577 *     "holds" -> { snapname -> holdname (string), ... }
5578 *     (optional) "cleanup_fd" -> fd (int32)
5579 * }
5580 *
5581 * outnvl: {
5582 *     snapname -> error value (int32)
5583 *     ...
5584 * }
5585 */
5586/* ARGSUSED */
5587static int
5588zfs_ioc_hold(const char *pool, nvlist_t *args, nvlist_t *errlist)
5589{
5590	nvpair_t *pair;
5591	nvlist_t *holds;
5592	int cleanup_fd = -1;
5593	int error;
5594	minor_t minor = 0;
5595
5596	error = nvlist_lookup_nvlist(args, "holds", &holds);
5597	if (error != 0)
5598		return (SET_ERROR(EINVAL));
5599
5600	/* make sure the user didn't pass us any invalid (empty) tags */
5601	for (pair = nvlist_next_nvpair(holds, NULL); pair != NULL;
5602	    pair = nvlist_next_nvpair(holds, pair)) {
5603		char *htag;
5604
5605		error = nvpair_value_string(pair, &htag);
5606		if (error != 0)
5607			return (SET_ERROR(error));
5608
5609		if (strlen(htag) == 0)
5610			return (SET_ERROR(EINVAL));
5611	}
5612
5613	if (nvlist_lookup_int32(args, "cleanup_fd", &cleanup_fd) == 0) {
5614		error = zfs_onexit_fd_hold(cleanup_fd, &minor);
5615		if (error != 0)
5616			return (error);
5617	}
5618
5619	error = dsl_dataset_user_hold(holds, minor, errlist);
5620	if (minor != 0)
5621		zfs_onexit_fd_rele(cleanup_fd);
5622	return (error);
5623}
5624
5625/*
5626 * innvl is not used.
5627 *
5628 * outnvl: {
5629 *    holdname -> time added (uint64 seconds since epoch)
5630 *    ...
5631 * }
5632 */
5633/* ARGSUSED */
5634static int
5635zfs_ioc_get_holds(const char *snapname, nvlist_t *args, nvlist_t *outnvl)
5636{
5637	return (dsl_dataset_get_holds(snapname, outnvl));
5638}
5639
5640/*
5641 * innvl: {
5642 *     snapname -> { holdname, ... }
5643 *     ...
5644 * }
5645 *
5646 * outnvl: {
5647 *     snapname -> error value (int32)
5648 *     ...
5649 * }
5650 */
5651/* ARGSUSED */
5652static int
5653zfs_ioc_release(const char *pool, nvlist_t *holds, nvlist_t *errlist)
5654{
5655	return (dsl_dataset_user_release(holds, errlist));
5656}
5657
5658/*
5659 * inputs:
5660 * zc_name		name of new filesystem or snapshot
5661 * zc_value		full name of old snapshot
5662 *
5663 * outputs:
5664 * zc_cookie		space in bytes
5665 * zc_objset_type	compressed space in bytes
5666 * zc_perm_action	uncompressed space in bytes
5667 */
5668static int
5669zfs_ioc_space_written(zfs_cmd_t *zc)
5670{
5671	int error;
5672	dsl_pool_t *dp;
5673	dsl_dataset_t *new, *old;
5674
5675	error = dsl_pool_hold(zc->zc_name, FTAG, &dp);
5676	if (error != 0)
5677		return (error);
5678	error = dsl_dataset_hold(dp, zc->zc_name, FTAG, &new);
5679	if (error != 0) {
5680		dsl_pool_rele(dp, FTAG);
5681		return (error);
5682	}
5683	error = dsl_dataset_hold(dp, zc->zc_value, FTAG, &old);
5684	if (error != 0) {
5685		dsl_dataset_rele(new, FTAG);
5686		dsl_pool_rele(dp, FTAG);
5687		return (error);
5688	}
5689
5690	error = dsl_dataset_space_written(old, new, &zc->zc_cookie,
5691	    &zc->zc_objset_type, &zc->zc_perm_action);
5692	dsl_dataset_rele(old, FTAG);
5693	dsl_dataset_rele(new, FTAG);
5694	dsl_pool_rele(dp, FTAG);
5695	return (error);
5696}
5697
5698/*
5699 * innvl: {
5700 *     "firstsnap" -> snapshot name
5701 * }
5702 *
5703 * outnvl: {
5704 *     "used" -> space in bytes
5705 *     "compressed" -> compressed space in bytes
5706 *     "uncompressed" -> uncompressed space in bytes
5707 * }
5708 */
5709static int
5710zfs_ioc_space_snaps(const char *lastsnap, nvlist_t *innvl, nvlist_t *outnvl)
5711{
5712	int error;
5713	dsl_pool_t *dp;
5714	dsl_dataset_t *new, *old;
5715	char *firstsnap;
5716	uint64_t used, comp, uncomp;
5717
5718	if (nvlist_lookup_string(innvl, "firstsnap", &firstsnap) != 0)
5719		return (SET_ERROR(EINVAL));
5720
5721	error = dsl_pool_hold(lastsnap, FTAG, &dp);
5722	if (error != 0)
5723		return (error);
5724
5725	error = dsl_dataset_hold(dp, lastsnap, FTAG, &new);
5726	if (error == 0 && !new->ds_is_snapshot) {
5727		dsl_dataset_rele(new, FTAG);
5728		error = SET_ERROR(EINVAL);
5729	}
5730	if (error != 0) {
5731		dsl_pool_rele(dp, FTAG);
5732		return (error);
5733	}
5734	error = dsl_dataset_hold(dp, firstsnap, FTAG, &old);
5735	if (error == 0 && !old->ds_is_snapshot) {
5736		dsl_dataset_rele(old, FTAG);
5737		error = SET_ERROR(EINVAL);
5738	}
5739	if (error != 0) {
5740		dsl_dataset_rele(new, FTAG);
5741		dsl_pool_rele(dp, FTAG);
5742		return (error);
5743	}
5744
5745	error = dsl_dataset_space_wouldfree(old, new, &used, &comp, &uncomp);
5746	dsl_dataset_rele(old, FTAG);
5747	dsl_dataset_rele(new, FTAG);
5748	dsl_pool_rele(dp, FTAG);
5749	fnvlist_add_uint64(outnvl, "used", used);
5750	fnvlist_add_uint64(outnvl, "compressed", comp);
5751	fnvlist_add_uint64(outnvl, "uncompressed", uncomp);
5752	return (error);
5753}
5754
5755static int
5756zfs_ioc_jail(zfs_cmd_t *zc)
5757{
5758
5759	return (zone_dataset_attach(curthread->td_ucred, zc->zc_name,
5760	    (int)zc->zc_jailid));
5761}
5762
5763static int
5764zfs_ioc_unjail(zfs_cmd_t *zc)
5765{
5766
5767	return (zone_dataset_detach(curthread->td_ucred, zc->zc_name,
5768	    (int)zc->zc_jailid));
5769}
5770
5771/*
5772 * innvl: {
5773 *     "fd" -> file descriptor to write stream to (int32)
5774 *     (optional) "fromsnap" -> full snap name to send an incremental from
5775 *     (optional) "largeblockok" -> (value ignored)
5776 *         indicates that blocks > 128KB are permitted
5777 *     (optional) "embedok" -> (value ignored)
5778 *         presence indicates DRR_WRITE_EMBEDDED records are permitted
5779 *     (optional) "compressok" -> (value ignored)
5780 *         presence indicates compressed DRR_WRITE records are permitted
5781 *     (optional) "resume_object" and "resume_offset" -> (uint64)
5782 *         if present, resume send stream from specified object and offset.
5783 * }
5784 *
5785 * outnvl is unused
5786 */
5787/* ARGSUSED */
5788static int
5789zfs_ioc_send_new(const char *snapname, nvlist_t *innvl, nvlist_t *outnvl)
5790{
5791	cap_rights_t rights;
5792	file_t *fp;
5793	int error;
5794	offset_t off;
5795	char *fromname = NULL;
5796	int fd;
5797	boolean_t largeblockok;
5798	boolean_t embedok;
5799	boolean_t compressok;
5800	uint64_t resumeobj = 0;
5801	uint64_t resumeoff = 0;
5802
5803	error = nvlist_lookup_int32(innvl, "fd", &fd);
5804	if (error != 0)
5805		return (SET_ERROR(EINVAL));
5806
5807	(void) nvlist_lookup_string(innvl, "fromsnap", &fromname);
5808
5809	largeblockok = nvlist_exists(innvl, "largeblockok");
5810	embedok = nvlist_exists(innvl, "embedok");
5811	compressok = nvlist_exists(innvl, "compressok");
5812
5813	(void) nvlist_lookup_uint64(innvl, "resume_object", &resumeobj);
5814	(void) nvlist_lookup_uint64(innvl, "resume_offset", &resumeoff);
5815
5816#ifdef illumos
5817	file_t *fp = getf(fd);
5818#else
5819	fget_write(curthread, fd, cap_rights_init(&rights, CAP_WRITE), &fp);
5820#endif
5821	if (fp == NULL)
5822		return (SET_ERROR(EBADF));
5823
5824	off = fp->f_offset;
5825	error = dmu_send(snapname, fromname, embedok, largeblockok, compressok,
5826#ifdef illumos
5827	    fd, resumeobj, resumeoff, fp->f_vnode, &off);
5828#else
5829	    fd, resumeobj, resumeoff, fp, &off);
5830#endif
5831
5832#ifdef illumos
5833	if (VOP_SEEK(fp->f_vnode, fp->f_offset, &off, NULL) == 0)
5834		fp->f_offset = off;
5835#else
5836	fp->f_offset = off;
5837#endif
5838
5839	releasef(fd);
5840	return (error);
5841}
5842
5843/*
5844 * Determine approximately how large a zfs send stream will be -- the number
5845 * of bytes that will be written to the fd supplied to zfs_ioc_send_new().
5846 *
5847 * innvl: {
5848 *     (optional) "from" -> full snap or bookmark name to send an incremental
5849 *                          from
5850 *     (optional) "largeblockok" -> (value ignored)
5851 *         indicates that blocks > 128KB are permitted
5852 *     (optional) "embedok" -> (value ignored)
5853 *         presence indicates DRR_WRITE_EMBEDDED records are permitted
5854 *     (optional) "compressok" -> (value ignored)
5855 *         presence indicates compressed DRR_WRITE records are permitted
5856 * }
5857 *
5858 * outnvl: {
5859 *     "space" -> bytes of space (uint64)
5860 * }
5861 */
5862static int
5863zfs_ioc_send_space(const char *snapname, nvlist_t *innvl, nvlist_t *outnvl)
5864{
5865	dsl_pool_t *dp;
5866	dsl_dataset_t *tosnap;
5867	int error;
5868	char *fromname;
5869	boolean_t compressok;
5870	uint64_t space;
5871
5872	error = dsl_pool_hold(snapname, FTAG, &dp);
5873	if (error != 0)
5874		return (error);
5875
5876	error = dsl_dataset_hold(dp, snapname, FTAG, &tosnap);
5877	if (error != 0) {
5878		dsl_pool_rele(dp, FTAG);
5879		return (error);
5880	}
5881
5882	compressok = nvlist_exists(innvl, "compressok");
5883
5884	error = nvlist_lookup_string(innvl, "from", &fromname);
5885	if (error == 0) {
5886		if (strchr(fromname, '@') != NULL) {
5887			/*
5888			 * If from is a snapshot, hold it and use the more
5889			 * efficient dmu_send_estimate to estimate send space
5890			 * size using deadlists.
5891			 */
5892			dsl_dataset_t *fromsnap;
5893			error = dsl_dataset_hold(dp, fromname, FTAG, &fromsnap);
5894			if (error != 0)
5895				goto out;
5896			error = dmu_send_estimate(tosnap, fromsnap, compressok,
5897			    &space);
5898			dsl_dataset_rele(fromsnap, FTAG);
5899		} else if (strchr(fromname, '#') != NULL) {
5900			/*
5901			 * If from is a bookmark, fetch the creation TXG of the
5902			 * snapshot it was created from and use that to find
5903			 * blocks that were born after it.
5904			 */
5905			zfs_bookmark_phys_t frombm;
5906
5907			error = dsl_bookmark_lookup(dp, fromname, tosnap,
5908			    &frombm);
5909			if (error != 0)
5910				goto out;
5911			error = dmu_send_estimate_from_txg(tosnap,
5912			    frombm.zbm_creation_txg, compressok, &space);
5913		} else {
5914			/*
5915			 * from is not properly formatted as a snapshot or
5916			 * bookmark
5917			 */
5918			error = SET_ERROR(EINVAL);
5919			goto out;
5920		}
5921	} else {
5922		/*
5923		 * If estimating the size of a full send, use dmu_send_estimate.
5924		 */
5925		error = dmu_send_estimate(tosnap, NULL, compressok, &space);
5926	}
5927
5928	fnvlist_add_uint64(outnvl, "space", space);
5929
5930out:
5931	dsl_dataset_rele(tosnap, FTAG);
5932	dsl_pool_rele(dp, FTAG);
5933	return (error);
5934}
5935
5936static zfs_ioc_vec_t zfs_ioc_vec[ZFS_IOC_LAST - ZFS_IOC_FIRST];
5937
5938static void
5939zfs_ioctl_register_legacy(zfs_ioc_t ioc, zfs_ioc_legacy_func_t *func,
5940    zfs_secpolicy_func_t *secpolicy, zfs_ioc_namecheck_t namecheck,
5941    boolean_t log_history, zfs_ioc_poolcheck_t pool_check)
5942{
5943	zfs_ioc_vec_t *vec = &zfs_ioc_vec[ioc - ZFS_IOC_FIRST];
5944
5945	ASSERT3U(ioc, >=, ZFS_IOC_FIRST);
5946	ASSERT3U(ioc, <, ZFS_IOC_LAST);
5947	ASSERT3P(vec->zvec_legacy_func, ==, NULL);
5948	ASSERT3P(vec->zvec_func, ==, NULL);
5949
5950	vec->zvec_legacy_func = func;
5951	vec->zvec_secpolicy = secpolicy;
5952	vec->zvec_namecheck = namecheck;
5953	vec->zvec_allow_log = log_history;
5954	vec->zvec_pool_check = pool_check;
5955}
5956
5957/*
5958 * See the block comment at the beginning of this file for details on
5959 * each argument to this function.
5960 */
5961static void
5962zfs_ioctl_register(const char *name, zfs_ioc_t ioc, zfs_ioc_func_t *func,
5963    zfs_secpolicy_func_t *secpolicy, zfs_ioc_namecheck_t namecheck,
5964    zfs_ioc_poolcheck_t pool_check, boolean_t smush_outnvlist,
5965    boolean_t allow_log)
5966{
5967	zfs_ioc_vec_t *vec = &zfs_ioc_vec[ioc - ZFS_IOC_FIRST];
5968
5969	ASSERT3U(ioc, >=, ZFS_IOC_FIRST);
5970	ASSERT3U(ioc, <, ZFS_IOC_LAST);
5971	ASSERT3P(vec->zvec_legacy_func, ==, NULL);
5972	ASSERT3P(vec->zvec_func, ==, NULL);
5973
5974	/* if we are logging, the name must be valid */
5975	ASSERT(!allow_log || namecheck != NO_NAME);
5976
5977	vec->zvec_name = name;
5978	vec->zvec_func = func;
5979	vec->zvec_secpolicy = secpolicy;
5980	vec->zvec_namecheck = namecheck;
5981	vec->zvec_pool_check = pool_check;
5982	vec->zvec_smush_outnvlist = smush_outnvlist;
5983	vec->zvec_allow_log = allow_log;
5984}
5985
5986static void
5987zfs_ioctl_register_pool(zfs_ioc_t ioc, zfs_ioc_legacy_func_t *func,
5988    zfs_secpolicy_func_t *secpolicy, boolean_t log_history,
5989    zfs_ioc_poolcheck_t pool_check)
5990{
5991	zfs_ioctl_register_legacy(ioc, func, secpolicy,
5992	    POOL_NAME, log_history, pool_check);
5993}
5994
5995static void
5996zfs_ioctl_register_dataset_nolog(zfs_ioc_t ioc, zfs_ioc_legacy_func_t *func,
5997    zfs_secpolicy_func_t *secpolicy, zfs_ioc_poolcheck_t pool_check)
5998{
5999	zfs_ioctl_register_legacy(ioc, func, secpolicy,
6000	    DATASET_NAME, B_FALSE, pool_check);
6001}
6002
6003static void
6004zfs_ioctl_register_pool_modify(zfs_ioc_t ioc, zfs_ioc_legacy_func_t *func)
6005{
6006	zfs_ioctl_register_legacy(ioc, func, zfs_secpolicy_config,
6007	    POOL_NAME, B_TRUE, POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY);
6008}
6009
6010static void
6011zfs_ioctl_register_pool_meta(zfs_ioc_t ioc, zfs_ioc_legacy_func_t *func,
6012    zfs_secpolicy_func_t *secpolicy)
6013{
6014	zfs_ioctl_register_legacy(ioc, func, secpolicy,
6015	    NO_NAME, B_FALSE, POOL_CHECK_NONE);
6016}
6017
6018static void
6019zfs_ioctl_register_dataset_read_secpolicy(zfs_ioc_t ioc,
6020    zfs_ioc_legacy_func_t *func, zfs_secpolicy_func_t *secpolicy)
6021{
6022	zfs_ioctl_register_legacy(ioc, func, secpolicy,
6023	    DATASET_NAME, B_FALSE, POOL_CHECK_SUSPENDED);
6024}
6025
6026static void
6027zfs_ioctl_register_dataset_read(zfs_ioc_t ioc, zfs_ioc_legacy_func_t *func)
6028{
6029	zfs_ioctl_register_dataset_read_secpolicy(ioc, func,
6030	    zfs_secpolicy_read);
6031}
6032
6033static void
6034zfs_ioctl_register_dataset_modify(zfs_ioc_t ioc, zfs_ioc_legacy_func_t *func,
6035    zfs_secpolicy_func_t *secpolicy)
6036{
6037	zfs_ioctl_register_legacy(ioc, func, secpolicy,
6038	    DATASET_NAME, B_TRUE, POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY);
6039}
6040
6041static void
6042zfs_ioctl_init(void)
6043{
6044	zfs_ioctl_register("snapshot", ZFS_IOC_SNAPSHOT,
6045	    zfs_ioc_snapshot, zfs_secpolicy_snapshot, POOL_NAME,
6046	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE);
6047
6048	zfs_ioctl_register("log_history", ZFS_IOC_LOG_HISTORY,
6049	    zfs_ioc_log_history, zfs_secpolicy_log_history, NO_NAME,
6050	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_FALSE, B_FALSE);
6051
6052	zfs_ioctl_register("space_snaps", ZFS_IOC_SPACE_SNAPS,
6053	    zfs_ioc_space_snaps, zfs_secpolicy_read, DATASET_NAME,
6054	    POOL_CHECK_SUSPENDED, B_FALSE, B_FALSE);
6055
6056	zfs_ioctl_register("send", ZFS_IOC_SEND_NEW,
6057	    zfs_ioc_send_new, zfs_secpolicy_send_new, DATASET_NAME,
6058	    POOL_CHECK_SUSPENDED, B_FALSE, B_FALSE);
6059
6060	zfs_ioctl_register("send_space", ZFS_IOC_SEND_SPACE,
6061	    zfs_ioc_send_space, zfs_secpolicy_read, DATASET_NAME,
6062	    POOL_CHECK_SUSPENDED, B_FALSE, B_FALSE);
6063
6064	zfs_ioctl_register("create", ZFS_IOC_CREATE,
6065	    zfs_ioc_create, zfs_secpolicy_create_clone, DATASET_NAME,
6066	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE);
6067
6068	zfs_ioctl_register("clone", ZFS_IOC_CLONE,
6069	    zfs_ioc_clone, zfs_secpolicy_create_clone, DATASET_NAME,
6070	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE);
6071
6072	zfs_ioctl_register("remap", ZFS_IOC_REMAP,
6073	    zfs_ioc_remap, zfs_secpolicy_remap, DATASET_NAME,
6074	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_FALSE, B_TRUE);
6075
6076	zfs_ioctl_register("destroy_snaps", ZFS_IOC_DESTROY_SNAPS,
6077	    zfs_ioc_destroy_snaps, zfs_secpolicy_destroy_snaps, POOL_NAME,
6078	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE);
6079
6080	zfs_ioctl_register("hold", ZFS_IOC_HOLD,
6081	    zfs_ioc_hold, zfs_secpolicy_hold, POOL_NAME,
6082	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE);
6083	zfs_ioctl_register("release", ZFS_IOC_RELEASE,
6084	    zfs_ioc_release, zfs_secpolicy_release, POOL_NAME,
6085	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE);
6086
6087	zfs_ioctl_register("get_holds", ZFS_IOC_GET_HOLDS,
6088	    zfs_ioc_get_holds, zfs_secpolicy_read, DATASET_NAME,
6089	    POOL_CHECK_SUSPENDED, B_FALSE, B_FALSE);
6090
6091	zfs_ioctl_register("rollback", ZFS_IOC_ROLLBACK,
6092	    zfs_ioc_rollback, zfs_secpolicy_rollback, DATASET_NAME,
6093	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_FALSE, B_TRUE);
6094
6095	zfs_ioctl_register("bookmark", ZFS_IOC_BOOKMARK,
6096	    zfs_ioc_bookmark, zfs_secpolicy_bookmark, POOL_NAME,
6097	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE);
6098
6099	zfs_ioctl_register("get_bookmarks", ZFS_IOC_GET_BOOKMARKS,
6100	    zfs_ioc_get_bookmarks, zfs_secpolicy_read, DATASET_NAME,
6101	    POOL_CHECK_SUSPENDED, B_FALSE, B_FALSE);
6102
6103	zfs_ioctl_register("destroy_bookmarks", ZFS_IOC_DESTROY_BOOKMARKS,
6104	    zfs_ioc_destroy_bookmarks, zfs_secpolicy_destroy_bookmarks,
6105	    POOL_NAME,
6106	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE);
6107
6108	zfs_ioctl_register("channel_program", ZFS_IOC_CHANNEL_PROGRAM,
6109	    zfs_ioc_channel_program, zfs_secpolicy_config,
6110	    POOL_NAME, POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE,
6111	    B_TRUE);
6112
6113	zfs_ioctl_register("zpool_checkpoint", ZFS_IOC_POOL_CHECKPOINT,
6114	    zfs_ioc_pool_checkpoint, zfs_secpolicy_config, POOL_NAME,
6115	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE);
6116
6117	zfs_ioctl_register("zpool_discard_checkpoint",
6118	    ZFS_IOC_POOL_DISCARD_CHECKPOINT, zfs_ioc_pool_discard_checkpoint,
6119	    zfs_secpolicy_config, POOL_NAME,
6120	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE);
6121
6122	/* IOCTLS that use the legacy function signature */
6123
6124	zfs_ioctl_register_legacy(ZFS_IOC_POOL_FREEZE, zfs_ioc_pool_freeze,
6125	    zfs_secpolicy_config, NO_NAME, B_FALSE, POOL_CHECK_READONLY);
6126
6127	zfs_ioctl_register_pool(ZFS_IOC_POOL_CREATE, zfs_ioc_pool_create,
6128	    zfs_secpolicy_config, B_TRUE, POOL_CHECK_NONE);
6129	zfs_ioctl_register_pool_modify(ZFS_IOC_POOL_SCAN,
6130	    zfs_ioc_pool_scan);
6131	zfs_ioctl_register_pool_modify(ZFS_IOC_POOL_UPGRADE,
6132	    zfs_ioc_pool_upgrade);
6133	zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_ADD,
6134	    zfs_ioc_vdev_add);
6135	zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_REMOVE,
6136	    zfs_ioc_vdev_remove);
6137	zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_SET_STATE,
6138	    zfs_ioc_vdev_set_state);
6139	zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_ATTACH,
6140	    zfs_ioc_vdev_attach);
6141	zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_DETACH,
6142	    zfs_ioc_vdev_detach);
6143	zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_SETPATH,
6144	    zfs_ioc_vdev_setpath);
6145	zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_SETFRU,
6146	    zfs_ioc_vdev_setfru);
6147	zfs_ioctl_register_pool_modify(ZFS_IOC_POOL_SET_PROPS,
6148	    zfs_ioc_pool_set_props);
6149	zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_SPLIT,
6150	    zfs_ioc_vdev_split);
6151	zfs_ioctl_register_pool_modify(ZFS_IOC_POOL_REGUID,
6152	    zfs_ioc_pool_reguid);
6153
6154	zfs_ioctl_register_pool_meta(ZFS_IOC_POOL_CONFIGS,
6155	    zfs_ioc_pool_configs, zfs_secpolicy_none);
6156	zfs_ioctl_register_pool_meta(ZFS_IOC_POOL_TRYIMPORT,
6157	    zfs_ioc_pool_tryimport, zfs_secpolicy_config);
6158	zfs_ioctl_register_pool_meta(ZFS_IOC_INJECT_FAULT,
6159	    zfs_ioc_inject_fault, zfs_secpolicy_inject);
6160	zfs_ioctl_register_pool_meta(ZFS_IOC_CLEAR_FAULT,
6161	    zfs_ioc_clear_fault, zfs_secpolicy_inject);
6162	zfs_ioctl_register_pool_meta(ZFS_IOC_INJECT_LIST_NEXT,
6163	    zfs_ioc_inject_list_next, zfs_secpolicy_inject);
6164
6165	/*
6166	 * pool destroy, and export don't log the history as part of
6167	 * zfsdev_ioctl, but rather zfs_ioc_pool_export
6168	 * does the logging of those commands.
6169	 */
6170	zfs_ioctl_register_pool(ZFS_IOC_POOL_DESTROY, zfs_ioc_pool_destroy,
6171	    zfs_secpolicy_config, B_FALSE, POOL_CHECK_NONE);
6172	zfs_ioctl_register_pool(ZFS_IOC_POOL_EXPORT, zfs_ioc_pool_export,
6173	    zfs_secpolicy_config, B_FALSE, POOL_CHECK_NONE);
6174
6175	zfs_ioctl_register_pool(ZFS_IOC_POOL_STATS, zfs_ioc_pool_stats,
6176	    zfs_secpolicy_read, B_FALSE, POOL_CHECK_NONE);
6177	zfs_ioctl_register_pool(ZFS_IOC_POOL_GET_PROPS, zfs_ioc_pool_get_props,
6178	    zfs_secpolicy_read, B_FALSE, POOL_CHECK_NONE);
6179
6180	zfs_ioctl_register_pool(ZFS_IOC_ERROR_LOG, zfs_ioc_error_log,
6181	    zfs_secpolicy_inject, B_FALSE, POOL_CHECK_NONE);
6182	zfs_ioctl_register_pool(ZFS_IOC_DSOBJ_TO_DSNAME,
6183	    zfs_ioc_dsobj_to_dsname,
6184	    zfs_secpolicy_diff, B_FALSE, POOL_CHECK_NONE);
6185	zfs_ioctl_register_pool(ZFS_IOC_POOL_GET_HISTORY,
6186	    zfs_ioc_pool_get_history,
6187	    zfs_secpolicy_config, B_FALSE, POOL_CHECK_SUSPENDED);
6188
6189	zfs_ioctl_register_pool(ZFS_IOC_POOL_IMPORT, zfs_ioc_pool_import,
6190	    zfs_secpolicy_config, B_TRUE, POOL_CHECK_NONE);
6191
6192	zfs_ioctl_register_pool(ZFS_IOC_CLEAR, zfs_ioc_clear,
6193	    zfs_secpolicy_config, B_TRUE, POOL_CHECK_READONLY);
6194	zfs_ioctl_register_pool(ZFS_IOC_POOL_REOPEN, zfs_ioc_pool_reopen,
6195	    zfs_secpolicy_config, B_TRUE, POOL_CHECK_SUSPENDED);
6196
6197	zfs_ioctl_register_dataset_read(ZFS_IOC_SPACE_WRITTEN,
6198	    zfs_ioc_space_written);
6199	zfs_ioctl_register_dataset_read(ZFS_IOC_OBJSET_RECVD_PROPS,
6200	    zfs_ioc_objset_recvd_props);
6201	zfs_ioctl_register_dataset_read(ZFS_IOC_NEXT_OBJ,
6202	    zfs_ioc_next_obj);
6203	zfs_ioctl_register_dataset_read(ZFS_IOC_GET_FSACL,
6204	    zfs_ioc_get_fsacl);
6205	zfs_ioctl_register_dataset_read(ZFS_IOC_OBJSET_STATS,
6206	    zfs_ioc_objset_stats);
6207	zfs_ioctl_register_dataset_read(ZFS_IOC_OBJSET_ZPLPROPS,
6208	    zfs_ioc_objset_zplprops);
6209	zfs_ioctl_register_dataset_read(ZFS_IOC_DATASET_LIST_NEXT,
6210	    zfs_ioc_dataset_list_next);
6211	zfs_ioctl_register_dataset_read(ZFS_IOC_SNAPSHOT_LIST_NEXT,
6212	    zfs_ioc_snapshot_list_next);
6213	zfs_ioctl_register_dataset_read(ZFS_IOC_SEND_PROGRESS,
6214	    zfs_ioc_send_progress);
6215
6216	zfs_ioctl_register_dataset_read_secpolicy(ZFS_IOC_DIFF,
6217	    zfs_ioc_diff, zfs_secpolicy_diff);
6218	zfs_ioctl_register_dataset_read_secpolicy(ZFS_IOC_OBJ_TO_STATS,
6219	    zfs_ioc_obj_to_stats, zfs_secpolicy_diff);
6220	zfs_ioctl_register_dataset_read_secpolicy(ZFS_IOC_OBJ_TO_PATH,
6221	    zfs_ioc_obj_to_path, zfs_secpolicy_diff);
6222	zfs_ioctl_register_dataset_read_secpolicy(ZFS_IOC_USERSPACE_ONE,
6223	    zfs_ioc_userspace_one, zfs_secpolicy_userspace_one);
6224	zfs_ioctl_register_dataset_read_secpolicy(ZFS_IOC_USERSPACE_MANY,
6225	    zfs_ioc_userspace_many, zfs_secpolicy_userspace_many);
6226	zfs_ioctl_register_dataset_read_secpolicy(ZFS_IOC_SEND,
6227	    zfs_ioc_send, zfs_secpolicy_send);
6228
6229	zfs_ioctl_register_dataset_modify(ZFS_IOC_SET_PROP, zfs_ioc_set_prop,
6230	    zfs_secpolicy_none);
6231	zfs_ioctl_register_dataset_modify(ZFS_IOC_DESTROY, zfs_ioc_destroy,
6232	    zfs_secpolicy_destroy);
6233	zfs_ioctl_register_dataset_modify(ZFS_IOC_RENAME, zfs_ioc_rename,
6234	    zfs_secpolicy_rename);
6235	zfs_ioctl_register_dataset_modify(ZFS_IOC_RECV, zfs_ioc_recv,
6236	    zfs_secpolicy_recv);
6237	zfs_ioctl_register_dataset_modify(ZFS_IOC_PROMOTE, zfs_ioc_promote,
6238	    zfs_secpolicy_promote);
6239	zfs_ioctl_register_dataset_modify(ZFS_IOC_INHERIT_PROP,
6240	    zfs_ioc_inherit_prop, zfs_secpolicy_inherit_prop);
6241	zfs_ioctl_register_dataset_modify(ZFS_IOC_SET_FSACL, zfs_ioc_set_fsacl,
6242	    zfs_secpolicy_set_fsacl);
6243
6244	zfs_ioctl_register_dataset_nolog(ZFS_IOC_SHARE, zfs_ioc_share,
6245	    zfs_secpolicy_share, POOL_CHECK_NONE);
6246	zfs_ioctl_register_dataset_nolog(ZFS_IOC_SMB_ACL, zfs_ioc_smb_acl,
6247	    zfs_secpolicy_smb_acl, POOL_CHECK_NONE);
6248	zfs_ioctl_register_dataset_nolog(ZFS_IOC_USERSPACE_UPGRADE,
6249	    zfs_ioc_userspace_upgrade, zfs_secpolicy_userspace_upgrade,
6250	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY);
6251	zfs_ioctl_register_dataset_nolog(ZFS_IOC_TMP_SNAPSHOT,
6252	    zfs_ioc_tmp_snapshot, zfs_secpolicy_tmp_snapshot,
6253	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY);
6254
6255#ifdef __FreeBSD__
6256	zfs_ioctl_register_dataset_nolog(ZFS_IOC_JAIL, zfs_ioc_jail,
6257	    zfs_secpolicy_config, POOL_CHECK_NONE);
6258	zfs_ioctl_register_dataset_nolog(ZFS_IOC_UNJAIL, zfs_ioc_unjail,
6259	    zfs_secpolicy_config, POOL_CHECK_NONE);
6260	zfs_ioctl_register("fbsd_nextboot", ZFS_IOC_NEXTBOOT,
6261	    zfs_ioc_nextboot, zfs_secpolicy_config, NO_NAME,
6262	    POOL_CHECK_NONE, B_FALSE, B_FALSE);
6263#endif
6264}
6265
6266int
6267pool_status_check(const char *name, zfs_ioc_namecheck_t type,
6268    zfs_ioc_poolcheck_t check)
6269{
6270	spa_t *spa;
6271	int error;
6272
6273	ASSERT(type == POOL_NAME || type == DATASET_NAME);
6274
6275	if (check & POOL_CHECK_NONE)
6276		return (0);
6277
6278	error = spa_open(name, &spa, FTAG);
6279	if (error == 0) {
6280		if ((check & POOL_CHECK_SUSPENDED) && spa_suspended(spa))
6281			error = SET_ERROR(EAGAIN);
6282		else if ((check & POOL_CHECK_READONLY) && !spa_writeable(spa))
6283			error = SET_ERROR(EROFS);
6284		spa_close(spa, FTAG);
6285	}
6286	return (error);
6287}
6288
6289/*
6290 * Find a free minor number.
6291 */
6292minor_t
6293zfsdev_minor_alloc(void)
6294{
6295	static minor_t last_minor;
6296	minor_t m;
6297
6298	ASSERT(MUTEX_HELD(&spa_namespace_lock));
6299
6300	for (m = last_minor + 1; m != last_minor; m++) {
6301		if (m > ZFSDEV_MAX_MINOR)
6302			m = 1;
6303		if (ddi_get_soft_state(zfsdev_state, m) == NULL) {
6304			last_minor = m;
6305			return (m);
6306		}
6307	}
6308
6309	return (0);
6310}
6311
6312static int
6313zfs_ctldev_init(struct cdev *devp)
6314{
6315	minor_t minor;
6316	zfs_soft_state_t *zs;
6317
6318	ASSERT(MUTEX_HELD(&spa_namespace_lock));
6319
6320	minor = zfsdev_minor_alloc();
6321	if (minor == 0)
6322		return (SET_ERROR(ENXIO));
6323
6324	if (ddi_soft_state_zalloc(zfsdev_state, minor) != DDI_SUCCESS)
6325		return (SET_ERROR(EAGAIN));
6326
6327	devfs_set_cdevpriv((void *)(uintptr_t)minor, zfsdev_close);
6328
6329	zs = ddi_get_soft_state(zfsdev_state, minor);
6330	zs->zss_type = ZSST_CTLDEV;
6331	zfs_onexit_init((zfs_onexit_t **)&zs->zss_data);
6332
6333	return (0);
6334}
6335
6336static void
6337zfs_ctldev_destroy(zfs_onexit_t *zo, minor_t minor)
6338{
6339	ASSERT(MUTEX_HELD(&spa_namespace_lock));
6340
6341	zfs_onexit_destroy(zo);
6342	ddi_soft_state_free(zfsdev_state, minor);
6343}
6344
6345void *
6346zfsdev_get_soft_state(minor_t minor, enum zfs_soft_state_type which)
6347{
6348	zfs_soft_state_t *zp;
6349
6350	zp = ddi_get_soft_state(zfsdev_state, minor);
6351	if (zp == NULL || zp->zss_type != which)
6352		return (NULL);
6353
6354	return (zp->zss_data);
6355}
6356
6357static int
6358zfsdev_open(struct cdev *devp, int flag, int mode, struct thread *td)
6359{
6360	int error = 0;
6361
6362#ifdef illumos
6363	if (getminor(*devp) != 0)
6364		return (zvol_open(devp, flag, otyp, cr));
6365#endif
6366
6367	/* This is the control device. Allocate a new minor if requested. */
6368	if (flag & FEXCL) {
6369		mutex_enter(&spa_namespace_lock);
6370		error = zfs_ctldev_init(devp);
6371		mutex_exit(&spa_namespace_lock);
6372	}
6373
6374	return (error);
6375}
6376
6377static void
6378zfsdev_close(void *data)
6379{
6380	zfs_onexit_t *zo;
6381	minor_t minor = (minor_t)(uintptr_t)data;
6382
6383	if (minor == 0)
6384		return;
6385
6386	mutex_enter(&spa_namespace_lock);
6387	zo = zfsdev_get_soft_state(minor, ZSST_CTLDEV);
6388	if (zo == NULL) {
6389		mutex_exit(&spa_namespace_lock);
6390		return;
6391	}
6392	zfs_ctldev_destroy(zo, minor);
6393	mutex_exit(&spa_namespace_lock);
6394}
6395
6396static int
6397zfsdev_ioctl(struct cdev *dev, u_long zcmd, caddr_t arg, int flag,
6398    struct thread *td)
6399{
6400	zfs_cmd_t *zc;
6401	uint_t vecnum;
6402	int error, rc, len;
6403#ifdef illumos
6404	minor_t minor = getminor(dev);
6405#else
6406	zfs_iocparm_t *zc_iocparm;
6407	int cflag, cmd, oldvecnum;
6408	boolean_t newioc, compat;
6409	void *compat_zc = NULL;
6410	cred_t *cr = td->td_ucred;
6411#endif
6412	const zfs_ioc_vec_t *vec;
6413	char *saved_poolname = NULL;
6414	nvlist_t *innvl = NULL;
6415
6416	cflag = ZFS_CMD_COMPAT_NONE;
6417	compat = B_FALSE;
6418	newioc = B_TRUE;	/* "new" style (zfs_iocparm_t) ioctl */
6419
6420	len = IOCPARM_LEN(zcmd);
6421	vecnum = cmd = zcmd & 0xff;
6422
6423	/*
6424	 * Check if we are talking to supported older binaries
6425	 * and translate zfs_cmd if necessary
6426	 */
6427	if (len != sizeof(zfs_iocparm_t)) {
6428		newioc = B_FALSE;
6429		compat = B_TRUE;
6430
6431		vecnum = cmd;
6432
6433		switch (len) {
6434		case sizeof(zfs_cmd_zcmd_t):
6435			cflag = ZFS_CMD_COMPAT_LZC;
6436			break;
6437		case sizeof(zfs_cmd_deadman_t):
6438			cflag = ZFS_CMD_COMPAT_DEADMAN;
6439			break;
6440		case sizeof(zfs_cmd_v28_t):
6441			cflag = ZFS_CMD_COMPAT_V28;
6442			break;
6443		case sizeof(zfs_cmd_v15_t):
6444			cflag = ZFS_CMD_COMPAT_V15;
6445			vecnum = zfs_ioctl_v15_to_v28[cmd];
6446
6447			/*
6448			 * Return without further handling
6449			 * if the command is blacklisted.
6450			 */
6451			if (vecnum == ZFS_IOC_COMPAT_PASS)
6452				return (0);
6453			else if (vecnum == ZFS_IOC_COMPAT_FAIL)
6454				return (ENOTSUP);
6455			break;
6456		default:
6457			return (EINVAL);
6458		}
6459	}
6460
6461#ifdef illumos
6462	vecnum = cmd - ZFS_IOC_FIRST;
6463	ASSERT3U(getmajor(dev), ==, ddi_driver_major(zfs_dip));
6464#endif
6465
6466	if (vecnum >= sizeof (zfs_ioc_vec) / sizeof (zfs_ioc_vec[0]))
6467		return (SET_ERROR(EINVAL));
6468	vec = &zfs_ioc_vec[vecnum];
6469
6470	zc = kmem_zalloc(sizeof(zfs_cmd_t), KM_SLEEP);
6471
6472#ifdef illumos
6473	error = ddi_copyin((void *)arg, zc, sizeof (zfs_cmd_t), flag);
6474	if (error != 0) {
6475		error = SET_ERROR(EFAULT);
6476		goto out;
6477	}
6478#else	/* !illumos */
6479	bzero(zc, sizeof(zfs_cmd_t));
6480
6481	if (newioc) {
6482		zc_iocparm = (void *)arg;
6483
6484		switch (zc_iocparm->zfs_ioctl_version) {
6485		case ZFS_IOCVER_CURRENT:
6486			if (zc_iocparm->zfs_cmd_size != sizeof(zfs_cmd_t)) {
6487				error = SET_ERROR(EINVAL);
6488				goto out;
6489			}
6490			break;
6491		case ZFS_IOCVER_INLANES:
6492			if (zc_iocparm->zfs_cmd_size != sizeof(zfs_cmd_inlanes_t)) {
6493				error = SET_ERROR(EFAULT);
6494				goto out;
6495			}
6496			compat = B_TRUE;
6497			cflag = ZFS_CMD_COMPAT_INLANES;
6498			break;
6499		case ZFS_IOCVER_RESUME:
6500			if (zc_iocparm->zfs_cmd_size != sizeof(zfs_cmd_resume_t)) {
6501				error = SET_ERROR(EFAULT);
6502				goto out;
6503			}
6504			compat = B_TRUE;
6505			cflag = ZFS_CMD_COMPAT_RESUME;
6506			break;
6507		case ZFS_IOCVER_EDBP:
6508			if (zc_iocparm->zfs_cmd_size != sizeof(zfs_cmd_edbp_t)) {
6509				error = SET_ERROR(EFAULT);
6510				goto out;
6511			}
6512			compat = B_TRUE;
6513			cflag = ZFS_CMD_COMPAT_EDBP;
6514			break;
6515		case ZFS_IOCVER_ZCMD:
6516			if (zc_iocparm->zfs_cmd_size > sizeof(zfs_cmd_t) ||
6517			    zc_iocparm->zfs_cmd_size < sizeof(zfs_cmd_zcmd_t)) {
6518				error = SET_ERROR(EFAULT);
6519				goto out;
6520			}
6521			compat = B_TRUE;
6522			cflag = ZFS_CMD_COMPAT_ZCMD;
6523			break;
6524		default:
6525			error = SET_ERROR(EINVAL);
6526			goto out;
6527			/* NOTREACHED */
6528		}
6529
6530		if (compat) {
6531			ASSERT(sizeof(zfs_cmd_t) >= zc_iocparm->zfs_cmd_size);
6532			compat_zc = kmem_zalloc(sizeof(zfs_cmd_t), KM_SLEEP);
6533			bzero(compat_zc, sizeof(zfs_cmd_t));
6534
6535			error = ddi_copyin((void *)(uintptr_t)zc_iocparm->zfs_cmd,
6536			    compat_zc, zc_iocparm->zfs_cmd_size, flag);
6537			if (error != 0) {
6538				error = SET_ERROR(EFAULT);
6539				goto out;
6540			}
6541		} else {
6542			error = ddi_copyin((void *)(uintptr_t)zc_iocparm->zfs_cmd,
6543			    zc, zc_iocparm->zfs_cmd_size, flag);
6544			if (error != 0) {
6545				error = SET_ERROR(EFAULT);
6546				goto out;
6547			}
6548		}
6549	}
6550
6551	if (compat) {
6552		if (newioc) {
6553			ASSERT(compat_zc != NULL);
6554			zfs_cmd_compat_get(zc, compat_zc, cflag);
6555		} else {
6556			ASSERT(compat_zc == NULL);
6557			zfs_cmd_compat_get(zc, arg, cflag);
6558		}
6559		oldvecnum = vecnum;
6560		error = zfs_ioctl_compat_pre(zc, &vecnum, cflag);
6561		if (error != 0)
6562			goto out;
6563		if (oldvecnum != vecnum)
6564			vec = &zfs_ioc_vec[vecnum];
6565	}
6566#endif	/* !illumos */
6567
6568	zc->zc_iflags = flag & FKIOCTL;
6569	if (zc->zc_nvlist_src_size != 0) {
6570		error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
6571		    zc->zc_iflags, &innvl);
6572		if (error != 0)
6573			goto out;
6574	}
6575
6576	/* rewrite innvl for backwards compatibility */
6577	if (compat)
6578		innvl = zfs_ioctl_compat_innvl(zc, innvl, vecnum, cflag);
6579
6580	/*
6581	 * Ensure that all pool/dataset names are valid before we pass down to
6582	 * the lower layers.
6583	 */
6584	zc->zc_name[sizeof (zc->zc_name) - 1] = '\0';
6585	switch (vec->zvec_namecheck) {
6586	case POOL_NAME:
6587		if (pool_namecheck(zc->zc_name, NULL, NULL) != 0)
6588			error = SET_ERROR(EINVAL);
6589		else
6590			error = pool_status_check(zc->zc_name,
6591			    vec->zvec_namecheck, vec->zvec_pool_check);
6592		break;
6593
6594	case DATASET_NAME:
6595		if (dataset_namecheck(zc->zc_name, NULL, NULL) != 0)
6596			error = SET_ERROR(EINVAL);
6597		else
6598			error = pool_status_check(zc->zc_name,
6599			    vec->zvec_namecheck, vec->zvec_pool_check);
6600		break;
6601
6602	case NO_NAME:
6603		break;
6604	}
6605
6606	if (error == 0)
6607		error = vec->zvec_secpolicy(zc, innvl, cr);
6608
6609	if (error != 0)
6610		goto out;
6611
6612	/* legacy ioctls can modify zc_name */
6613	len = strcspn(zc->zc_name, "/@#") + 1;
6614	saved_poolname = kmem_alloc(len, KM_SLEEP);
6615	(void) strlcpy(saved_poolname, zc->zc_name, len);
6616
6617	if (vec->zvec_func != NULL) {
6618		nvlist_t *outnvl;
6619		int puterror = 0;
6620		spa_t *spa;
6621		nvlist_t *lognv = NULL;
6622
6623		ASSERT(vec->zvec_legacy_func == NULL);
6624
6625		/*
6626		 * Add the innvl to the lognv before calling the func,
6627		 * in case the func changes the innvl.
6628		 */
6629		if (vec->zvec_allow_log) {
6630			lognv = fnvlist_alloc();
6631			fnvlist_add_string(lognv, ZPOOL_HIST_IOCTL,
6632			    vec->zvec_name);
6633			if (!nvlist_empty(innvl)) {
6634				fnvlist_add_nvlist(lognv, ZPOOL_HIST_INPUT_NVL,
6635				    innvl);
6636			}
6637		}
6638
6639		outnvl = fnvlist_alloc();
6640		error = vec->zvec_func(zc->zc_name, innvl, outnvl);
6641
6642		/*
6643		 * Some commands can partially execute, modfiy state, and still
6644		 * return an error.  In these cases, attempt to record what
6645		 * was modified.
6646		 */
6647		if ((error == 0 ||
6648		    (cmd == ZFS_IOC_CHANNEL_PROGRAM && error != EINVAL)) &&
6649		    vec->zvec_allow_log &&
6650		    spa_open(zc->zc_name, &spa, FTAG) == 0) {
6651			if (!nvlist_empty(outnvl)) {
6652				fnvlist_add_nvlist(lognv, ZPOOL_HIST_OUTPUT_NVL,
6653				    outnvl);
6654			}
6655			if (error != 0) {
6656				fnvlist_add_int64(lognv, ZPOOL_HIST_ERRNO,
6657				    error);
6658			}
6659			(void) spa_history_log_nvl(spa, lognv);
6660			spa_close(spa, FTAG);
6661		}
6662		fnvlist_free(lognv);
6663
6664		/* rewrite outnvl for backwards compatibility */
6665		if (compat)
6666			outnvl = zfs_ioctl_compat_outnvl(zc, outnvl, vecnum,
6667			    cflag);
6668
6669		if (!nvlist_empty(outnvl) || zc->zc_nvlist_dst_size != 0) {
6670			int smusherror = 0;
6671			if (vec->zvec_smush_outnvlist) {
6672				smusherror = nvlist_smush(outnvl,
6673				    zc->zc_nvlist_dst_size);
6674			}
6675			if (smusherror == 0)
6676				puterror = put_nvlist(zc, outnvl);
6677		}
6678
6679		if (puterror != 0)
6680			error = puterror;
6681
6682		nvlist_free(outnvl);
6683	} else {
6684		error = vec->zvec_legacy_func(zc);
6685	}
6686
6687out:
6688	nvlist_free(innvl);
6689
6690#ifdef illumos
6691	rc = ddi_copyout(zc, (void *)arg, sizeof (zfs_cmd_t), flag);
6692	if (error == 0 && rc != 0)
6693		error = SET_ERROR(EFAULT);
6694#else
6695	if (compat) {
6696		zfs_ioctl_compat_post(zc, cmd, cflag);
6697		if (newioc) {
6698			ASSERT(compat_zc != NULL);
6699			ASSERT(sizeof(zfs_cmd_t) >= zc_iocparm->zfs_cmd_size);
6700
6701			zfs_cmd_compat_put(zc, compat_zc, vecnum, cflag);
6702			rc = ddi_copyout(compat_zc,
6703			    (void *)(uintptr_t)zc_iocparm->zfs_cmd,
6704			    zc_iocparm->zfs_cmd_size, flag);
6705			if (error == 0 && rc != 0)
6706				error = SET_ERROR(EFAULT);
6707			kmem_free(compat_zc, sizeof (zfs_cmd_t));
6708		} else {
6709			zfs_cmd_compat_put(zc, arg, vecnum, cflag);
6710		}
6711	} else {
6712		ASSERT(newioc);
6713
6714		rc = ddi_copyout(zc, (void *)(uintptr_t)zc_iocparm->zfs_cmd,
6715		    sizeof (zfs_cmd_t), flag);
6716		if (error == 0 && rc != 0)
6717			error = SET_ERROR(EFAULT);
6718	}
6719#endif
6720	if (error == 0 && vec->zvec_allow_log) {
6721		char *s = tsd_get(zfs_allow_log_key);
6722		if (s != NULL)
6723			strfree(s);
6724		(void) tsd_set(zfs_allow_log_key, saved_poolname);
6725	} else {
6726		if (saved_poolname != NULL)
6727			strfree(saved_poolname);
6728	}
6729
6730	kmem_free(zc, sizeof (zfs_cmd_t));
6731	return (error);
6732}
6733
6734#ifdef illumos
6735static int
6736zfs_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
6737{
6738	if (cmd != DDI_ATTACH)
6739		return (DDI_FAILURE);
6740
6741	if (ddi_create_minor_node(dip, "zfs", S_IFCHR, 0,
6742	    DDI_PSEUDO, 0) == DDI_FAILURE)
6743		return (DDI_FAILURE);
6744
6745	zfs_dip = dip;
6746
6747	ddi_report_dev(dip);
6748
6749	return (DDI_SUCCESS);
6750}
6751
6752static int
6753zfs_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
6754{
6755	if (spa_busy() || zfs_busy() || zvol_busy())
6756		return (DDI_FAILURE);
6757
6758	if (cmd != DDI_DETACH)
6759		return (DDI_FAILURE);
6760
6761	zfs_dip = NULL;
6762
6763	ddi_prop_remove_all(dip);
6764	ddi_remove_minor_node(dip, NULL);
6765
6766	return (DDI_SUCCESS);
6767}
6768
6769/*ARGSUSED*/
6770static int
6771zfs_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result)
6772{
6773	switch (infocmd) {
6774	case DDI_INFO_DEVT2DEVINFO:
6775		*result = zfs_dip;
6776		return (DDI_SUCCESS);
6777
6778	case DDI_INFO_DEVT2INSTANCE:
6779		*result = (void *)0;
6780		return (DDI_SUCCESS);
6781	}
6782
6783	return (DDI_FAILURE);
6784}
6785#endif	/* illumos */
6786
6787/*
6788 * OK, so this is a little weird.
6789 *
6790 * /dev/zfs is the control node, i.e. minor 0.
6791 * /dev/zvol/[r]dsk/pool/dataset are the zvols, minor > 0.
6792 *
6793 * /dev/zfs has basically nothing to do except serve up ioctls,
6794 * so most of the standard driver entry points are in zvol.c.
6795 */
6796#ifdef illumos
6797static struct cb_ops zfs_cb_ops = {
6798	zfsdev_open,	/* open */
6799	zfsdev_close,	/* close */
6800	zvol_strategy,	/* strategy */
6801	nodev,		/* print */
6802	zvol_dump,	/* dump */
6803	zvol_read,	/* read */
6804	zvol_write,	/* write */
6805	zfsdev_ioctl,	/* ioctl */
6806	nodev,		/* devmap */
6807	nodev,		/* mmap */
6808	nodev,		/* segmap */
6809	nochpoll,	/* poll */
6810	ddi_prop_op,	/* prop_op */
6811	NULL,		/* streamtab */
6812	D_NEW | D_MP | D_64BIT,		/* Driver compatibility flag */
6813	CB_REV,		/* version */
6814	nodev,		/* async read */
6815	nodev,		/* async write */
6816};
6817
6818static struct dev_ops zfs_dev_ops = {
6819	DEVO_REV,	/* version */
6820	0,		/* refcnt */
6821	zfs_info,	/* info */
6822	nulldev,	/* identify */
6823	nulldev,	/* probe */
6824	zfs_attach,	/* attach */
6825	zfs_detach,	/* detach */
6826	nodev,		/* reset */
6827	&zfs_cb_ops,	/* driver operations */
6828	NULL,		/* no bus operations */
6829	NULL,		/* power */
6830	ddi_quiesce_not_needed,	/* quiesce */
6831};
6832
6833static struct modldrv zfs_modldrv = {
6834	&mod_driverops,
6835	"ZFS storage pool",
6836	&zfs_dev_ops
6837};
6838
6839static struct modlinkage modlinkage = {
6840	MODREV_1,
6841	(void *)&zfs_modlfs,
6842	(void *)&zfs_modldrv,
6843	NULL
6844};
6845#endif	/* illumos */
6846
6847static struct cdevsw zfs_cdevsw = {
6848	.d_version =	D_VERSION,
6849	.d_open =	zfsdev_open,
6850	.d_ioctl =	zfsdev_ioctl,
6851	.d_name =	ZFS_DEV_NAME
6852};
6853
6854static void
6855zfs_allow_log_destroy(void *arg)
6856{
6857	char *poolname = arg;
6858	strfree(poolname);
6859}
6860
6861static void
6862zfsdev_init(void)
6863{
6864	zfsdev = make_dev(&zfs_cdevsw, 0x0, UID_ROOT, GID_OPERATOR, 0666,
6865	    ZFS_DEV_NAME);
6866}
6867
6868static void
6869zfsdev_fini(void)
6870{
6871	if (zfsdev != NULL)
6872		destroy_dev(zfsdev);
6873}
6874
6875static struct root_hold_token *zfs_root_token;
6876struct proc *zfsproc;
6877
6878#ifdef illumos
6879int
6880_init(void)
6881{
6882	int error;
6883
6884	spa_init(FREAD | FWRITE);
6885	zfs_init();
6886	zvol_init();
6887	zfs_ioctl_init();
6888
6889	if ((error = mod_install(&modlinkage)) != 0) {
6890		zvol_fini();
6891		zfs_fini();
6892		spa_fini();
6893		return (error);
6894	}
6895
6896	tsd_create(&zfs_fsyncer_key, NULL);
6897	tsd_create(&rrw_tsd_key, rrw_tsd_destroy);
6898	tsd_create(&zfs_allow_log_key, zfs_allow_log_destroy);
6899
6900	error = ldi_ident_from_mod(&modlinkage, &zfs_li);
6901	ASSERT(error == 0);
6902	mutex_init(&zfs_share_lock, NULL, MUTEX_DEFAULT, NULL);
6903
6904	return (0);
6905}
6906
6907int
6908_fini(void)
6909{
6910	int error;
6911
6912	if (spa_busy() || zfs_busy() || zvol_busy() || zio_injection_enabled)
6913		return (SET_ERROR(EBUSY));
6914
6915	if ((error = mod_remove(&modlinkage)) != 0)
6916		return (error);
6917
6918	zvol_fini();
6919	zfs_fini();
6920	spa_fini();
6921	if (zfs_nfsshare_inited)
6922		(void) ddi_modclose(nfs_mod);
6923	if (zfs_smbshare_inited)
6924		(void) ddi_modclose(smbsrv_mod);
6925	if (zfs_nfsshare_inited || zfs_smbshare_inited)
6926		(void) ddi_modclose(sharefs_mod);
6927
6928	tsd_destroy(&zfs_fsyncer_key);
6929	ldi_ident_release(zfs_li);
6930	zfs_li = NULL;
6931	mutex_destroy(&zfs_share_lock);
6932
6933	return (error);
6934}
6935
6936int
6937_info(struct modinfo *modinfop)
6938{
6939	return (mod_info(&modlinkage, modinfop));
6940}
6941#endif	/* illumos */
6942
6943static int zfs__init(void);
6944static int zfs__fini(void);
6945static void zfs_shutdown(void *, int);
6946
6947static eventhandler_tag zfs_shutdown_event_tag;
6948
6949#ifdef __FreeBSD__
6950#define ZFS_MIN_KSTACK_PAGES 4
6951#endif
6952
6953int
6954zfs__init(void)
6955{
6956
6957#ifdef __FreeBSD__
6958#if KSTACK_PAGES < ZFS_MIN_KSTACK_PAGES
6959	printf("ZFS NOTICE: KSTACK_PAGES is %d which could result in stack "
6960	    "overflow panic!\nPlease consider adding "
6961	    "'options KSTACK_PAGES=%d' to your kernel config\n", KSTACK_PAGES,
6962	    ZFS_MIN_KSTACK_PAGES);
6963#endif
6964#endif
6965	zfs_root_token = root_mount_hold("ZFS");
6966
6967	mutex_init(&zfs_share_lock, NULL, MUTEX_DEFAULT, NULL);
6968
6969	spa_init(FREAD | FWRITE);
6970	zfs_init();
6971	zvol_init();
6972	zfs_ioctl_init();
6973
6974	tsd_create(&zfs_fsyncer_key, NULL);
6975	tsd_create(&rrw_tsd_key, rrw_tsd_destroy);
6976	tsd_create(&zfs_allow_log_key, zfs_allow_log_destroy);
6977	tsd_create(&zfs_geom_probe_vdev_key, NULL);
6978
6979	printf("ZFS storage pool version: features support (" SPA_VERSION_STRING ")\n");
6980	root_mount_rel(zfs_root_token);
6981
6982	zfsdev_init();
6983
6984	return (0);
6985}
6986
6987int
6988zfs__fini(void)
6989{
6990	if (spa_busy() || zfs_busy() || zvol_busy() ||
6991	    zio_injection_enabled) {
6992		return (EBUSY);
6993	}
6994
6995	zfsdev_fini();
6996	zvol_fini();
6997	zfs_fini();
6998	spa_fini();
6999
7000	tsd_destroy(&zfs_fsyncer_key);
7001	tsd_destroy(&rrw_tsd_key);
7002	tsd_destroy(&zfs_allow_log_key);
7003
7004	mutex_destroy(&zfs_share_lock);
7005
7006	return (0);
7007}
7008
7009static void
7010zfs_shutdown(void *arg __unused, int howto __unused)
7011{
7012
7013	/*
7014	 * ZFS fini routines can not properly work in a panic-ed system.
7015	 */
7016	if (panicstr == NULL)
7017		(void)zfs__fini();
7018}
7019
7020
7021static int
7022zfs_modevent(module_t mod, int type, void *unused __unused)
7023{
7024	int err;
7025
7026	switch (type) {
7027	case MOD_LOAD:
7028		err = zfs__init();
7029		if (err == 0)
7030			zfs_shutdown_event_tag = EVENTHANDLER_REGISTER(
7031			    shutdown_post_sync, zfs_shutdown, NULL,
7032			    SHUTDOWN_PRI_FIRST);
7033		return (err);
7034	case MOD_UNLOAD:
7035		err = zfs__fini();
7036		if (err == 0 && zfs_shutdown_event_tag != NULL)
7037			EVENTHANDLER_DEREGISTER(shutdown_post_sync,
7038			    zfs_shutdown_event_tag);
7039		return (err);
7040	case MOD_SHUTDOWN:
7041		return (0);
7042	default:
7043		break;
7044	}
7045	return (EOPNOTSUPP);
7046}
7047
7048static moduledata_t zfs_mod = {
7049	"zfsctrl",
7050	zfs_modevent,
7051	0
7052};
7053DECLARE_MODULE(zfsctrl, zfs_mod, SI_SUB_VFS, SI_ORDER_ANY);
7054MODULE_VERSION(zfsctrl, 1);
7055MODULE_DEPEND(zfsctrl, opensolaris, 1, 1, 1);
7056MODULE_DEPEND(zfsctrl, krpc, 1, 1, 1);
7057MODULE_DEPEND(zfsctrl, acl_nfs4, 1, 1, 1);
7058