1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22/*
23 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
24 * Portions Copyright 2011 Martin Matuska
25 * Copyright 2015, OmniTI Computer Consulting, Inc. All rights reserved.
26 * Portions Copyright 2012 Pawel Jakub Dawidek <pawel@dawidek.net>
27 * Copyright (c) 2014, 2016 Joyent, Inc. All rights reserved.
28 * Copyright 2016 Nexenta Systems, Inc.  All rights reserved.
29 * Copyright (c) 2014, Joyent, Inc. All rights reserved.
30 * Copyright (c) 2011, 2020 by Delphix. All rights reserved.
31 * Copyright (c) 2013 by Saso Kiselkov. All rights reserved.
32 * Copyright (c) 2013 Steven Hartland. All rights reserved.
33 * Copyright (c) 2014 Integros [integros.com]
34 * Copyright 2016 Toomas Soome <tsoome@me.com>
35 * Copyright (c) 2016 Actifio, Inc. All rights reserved.
36 * Copyright (c) 2018, loli10K <ezomori.nozomu@gmail.com>. All rights reserved.
37 * Copyright 2017 RackTop Systems.
38 * Copyright (c) 2017 Open-E, Inc. All Rights Reserved.
39 * Copyright (c) 2019 Datto Inc.
40 * Copyright (c) 2019, 2020 by Christian Schwarz. All rights reserved.
41 * Copyright (c) 2019, Klara Inc.
42 * Copyright (c) 2019, Allan Jude
43 */
44
45/*
46 * ZFS ioctls.
47 *
48 * This file handles the ioctls to /dev/zfs, used for configuring ZFS storage
49 * pools and filesystems, e.g. with /sbin/zfs and /sbin/zpool.
50 *
51 * There are two ways that we handle ioctls: the legacy way where almost
52 * all of the logic is in the ioctl callback, and the new way where most
53 * of the marshalling is handled in the common entry point, zfsdev_ioctl().
54 *
55 * Non-legacy ioctls should be registered by calling
56 * zfs_ioctl_register() from zfs_ioctl_init().  The ioctl is invoked
57 * from userland by lzc_ioctl().
58 *
59 * The registration arguments are as follows:
60 *
61 * const char *name
62 *   The name of the ioctl.  This is used for history logging.  If the
63 *   ioctl returns successfully (the callback returns 0), and allow_log
64 *   is true, then a history log entry will be recorded with the input &
65 *   output nvlists.  The log entry can be printed with "zpool history -i".
66 *
67 * zfs_ioc_t ioc
68 *   The ioctl request number, which userland will pass to ioctl(2).
69 *   We want newer versions of libzfs and libzfs_core to run against
70 *   existing zfs kernel modules (i.e. a deferred reboot after an update).
71 *   Therefore the ioctl numbers cannot change from release to release.
72 *
73 * zfs_secpolicy_func_t *secpolicy
74 *   This function will be called before the zfs_ioc_func_t, to
75 *   determine if this operation is permitted.  It should return EPERM
76 *   on failure, and 0 on success.  Checks include determining if the
77 *   dataset is visible in this zone, and if the user has either all
78 *   zfs privileges in the zone (SYS_MOUNT), or has been granted permission
79 *   to do this operation on this dataset with "zfs allow".
80 *
81 * zfs_ioc_namecheck_t namecheck
82 *   This specifies what to expect in the zfs_cmd_t:zc_name -- a pool
83 *   name, a dataset name, or nothing.  If the name is not well-formed,
84 *   the ioctl will fail and the callback will not be called.
85 *   Therefore, the callback can assume that the name is well-formed
86 *   (e.g. is null-terminated, doesn't have more than one '@' character,
87 *   doesn't have invalid characters).
88 *
89 * zfs_ioc_poolcheck_t pool_check
90 *   This specifies requirements on the pool state.  If the pool does
91 *   not meet them (is suspended or is readonly), the ioctl will fail
92 *   and the callback will not be called.  If any checks are specified
93 *   (i.e. it is not POOL_CHECK_NONE), namecheck must not be NO_NAME.
94 *   Multiple checks can be or-ed together (e.g. POOL_CHECK_SUSPENDED |
95 *   POOL_CHECK_READONLY).
96 *
97 * zfs_ioc_key_t *nvl_keys
98 *  The list of expected/allowable innvl input keys. This list is used
99 *  to validate the nvlist input to the ioctl.
100 *
101 * boolean_t smush_outnvlist
102 *   If smush_outnvlist is true, then the output is presumed to be a
103 *   list of errors, and it will be "smushed" down to fit into the
104 *   caller's buffer, by removing some entries and replacing them with a
105 *   single "N_MORE_ERRORS" entry indicating how many were removed.  See
106 *   nvlist_smush() for details.  If smush_outnvlist is false, and the
107 *   outnvlist does not fit into the userland-provided buffer, then the
108 *   ioctl will fail with ENOMEM.
109 *
110 * zfs_ioc_func_t *func
111 *   The callback function that will perform the operation.
112 *
113 *   The callback should return 0 on success, or an error number on
114 *   failure.  If the function fails, the userland ioctl will return -1,
115 *   and errno will be set to the callback's return value.  The callback
116 *   will be called with the following arguments:
117 *
118 *   const char *name
119 *     The name of the pool or dataset to operate on, from
120 *     zfs_cmd_t:zc_name.  The 'namecheck' argument specifies the
121 *     expected type (pool, dataset, or none).
122 *
123 *   nvlist_t *innvl
124 *     The input nvlist, deserialized from zfs_cmd_t:zc_nvlist_src.  Or
125 *     NULL if no input nvlist was provided.  Changes to this nvlist are
126 *     ignored.  If the input nvlist could not be deserialized, the
127 *     ioctl will fail and the callback will not be called.
128 *
129 *   nvlist_t *outnvl
130 *     The output nvlist, initially empty.  The callback can fill it in,
131 *     and it will be returned to userland by serializing it into
132 *     zfs_cmd_t:zc_nvlist_dst.  If it is non-empty, and serialization
133 *     fails (e.g. because the caller didn't supply a large enough
134 *     buffer), then the overall ioctl will fail.  See the
135 *     'smush_nvlist' argument above for additional behaviors.
136 *
137 *     There are two typical uses of the output nvlist:
138 *       - To return state, e.g. property values.  In this case,
139 *         smush_outnvlist should be false.  If the buffer was not large
140 *         enough, the caller will reallocate a larger buffer and try
141 *         the ioctl again.
142 *
143 *       - To return multiple errors from an ioctl which makes on-disk
144 *         changes.  In this case, smush_outnvlist should be true.
145 *         Ioctls which make on-disk modifications should generally not
146 *         use the outnvl if they succeed, because the caller can not
147 *         distinguish between the operation failing, and
148 *         deserialization failing.
149 *
150 * IOCTL Interface Errors
151 *
152 * The following ioctl input errors can be returned:
153 *   ZFS_ERR_IOC_CMD_UNAVAIL	the ioctl number is not supported by kernel
154 *   ZFS_ERR_IOC_ARG_UNAVAIL	an input argument is not supported by kernel
155 *   ZFS_ERR_IOC_ARG_REQUIRED	a required input argument is missing
156 *   ZFS_ERR_IOC_ARG_BADTYPE	an input argument has an invalid type
157 */
158
159#include <sys/types.h>
160#include <sys/param.h>
161#include <sys/errno.h>
162#include <sys/uio_impl.h>
163#include <sys/file.h>
164#include <sys/kmem.h>
165#include <sys/cmn_err.h>
166#include <sys/stat.h>
167#include <sys/zfs_ioctl.h>
168#include <sys/zfs_quota.h>
169#include <sys/zfs_vfsops.h>
170#include <sys/zfs_znode.h>
171#include <sys/zap.h>
172#include <sys/spa.h>
173#include <sys/spa_impl.h>
174#include <sys/vdev.h>
175#include <sys/vdev_impl.h>
176#include <sys/dmu.h>
177#include <sys/dsl_dir.h>
178#include <sys/dsl_dataset.h>
179#include <sys/dsl_prop.h>
180#include <sys/dsl_deleg.h>
181#include <sys/dmu_objset.h>
182#include <sys/dmu_impl.h>
183#include <sys/dmu_redact.h>
184#include <sys/dmu_tx.h>
185#include <sys/sunddi.h>
186#include <sys/policy.h>
187#include <sys/zone.h>
188#include <sys/nvpair.h>
189#include <sys/pathname.h>
190#include <sys/fs/zfs.h>
191#include <sys/zfs_ctldir.h>
192#include <sys/zfs_dir.h>
193#include <sys/zfs_onexit.h>
194#include <sys/zvol.h>
195#include <sys/dsl_scan.h>
196#include <sys/fm/util.h>
197#include <sys/dsl_crypt.h>
198#include <sys/rrwlock.h>
199#include <sys/zfs_file.h>
200
201#include <sys/dmu_recv.h>
202#include <sys/dmu_send.h>
203#include <sys/dmu_recv.h>
204#include <sys/dsl_destroy.h>
205#include <sys/dsl_bookmark.h>
206#include <sys/dsl_userhold.h>
207#include <sys/zfeature.h>
208#include <sys/zcp.h>
209#include <sys/zio_checksum.h>
210#include <sys/vdev_removal.h>
211#include <sys/vdev_impl.h>
212#include <sys/vdev_initialize.h>
213#include <sys/vdev_trim.h>
214
215#include "zfs_namecheck.h"
216#include "zfs_prop.h"
217#include "zfs_deleg.h"
218#include "zfs_comutil.h"
219
220#include <sys/lua/lua.h>
221#include <sys/lua/lauxlib.h>
222#include <sys/zfs_ioctl_impl.h>
223
224kmutex_t zfsdev_state_lock;
225zfsdev_state_t *zfsdev_state_list;
226
227/*
228 * Limit maximum nvlist size.  We don't want users passing in insane values
229 * for zc->zc_nvlist_src_size, since we will need to allocate that much memory.
230 * Defaults to 0=auto which is handled by platform code.
231 */
232unsigned long zfs_max_nvlist_src_size = 0;
233
234/*
235 * When logging the output nvlist of an ioctl in the on-disk history, limit
236 * the logged size to this many bytes.  This must be less than DMU_MAX_ACCESS.
237 * This applies primarily to zfs_ioc_channel_program().
238 */
239unsigned long zfs_history_output_max = 1024 * 1024;
240
241uint_t zfs_fsyncer_key;
242uint_t zfs_allow_log_key;
243
244/* DATA_TYPE_ANY is used when zkey_type can vary. */
245#define	DATA_TYPE_ANY	DATA_TYPE_UNKNOWN
246
247typedef struct zfs_ioc_vec {
248	zfs_ioc_legacy_func_t	*zvec_legacy_func;
249	zfs_ioc_func_t		*zvec_func;
250	zfs_secpolicy_func_t	*zvec_secpolicy;
251	zfs_ioc_namecheck_t	zvec_namecheck;
252	boolean_t		zvec_allow_log;
253	zfs_ioc_poolcheck_t	zvec_pool_check;
254	boolean_t		zvec_smush_outnvlist;
255	const char		*zvec_name;
256	const zfs_ioc_key_t	*zvec_nvl_keys;
257	size_t			zvec_nvl_key_count;
258} zfs_ioc_vec_t;
259
260/* This array is indexed by zfs_userquota_prop_t */
261static const char *userquota_perms[] = {
262	ZFS_DELEG_PERM_USERUSED,
263	ZFS_DELEG_PERM_USERQUOTA,
264	ZFS_DELEG_PERM_GROUPUSED,
265	ZFS_DELEG_PERM_GROUPQUOTA,
266	ZFS_DELEG_PERM_USEROBJUSED,
267	ZFS_DELEG_PERM_USEROBJQUOTA,
268	ZFS_DELEG_PERM_GROUPOBJUSED,
269	ZFS_DELEG_PERM_GROUPOBJQUOTA,
270	ZFS_DELEG_PERM_PROJECTUSED,
271	ZFS_DELEG_PERM_PROJECTQUOTA,
272	ZFS_DELEG_PERM_PROJECTOBJUSED,
273	ZFS_DELEG_PERM_PROJECTOBJQUOTA,
274};
275
276static int zfs_ioc_userspace_upgrade(zfs_cmd_t *zc);
277static int zfs_ioc_id_quota_upgrade(zfs_cmd_t *zc);
278static int zfs_check_settable(const char *name, nvpair_t *property,
279    cred_t *cr);
280static int zfs_check_clearable(const char *dataset, nvlist_t *props,
281    nvlist_t **errors);
282static int zfs_fill_zplprops_root(uint64_t, nvlist_t *, nvlist_t *,
283    boolean_t *);
284int zfs_set_prop_nvlist(const char *, zprop_source_t, nvlist_t *, nvlist_t *);
285static int get_nvlist(uint64_t nvl, uint64_t size, int iflag, nvlist_t **nvp);
286
287static void
288history_str_free(char *buf)
289{
290	kmem_free(buf, HIS_MAX_RECORD_LEN);
291}
292
293static char *
294history_str_get(zfs_cmd_t *zc)
295{
296	char *buf;
297
298	if (zc->zc_history == 0)
299		return (NULL);
300
301	buf = kmem_alloc(HIS_MAX_RECORD_LEN, KM_SLEEP);
302	if (copyinstr((void *)(uintptr_t)zc->zc_history,
303	    buf, HIS_MAX_RECORD_LEN, NULL) != 0) {
304		history_str_free(buf);
305		return (NULL);
306	}
307
308	buf[HIS_MAX_RECORD_LEN -1] = '\0';
309
310	return (buf);
311}
312
313/*
314 * Return non-zero if the spa version is less than requested version.
315 */
316static int
317zfs_earlier_version(const char *name, int version)
318{
319	spa_t *spa;
320
321	if (spa_open(name, &spa, FTAG) == 0) {
322		if (spa_version(spa) < version) {
323			spa_close(spa, FTAG);
324			return (1);
325		}
326		spa_close(spa, FTAG);
327	}
328	return (0);
329}
330
331/*
332 * Return TRUE if the ZPL version is less than requested version.
333 */
334static boolean_t
335zpl_earlier_version(const char *name, int version)
336{
337	objset_t *os;
338	boolean_t rc = B_TRUE;
339
340	if (dmu_objset_hold(name, FTAG, &os) == 0) {
341		uint64_t zplversion;
342
343		if (dmu_objset_type(os) != DMU_OST_ZFS) {
344			dmu_objset_rele(os, FTAG);
345			return (B_TRUE);
346		}
347		/* XXX reading from non-owned objset */
348		if (zfs_get_zplprop(os, ZFS_PROP_VERSION, &zplversion) == 0)
349			rc = zplversion < version;
350		dmu_objset_rele(os, FTAG);
351	}
352	return (rc);
353}
354
355static void
356zfs_log_history(zfs_cmd_t *zc)
357{
358	spa_t *spa;
359	char *buf;
360
361	if ((buf = history_str_get(zc)) == NULL)
362		return;
363
364	if (spa_open(zc->zc_name, &spa, FTAG) == 0) {
365		if (spa_version(spa) >= SPA_VERSION_ZPOOL_HISTORY)
366			(void) spa_history_log(spa, buf);
367		spa_close(spa, FTAG);
368	}
369	history_str_free(buf);
370}
371
372/*
373 * Policy for top-level read operations (list pools).  Requires no privileges,
374 * and can be used in the local zone, as there is no associated dataset.
375 */
376/* ARGSUSED */
377static int
378zfs_secpolicy_none(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
379{
380	return (0);
381}
382
383/*
384 * Policy for dataset read operations (list children, get statistics).  Requires
385 * no privileges, but must be visible in the local zone.
386 */
387/* ARGSUSED */
388static int
389zfs_secpolicy_read(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
390{
391	if (INGLOBALZONE(curproc) ||
392	    zone_dataset_visible(zc->zc_name, NULL))
393		return (0);
394
395	return (SET_ERROR(ENOENT));
396}
397
398static int
399zfs_dozonecheck_impl(const char *dataset, uint64_t zoned, cred_t *cr)
400{
401	int writable = 1;
402
403	/*
404	 * The dataset must be visible by this zone -- check this first
405	 * so they don't see EPERM on something they shouldn't know about.
406	 */
407	if (!INGLOBALZONE(curproc) &&
408	    !zone_dataset_visible(dataset, &writable))
409		return (SET_ERROR(ENOENT));
410
411	if (INGLOBALZONE(curproc)) {
412		/*
413		 * If the fs is zoned, only root can access it from the
414		 * global zone.
415		 */
416		if (secpolicy_zfs(cr) && zoned)
417			return (SET_ERROR(EPERM));
418	} else {
419		/*
420		 * If we are in a local zone, the 'zoned' property must be set.
421		 */
422		if (!zoned)
423			return (SET_ERROR(EPERM));
424
425		/* must be writable by this zone */
426		if (!writable)
427			return (SET_ERROR(EPERM));
428	}
429	return (0);
430}
431
432static int
433zfs_dozonecheck(const char *dataset, cred_t *cr)
434{
435	uint64_t zoned;
436
437	if (dsl_prop_get_integer(dataset, zfs_prop_to_name(ZFS_PROP_ZONED),
438	    &zoned, NULL))
439		return (SET_ERROR(ENOENT));
440
441	return (zfs_dozonecheck_impl(dataset, zoned, cr));
442}
443
444static int
445zfs_dozonecheck_ds(const char *dataset, dsl_dataset_t *ds, cred_t *cr)
446{
447	uint64_t zoned;
448
449	if (dsl_prop_get_int_ds(ds, zfs_prop_to_name(ZFS_PROP_ZONED), &zoned))
450		return (SET_ERROR(ENOENT));
451
452	return (zfs_dozonecheck_impl(dataset, zoned, cr));
453}
454
455static int
456zfs_secpolicy_write_perms_ds(const char *name, dsl_dataset_t *ds,
457    const char *perm, cred_t *cr)
458{
459	int error;
460
461	error = zfs_dozonecheck_ds(name, ds, cr);
462	if (error == 0) {
463		error = secpolicy_zfs(cr);
464		if (error != 0)
465			error = dsl_deleg_access_impl(ds, perm, cr);
466	}
467	return (error);
468}
469
470static int
471zfs_secpolicy_write_perms(const char *name, const char *perm, cred_t *cr)
472{
473	int error;
474	dsl_dataset_t *ds;
475	dsl_pool_t *dp;
476
477	/*
478	 * First do a quick check for root in the global zone, which
479	 * is allowed to do all write_perms.  This ensures that zfs_ioc_*
480	 * will get to handle nonexistent datasets.
481	 */
482	if (INGLOBALZONE(curproc) && secpolicy_zfs(cr) == 0)
483		return (0);
484
485	error = dsl_pool_hold(name, FTAG, &dp);
486	if (error != 0)
487		return (error);
488
489	error = dsl_dataset_hold(dp, name, FTAG, &ds);
490	if (error != 0) {
491		dsl_pool_rele(dp, FTAG);
492		return (error);
493	}
494
495	error = zfs_secpolicy_write_perms_ds(name, ds, perm, cr);
496
497	dsl_dataset_rele(ds, FTAG);
498	dsl_pool_rele(dp, FTAG);
499	return (error);
500}
501
502/*
503 * Policy for setting the security label property.
504 *
505 * Returns 0 for success, non-zero for access and other errors.
506 */
507static int
508zfs_set_slabel_policy(const char *name, const char *strval, cred_t *cr)
509{
510#ifdef HAVE_MLSLABEL
511	char		ds_hexsl[MAXNAMELEN];
512	bslabel_t	ds_sl, new_sl;
513	boolean_t	new_default = FALSE;
514	uint64_t	zoned;
515	int		needed_priv = -1;
516	int		error;
517
518	/* First get the existing dataset label. */
519	error = dsl_prop_get(name, zfs_prop_to_name(ZFS_PROP_MLSLABEL),
520	    1, sizeof (ds_hexsl), &ds_hexsl, NULL);
521	if (error != 0)
522		return (SET_ERROR(EPERM));
523
524	if (strcasecmp(strval, ZFS_MLSLABEL_DEFAULT) == 0)
525		new_default = TRUE;
526
527	/* The label must be translatable */
528	if (!new_default && (hexstr_to_label(strval, &new_sl) != 0))
529		return (SET_ERROR(EINVAL));
530
531	/*
532	 * In a non-global zone, disallow attempts to set a label that
533	 * doesn't match that of the zone; otherwise no other checks
534	 * are needed.
535	 */
536	if (!INGLOBALZONE(curproc)) {
537		if (new_default || !blequal(&new_sl, CR_SL(CRED())))
538			return (SET_ERROR(EPERM));
539		return (0);
540	}
541
542	/*
543	 * For global-zone datasets (i.e., those whose zoned property is
544	 * "off", verify that the specified new label is valid for the
545	 * global zone.
546	 */
547	if (dsl_prop_get_integer(name,
548	    zfs_prop_to_name(ZFS_PROP_ZONED), &zoned, NULL))
549		return (SET_ERROR(EPERM));
550	if (!zoned) {
551		if (zfs_check_global_label(name, strval) != 0)
552			return (SET_ERROR(EPERM));
553	}
554
555	/*
556	 * If the existing dataset label is nondefault, check if the
557	 * dataset is mounted (label cannot be changed while mounted).
558	 * Get the zfsvfs_t; if there isn't one, then the dataset isn't
559	 * mounted (or isn't a dataset, doesn't exist, ...).
560	 */
561	if (strcasecmp(ds_hexsl, ZFS_MLSLABEL_DEFAULT) != 0) {
562		objset_t *os;
563		static const char *setsl_tag = "setsl_tag";
564
565		/*
566		 * Try to own the dataset; abort if there is any error,
567		 * (e.g., already mounted, in use, or other error).
568		 */
569		error = dmu_objset_own(name, DMU_OST_ZFS, B_TRUE, B_TRUE,
570		    setsl_tag, &os);
571		if (error != 0)
572			return (SET_ERROR(EPERM));
573
574		dmu_objset_disown(os, B_TRUE, setsl_tag);
575
576		if (new_default) {
577			needed_priv = PRIV_FILE_DOWNGRADE_SL;
578			goto out_check;
579		}
580
581		if (hexstr_to_label(strval, &new_sl) != 0)
582			return (SET_ERROR(EPERM));
583
584		if (blstrictdom(&ds_sl, &new_sl))
585			needed_priv = PRIV_FILE_DOWNGRADE_SL;
586		else if (blstrictdom(&new_sl, &ds_sl))
587			needed_priv = PRIV_FILE_UPGRADE_SL;
588	} else {
589		/* dataset currently has a default label */
590		if (!new_default)
591			needed_priv = PRIV_FILE_UPGRADE_SL;
592	}
593
594out_check:
595	if (needed_priv != -1)
596		return (PRIV_POLICY(cr, needed_priv, B_FALSE, EPERM, NULL));
597	return (0);
598#else
599	return (SET_ERROR(ENOTSUP));
600#endif /* HAVE_MLSLABEL */
601}
602
603static int
604zfs_secpolicy_setprop(const char *dsname, zfs_prop_t prop, nvpair_t *propval,
605    cred_t *cr)
606{
607	char *strval;
608
609	/*
610	 * Check permissions for special properties.
611	 */
612	switch (prop) {
613	default:
614		break;
615	case ZFS_PROP_ZONED:
616		/*
617		 * Disallow setting of 'zoned' from within a local zone.
618		 */
619		if (!INGLOBALZONE(curproc))
620			return (SET_ERROR(EPERM));
621		break;
622
623	case ZFS_PROP_QUOTA:
624	case ZFS_PROP_FILESYSTEM_LIMIT:
625	case ZFS_PROP_SNAPSHOT_LIMIT:
626		if (!INGLOBALZONE(curproc)) {
627			uint64_t zoned;
628			char setpoint[ZFS_MAX_DATASET_NAME_LEN];
629			/*
630			 * Unprivileged users are allowed to modify the
631			 * limit on things *under* (ie. contained by)
632			 * the thing they own.
633			 */
634			if (dsl_prop_get_integer(dsname,
635			    zfs_prop_to_name(ZFS_PROP_ZONED), &zoned, setpoint))
636				return (SET_ERROR(EPERM));
637			if (!zoned || strlen(dsname) <= strlen(setpoint))
638				return (SET_ERROR(EPERM));
639		}
640		break;
641
642	case ZFS_PROP_MLSLABEL:
643		if (!is_system_labeled())
644			return (SET_ERROR(EPERM));
645
646		if (nvpair_value_string(propval, &strval) == 0) {
647			int err;
648
649			err = zfs_set_slabel_policy(dsname, strval, CRED());
650			if (err != 0)
651				return (err);
652		}
653		break;
654	}
655
656	return (zfs_secpolicy_write_perms(dsname, zfs_prop_to_name(prop), cr));
657}
658
659/* ARGSUSED */
660static int
661zfs_secpolicy_set_fsacl(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
662{
663	int error;
664
665	error = zfs_dozonecheck(zc->zc_name, cr);
666	if (error != 0)
667		return (error);
668
669	/*
670	 * permission to set permissions will be evaluated later in
671	 * dsl_deleg_can_allow()
672	 */
673	return (0);
674}
675
676/* ARGSUSED */
677static int
678zfs_secpolicy_rollback(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
679{
680	return (zfs_secpolicy_write_perms(zc->zc_name,
681	    ZFS_DELEG_PERM_ROLLBACK, cr));
682}
683
684/* ARGSUSED */
685static int
686zfs_secpolicy_send(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
687{
688	dsl_pool_t *dp;
689	dsl_dataset_t *ds;
690	const char *cp;
691	int error;
692
693	/*
694	 * Generate the current snapshot name from the given objsetid, then
695	 * use that name for the secpolicy/zone checks.
696	 */
697	cp = strchr(zc->zc_name, '@');
698	if (cp == NULL)
699		return (SET_ERROR(EINVAL));
700	error = dsl_pool_hold(zc->zc_name, FTAG, &dp);
701	if (error != 0)
702		return (error);
703
704	error = dsl_dataset_hold_obj(dp, zc->zc_sendobj, FTAG, &ds);
705	if (error != 0) {
706		dsl_pool_rele(dp, FTAG);
707		return (error);
708	}
709
710	dsl_dataset_name(ds, zc->zc_name);
711
712	error = zfs_secpolicy_write_perms_ds(zc->zc_name, ds,
713	    ZFS_DELEG_PERM_SEND, cr);
714	dsl_dataset_rele(ds, FTAG);
715	dsl_pool_rele(dp, FTAG);
716
717	return (error);
718}
719
720/* ARGSUSED */
721static int
722zfs_secpolicy_send_new(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
723{
724	return (zfs_secpolicy_write_perms(zc->zc_name,
725	    ZFS_DELEG_PERM_SEND, cr));
726}
727
728static int
729zfs_secpolicy_share(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
730{
731	return (SET_ERROR(ENOTSUP));
732}
733
734static int
735zfs_secpolicy_smb_acl(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
736{
737	return (SET_ERROR(ENOTSUP));
738}
739
740static int
741zfs_get_parent(const char *datasetname, char *parent, int parentsize)
742{
743	char *cp;
744
745	/*
746	 * Remove the @bla or /bla from the end of the name to get the parent.
747	 */
748	(void) strncpy(parent, datasetname, parentsize);
749	cp = strrchr(parent, '@');
750	if (cp != NULL) {
751		cp[0] = '\0';
752	} else {
753		cp = strrchr(parent, '/');
754		if (cp == NULL)
755			return (SET_ERROR(ENOENT));
756		cp[0] = '\0';
757	}
758
759	return (0);
760}
761
762int
763zfs_secpolicy_destroy_perms(const char *name, cred_t *cr)
764{
765	int error;
766
767	if ((error = zfs_secpolicy_write_perms(name,
768	    ZFS_DELEG_PERM_MOUNT, cr)) != 0)
769		return (error);
770
771	return (zfs_secpolicy_write_perms(name, ZFS_DELEG_PERM_DESTROY, cr));
772}
773
774/* ARGSUSED */
775static int
776zfs_secpolicy_destroy(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
777{
778	return (zfs_secpolicy_destroy_perms(zc->zc_name, cr));
779}
780
781/*
782 * Destroying snapshots with delegated permissions requires
783 * descendant mount and destroy permissions.
784 */
785/* ARGSUSED */
786static int
787zfs_secpolicy_destroy_snaps(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
788{
789	nvlist_t *snaps;
790	nvpair_t *pair, *nextpair;
791	int error = 0;
792
793	snaps = fnvlist_lookup_nvlist(innvl, "snaps");
794
795	for (pair = nvlist_next_nvpair(snaps, NULL); pair != NULL;
796	    pair = nextpair) {
797		nextpair = nvlist_next_nvpair(snaps, pair);
798		error = zfs_secpolicy_destroy_perms(nvpair_name(pair), cr);
799		if (error == ENOENT) {
800			/*
801			 * Ignore any snapshots that don't exist (we consider
802			 * them "already destroyed").  Remove the name from the
803			 * nvl here in case the snapshot is created between
804			 * now and when we try to destroy it (in which case
805			 * we don't want to destroy it since we haven't
806			 * checked for permission).
807			 */
808			fnvlist_remove_nvpair(snaps, pair);
809			error = 0;
810		}
811		if (error != 0)
812			break;
813	}
814
815	return (error);
816}
817
818int
819zfs_secpolicy_rename_perms(const char *from, const char *to, cred_t *cr)
820{
821	char	parentname[ZFS_MAX_DATASET_NAME_LEN];
822	int	error;
823
824	if ((error = zfs_secpolicy_write_perms(from,
825	    ZFS_DELEG_PERM_RENAME, cr)) != 0)
826		return (error);
827
828	if ((error = zfs_secpolicy_write_perms(from,
829	    ZFS_DELEG_PERM_MOUNT, cr)) != 0)
830		return (error);
831
832	if ((error = zfs_get_parent(to, parentname,
833	    sizeof (parentname))) != 0)
834		return (error);
835
836	if ((error = zfs_secpolicy_write_perms(parentname,
837	    ZFS_DELEG_PERM_CREATE, cr)) != 0)
838		return (error);
839
840	if ((error = zfs_secpolicy_write_perms(parentname,
841	    ZFS_DELEG_PERM_MOUNT, cr)) != 0)
842		return (error);
843
844	return (error);
845}
846
847/* ARGSUSED */
848static int
849zfs_secpolicy_rename(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
850{
851	return (zfs_secpolicy_rename_perms(zc->zc_name, zc->zc_value, cr));
852}
853
854/* ARGSUSED */
855static int
856zfs_secpolicy_promote(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
857{
858	dsl_pool_t *dp;
859	dsl_dataset_t *clone;
860	int error;
861
862	error = zfs_secpolicy_write_perms(zc->zc_name,
863	    ZFS_DELEG_PERM_PROMOTE, cr);
864	if (error != 0)
865		return (error);
866
867	error = dsl_pool_hold(zc->zc_name, FTAG, &dp);
868	if (error != 0)
869		return (error);
870
871	error = dsl_dataset_hold(dp, zc->zc_name, FTAG, &clone);
872
873	if (error == 0) {
874		char parentname[ZFS_MAX_DATASET_NAME_LEN];
875		dsl_dataset_t *origin = NULL;
876		dsl_dir_t *dd;
877		dd = clone->ds_dir;
878
879		error = dsl_dataset_hold_obj(dd->dd_pool,
880		    dsl_dir_phys(dd)->dd_origin_obj, FTAG, &origin);
881		if (error != 0) {
882			dsl_dataset_rele(clone, FTAG);
883			dsl_pool_rele(dp, FTAG);
884			return (error);
885		}
886
887		error = zfs_secpolicy_write_perms_ds(zc->zc_name, clone,
888		    ZFS_DELEG_PERM_MOUNT, cr);
889
890		dsl_dataset_name(origin, parentname);
891		if (error == 0) {
892			error = zfs_secpolicy_write_perms_ds(parentname, origin,
893			    ZFS_DELEG_PERM_PROMOTE, cr);
894		}
895		dsl_dataset_rele(clone, FTAG);
896		dsl_dataset_rele(origin, FTAG);
897	}
898	dsl_pool_rele(dp, FTAG);
899	return (error);
900}
901
902/* ARGSUSED */
903static int
904zfs_secpolicy_recv(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
905{
906	int error;
907
908	if ((error = zfs_secpolicy_write_perms(zc->zc_name,
909	    ZFS_DELEG_PERM_RECEIVE, cr)) != 0)
910		return (error);
911
912	if ((error = zfs_secpolicy_write_perms(zc->zc_name,
913	    ZFS_DELEG_PERM_MOUNT, cr)) != 0)
914		return (error);
915
916	return (zfs_secpolicy_write_perms(zc->zc_name,
917	    ZFS_DELEG_PERM_CREATE, cr));
918}
919
920/* ARGSUSED */
921static int
922zfs_secpolicy_recv_new(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
923{
924	return (zfs_secpolicy_recv(zc, innvl, cr));
925}
926
927int
928zfs_secpolicy_snapshot_perms(const char *name, cred_t *cr)
929{
930	return (zfs_secpolicy_write_perms(name,
931	    ZFS_DELEG_PERM_SNAPSHOT, cr));
932}
933
934/*
935 * Check for permission to create each snapshot in the nvlist.
936 */
937/* ARGSUSED */
938static int
939zfs_secpolicy_snapshot(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
940{
941	nvlist_t *snaps;
942	int error = 0;
943	nvpair_t *pair;
944
945	snaps = fnvlist_lookup_nvlist(innvl, "snaps");
946
947	for (pair = nvlist_next_nvpair(snaps, NULL); pair != NULL;
948	    pair = nvlist_next_nvpair(snaps, pair)) {
949		char *name = nvpair_name(pair);
950		char *atp = strchr(name, '@');
951
952		if (atp == NULL) {
953			error = SET_ERROR(EINVAL);
954			break;
955		}
956		*atp = '\0';
957		error = zfs_secpolicy_snapshot_perms(name, cr);
958		*atp = '@';
959		if (error != 0)
960			break;
961	}
962	return (error);
963}
964
965/*
966 * Check for permission to create each bookmark in the nvlist.
967 */
968/* ARGSUSED */
969static int
970zfs_secpolicy_bookmark(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
971{
972	int error = 0;
973
974	for (nvpair_t *pair = nvlist_next_nvpair(innvl, NULL);
975	    pair != NULL; pair = nvlist_next_nvpair(innvl, pair)) {
976		char *name = nvpair_name(pair);
977		char *hashp = strchr(name, '#');
978
979		if (hashp == NULL) {
980			error = SET_ERROR(EINVAL);
981			break;
982		}
983		*hashp = '\0';
984		error = zfs_secpolicy_write_perms(name,
985		    ZFS_DELEG_PERM_BOOKMARK, cr);
986		*hashp = '#';
987		if (error != 0)
988			break;
989	}
990	return (error);
991}
992
993/* ARGSUSED */
994static int
995zfs_secpolicy_destroy_bookmarks(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
996{
997	nvpair_t *pair, *nextpair;
998	int error = 0;
999
1000	for (pair = nvlist_next_nvpair(innvl, NULL); pair != NULL;
1001	    pair = nextpair) {
1002		char *name = nvpair_name(pair);
1003		char *hashp = strchr(name, '#');
1004		nextpair = nvlist_next_nvpair(innvl, pair);
1005
1006		if (hashp == NULL) {
1007			error = SET_ERROR(EINVAL);
1008			break;
1009		}
1010
1011		*hashp = '\0';
1012		error = zfs_secpolicy_write_perms(name,
1013		    ZFS_DELEG_PERM_DESTROY, cr);
1014		*hashp = '#';
1015		if (error == ENOENT) {
1016			/*
1017			 * Ignore any filesystems that don't exist (we consider
1018			 * their bookmarks "already destroyed").  Remove
1019			 * the name from the nvl here in case the filesystem
1020			 * is created between now and when we try to destroy
1021			 * the bookmark (in which case we don't want to
1022			 * destroy it since we haven't checked for permission).
1023			 */
1024			fnvlist_remove_nvpair(innvl, pair);
1025			error = 0;
1026		}
1027		if (error != 0)
1028			break;
1029	}
1030
1031	return (error);
1032}
1033
1034/* ARGSUSED */
1035static int
1036zfs_secpolicy_log_history(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1037{
1038	/*
1039	 * Even root must have a proper TSD so that we know what pool
1040	 * to log to.
1041	 */
1042	if (tsd_get(zfs_allow_log_key) == NULL)
1043		return (SET_ERROR(EPERM));
1044	return (0);
1045}
1046
1047static int
1048zfs_secpolicy_create_clone(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1049{
1050	char	parentname[ZFS_MAX_DATASET_NAME_LEN];
1051	int	error;
1052	char	*origin;
1053
1054	if ((error = zfs_get_parent(zc->zc_name, parentname,
1055	    sizeof (parentname))) != 0)
1056		return (error);
1057
1058	if (nvlist_lookup_string(innvl, "origin", &origin) == 0 &&
1059	    (error = zfs_secpolicy_write_perms(origin,
1060	    ZFS_DELEG_PERM_CLONE, cr)) != 0)
1061		return (error);
1062
1063	if ((error = zfs_secpolicy_write_perms(parentname,
1064	    ZFS_DELEG_PERM_CREATE, cr)) != 0)
1065		return (error);
1066
1067	return (zfs_secpolicy_write_perms(parentname,
1068	    ZFS_DELEG_PERM_MOUNT, cr));
1069}
1070
1071/*
1072 * Policy for pool operations - create/destroy pools, add vdevs, etc.  Requires
1073 * SYS_CONFIG privilege, which is not available in a local zone.
1074 */
1075/* ARGSUSED */
1076int
1077zfs_secpolicy_config(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1078{
1079	if (secpolicy_sys_config(cr, B_FALSE) != 0)
1080		return (SET_ERROR(EPERM));
1081
1082	return (0);
1083}
1084
1085/*
1086 * Policy for object to name lookups.
1087 */
1088/* ARGSUSED */
1089static int
1090zfs_secpolicy_diff(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1091{
1092	int error;
1093
1094	if ((error = secpolicy_sys_config(cr, B_FALSE)) == 0)
1095		return (0);
1096
1097	error = zfs_secpolicy_write_perms(zc->zc_name, ZFS_DELEG_PERM_DIFF, cr);
1098	return (error);
1099}
1100
1101/*
1102 * Policy for fault injection.  Requires all privileges.
1103 */
1104/* ARGSUSED */
1105static int
1106zfs_secpolicy_inject(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1107{
1108	return (secpolicy_zinject(cr));
1109}
1110
1111/* ARGSUSED */
1112static int
1113zfs_secpolicy_inherit_prop(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1114{
1115	zfs_prop_t prop = zfs_name_to_prop(zc->zc_value);
1116
1117	if (prop == ZPROP_INVAL) {
1118		if (!zfs_prop_user(zc->zc_value))
1119			return (SET_ERROR(EINVAL));
1120		return (zfs_secpolicy_write_perms(zc->zc_name,
1121		    ZFS_DELEG_PERM_USERPROP, cr));
1122	} else {
1123		return (zfs_secpolicy_setprop(zc->zc_name, prop,
1124		    NULL, cr));
1125	}
1126}
1127
1128static int
1129zfs_secpolicy_userspace_one(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1130{
1131	int err = zfs_secpolicy_read(zc, innvl, cr);
1132	if (err)
1133		return (err);
1134
1135	if (zc->zc_objset_type >= ZFS_NUM_USERQUOTA_PROPS)
1136		return (SET_ERROR(EINVAL));
1137
1138	if (zc->zc_value[0] == 0) {
1139		/*
1140		 * They are asking about a posix uid/gid.  If it's
1141		 * themself, allow it.
1142		 */
1143		if (zc->zc_objset_type == ZFS_PROP_USERUSED ||
1144		    zc->zc_objset_type == ZFS_PROP_USERQUOTA ||
1145		    zc->zc_objset_type == ZFS_PROP_USEROBJUSED ||
1146		    zc->zc_objset_type == ZFS_PROP_USEROBJQUOTA) {
1147			if (zc->zc_guid == crgetuid(cr))
1148				return (0);
1149		} else if (zc->zc_objset_type == ZFS_PROP_GROUPUSED ||
1150		    zc->zc_objset_type == ZFS_PROP_GROUPQUOTA ||
1151		    zc->zc_objset_type == ZFS_PROP_GROUPOBJUSED ||
1152		    zc->zc_objset_type == ZFS_PROP_GROUPOBJQUOTA) {
1153			if (groupmember(zc->zc_guid, cr))
1154				return (0);
1155		}
1156		/* else is for project quota/used */
1157	}
1158
1159	return (zfs_secpolicy_write_perms(zc->zc_name,
1160	    userquota_perms[zc->zc_objset_type], cr));
1161}
1162
1163static int
1164zfs_secpolicy_userspace_many(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1165{
1166	int err = zfs_secpolicy_read(zc, innvl, cr);
1167	if (err)
1168		return (err);
1169
1170	if (zc->zc_objset_type >= ZFS_NUM_USERQUOTA_PROPS)
1171		return (SET_ERROR(EINVAL));
1172
1173	return (zfs_secpolicy_write_perms(zc->zc_name,
1174	    userquota_perms[zc->zc_objset_type], cr));
1175}
1176
1177/* ARGSUSED */
1178static int
1179zfs_secpolicy_userspace_upgrade(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1180{
1181	return (zfs_secpolicy_setprop(zc->zc_name, ZFS_PROP_VERSION,
1182	    NULL, cr));
1183}
1184
1185/* ARGSUSED */
1186static int
1187zfs_secpolicy_hold(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1188{
1189	nvpair_t *pair;
1190	nvlist_t *holds;
1191	int error;
1192
1193	holds = fnvlist_lookup_nvlist(innvl, "holds");
1194
1195	for (pair = nvlist_next_nvpair(holds, NULL); pair != NULL;
1196	    pair = nvlist_next_nvpair(holds, pair)) {
1197		char fsname[ZFS_MAX_DATASET_NAME_LEN];
1198		error = dmu_fsname(nvpair_name(pair), fsname);
1199		if (error != 0)
1200			return (error);
1201		error = zfs_secpolicy_write_perms(fsname,
1202		    ZFS_DELEG_PERM_HOLD, cr);
1203		if (error != 0)
1204			return (error);
1205	}
1206	return (0);
1207}
1208
1209/* ARGSUSED */
1210static int
1211zfs_secpolicy_release(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1212{
1213	nvpair_t *pair;
1214	int error;
1215
1216	for (pair = nvlist_next_nvpair(innvl, NULL); pair != NULL;
1217	    pair = nvlist_next_nvpair(innvl, pair)) {
1218		char fsname[ZFS_MAX_DATASET_NAME_LEN];
1219		error = dmu_fsname(nvpair_name(pair), fsname);
1220		if (error != 0)
1221			return (error);
1222		error = zfs_secpolicy_write_perms(fsname,
1223		    ZFS_DELEG_PERM_RELEASE, cr);
1224		if (error != 0)
1225			return (error);
1226	}
1227	return (0);
1228}
1229
1230/*
1231 * Policy for allowing temporary snapshots to be taken or released
1232 */
1233static int
1234zfs_secpolicy_tmp_snapshot(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1235{
1236	/*
1237	 * A temporary snapshot is the same as a snapshot,
1238	 * hold, destroy and release all rolled into one.
1239	 * Delegated diff alone is sufficient that we allow this.
1240	 */
1241	int error;
1242
1243	if ((error = zfs_secpolicy_write_perms(zc->zc_name,
1244	    ZFS_DELEG_PERM_DIFF, cr)) == 0)
1245		return (0);
1246
1247	error = zfs_secpolicy_snapshot_perms(zc->zc_name, cr);
1248
1249	if (innvl != NULL) {
1250		if (error == 0)
1251			error = zfs_secpolicy_hold(zc, innvl, cr);
1252		if (error == 0)
1253			error = zfs_secpolicy_release(zc, innvl, cr);
1254		if (error == 0)
1255			error = zfs_secpolicy_destroy(zc, innvl, cr);
1256	}
1257	return (error);
1258}
1259
1260static int
1261zfs_secpolicy_load_key(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1262{
1263	return (zfs_secpolicy_write_perms(zc->zc_name,
1264	    ZFS_DELEG_PERM_LOAD_KEY, cr));
1265}
1266
1267static int
1268zfs_secpolicy_change_key(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1269{
1270	return (zfs_secpolicy_write_perms(zc->zc_name,
1271	    ZFS_DELEG_PERM_CHANGE_KEY, cr));
1272}
1273
1274/*
1275 * Returns the nvlist as specified by the user in the zfs_cmd_t.
1276 */
1277static int
1278get_nvlist(uint64_t nvl, uint64_t size, int iflag, nvlist_t **nvp)
1279{
1280	char *packed;
1281	int error;
1282	nvlist_t *list = NULL;
1283
1284	/*
1285	 * Read in and unpack the user-supplied nvlist.
1286	 */
1287	if (size == 0)
1288		return (SET_ERROR(EINVAL));
1289
1290	packed = vmem_alloc(size, KM_SLEEP);
1291
1292	if ((error = ddi_copyin((void *)(uintptr_t)nvl, packed, size,
1293	    iflag)) != 0) {
1294		vmem_free(packed, size);
1295		return (SET_ERROR(EFAULT));
1296	}
1297
1298	if ((error = nvlist_unpack(packed, size, &list, 0)) != 0) {
1299		vmem_free(packed, size);
1300		return (error);
1301	}
1302
1303	vmem_free(packed, size);
1304
1305	*nvp = list;
1306	return (0);
1307}
1308
1309/*
1310 * Reduce the size of this nvlist until it can be serialized in 'max' bytes.
1311 * Entries will be removed from the end of the nvlist, and one int32 entry
1312 * named "N_MORE_ERRORS" will be added indicating how many entries were
1313 * removed.
1314 */
1315static int
1316nvlist_smush(nvlist_t *errors, size_t max)
1317{
1318	size_t size;
1319
1320	size = fnvlist_size(errors);
1321
1322	if (size > max) {
1323		nvpair_t *more_errors;
1324		int n = 0;
1325
1326		if (max < 1024)
1327			return (SET_ERROR(ENOMEM));
1328
1329		fnvlist_add_int32(errors, ZPROP_N_MORE_ERRORS, 0);
1330		more_errors = nvlist_prev_nvpair(errors, NULL);
1331
1332		do {
1333			nvpair_t *pair = nvlist_prev_nvpair(errors,
1334			    more_errors);
1335			fnvlist_remove_nvpair(errors, pair);
1336			n++;
1337			size = fnvlist_size(errors);
1338		} while (size > max);
1339
1340		fnvlist_remove_nvpair(errors, more_errors);
1341		fnvlist_add_int32(errors, ZPROP_N_MORE_ERRORS, n);
1342		ASSERT3U(fnvlist_size(errors), <=, max);
1343	}
1344
1345	return (0);
1346}
1347
1348static int
1349put_nvlist(zfs_cmd_t *zc, nvlist_t *nvl)
1350{
1351	char *packed = NULL;
1352	int error = 0;
1353	size_t size;
1354
1355	size = fnvlist_size(nvl);
1356
1357	if (size > zc->zc_nvlist_dst_size) {
1358		error = SET_ERROR(ENOMEM);
1359	} else {
1360		packed = fnvlist_pack(nvl, &size);
1361		if (ddi_copyout(packed, (void *)(uintptr_t)zc->zc_nvlist_dst,
1362		    size, zc->zc_iflags) != 0)
1363			error = SET_ERROR(EFAULT);
1364		fnvlist_pack_free(packed, size);
1365	}
1366
1367	zc->zc_nvlist_dst_size = size;
1368	zc->zc_nvlist_dst_filled = B_TRUE;
1369	return (error);
1370}
1371
1372int
1373getzfsvfs_impl(objset_t *os, zfsvfs_t **zfvp)
1374{
1375	int error = 0;
1376	if (dmu_objset_type(os) != DMU_OST_ZFS) {
1377		return (SET_ERROR(EINVAL));
1378	}
1379
1380	mutex_enter(&os->os_user_ptr_lock);
1381	*zfvp = dmu_objset_get_user(os);
1382	/* bump s_active only when non-zero to prevent umount race */
1383	error = zfs_vfs_ref(zfvp);
1384	mutex_exit(&os->os_user_ptr_lock);
1385	return (error);
1386}
1387
1388int
1389getzfsvfs(const char *dsname, zfsvfs_t **zfvp)
1390{
1391	objset_t *os;
1392	int error;
1393
1394	error = dmu_objset_hold(dsname, FTAG, &os);
1395	if (error != 0)
1396		return (error);
1397
1398	error = getzfsvfs_impl(os, zfvp);
1399	dmu_objset_rele(os, FTAG);
1400	return (error);
1401}
1402
1403/*
1404 * Find a zfsvfs_t for a mounted filesystem, or create our own, in which
1405 * case its z_sb will be NULL, and it will be opened as the owner.
1406 * If 'writer' is set, the z_teardown_lock will be held for RW_WRITER,
1407 * which prevents all inode ops from running.
1408 */
1409static int
1410zfsvfs_hold(const char *name, void *tag, zfsvfs_t **zfvp, boolean_t writer)
1411{
1412	int error = 0;
1413
1414	if (getzfsvfs(name, zfvp) != 0)
1415		error = zfsvfs_create(name, B_FALSE, zfvp);
1416	if (error == 0) {
1417		if (writer)
1418			ZFS_TEARDOWN_ENTER_WRITE(*zfvp, tag);
1419		else
1420			ZFS_TEARDOWN_ENTER_READ(*zfvp, tag);
1421		if ((*zfvp)->z_unmounted) {
1422			/*
1423			 * XXX we could probably try again, since the unmounting
1424			 * thread should be just about to disassociate the
1425			 * objset from the zfsvfs.
1426			 */
1427			ZFS_TEARDOWN_EXIT(*zfvp, tag);
1428			return (SET_ERROR(EBUSY));
1429		}
1430	}
1431	return (error);
1432}
1433
1434static void
1435zfsvfs_rele(zfsvfs_t *zfsvfs, void *tag)
1436{
1437	ZFS_TEARDOWN_EXIT(zfsvfs, tag);
1438
1439	if (zfs_vfs_held(zfsvfs)) {
1440		zfs_vfs_rele(zfsvfs);
1441	} else {
1442		dmu_objset_disown(zfsvfs->z_os, B_TRUE, zfsvfs);
1443		zfsvfs_free(zfsvfs);
1444	}
1445}
1446
1447static int
1448zfs_ioc_pool_create(zfs_cmd_t *zc)
1449{
1450	int error;
1451	nvlist_t *config, *props = NULL;
1452	nvlist_t *rootprops = NULL;
1453	nvlist_t *zplprops = NULL;
1454	dsl_crypto_params_t *dcp = NULL;
1455	const char *spa_name = zc->zc_name;
1456	boolean_t unload_wkey = B_TRUE;
1457
1458	if ((error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
1459	    zc->zc_iflags, &config)))
1460		return (error);
1461
1462	if (zc->zc_nvlist_src_size != 0 && (error =
1463	    get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
1464	    zc->zc_iflags, &props))) {
1465		nvlist_free(config);
1466		return (error);
1467	}
1468
1469	if (props) {
1470		nvlist_t *nvl = NULL;
1471		nvlist_t *hidden_args = NULL;
1472		uint64_t version = SPA_VERSION;
1473		char *tname;
1474
1475		(void) nvlist_lookup_uint64(props,
1476		    zpool_prop_to_name(ZPOOL_PROP_VERSION), &version);
1477		if (!SPA_VERSION_IS_SUPPORTED(version)) {
1478			error = SET_ERROR(EINVAL);
1479			goto pool_props_bad;
1480		}
1481		(void) nvlist_lookup_nvlist(props, ZPOOL_ROOTFS_PROPS, &nvl);
1482		if (nvl) {
1483			error = nvlist_dup(nvl, &rootprops, KM_SLEEP);
1484			if (error != 0)
1485				goto pool_props_bad;
1486			(void) nvlist_remove_all(props, ZPOOL_ROOTFS_PROPS);
1487		}
1488
1489		(void) nvlist_lookup_nvlist(props, ZPOOL_HIDDEN_ARGS,
1490		    &hidden_args);
1491		error = dsl_crypto_params_create_nvlist(DCP_CMD_NONE,
1492		    rootprops, hidden_args, &dcp);
1493		if (error != 0)
1494			goto pool_props_bad;
1495		(void) nvlist_remove_all(props, ZPOOL_HIDDEN_ARGS);
1496
1497		VERIFY(nvlist_alloc(&zplprops, NV_UNIQUE_NAME, KM_SLEEP) == 0);
1498		error = zfs_fill_zplprops_root(version, rootprops,
1499		    zplprops, NULL);
1500		if (error != 0)
1501			goto pool_props_bad;
1502
1503		if (nvlist_lookup_string(props,
1504		    zpool_prop_to_name(ZPOOL_PROP_TNAME), &tname) == 0)
1505			spa_name = tname;
1506	}
1507
1508	error = spa_create(zc->zc_name, config, props, zplprops, dcp);
1509
1510	/*
1511	 * Set the remaining root properties
1512	 */
1513	if (!error && (error = zfs_set_prop_nvlist(spa_name,
1514	    ZPROP_SRC_LOCAL, rootprops, NULL)) != 0) {
1515		(void) spa_destroy(spa_name);
1516		unload_wkey = B_FALSE; /* spa_destroy() unloads wrapping keys */
1517	}
1518
1519pool_props_bad:
1520	nvlist_free(rootprops);
1521	nvlist_free(zplprops);
1522	nvlist_free(config);
1523	nvlist_free(props);
1524	dsl_crypto_params_free(dcp, unload_wkey && !!error);
1525
1526	return (error);
1527}
1528
1529static int
1530zfs_ioc_pool_destroy(zfs_cmd_t *zc)
1531{
1532	int error;
1533	zfs_log_history(zc);
1534	error = spa_destroy(zc->zc_name);
1535
1536	return (error);
1537}
1538
1539static int
1540zfs_ioc_pool_import(zfs_cmd_t *zc)
1541{
1542	nvlist_t *config, *props = NULL;
1543	uint64_t guid;
1544	int error;
1545
1546	if ((error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
1547	    zc->zc_iflags, &config)) != 0)
1548		return (error);
1549
1550	if (zc->zc_nvlist_src_size != 0 && (error =
1551	    get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
1552	    zc->zc_iflags, &props))) {
1553		nvlist_free(config);
1554		return (error);
1555	}
1556
1557	if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID, &guid) != 0 ||
1558	    guid != zc->zc_guid)
1559		error = SET_ERROR(EINVAL);
1560	else
1561		error = spa_import(zc->zc_name, config, props, zc->zc_cookie);
1562
1563	if (zc->zc_nvlist_dst != 0) {
1564		int err;
1565
1566		if ((err = put_nvlist(zc, config)) != 0)
1567			error = err;
1568	}
1569
1570	nvlist_free(config);
1571	nvlist_free(props);
1572
1573	return (error);
1574}
1575
1576static int
1577zfs_ioc_pool_export(zfs_cmd_t *zc)
1578{
1579	int error;
1580	boolean_t force = (boolean_t)zc->zc_cookie;
1581	boolean_t hardforce = (boolean_t)zc->zc_guid;
1582
1583	zfs_log_history(zc);
1584	error = spa_export(zc->zc_name, NULL, force, hardforce);
1585
1586	return (error);
1587}
1588
1589static int
1590zfs_ioc_pool_configs(zfs_cmd_t *zc)
1591{
1592	nvlist_t *configs;
1593	int error;
1594
1595	if ((configs = spa_all_configs(&zc->zc_cookie)) == NULL)
1596		return (SET_ERROR(EEXIST));
1597
1598	error = put_nvlist(zc, configs);
1599
1600	nvlist_free(configs);
1601
1602	return (error);
1603}
1604
1605/*
1606 * inputs:
1607 * zc_name		name of the pool
1608 *
1609 * outputs:
1610 * zc_cookie		real errno
1611 * zc_nvlist_dst	config nvlist
1612 * zc_nvlist_dst_size	size of config nvlist
1613 */
1614static int
1615zfs_ioc_pool_stats(zfs_cmd_t *zc)
1616{
1617	nvlist_t *config;
1618	int error;
1619	int ret = 0;
1620
1621	error = spa_get_stats(zc->zc_name, &config, zc->zc_value,
1622	    sizeof (zc->zc_value));
1623
1624	if (config != NULL) {
1625		ret = put_nvlist(zc, config);
1626		nvlist_free(config);
1627
1628		/*
1629		 * The config may be present even if 'error' is non-zero.
1630		 * In this case we return success, and preserve the real errno
1631		 * in 'zc_cookie'.
1632		 */
1633		zc->zc_cookie = error;
1634	} else {
1635		ret = error;
1636	}
1637
1638	return (ret);
1639}
1640
1641/*
1642 * Try to import the given pool, returning pool stats as appropriate so that
1643 * user land knows which devices are available and overall pool health.
1644 */
1645static int
1646zfs_ioc_pool_tryimport(zfs_cmd_t *zc)
1647{
1648	nvlist_t *tryconfig, *config = NULL;
1649	int error;
1650
1651	if ((error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
1652	    zc->zc_iflags, &tryconfig)) != 0)
1653		return (error);
1654
1655	config = spa_tryimport(tryconfig);
1656
1657	nvlist_free(tryconfig);
1658
1659	if (config == NULL)
1660		return (SET_ERROR(EINVAL));
1661
1662	error = put_nvlist(zc, config);
1663	nvlist_free(config);
1664
1665	return (error);
1666}
1667
1668/*
1669 * inputs:
1670 * zc_name              name of the pool
1671 * zc_cookie            scan func (pool_scan_func_t)
1672 * zc_flags             scrub pause/resume flag (pool_scrub_cmd_t)
1673 */
1674static int
1675zfs_ioc_pool_scan(zfs_cmd_t *zc)
1676{
1677	spa_t *spa;
1678	int error;
1679
1680	if (zc->zc_flags >= POOL_SCRUB_FLAGS_END)
1681		return (SET_ERROR(EINVAL));
1682
1683	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
1684		return (error);
1685
1686	if (zc->zc_flags == POOL_SCRUB_PAUSE)
1687		error = spa_scrub_pause_resume(spa, POOL_SCRUB_PAUSE);
1688	else if (zc->zc_cookie == POOL_SCAN_NONE)
1689		error = spa_scan_stop(spa);
1690	else
1691		error = spa_scan(spa, zc->zc_cookie);
1692
1693	spa_close(spa, FTAG);
1694
1695	return (error);
1696}
1697
1698static int
1699zfs_ioc_pool_freeze(zfs_cmd_t *zc)
1700{
1701	spa_t *spa;
1702	int error;
1703
1704	error = spa_open(zc->zc_name, &spa, FTAG);
1705	if (error == 0) {
1706		spa_freeze(spa);
1707		spa_close(spa, FTAG);
1708	}
1709	return (error);
1710}
1711
1712static int
1713zfs_ioc_pool_upgrade(zfs_cmd_t *zc)
1714{
1715	spa_t *spa;
1716	int error;
1717
1718	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
1719		return (error);
1720
1721	if (zc->zc_cookie < spa_version(spa) ||
1722	    !SPA_VERSION_IS_SUPPORTED(zc->zc_cookie)) {
1723		spa_close(spa, FTAG);
1724		return (SET_ERROR(EINVAL));
1725	}
1726
1727	spa_upgrade(spa, zc->zc_cookie);
1728	spa_close(spa, FTAG);
1729
1730	return (error);
1731}
1732
1733static int
1734zfs_ioc_pool_get_history(zfs_cmd_t *zc)
1735{
1736	spa_t *spa;
1737	char *hist_buf;
1738	uint64_t size;
1739	int error;
1740
1741	if ((size = zc->zc_history_len) == 0)
1742		return (SET_ERROR(EINVAL));
1743
1744	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
1745		return (error);
1746
1747	if (spa_version(spa) < SPA_VERSION_ZPOOL_HISTORY) {
1748		spa_close(spa, FTAG);
1749		return (SET_ERROR(ENOTSUP));
1750	}
1751
1752	hist_buf = vmem_alloc(size, KM_SLEEP);
1753	if ((error = spa_history_get(spa, &zc->zc_history_offset,
1754	    &zc->zc_history_len, hist_buf)) == 0) {
1755		error = ddi_copyout(hist_buf,
1756		    (void *)(uintptr_t)zc->zc_history,
1757		    zc->zc_history_len, zc->zc_iflags);
1758	}
1759
1760	spa_close(spa, FTAG);
1761	vmem_free(hist_buf, size);
1762	return (error);
1763}
1764
1765static int
1766zfs_ioc_pool_reguid(zfs_cmd_t *zc)
1767{
1768	spa_t *spa;
1769	int error;
1770
1771	error = spa_open(zc->zc_name, &spa, FTAG);
1772	if (error == 0) {
1773		error = spa_change_guid(spa);
1774		spa_close(spa, FTAG);
1775	}
1776	return (error);
1777}
1778
1779static int
1780zfs_ioc_dsobj_to_dsname(zfs_cmd_t *zc)
1781{
1782	return (dsl_dsobj_to_dsname(zc->zc_name, zc->zc_obj, zc->zc_value));
1783}
1784
1785/*
1786 * inputs:
1787 * zc_name		name of filesystem
1788 * zc_obj		object to find
1789 *
1790 * outputs:
1791 * zc_value		name of object
1792 */
1793static int
1794zfs_ioc_obj_to_path(zfs_cmd_t *zc)
1795{
1796	objset_t *os;
1797	int error;
1798
1799	/* XXX reading from objset not owned */
1800	if ((error = dmu_objset_hold_flags(zc->zc_name, B_TRUE,
1801	    FTAG, &os)) != 0)
1802		return (error);
1803	if (dmu_objset_type(os) != DMU_OST_ZFS) {
1804		dmu_objset_rele_flags(os, B_TRUE, FTAG);
1805		return (SET_ERROR(EINVAL));
1806	}
1807	error = zfs_obj_to_path(os, zc->zc_obj, zc->zc_value,
1808	    sizeof (zc->zc_value));
1809	dmu_objset_rele_flags(os, B_TRUE, FTAG);
1810
1811	return (error);
1812}
1813
1814/*
1815 * inputs:
1816 * zc_name		name of filesystem
1817 * zc_obj		object to find
1818 *
1819 * outputs:
1820 * zc_stat		stats on object
1821 * zc_value		path to object
1822 */
1823static int
1824zfs_ioc_obj_to_stats(zfs_cmd_t *zc)
1825{
1826	objset_t *os;
1827	int error;
1828
1829	/* XXX reading from objset not owned */
1830	if ((error = dmu_objset_hold_flags(zc->zc_name, B_TRUE,
1831	    FTAG, &os)) != 0)
1832		return (error);
1833	if (dmu_objset_type(os) != DMU_OST_ZFS) {
1834		dmu_objset_rele_flags(os, B_TRUE, FTAG);
1835		return (SET_ERROR(EINVAL));
1836	}
1837	error = zfs_obj_to_stats(os, zc->zc_obj, &zc->zc_stat, zc->zc_value,
1838	    sizeof (zc->zc_value));
1839	dmu_objset_rele_flags(os, B_TRUE, FTAG);
1840
1841	return (error);
1842}
1843
1844static int
1845zfs_ioc_vdev_add(zfs_cmd_t *zc)
1846{
1847	spa_t *spa;
1848	int error;
1849	nvlist_t *config;
1850
1851	error = spa_open(zc->zc_name, &spa, FTAG);
1852	if (error != 0)
1853		return (error);
1854
1855	error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
1856	    zc->zc_iflags, &config);
1857	if (error == 0) {
1858		error = spa_vdev_add(spa, config);
1859		nvlist_free(config);
1860	}
1861	spa_close(spa, FTAG);
1862	return (error);
1863}
1864
1865/*
1866 * inputs:
1867 * zc_name		name of the pool
1868 * zc_guid		guid of vdev to remove
1869 * zc_cookie		cancel removal
1870 */
1871static int
1872zfs_ioc_vdev_remove(zfs_cmd_t *zc)
1873{
1874	spa_t *spa;
1875	int error;
1876
1877	error = spa_open(zc->zc_name, &spa, FTAG);
1878	if (error != 0)
1879		return (error);
1880	if (zc->zc_cookie != 0) {
1881		error = spa_vdev_remove_cancel(spa);
1882	} else {
1883		error = spa_vdev_remove(spa, zc->zc_guid, B_FALSE);
1884	}
1885	spa_close(spa, FTAG);
1886	return (error);
1887}
1888
1889static int
1890zfs_ioc_vdev_set_state(zfs_cmd_t *zc)
1891{
1892	spa_t *spa;
1893	int error;
1894	vdev_state_t newstate = VDEV_STATE_UNKNOWN;
1895
1896	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
1897		return (error);
1898	switch (zc->zc_cookie) {
1899	case VDEV_STATE_ONLINE:
1900		error = vdev_online(spa, zc->zc_guid, zc->zc_obj, &newstate);
1901		break;
1902
1903	case VDEV_STATE_OFFLINE:
1904		error = vdev_offline(spa, zc->zc_guid, zc->zc_obj);
1905		break;
1906
1907	case VDEV_STATE_FAULTED:
1908		if (zc->zc_obj != VDEV_AUX_ERR_EXCEEDED &&
1909		    zc->zc_obj != VDEV_AUX_EXTERNAL &&
1910		    zc->zc_obj != VDEV_AUX_EXTERNAL_PERSIST)
1911			zc->zc_obj = VDEV_AUX_ERR_EXCEEDED;
1912
1913		error = vdev_fault(spa, zc->zc_guid, zc->zc_obj);
1914		break;
1915
1916	case VDEV_STATE_DEGRADED:
1917		if (zc->zc_obj != VDEV_AUX_ERR_EXCEEDED &&
1918		    zc->zc_obj != VDEV_AUX_EXTERNAL)
1919			zc->zc_obj = VDEV_AUX_ERR_EXCEEDED;
1920
1921		error = vdev_degrade(spa, zc->zc_guid, zc->zc_obj);
1922		break;
1923
1924	default:
1925		error = SET_ERROR(EINVAL);
1926	}
1927	zc->zc_cookie = newstate;
1928	spa_close(spa, FTAG);
1929	return (error);
1930}
1931
1932static int
1933zfs_ioc_vdev_attach(zfs_cmd_t *zc)
1934{
1935	spa_t *spa;
1936	nvlist_t *config;
1937	int replacing = zc->zc_cookie;
1938	int rebuild = zc->zc_simple;
1939	int error;
1940
1941	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
1942		return (error);
1943
1944	if ((error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
1945	    zc->zc_iflags, &config)) == 0) {
1946		error = spa_vdev_attach(spa, zc->zc_guid, config, replacing,
1947		    rebuild);
1948		nvlist_free(config);
1949	}
1950
1951	spa_close(spa, FTAG);
1952	return (error);
1953}
1954
1955static int
1956zfs_ioc_vdev_detach(zfs_cmd_t *zc)
1957{
1958	spa_t *spa;
1959	int error;
1960
1961	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
1962		return (error);
1963
1964	error = spa_vdev_detach(spa, zc->zc_guid, 0, B_FALSE);
1965
1966	spa_close(spa, FTAG);
1967	return (error);
1968}
1969
1970static int
1971zfs_ioc_vdev_split(zfs_cmd_t *zc)
1972{
1973	spa_t *spa;
1974	nvlist_t *config, *props = NULL;
1975	int error;
1976	boolean_t exp = !!(zc->zc_cookie & ZPOOL_EXPORT_AFTER_SPLIT);
1977
1978	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
1979		return (error);
1980
1981	if ((error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
1982	    zc->zc_iflags, &config))) {
1983		spa_close(spa, FTAG);
1984		return (error);
1985	}
1986
1987	if (zc->zc_nvlist_src_size != 0 && (error =
1988	    get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
1989	    zc->zc_iflags, &props))) {
1990		spa_close(spa, FTAG);
1991		nvlist_free(config);
1992		return (error);
1993	}
1994
1995	error = spa_vdev_split_mirror(spa, zc->zc_string, config, props, exp);
1996
1997	spa_close(spa, FTAG);
1998
1999	nvlist_free(config);
2000	nvlist_free(props);
2001
2002	return (error);
2003}
2004
2005static int
2006zfs_ioc_vdev_setpath(zfs_cmd_t *zc)
2007{
2008	spa_t *spa;
2009	const char *path = zc->zc_value;
2010	uint64_t guid = zc->zc_guid;
2011	int error;
2012
2013	error = spa_open(zc->zc_name, &spa, FTAG);
2014	if (error != 0)
2015		return (error);
2016
2017	error = spa_vdev_setpath(spa, guid, path);
2018	spa_close(spa, FTAG);
2019	return (error);
2020}
2021
2022static int
2023zfs_ioc_vdev_setfru(zfs_cmd_t *zc)
2024{
2025	spa_t *spa;
2026	const char *fru = zc->zc_value;
2027	uint64_t guid = zc->zc_guid;
2028	int error;
2029
2030	error = spa_open(zc->zc_name, &spa, FTAG);
2031	if (error != 0)
2032		return (error);
2033
2034	error = spa_vdev_setfru(spa, guid, fru);
2035	spa_close(spa, FTAG);
2036	return (error);
2037}
2038
2039static int
2040zfs_ioc_objset_stats_impl(zfs_cmd_t *zc, objset_t *os)
2041{
2042	int error = 0;
2043	nvlist_t *nv;
2044
2045	dmu_objset_fast_stat(os, &zc->zc_objset_stats);
2046
2047	if (zc->zc_nvlist_dst != 0 &&
2048	    (error = dsl_prop_get_all(os, &nv)) == 0) {
2049		dmu_objset_stats(os, nv);
2050		/*
2051		 * NB: zvol_get_stats() will read the objset contents,
2052		 * which we aren't supposed to do with a
2053		 * DS_MODE_USER hold, because it could be
2054		 * inconsistent.  So this is a bit of a workaround...
2055		 * XXX reading without owning
2056		 */
2057		if (!zc->zc_objset_stats.dds_inconsistent &&
2058		    dmu_objset_type(os) == DMU_OST_ZVOL) {
2059			error = zvol_get_stats(os, nv);
2060			if (error == EIO) {
2061				nvlist_free(nv);
2062				return (error);
2063			}
2064			VERIFY0(error);
2065		}
2066		if (error == 0)
2067			error = put_nvlist(zc, nv);
2068		nvlist_free(nv);
2069	}
2070
2071	return (error);
2072}
2073
2074/*
2075 * inputs:
2076 * zc_name		name of filesystem
2077 * zc_nvlist_dst_size	size of buffer for property nvlist
2078 *
2079 * outputs:
2080 * zc_objset_stats	stats
2081 * zc_nvlist_dst	property nvlist
2082 * zc_nvlist_dst_size	size of property nvlist
2083 */
2084static int
2085zfs_ioc_objset_stats(zfs_cmd_t *zc)
2086{
2087	objset_t *os;
2088	int error;
2089
2090	error = dmu_objset_hold(zc->zc_name, FTAG, &os);
2091	if (error == 0) {
2092		error = zfs_ioc_objset_stats_impl(zc, os);
2093		dmu_objset_rele(os, FTAG);
2094	}
2095
2096	return (error);
2097}
2098
2099/*
2100 * inputs:
2101 * zc_name		name of filesystem
2102 * zc_nvlist_dst_size	size of buffer for property nvlist
2103 *
2104 * outputs:
2105 * zc_nvlist_dst	received property nvlist
2106 * zc_nvlist_dst_size	size of received property nvlist
2107 *
2108 * Gets received properties (distinct from local properties on or after
2109 * SPA_VERSION_RECVD_PROPS) for callers who want to differentiate received from
2110 * local property values.
2111 */
2112static int
2113zfs_ioc_objset_recvd_props(zfs_cmd_t *zc)
2114{
2115	int error = 0;
2116	nvlist_t *nv;
2117
2118	/*
2119	 * Without this check, we would return local property values if the
2120	 * caller has not already received properties on or after
2121	 * SPA_VERSION_RECVD_PROPS.
2122	 */
2123	if (!dsl_prop_get_hasrecvd(zc->zc_name))
2124		return (SET_ERROR(ENOTSUP));
2125
2126	if (zc->zc_nvlist_dst != 0 &&
2127	    (error = dsl_prop_get_received(zc->zc_name, &nv)) == 0) {
2128		error = put_nvlist(zc, nv);
2129		nvlist_free(nv);
2130	}
2131
2132	return (error);
2133}
2134
2135static int
2136nvl_add_zplprop(objset_t *os, nvlist_t *props, zfs_prop_t prop)
2137{
2138	uint64_t value;
2139	int error;
2140
2141	/*
2142	 * zfs_get_zplprop() will either find a value or give us
2143	 * the default value (if there is one).
2144	 */
2145	if ((error = zfs_get_zplprop(os, prop, &value)) != 0)
2146		return (error);
2147	VERIFY(nvlist_add_uint64(props, zfs_prop_to_name(prop), value) == 0);
2148	return (0);
2149}
2150
2151/*
2152 * inputs:
2153 * zc_name		name of filesystem
2154 * zc_nvlist_dst_size	size of buffer for zpl property nvlist
2155 *
2156 * outputs:
2157 * zc_nvlist_dst	zpl property nvlist
2158 * zc_nvlist_dst_size	size of zpl property nvlist
2159 */
2160static int
2161zfs_ioc_objset_zplprops(zfs_cmd_t *zc)
2162{
2163	objset_t *os;
2164	int err;
2165
2166	/* XXX reading without owning */
2167	if ((err = dmu_objset_hold(zc->zc_name, FTAG, &os)))
2168		return (err);
2169
2170	dmu_objset_fast_stat(os, &zc->zc_objset_stats);
2171
2172	/*
2173	 * NB: nvl_add_zplprop() will read the objset contents,
2174	 * which we aren't supposed to do with a DS_MODE_USER
2175	 * hold, because it could be inconsistent.
2176	 */
2177	if (zc->zc_nvlist_dst != 0 &&
2178	    !zc->zc_objset_stats.dds_inconsistent &&
2179	    dmu_objset_type(os) == DMU_OST_ZFS) {
2180		nvlist_t *nv;
2181
2182		VERIFY(nvlist_alloc(&nv, NV_UNIQUE_NAME, KM_SLEEP) == 0);
2183		if ((err = nvl_add_zplprop(os, nv, ZFS_PROP_VERSION)) == 0 &&
2184		    (err = nvl_add_zplprop(os, nv, ZFS_PROP_NORMALIZE)) == 0 &&
2185		    (err = nvl_add_zplprop(os, nv, ZFS_PROP_UTF8ONLY)) == 0 &&
2186		    (err = nvl_add_zplprop(os, nv, ZFS_PROP_CASE)) == 0)
2187			err = put_nvlist(zc, nv);
2188		nvlist_free(nv);
2189	} else {
2190		err = SET_ERROR(ENOENT);
2191	}
2192	dmu_objset_rele(os, FTAG);
2193	return (err);
2194}
2195
2196/*
2197 * inputs:
2198 * zc_name		name of filesystem
2199 * zc_cookie		zap cursor
2200 * zc_nvlist_dst_size	size of buffer for property nvlist
2201 *
2202 * outputs:
2203 * zc_name		name of next filesystem
2204 * zc_cookie		zap cursor
2205 * zc_objset_stats	stats
2206 * zc_nvlist_dst	property nvlist
2207 * zc_nvlist_dst_size	size of property nvlist
2208 */
2209static int
2210zfs_ioc_dataset_list_next(zfs_cmd_t *zc)
2211{
2212	objset_t *os;
2213	int error;
2214	char *p;
2215	size_t orig_len = strlen(zc->zc_name);
2216
2217top:
2218	if ((error = dmu_objset_hold(zc->zc_name, FTAG, &os))) {
2219		if (error == ENOENT)
2220			error = SET_ERROR(ESRCH);
2221		return (error);
2222	}
2223
2224	p = strrchr(zc->zc_name, '/');
2225	if (p == NULL || p[1] != '\0')
2226		(void) strlcat(zc->zc_name, "/", sizeof (zc->zc_name));
2227	p = zc->zc_name + strlen(zc->zc_name);
2228
2229	do {
2230		error = dmu_dir_list_next(os,
2231		    sizeof (zc->zc_name) - (p - zc->zc_name), p,
2232		    NULL, &zc->zc_cookie);
2233		if (error == ENOENT)
2234			error = SET_ERROR(ESRCH);
2235	} while (error == 0 && zfs_dataset_name_hidden(zc->zc_name));
2236	dmu_objset_rele(os, FTAG);
2237
2238	/*
2239	 * If it's an internal dataset (ie. with a '$' in its name),
2240	 * don't try to get stats for it, otherwise we'll return ENOENT.
2241	 */
2242	if (error == 0 && strchr(zc->zc_name, '$') == NULL) {
2243		error = zfs_ioc_objset_stats(zc); /* fill in the stats */
2244		if (error == ENOENT) {
2245			/* We lost a race with destroy, get the next one. */
2246			zc->zc_name[orig_len] = '\0';
2247			goto top;
2248		}
2249	}
2250	return (error);
2251}
2252
2253/*
2254 * inputs:
2255 * zc_name		name of filesystem
2256 * zc_cookie		zap cursor
2257 * zc_nvlist_src	iteration range nvlist
2258 * zc_nvlist_src_size	size of iteration range nvlist
2259 *
2260 * outputs:
2261 * zc_name		name of next snapshot
2262 * zc_objset_stats	stats
2263 * zc_nvlist_dst	property nvlist
2264 * zc_nvlist_dst_size	size of property nvlist
2265 */
2266static int
2267zfs_ioc_snapshot_list_next(zfs_cmd_t *zc)
2268{
2269	int error;
2270	objset_t *os, *ossnap;
2271	dsl_dataset_t *ds;
2272	uint64_t min_txg = 0, max_txg = 0;
2273
2274	if (zc->zc_nvlist_src_size != 0) {
2275		nvlist_t *props = NULL;
2276		error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
2277		    zc->zc_iflags, &props);
2278		if (error != 0)
2279			return (error);
2280		(void) nvlist_lookup_uint64(props, SNAP_ITER_MIN_TXG,
2281		    &min_txg);
2282		(void) nvlist_lookup_uint64(props, SNAP_ITER_MAX_TXG,
2283		    &max_txg);
2284		nvlist_free(props);
2285	}
2286
2287	error = dmu_objset_hold(zc->zc_name, FTAG, &os);
2288	if (error != 0) {
2289		return (error == ENOENT ? SET_ERROR(ESRCH) : error);
2290	}
2291
2292	/*
2293	 * A dataset name of maximum length cannot have any snapshots,
2294	 * so exit immediately.
2295	 */
2296	if (strlcat(zc->zc_name, "@", sizeof (zc->zc_name)) >=
2297	    ZFS_MAX_DATASET_NAME_LEN) {
2298		dmu_objset_rele(os, FTAG);
2299		return (SET_ERROR(ESRCH));
2300	}
2301
2302	while (error == 0) {
2303		if (issig(JUSTLOOKING) && issig(FORREAL)) {
2304			error = SET_ERROR(EINTR);
2305			break;
2306		}
2307
2308		error = dmu_snapshot_list_next(os,
2309		    sizeof (zc->zc_name) - strlen(zc->zc_name),
2310		    zc->zc_name + strlen(zc->zc_name), &zc->zc_obj,
2311		    &zc->zc_cookie, NULL);
2312		if (error == ENOENT) {
2313			error = SET_ERROR(ESRCH);
2314			break;
2315		} else if (error != 0) {
2316			break;
2317		}
2318
2319		error = dsl_dataset_hold_obj(dmu_objset_pool(os), zc->zc_obj,
2320		    FTAG, &ds);
2321		if (error != 0)
2322			break;
2323
2324		if ((min_txg != 0 && dsl_get_creationtxg(ds) < min_txg) ||
2325		    (max_txg != 0 && dsl_get_creationtxg(ds) > max_txg)) {
2326			dsl_dataset_rele(ds, FTAG);
2327			/* undo snapshot name append */
2328			*(strchr(zc->zc_name, '@') + 1) = '\0';
2329			/* skip snapshot */
2330			continue;
2331		}
2332
2333		if (zc->zc_simple) {
2334			dsl_dataset_rele(ds, FTAG);
2335			break;
2336		}
2337
2338		if ((error = dmu_objset_from_ds(ds, &ossnap)) != 0) {
2339			dsl_dataset_rele(ds, FTAG);
2340			break;
2341		}
2342		if ((error = zfs_ioc_objset_stats_impl(zc, ossnap)) != 0) {
2343			dsl_dataset_rele(ds, FTAG);
2344			break;
2345		}
2346		dsl_dataset_rele(ds, FTAG);
2347		break;
2348	}
2349
2350	dmu_objset_rele(os, FTAG);
2351	/* if we failed, undo the @ that we tacked on to zc_name */
2352	if (error != 0)
2353		*strchr(zc->zc_name, '@') = '\0';
2354	return (error);
2355}
2356
2357static int
2358zfs_prop_set_userquota(const char *dsname, nvpair_t *pair)
2359{
2360	const char *propname = nvpair_name(pair);
2361	uint64_t *valary;
2362	unsigned int vallen;
2363	const char *dash, *domain;
2364	zfs_userquota_prop_t type;
2365	uint64_t rid;
2366	uint64_t quota;
2367	zfsvfs_t *zfsvfs;
2368	int err;
2369
2370	if (nvpair_type(pair) == DATA_TYPE_NVLIST) {
2371		nvlist_t *attrs;
2372		VERIFY(nvpair_value_nvlist(pair, &attrs) == 0);
2373		if (nvlist_lookup_nvpair(attrs, ZPROP_VALUE,
2374		    &pair) != 0)
2375			return (SET_ERROR(EINVAL));
2376	}
2377
2378	/*
2379	 * A correctly constructed propname is encoded as
2380	 * userquota@<rid>-<domain>.
2381	 */
2382	if ((dash = strchr(propname, '-')) == NULL ||
2383	    nvpair_value_uint64_array(pair, &valary, &vallen) != 0 ||
2384	    vallen != 3)
2385		return (SET_ERROR(EINVAL));
2386
2387	domain = dash + 1;
2388	type = valary[0];
2389	rid = valary[1];
2390	quota = valary[2];
2391
2392	err = zfsvfs_hold(dsname, FTAG, &zfsvfs, B_FALSE);
2393	if (err == 0) {
2394		err = zfs_set_userquota(zfsvfs, type, domain, rid, quota);
2395		zfsvfs_rele(zfsvfs, FTAG);
2396	}
2397
2398	return (err);
2399}
2400
2401/*
2402 * If the named property is one that has a special function to set its value,
2403 * return 0 on success and a positive error code on failure; otherwise if it is
2404 * not one of the special properties handled by this function, return -1.
2405 *
2406 * XXX: It would be better for callers of the property interface if we handled
2407 * these special cases in dsl_prop.c (in the dsl layer).
2408 */
2409static int
2410zfs_prop_set_special(const char *dsname, zprop_source_t source,
2411    nvpair_t *pair)
2412{
2413	const char *propname = nvpair_name(pair);
2414	zfs_prop_t prop = zfs_name_to_prop(propname);
2415	uint64_t intval = 0;
2416	const char *strval = NULL;
2417	int err = -1;
2418
2419	if (prop == ZPROP_INVAL) {
2420		if (zfs_prop_userquota(propname))
2421			return (zfs_prop_set_userquota(dsname, pair));
2422		return (-1);
2423	}
2424
2425	if (nvpair_type(pair) == DATA_TYPE_NVLIST) {
2426		nvlist_t *attrs;
2427		VERIFY(nvpair_value_nvlist(pair, &attrs) == 0);
2428		VERIFY(nvlist_lookup_nvpair(attrs, ZPROP_VALUE,
2429		    &pair) == 0);
2430	}
2431
2432	/* all special properties are numeric except for keylocation */
2433	if (zfs_prop_get_type(prop) == PROP_TYPE_STRING) {
2434		strval = fnvpair_value_string(pair);
2435	} else {
2436		intval = fnvpair_value_uint64(pair);
2437	}
2438
2439	switch (prop) {
2440	case ZFS_PROP_QUOTA:
2441		err = dsl_dir_set_quota(dsname, source, intval);
2442		break;
2443	case ZFS_PROP_REFQUOTA:
2444		err = dsl_dataset_set_refquota(dsname, source, intval);
2445		break;
2446	case ZFS_PROP_FILESYSTEM_LIMIT:
2447	case ZFS_PROP_SNAPSHOT_LIMIT:
2448		if (intval == UINT64_MAX) {
2449			/* clearing the limit, just do it */
2450			err = 0;
2451		} else {
2452			err = dsl_dir_activate_fs_ss_limit(dsname);
2453		}
2454		/*
2455		 * Set err to -1 to force the zfs_set_prop_nvlist code down the
2456		 * default path to set the value in the nvlist.
2457		 */
2458		if (err == 0)
2459			err = -1;
2460		break;
2461	case ZFS_PROP_KEYLOCATION:
2462		err = dsl_crypto_can_set_keylocation(dsname, strval);
2463
2464		/*
2465		 * Set err to -1 to force the zfs_set_prop_nvlist code down the
2466		 * default path to set the value in the nvlist.
2467		 */
2468		if (err == 0)
2469			err = -1;
2470		break;
2471	case ZFS_PROP_RESERVATION:
2472		err = dsl_dir_set_reservation(dsname, source, intval);
2473		break;
2474	case ZFS_PROP_REFRESERVATION:
2475		err = dsl_dataset_set_refreservation(dsname, source, intval);
2476		break;
2477	case ZFS_PROP_COMPRESSION:
2478		err = dsl_dataset_set_compression(dsname, source, intval);
2479		/*
2480		 * Set err to -1 to force the zfs_set_prop_nvlist code down the
2481		 * default path to set the value in the nvlist.
2482		 */
2483		if (err == 0)
2484			err = -1;
2485		break;
2486	case ZFS_PROP_VOLSIZE:
2487		err = zvol_set_volsize(dsname, intval);
2488		break;
2489	case ZFS_PROP_SNAPDEV:
2490		err = zvol_set_snapdev(dsname, source, intval);
2491		break;
2492	case ZFS_PROP_VOLMODE:
2493		err = zvol_set_volmode(dsname, source, intval);
2494		break;
2495	case ZFS_PROP_VERSION:
2496	{
2497		zfsvfs_t *zfsvfs;
2498
2499		if ((err = zfsvfs_hold(dsname, FTAG, &zfsvfs, B_TRUE)) != 0)
2500			break;
2501
2502		err = zfs_set_version(zfsvfs, intval);
2503		zfsvfs_rele(zfsvfs, FTAG);
2504
2505		if (err == 0 && intval >= ZPL_VERSION_USERSPACE) {
2506			zfs_cmd_t *zc;
2507
2508			zc = kmem_zalloc(sizeof (zfs_cmd_t), KM_SLEEP);
2509			(void) strlcpy(zc->zc_name, dsname,
2510			    sizeof (zc->zc_name));
2511			(void) zfs_ioc_userspace_upgrade(zc);
2512			(void) zfs_ioc_id_quota_upgrade(zc);
2513			kmem_free(zc, sizeof (zfs_cmd_t));
2514		}
2515		break;
2516	}
2517	default:
2518		err = -1;
2519	}
2520
2521	return (err);
2522}
2523
2524static boolean_t
2525zfs_is_namespace_prop(zfs_prop_t prop)
2526{
2527	switch (prop) {
2528
2529	case ZFS_PROP_ATIME:
2530	case ZFS_PROP_RELATIME:
2531	case ZFS_PROP_DEVICES:
2532	case ZFS_PROP_EXEC:
2533	case ZFS_PROP_SETUID:
2534	case ZFS_PROP_READONLY:
2535	case ZFS_PROP_XATTR:
2536	case ZFS_PROP_NBMAND:
2537		return (B_TRUE);
2538
2539	default:
2540		return (B_FALSE);
2541	}
2542}
2543
2544/*
2545 * This function is best effort. If it fails to set any of the given properties,
2546 * it continues to set as many as it can and returns the last error
2547 * encountered. If the caller provides a non-NULL errlist, it will be filled in
2548 * with the list of names of all the properties that failed along with the
2549 * corresponding error numbers.
2550 *
2551 * If every property is set successfully, zero is returned and errlist is not
2552 * modified.
2553 */
2554int
2555zfs_set_prop_nvlist(const char *dsname, zprop_source_t source, nvlist_t *nvl,
2556    nvlist_t *errlist)
2557{
2558	nvpair_t *pair;
2559	nvpair_t *propval;
2560	int rv = 0;
2561	uint64_t intval;
2562	const char *strval;
2563	boolean_t should_update_mount_cache = B_FALSE;
2564
2565	nvlist_t *genericnvl = fnvlist_alloc();
2566	nvlist_t *retrynvl = fnvlist_alloc();
2567retry:
2568	pair = NULL;
2569	while ((pair = nvlist_next_nvpair(nvl, pair)) != NULL) {
2570		const char *propname = nvpair_name(pair);
2571		zfs_prop_t prop = zfs_name_to_prop(propname);
2572		int err = 0;
2573
2574		/* decode the property value */
2575		propval = pair;
2576		if (nvpair_type(pair) == DATA_TYPE_NVLIST) {
2577			nvlist_t *attrs;
2578			attrs = fnvpair_value_nvlist(pair);
2579			if (nvlist_lookup_nvpair(attrs, ZPROP_VALUE,
2580			    &propval) != 0)
2581				err = SET_ERROR(EINVAL);
2582		}
2583
2584		/* Validate value type */
2585		if (err == 0 && source == ZPROP_SRC_INHERITED) {
2586			/* inherited properties are expected to be booleans */
2587			if (nvpair_type(propval) != DATA_TYPE_BOOLEAN)
2588				err = SET_ERROR(EINVAL);
2589		} else if (err == 0 && prop == ZPROP_INVAL) {
2590			if (zfs_prop_user(propname)) {
2591				if (nvpair_type(propval) != DATA_TYPE_STRING)
2592					err = SET_ERROR(EINVAL);
2593			} else if (zfs_prop_userquota(propname)) {
2594				if (nvpair_type(propval) !=
2595				    DATA_TYPE_UINT64_ARRAY)
2596					err = SET_ERROR(EINVAL);
2597			} else {
2598				err = SET_ERROR(EINVAL);
2599			}
2600		} else if (err == 0) {
2601			if (nvpair_type(propval) == DATA_TYPE_STRING) {
2602				if (zfs_prop_get_type(prop) != PROP_TYPE_STRING)
2603					err = SET_ERROR(EINVAL);
2604			} else if (nvpair_type(propval) == DATA_TYPE_UINT64) {
2605				const char *unused;
2606
2607				intval = fnvpair_value_uint64(propval);
2608
2609				switch (zfs_prop_get_type(prop)) {
2610				case PROP_TYPE_NUMBER:
2611					break;
2612				case PROP_TYPE_STRING:
2613					err = SET_ERROR(EINVAL);
2614					break;
2615				case PROP_TYPE_INDEX:
2616					if (zfs_prop_index_to_string(prop,
2617					    intval, &unused) != 0)
2618						err =
2619						    SET_ERROR(ZFS_ERR_BADPROP);
2620					break;
2621				default:
2622					cmn_err(CE_PANIC,
2623					    "unknown property type");
2624				}
2625			} else {
2626				err = SET_ERROR(EINVAL);
2627			}
2628		}
2629
2630		/* Validate permissions */
2631		if (err == 0)
2632			err = zfs_check_settable(dsname, pair, CRED());
2633
2634		if (err == 0) {
2635			if (source == ZPROP_SRC_INHERITED)
2636				err = -1; /* does not need special handling */
2637			else
2638				err = zfs_prop_set_special(dsname, source,
2639				    pair);
2640			if (err == -1) {
2641				/*
2642				 * For better performance we build up a list of
2643				 * properties to set in a single transaction.
2644				 */
2645				err = nvlist_add_nvpair(genericnvl, pair);
2646			} else if (err != 0 && nvl != retrynvl) {
2647				/*
2648				 * This may be a spurious error caused by
2649				 * receiving quota and reservation out of order.
2650				 * Try again in a second pass.
2651				 */
2652				err = nvlist_add_nvpair(retrynvl, pair);
2653			}
2654		}
2655
2656		if (err != 0) {
2657			if (errlist != NULL)
2658				fnvlist_add_int32(errlist, propname, err);
2659			rv = err;
2660		}
2661
2662		if (zfs_is_namespace_prop(prop))
2663			should_update_mount_cache = B_TRUE;
2664	}
2665
2666	if (nvl != retrynvl && !nvlist_empty(retrynvl)) {
2667		nvl = retrynvl;
2668		goto retry;
2669	}
2670
2671	if (!nvlist_empty(genericnvl) &&
2672	    dsl_props_set(dsname, source, genericnvl) != 0) {
2673		/*
2674		 * If this fails, we still want to set as many properties as we
2675		 * can, so try setting them individually.
2676		 */
2677		pair = NULL;
2678		while ((pair = nvlist_next_nvpair(genericnvl, pair)) != NULL) {
2679			const char *propname = nvpair_name(pair);
2680			int err = 0;
2681
2682			propval = pair;
2683			if (nvpair_type(pair) == DATA_TYPE_NVLIST) {
2684				nvlist_t *attrs;
2685				attrs = fnvpair_value_nvlist(pair);
2686				propval = fnvlist_lookup_nvpair(attrs,
2687				    ZPROP_VALUE);
2688			}
2689
2690			if (nvpair_type(propval) == DATA_TYPE_STRING) {
2691				strval = fnvpair_value_string(propval);
2692				err = dsl_prop_set_string(dsname, propname,
2693				    source, strval);
2694			} else if (nvpair_type(propval) == DATA_TYPE_BOOLEAN) {
2695				err = dsl_prop_inherit(dsname, propname,
2696				    source);
2697			} else {
2698				intval = fnvpair_value_uint64(propval);
2699				err = dsl_prop_set_int(dsname, propname, source,
2700				    intval);
2701			}
2702
2703			if (err != 0) {
2704				if (errlist != NULL) {
2705					fnvlist_add_int32(errlist, propname,
2706					    err);
2707				}
2708				rv = err;
2709			}
2710		}
2711	}
2712	if (should_update_mount_cache)
2713		zfs_ioctl_update_mount_cache(dsname);
2714
2715	nvlist_free(genericnvl);
2716	nvlist_free(retrynvl);
2717
2718	return (rv);
2719}
2720
2721/*
2722 * Check that all the properties are valid user properties.
2723 */
2724static int
2725zfs_check_userprops(nvlist_t *nvl)
2726{
2727	nvpair_t *pair = NULL;
2728
2729	while ((pair = nvlist_next_nvpair(nvl, pair)) != NULL) {
2730		const char *propname = nvpair_name(pair);
2731
2732		if (!zfs_prop_user(propname) ||
2733		    nvpair_type(pair) != DATA_TYPE_STRING)
2734			return (SET_ERROR(EINVAL));
2735
2736		if (strlen(propname) >= ZAP_MAXNAMELEN)
2737			return (SET_ERROR(ENAMETOOLONG));
2738
2739		if (strlen(fnvpair_value_string(pair)) >= ZAP_MAXVALUELEN)
2740			return (SET_ERROR(E2BIG));
2741	}
2742	return (0);
2743}
2744
2745static void
2746props_skip(nvlist_t *props, nvlist_t *skipped, nvlist_t **newprops)
2747{
2748	nvpair_t *pair;
2749
2750	VERIFY(nvlist_alloc(newprops, NV_UNIQUE_NAME, KM_SLEEP) == 0);
2751
2752	pair = NULL;
2753	while ((pair = nvlist_next_nvpair(props, pair)) != NULL) {
2754		if (nvlist_exists(skipped, nvpair_name(pair)))
2755			continue;
2756
2757		VERIFY(nvlist_add_nvpair(*newprops, pair) == 0);
2758	}
2759}
2760
2761static int
2762clear_received_props(const char *dsname, nvlist_t *props,
2763    nvlist_t *skipped)
2764{
2765	int err = 0;
2766	nvlist_t *cleared_props = NULL;
2767	props_skip(props, skipped, &cleared_props);
2768	if (!nvlist_empty(cleared_props)) {
2769		/*
2770		 * Acts on local properties until the dataset has received
2771		 * properties at least once on or after SPA_VERSION_RECVD_PROPS.
2772		 */
2773		zprop_source_t flags = (ZPROP_SRC_NONE |
2774		    (dsl_prop_get_hasrecvd(dsname) ? ZPROP_SRC_RECEIVED : 0));
2775		err = zfs_set_prop_nvlist(dsname, flags, cleared_props, NULL);
2776	}
2777	nvlist_free(cleared_props);
2778	return (err);
2779}
2780
2781/*
2782 * inputs:
2783 * zc_name		name of filesystem
2784 * zc_value		name of property to set
2785 * zc_nvlist_src{_size}	nvlist of properties to apply
2786 * zc_cookie		received properties flag
2787 *
2788 * outputs:
2789 * zc_nvlist_dst{_size} error for each unapplied received property
2790 */
2791static int
2792zfs_ioc_set_prop(zfs_cmd_t *zc)
2793{
2794	nvlist_t *nvl;
2795	boolean_t received = zc->zc_cookie;
2796	zprop_source_t source = (received ? ZPROP_SRC_RECEIVED :
2797	    ZPROP_SRC_LOCAL);
2798	nvlist_t *errors;
2799	int error;
2800
2801	if ((error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
2802	    zc->zc_iflags, &nvl)) != 0)
2803		return (error);
2804
2805	if (received) {
2806		nvlist_t *origprops;
2807
2808		if (dsl_prop_get_received(zc->zc_name, &origprops) == 0) {
2809			(void) clear_received_props(zc->zc_name,
2810			    origprops, nvl);
2811			nvlist_free(origprops);
2812		}
2813
2814		error = dsl_prop_set_hasrecvd(zc->zc_name);
2815	}
2816
2817	errors = fnvlist_alloc();
2818	if (error == 0)
2819		error = zfs_set_prop_nvlist(zc->zc_name, source, nvl, errors);
2820
2821	if (zc->zc_nvlist_dst != 0 && errors != NULL) {
2822		(void) put_nvlist(zc, errors);
2823	}
2824
2825	nvlist_free(errors);
2826	nvlist_free(nvl);
2827	return (error);
2828}
2829
2830/*
2831 * inputs:
2832 * zc_name		name of filesystem
2833 * zc_value		name of property to inherit
2834 * zc_cookie		revert to received value if TRUE
2835 *
2836 * outputs:		none
2837 */
2838static int
2839zfs_ioc_inherit_prop(zfs_cmd_t *zc)
2840{
2841	const char *propname = zc->zc_value;
2842	zfs_prop_t prop = zfs_name_to_prop(propname);
2843	boolean_t received = zc->zc_cookie;
2844	zprop_source_t source = (received
2845	    ? ZPROP_SRC_NONE		/* revert to received value, if any */
2846	    : ZPROP_SRC_INHERITED);	/* explicitly inherit */
2847	nvlist_t *dummy;
2848	nvpair_t *pair;
2849	zprop_type_t type;
2850	int err;
2851
2852	if (!received) {
2853		/*
2854		 * Only check this in the non-received case. We want to allow
2855		 * 'inherit -S' to revert non-inheritable properties like quota
2856		 * and reservation to the received or default values even though
2857		 * they are not considered inheritable.
2858		 */
2859		if (prop != ZPROP_INVAL && !zfs_prop_inheritable(prop))
2860			return (SET_ERROR(EINVAL));
2861	}
2862
2863	if (prop == ZPROP_INVAL) {
2864		if (!zfs_prop_user(propname))
2865			return (SET_ERROR(EINVAL));
2866
2867		type = PROP_TYPE_STRING;
2868	} else if (prop == ZFS_PROP_VOLSIZE || prop == ZFS_PROP_VERSION) {
2869		return (SET_ERROR(EINVAL));
2870	} else {
2871		type = zfs_prop_get_type(prop);
2872	}
2873
2874	/*
2875	 * zfs_prop_set_special() expects properties in the form of an
2876	 * nvpair with type info.
2877	 */
2878	dummy = fnvlist_alloc();
2879
2880	switch (type) {
2881	case PROP_TYPE_STRING:
2882		VERIFY(0 == nvlist_add_string(dummy, propname, ""));
2883		break;
2884	case PROP_TYPE_NUMBER:
2885	case PROP_TYPE_INDEX:
2886		VERIFY(0 == nvlist_add_uint64(dummy, propname, 0));
2887		break;
2888	default:
2889		err = SET_ERROR(EINVAL);
2890		goto errout;
2891	}
2892
2893	pair = nvlist_next_nvpair(dummy, NULL);
2894	if (pair == NULL) {
2895		err = SET_ERROR(EINVAL);
2896	} else {
2897		err = zfs_prop_set_special(zc->zc_name, source, pair);
2898		if (err == -1) /* property is not "special", needs handling */
2899			err = dsl_prop_inherit(zc->zc_name, zc->zc_value,
2900			    source);
2901	}
2902
2903errout:
2904	nvlist_free(dummy);
2905	return (err);
2906}
2907
2908static int
2909zfs_ioc_pool_set_props(zfs_cmd_t *zc)
2910{
2911	nvlist_t *props;
2912	spa_t *spa;
2913	int error;
2914	nvpair_t *pair;
2915
2916	if ((error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
2917	    zc->zc_iflags, &props)))
2918		return (error);
2919
2920	/*
2921	 * If the only property is the configfile, then just do a spa_lookup()
2922	 * to handle the faulted case.
2923	 */
2924	pair = nvlist_next_nvpair(props, NULL);
2925	if (pair != NULL && strcmp(nvpair_name(pair),
2926	    zpool_prop_to_name(ZPOOL_PROP_CACHEFILE)) == 0 &&
2927	    nvlist_next_nvpair(props, pair) == NULL) {
2928		mutex_enter(&spa_namespace_lock);
2929		if ((spa = spa_lookup(zc->zc_name)) != NULL) {
2930			spa_configfile_set(spa, props, B_FALSE);
2931			spa_write_cachefile(spa, B_FALSE, B_TRUE);
2932		}
2933		mutex_exit(&spa_namespace_lock);
2934		if (spa != NULL) {
2935			nvlist_free(props);
2936			return (0);
2937		}
2938	}
2939
2940	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0) {
2941		nvlist_free(props);
2942		return (error);
2943	}
2944
2945	error = spa_prop_set(spa, props);
2946
2947	nvlist_free(props);
2948	spa_close(spa, FTAG);
2949
2950	return (error);
2951}
2952
2953static int
2954zfs_ioc_pool_get_props(zfs_cmd_t *zc)
2955{
2956	spa_t *spa;
2957	int error;
2958	nvlist_t *nvp = NULL;
2959
2960	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0) {
2961		/*
2962		 * If the pool is faulted, there may be properties we can still
2963		 * get (such as altroot and cachefile), so attempt to get them
2964		 * anyway.
2965		 */
2966		mutex_enter(&spa_namespace_lock);
2967		if ((spa = spa_lookup(zc->zc_name)) != NULL)
2968			error = spa_prop_get(spa, &nvp);
2969		mutex_exit(&spa_namespace_lock);
2970	} else {
2971		error = spa_prop_get(spa, &nvp);
2972		spa_close(spa, FTAG);
2973	}
2974
2975	if (error == 0 && zc->zc_nvlist_dst != 0)
2976		error = put_nvlist(zc, nvp);
2977	else
2978		error = SET_ERROR(EFAULT);
2979
2980	nvlist_free(nvp);
2981	return (error);
2982}
2983
2984/*
2985 * inputs:
2986 * zc_name		name of filesystem
2987 * zc_nvlist_src{_size}	nvlist of delegated permissions
2988 * zc_perm_action	allow/unallow flag
2989 *
2990 * outputs:		none
2991 */
2992static int
2993zfs_ioc_set_fsacl(zfs_cmd_t *zc)
2994{
2995	int error;
2996	nvlist_t *fsaclnv = NULL;
2997
2998	if ((error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
2999	    zc->zc_iflags, &fsaclnv)) != 0)
3000		return (error);
3001
3002	/*
3003	 * Verify nvlist is constructed correctly
3004	 */
3005	if ((error = zfs_deleg_verify_nvlist(fsaclnv)) != 0) {
3006		nvlist_free(fsaclnv);
3007		return (SET_ERROR(EINVAL));
3008	}
3009
3010	/*
3011	 * If we don't have PRIV_SYS_MOUNT, then validate
3012	 * that user is allowed to hand out each permission in
3013	 * the nvlist(s)
3014	 */
3015
3016	error = secpolicy_zfs(CRED());
3017	if (error != 0) {
3018		if (zc->zc_perm_action == B_FALSE) {
3019			error = dsl_deleg_can_allow(zc->zc_name,
3020			    fsaclnv, CRED());
3021		} else {
3022			error = dsl_deleg_can_unallow(zc->zc_name,
3023			    fsaclnv, CRED());
3024		}
3025	}
3026
3027	if (error == 0)
3028		error = dsl_deleg_set(zc->zc_name, fsaclnv, zc->zc_perm_action);
3029
3030	nvlist_free(fsaclnv);
3031	return (error);
3032}
3033
3034/*
3035 * inputs:
3036 * zc_name		name of filesystem
3037 *
3038 * outputs:
3039 * zc_nvlist_src{_size}	nvlist of delegated permissions
3040 */
3041static int
3042zfs_ioc_get_fsacl(zfs_cmd_t *zc)
3043{
3044	nvlist_t *nvp;
3045	int error;
3046
3047	if ((error = dsl_deleg_get(zc->zc_name, &nvp)) == 0) {
3048		error = put_nvlist(zc, nvp);
3049		nvlist_free(nvp);
3050	}
3051
3052	return (error);
3053}
3054
3055/* ARGSUSED */
3056static void
3057zfs_create_cb(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx)
3058{
3059	zfs_creat_t *zct = arg;
3060
3061	zfs_create_fs(os, cr, zct->zct_zplprops, tx);
3062}
3063
3064#define	ZFS_PROP_UNDEFINED	((uint64_t)-1)
3065
3066/*
3067 * inputs:
3068 * os			parent objset pointer (NULL if root fs)
3069 * fuids_ok		fuids allowed in this version of the spa?
3070 * sa_ok		SAs allowed in this version of the spa?
3071 * createprops		list of properties requested by creator
3072 *
3073 * outputs:
3074 * zplprops	values for the zplprops we attach to the master node object
3075 * is_ci	true if requested file system will be purely case-insensitive
3076 *
3077 * Determine the settings for utf8only, normalization and
3078 * casesensitivity.  Specific values may have been requested by the
3079 * creator and/or we can inherit values from the parent dataset.  If
3080 * the file system is of too early a vintage, a creator can not
3081 * request settings for these properties, even if the requested
3082 * setting is the default value.  We don't actually want to create dsl
3083 * properties for these, so remove them from the source nvlist after
3084 * processing.
3085 */
3086static int
3087zfs_fill_zplprops_impl(objset_t *os, uint64_t zplver,
3088    boolean_t fuids_ok, boolean_t sa_ok, nvlist_t *createprops,
3089    nvlist_t *zplprops, boolean_t *is_ci)
3090{
3091	uint64_t sense = ZFS_PROP_UNDEFINED;
3092	uint64_t norm = ZFS_PROP_UNDEFINED;
3093	uint64_t u8 = ZFS_PROP_UNDEFINED;
3094	int error;
3095
3096	ASSERT(zplprops != NULL);
3097
3098	/* parent dataset must be a filesystem */
3099	if (os != NULL && os->os_phys->os_type != DMU_OST_ZFS)
3100		return (SET_ERROR(ZFS_ERR_WRONG_PARENT));
3101
3102	/*
3103	 * Pull out creator prop choices, if any.
3104	 */
3105	if (createprops) {
3106		(void) nvlist_lookup_uint64(createprops,
3107		    zfs_prop_to_name(ZFS_PROP_VERSION), &zplver);
3108		(void) nvlist_lookup_uint64(createprops,
3109		    zfs_prop_to_name(ZFS_PROP_NORMALIZE), &norm);
3110		(void) nvlist_remove_all(createprops,
3111		    zfs_prop_to_name(ZFS_PROP_NORMALIZE));
3112		(void) nvlist_lookup_uint64(createprops,
3113		    zfs_prop_to_name(ZFS_PROP_UTF8ONLY), &u8);
3114		(void) nvlist_remove_all(createprops,
3115		    zfs_prop_to_name(ZFS_PROP_UTF8ONLY));
3116		(void) nvlist_lookup_uint64(createprops,
3117		    zfs_prop_to_name(ZFS_PROP_CASE), &sense);
3118		(void) nvlist_remove_all(createprops,
3119		    zfs_prop_to_name(ZFS_PROP_CASE));
3120	}
3121
3122	/*
3123	 * If the zpl version requested is whacky or the file system
3124	 * or pool is version is too "young" to support normalization
3125	 * and the creator tried to set a value for one of the props,
3126	 * error out.
3127	 */
3128	if ((zplver < ZPL_VERSION_INITIAL || zplver > ZPL_VERSION) ||
3129	    (zplver >= ZPL_VERSION_FUID && !fuids_ok) ||
3130	    (zplver >= ZPL_VERSION_SA && !sa_ok) ||
3131	    (zplver < ZPL_VERSION_NORMALIZATION &&
3132	    (norm != ZFS_PROP_UNDEFINED || u8 != ZFS_PROP_UNDEFINED ||
3133	    sense != ZFS_PROP_UNDEFINED)))
3134		return (SET_ERROR(ENOTSUP));
3135
3136	/*
3137	 * Put the version in the zplprops
3138	 */
3139	VERIFY(nvlist_add_uint64(zplprops,
3140	    zfs_prop_to_name(ZFS_PROP_VERSION), zplver) == 0);
3141
3142	if (norm == ZFS_PROP_UNDEFINED &&
3143	    (error = zfs_get_zplprop(os, ZFS_PROP_NORMALIZE, &norm)) != 0)
3144		return (error);
3145	VERIFY(nvlist_add_uint64(zplprops,
3146	    zfs_prop_to_name(ZFS_PROP_NORMALIZE), norm) == 0);
3147
3148	/*
3149	 * If we're normalizing, names must always be valid UTF-8 strings.
3150	 */
3151	if (norm)
3152		u8 = 1;
3153	if (u8 == ZFS_PROP_UNDEFINED &&
3154	    (error = zfs_get_zplprop(os, ZFS_PROP_UTF8ONLY, &u8)) != 0)
3155		return (error);
3156	VERIFY(nvlist_add_uint64(zplprops,
3157	    zfs_prop_to_name(ZFS_PROP_UTF8ONLY), u8) == 0);
3158
3159	if (sense == ZFS_PROP_UNDEFINED &&
3160	    (error = zfs_get_zplprop(os, ZFS_PROP_CASE, &sense)) != 0)
3161		return (error);
3162	VERIFY(nvlist_add_uint64(zplprops,
3163	    zfs_prop_to_name(ZFS_PROP_CASE), sense) == 0);
3164
3165	if (is_ci)
3166		*is_ci = (sense == ZFS_CASE_INSENSITIVE);
3167
3168	return (0);
3169}
3170
3171static int
3172zfs_fill_zplprops(const char *dataset, nvlist_t *createprops,
3173    nvlist_t *zplprops, boolean_t *is_ci)
3174{
3175	boolean_t fuids_ok, sa_ok;
3176	uint64_t zplver = ZPL_VERSION;
3177	objset_t *os = NULL;
3178	char parentname[ZFS_MAX_DATASET_NAME_LEN];
3179	spa_t *spa;
3180	uint64_t spa_vers;
3181	int error;
3182
3183	zfs_get_parent(dataset, parentname, sizeof (parentname));
3184
3185	if ((error = spa_open(dataset, &spa, FTAG)) != 0)
3186		return (error);
3187
3188	spa_vers = spa_version(spa);
3189	spa_close(spa, FTAG);
3190
3191	zplver = zfs_zpl_version_map(spa_vers);
3192	fuids_ok = (zplver >= ZPL_VERSION_FUID);
3193	sa_ok = (zplver >= ZPL_VERSION_SA);
3194
3195	/*
3196	 * Open parent object set so we can inherit zplprop values.
3197	 */
3198	if ((error = dmu_objset_hold(parentname, FTAG, &os)) != 0)
3199		return (error);
3200
3201	error = zfs_fill_zplprops_impl(os, zplver, fuids_ok, sa_ok, createprops,
3202	    zplprops, is_ci);
3203	dmu_objset_rele(os, FTAG);
3204	return (error);
3205}
3206
3207static int
3208zfs_fill_zplprops_root(uint64_t spa_vers, nvlist_t *createprops,
3209    nvlist_t *zplprops, boolean_t *is_ci)
3210{
3211	boolean_t fuids_ok;
3212	boolean_t sa_ok;
3213	uint64_t zplver = ZPL_VERSION;
3214	int error;
3215
3216	zplver = zfs_zpl_version_map(spa_vers);
3217	fuids_ok = (zplver >= ZPL_VERSION_FUID);
3218	sa_ok = (zplver >= ZPL_VERSION_SA);
3219
3220	error = zfs_fill_zplprops_impl(NULL, zplver, fuids_ok, sa_ok,
3221	    createprops, zplprops, is_ci);
3222	return (error);
3223}
3224
3225/*
3226 * innvl: {
3227 *     "type" -> dmu_objset_type_t (int32)
3228 *     (optional) "props" -> { prop -> value }
3229 *     (optional) "hidden_args" -> { "wkeydata" -> value }
3230 *         raw uint8_t array of encryption wrapping key data (32 bytes)
3231 * }
3232 *
3233 * outnvl: propname -> error code (int32)
3234 */
3235
3236static const zfs_ioc_key_t zfs_keys_create[] = {
3237	{"type",	DATA_TYPE_INT32,	0},
3238	{"props",	DATA_TYPE_NVLIST,	ZK_OPTIONAL},
3239	{"hidden_args",	DATA_TYPE_NVLIST,	ZK_OPTIONAL},
3240};
3241
3242static int
3243zfs_ioc_create(const char *fsname, nvlist_t *innvl, nvlist_t *outnvl)
3244{
3245	int error = 0;
3246	zfs_creat_t zct = { 0 };
3247	nvlist_t *nvprops = NULL;
3248	nvlist_t *hidden_args = NULL;
3249	void (*cbfunc)(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx);
3250	dmu_objset_type_t type;
3251	boolean_t is_insensitive = B_FALSE;
3252	dsl_crypto_params_t *dcp = NULL;
3253
3254	type = (dmu_objset_type_t)fnvlist_lookup_int32(innvl, "type");
3255	(void) nvlist_lookup_nvlist(innvl, "props", &nvprops);
3256	(void) nvlist_lookup_nvlist(innvl, ZPOOL_HIDDEN_ARGS, &hidden_args);
3257
3258	switch (type) {
3259	case DMU_OST_ZFS:
3260		cbfunc = zfs_create_cb;
3261		break;
3262
3263	case DMU_OST_ZVOL:
3264		cbfunc = zvol_create_cb;
3265		break;
3266
3267	default:
3268		cbfunc = NULL;
3269		break;
3270	}
3271	if (strchr(fsname, '@') ||
3272	    strchr(fsname, '%'))
3273		return (SET_ERROR(EINVAL));
3274
3275	zct.zct_props = nvprops;
3276
3277	if (cbfunc == NULL)
3278		return (SET_ERROR(EINVAL));
3279
3280	if (type == DMU_OST_ZVOL) {
3281		uint64_t volsize, volblocksize;
3282
3283		if (nvprops == NULL)
3284			return (SET_ERROR(EINVAL));
3285		if (nvlist_lookup_uint64(nvprops,
3286		    zfs_prop_to_name(ZFS_PROP_VOLSIZE), &volsize) != 0)
3287			return (SET_ERROR(EINVAL));
3288
3289		if ((error = nvlist_lookup_uint64(nvprops,
3290		    zfs_prop_to_name(ZFS_PROP_VOLBLOCKSIZE),
3291		    &volblocksize)) != 0 && error != ENOENT)
3292			return (SET_ERROR(EINVAL));
3293
3294		if (error != 0)
3295			volblocksize = zfs_prop_default_numeric(
3296			    ZFS_PROP_VOLBLOCKSIZE);
3297
3298		if ((error = zvol_check_volblocksize(fsname,
3299		    volblocksize)) != 0 ||
3300		    (error = zvol_check_volsize(volsize,
3301		    volblocksize)) != 0)
3302			return (error);
3303	} else if (type == DMU_OST_ZFS) {
3304		int error;
3305
3306		/*
3307		 * We have to have normalization and
3308		 * case-folding flags correct when we do the
3309		 * file system creation, so go figure them out
3310		 * now.
3311		 */
3312		VERIFY(nvlist_alloc(&zct.zct_zplprops,
3313		    NV_UNIQUE_NAME, KM_SLEEP) == 0);
3314		error = zfs_fill_zplprops(fsname, nvprops,
3315		    zct.zct_zplprops, &is_insensitive);
3316		if (error != 0) {
3317			nvlist_free(zct.zct_zplprops);
3318			return (error);
3319		}
3320	}
3321
3322	error = dsl_crypto_params_create_nvlist(DCP_CMD_NONE, nvprops,
3323	    hidden_args, &dcp);
3324	if (error != 0) {
3325		nvlist_free(zct.zct_zplprops);
3326		return (error);
3327	}
3328
3329	error = dmu_objset_create(fsname, type,
3330	    is_insensitive ? DS_FLAG_CI_DATASET : 0, dcp, cbfunc, &zct);
3331
3332	nvlist_free(zct.zct_zplprops);
3333	dsl_crypto_params_free(dcp, !!error);
3334
3335	/*
3336	 * It would be nice to do this atomically.
3337	 */
3338	if (error == 0) {
3339		error = zfs_set_prop_nvlist(fsname, ZPROP_SRC_LOCAL,
3340		    nvprops, outnvl);
3341		if (error != 0) {
3342			spa_t *spa;
3343			int error2;
3344
3345			/*
3346			 * Volumes will return EBUSY and cannot be destroyed
3347			 * until all asynchronous minor handling (e.g. from
3348			 * setting the volmode property) has completed. Wait for
3349			 * the spa_zvol_taskq to drain then retry.
3350			 */
3351			error2 = dsl_destroy_head(fsname);
3352			while ((error2 == EBUSY) && (type == DMU_OST_ZVOL)) {
3353				error2 = spa_open(fsname, &spa, FTAG);
3354				if (error2 == 0) {
3355					taskq_wait(spa->spa_zvol_taskq);
3356					spa_close(spa, FTAG);
3357				}
3358				error2 = dsl_destroy_head(fsname);
3359			}
3360		}
3361	}
3362	return (error);
3363}
3364
3365/*
3366 * innvl: {
3367 *     "origin" -> name of origin snapshot
3368 *     (optional) "props" -> { prop -> value }
3369 *     (optional) "hidden_args" -> { "wkeydata" -> value }
3370 *         raw uint8_t array of encryption wrapping key data (32 bytes)
3371 * }
3372 *
3373 * outputs:
3374 * outnvl: propname -> error code (int32)
3375 */
3376static const zfs_ioc_key_t zfs_keys_clone[] = {
3377	{"origin",	DATA_TYPE_STRING,	0},
3378	{"props",	DATA_TYPE_NVLIST,	ZK_OPTIONAL},
3379	{"hidden_args",	DATA_TYPE_NVLIST,	ZK_OPTIONAL},
3380};
3381
3382static int
3383zfs_ioc_clone(const char *fsname, nvlist_t *innvl, nvlist_t *outnvl)
3384{
3385	int error = 0;
3386	nvlist_t *nvprops = NULL;
3387	const char *origin_name;
3388
3389	origin_name = fnvlist_lookup_string(innvl, "origin");
3390	(void) nvlist_lookup_nvlist(innvl, "props", &nvprops);
3391
3392	if (strchr(fsname, '@') ||
3393	    strchr(fsname, '%'))
3394		return (SET_ERROR(EINVAL));
3395
3396	if (dataset_namecheck(origin_name, NULL, NULL) != 0)
3397		return (SET_ERROR(EINVAL));
3398
3399	error = dmu_objset_clone(fsname, origin_name);
3400
3401	/*
3402	 * It would be nice to do this atomically.
3403	 */
3404	if (error == 0) {
3405		error = zfs_set_prop_nvlist(fsname, ZPROP_SRC_LOCAL,
3406		    nvprops, outnvl);
3407		if (error != 0)
3408			(void) dsl_destroy_head(fsname);
3409	}
3410	return (error);
3411}
3412
3413static const zfs_ioc_key_t zfs_keys_remap[] = {
3414	/* no nvl keys */
3415};
3416
3417/* ARGSUSED */
3418static int
3419zfs_ioc_remap(const char *fsname, nvlist_t *innvl, nvlist_t *outnvl)
3420{
3421	/* This IOCTL is no longer supported. */
3422	return (0);
3423}
3424
3425/*
3426 * innvl: {
3427 *     "snaps" -> { snapshot1, snapshot2 }
3428 *     (optional) "props" -> { prop -> value (string) }
3429 * }
3430 *
3431 * outnvl: snapshot -> error code (int32)
3432 */
3433static const zfs_ioc_key_t zfs_keys_snapshot[] = {
3434	{"snaps",	DATA_TYPE_NVLIST,	0},
3435	{"props",	DATA_TYPE_NVLIST,	ZK_OPTIONAL},
3436};
3437
3438static int
3439zfs_ioc_snapshot(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl)
3440{
3441	nvlist_t *snaps;
3442	nvlist_t *props = NULL;
3443	int error, poollen;
3444	nvpair_t *pair;
3445
3446	(void) nvlist_lookup_nvlist(innvl, "props", &props);
3447	if (!nvlist_empty(props) &&
3448	    zfs_earlier_version(poolname, SPA_VERSION_SNAP_PROPS))
3449		return (SET_ERROR(ENOTSUP));
3450	if ((error = zfs_check_userprops(props)) != 0)
3451		return (error);
3452
3453	snaps = fnvlist_lookup_nvlist(innvl, "snaps");
3454	poollen = strlen(poolname);
3455	for (pair = nvlist_next_nvpair(snaps, NULL); pair != NULL;
3456	    pair = nvlist_next_nvpair(snaps, pair)) {
3457		const char *name = nvpair_name(pair);
3458		char *cp = strchr(name, '@');
3459
3460		/*
3461		 * The snap name must contain an @, and the part after it must
3462		 * contain only valid characters.
3463		 */
3464		if (cp == NULL ||
3465		    zfs_component_namecheck(cp + 1, NULL, NULL) != 0)
3466			return (SET_ERROR(EINVAL));
3467
3468		/*
3469		 * The snap must be in the specified pool.
3470		 */
3471		if (strncmp(name, poolname, poollen) != 0 ||
3472		    (name[poollen] != '/' && name[poollen] != '@'))
3473			return (SET_ERROR(EXDEV));
3474
3475		/*
3476		 * Check for permission to set the properties on the fs.
3477		 */
3478		if (!nvlist_empty(props)) {
3479			*cp = '\0';
3480			error = zfs_secpolicy_write_perms(name,
3481			    ZFS_DELEG_PERM_USERPROP, CRED());
3482			*cp = '@';
3483			if (error != 0)
3484				return (error);
3485		}
3486
3487		/* This must be the only snap of this fs. */
3488		for (nvpair_t *pair2 = nvlist_next_nvpair(snaps, pair);
3489		    pair2 != NULL; pair2 = nvlist_next_nvpair(snaps, pair2)) {
3490			if (strncmp(name, nvpair_name(pair2), cp - name + 1)
3491			    == 0) {
3492				return (SET_ERROR(EXDEV));
3493			}
3494		}
3495	}
3496
3497	error = dsl_dataset_snapshot(snaps, props, outnvl);
3498
3499	return (error);
3500}
3501
3502/*
3503 * innvl: "message" -> string
3504 */
3505static const zfs_ioc_key_t zfs_keys_log_history[] = {
3506	{"message",	DATA_TYPE_STRING,	0},
3507};
3508
3509/* ARGSUSED */
3510static int
3511zfs_ioc_log_history(const char *unused, nvlist_t *innvl, nvlist_t *outnvl)
3512{
3513	const char *message;
3514	char *poolname;
3515	spa_t *spa;
3516	int error;
3517
3518	/*
3519	 * The poolname in the ioctl is not set, we get it from the TSD,
3520	 * which was set at the end of the last successful ioctl that allows
3521	 * logging.  The secpolicy func already checked that it is set.
3522	 * Only one log ioctl is allowed after each successful ioctl, so
3523	 * we clear the TSD here.
3524	 */
3525	poolname = tsd_get(zfs_allow_log_key);
3526	if (poolname == NULL)
3527		return (SET_ERROR(EINVAL));
3528	(void) tsd_set(zfs_allow_log_key, NULL);
3529	error = spa_open(poolname, &spa, FTAG);
3530	kmem_strfree(poolname);
3531	if (error != 0)
3532		return (error);
3533
3534	message = fnvlist_lookup_string(innvl, "message");
3535
3536	if (spa_version(spa) < SPA_VERSION_ZPOOL_HISTORY) {
3537		spa_close(spa, FTAG);
3538		return (SET_ERROR(ENOTSUP));
3539	}
3540
3541	error = spa_history_log(spa, message);
3542	spa_close(spa, FTAG);
3543	return (error);
3544}
3545
3546/*
3547 * This ioctl is used to set the bootenv configuration on the current
3548 * pool. This configuration is stored in the second padding area of the label,
3549 * and it is used by the bootloader(s) to store the bootloader and/or system
3550 * specific data.
3551 * The data is stored as nvlist data stream, and is protected by
3552 * an embedded checksum.
3553 * The version can have two possible values:
3554 * VB_RAW: nvlist should have key GRUB_ENVMAP, value DATA_TYPE_STRING.
3555 * VB_NVLIST: nvlist with arbitrary <key, value> pairs.
3556 */
3557static const zfs_ioc_key_t zfs_keys_set_bootenv[] = {
3558	{"version",	DATA_TYPE_UINT64,	0},
3559	{"<keys>",	DATA_TYPE_ANY, ZK_OPTIONAL | ZK_WILDCARDLIST},
3560};
3561
3562static int
3563zfs_ioc_set_bootenv(const char *name, nvlist_t *innvl, nvlist_t *outnvl)
3564{
3565	int error;
3566	spa_t *spa;
3567
3568	if ((error = spa_open(name, &spa, FTAG)) != 0)
3569		return (error);
3570	spa_vdev_state_enter(spa, SCL_ALL);
3571	error = vdev_label_write_bootenv(spa->spa_root_vdev, innvl);
3572	(void) spa_vdev_state_exit(spa, NULL, 0);
3573	spa_close(spa, FTAG);
3574	return (error);
3575}
3576
3577static const zfs_ioc_key_t zfs_keys_get_bootenv[] = {
3578	/* no nvl keys */
3579};
3580
3581static int
3582zfs_ioc_get_bootenv(const char *name, nvlist_t *innvl, nvlist_t *outnvl)
3583{
3584	spa_t *spa;
3585	int error;
3586
3587	if ((error = spa_open(name, &spa, FTAG)) != 0)
3588		return (error);
3589	spa_vdev_state_enter(spa, SCL_ALL);
3590	error = vdev_label_read_bootenv(spa->spa_root_vdev, outnvl);
3591	(void) spa_vdev_state_exit(spa, NULL, 0);
3592	spa_close(spa, FTAG);
3593	return (error);
3594}
3595
3596/*
3597 * The dp_config_rwlock must not be held when calling this, because the
3598 * unmount may need to write out data.
3599 *
3600 * This function is best-effort.  Callers must deal gracefully if it
3601 * remains mounted (or is remounted after this call).
3602 *
3603 * Returns 0 if the argument is not a snapshot, or it is not currently a
3604 * filesystem, or we were able to unmount it.  Returns error code otherwise.
3605 */
3606void
3607zfs_unmount_snap(const char *snapname)
3608{
3609	if (strchr(snapname, '@') == NULL)
3610		return;
3611
3612	(void) zfsctl_snapshot_unmount(snapname, MNT_FORCE);
3613}
3614
3615/* ARGSUSED */
3616static int
3617zfs_unmount_snap_cb(const char *snapname, void *arg)
3618{
3619	zfs_unmount_snap(snapname);
3620	return (0);
3621}
3622
3623/*
3624 * When a clone is destroyed, its origin may also need to be destroyed,
3625 * in which case it must be unmounted.  This routine will do that unmount
3626 * if necessary.
3627 */
3628void
3629zfs_destroy_unmount_origin(const char *fsname)
3630{
3631	int error;
3632	objset_t *os;
3633	dsl_dataset_t *ds;
3634
3635	error = dmu_objset_hold(fsname, FTAG, &os);
3636	if (error != 0)
3637		return;
3638	ds = dmu_objset_ds(os);
3639	if (dsl_dir_is_clone(ds->ds_dir) && DS_IS_DEFER_DESTROY(ds->ds_prev)) {
3640		char originname[ZFS_MAX_DATASET_NAME_LEN];
3641		dsl_dataset_name(ds->ds_prev, originname);
3642		dmu_objset_rele(os, FTAG);
3643		zfs_unmount_snap(originname);
3644	} else {
3645		dmu_objset_rele(os, FTAG);
3646	}
3647}
3648
3649/*
3650 * innvl: {
3651 *     "snaps" -> { snapshot1, snapshot2 }
3652 *     (optional boolean) "defer"
3653 * }
3654 *
3655 * outnvl: snapshot -> error code (int32)
3656 */
3657static const zfs_ioc_key_t zfs_keys_destroy_snaps[] = {
3658	{"snaps",	DATA_TYPE_NVLIST,	0},
3659	{"defer",	DATA_TYPE_BOOLEAN,	ZK_OPTIONAL},
3660};
3661
3662/* ARGSUSED */
3663static int
3664zfs_ioc_destroy_snaps(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl)
3665{
3666	int poollen;
3667	nvlist_t *snaps;
3668	nvpair_t *pair;
3669	boolean_t defer;
3670	spa_t *spa;
3671
3672	snaps = fnvlist_lookup_nvlist(innvl, "snaps");
3673	defer = nvlist_exists(innvl, "defer");
3674
3675	poollen = strlen(poolname);
3676	for (pair = nvlist_next_nvpair(snaps, NULL); pair != NULL;
3677	    pair = nvlist_next_nvpair(snaps, pair)) {
3678		const char *name = nvpair_name(pair);
3679
3680		/*
3681		 * The snap must be in the specified pool to prevent the
3682		 * invalid removal of zvol minors below.
3683		 */
3684		if (strncmp(name, poolname, poollen) != 0 ||
3685		    (name[poollen] != '/' && name[poollen] != '@'))
3686			return (SET_ERROR(EXDEV));
3687
3688		zfs_unmount_snap(nvpair_name(pair));
3689		if (spa_open(name, &spa, FTAG) == 0) {
3690			zvol_remove_minors(spa, name, B_TRUE);
3691			spa_close(spa, FTAG);
3692		}
3693	}
3694
3695	return (dsl_destroy_snapshots_nvl(snaps, defer, outnvl));
3696}
3697
3698/*
3699 * Create bookmarks. The bookmark names are of the form <fs>#<bmark>.
3700 * All bookmarks and snapshots must be in the same pool.
3701 * dsl_bookmark_create_nvl_validate describes the nvlist schema in more detail.
3702 *
3703 * innvl: {
3704 *     new_bookmark1 -> existing_snapshot,
3705 *     new_bookmark2 -> existing_bookmark,
3706 * }
3707 *
3708 * outnvl: bookmark -> error code (int32)
3709 *
3710 */
3711static const zfs_ioc_key_t zfs_keys_bookmark[] = {
3712	{"<bookmark>...",	DATA_TYPE_STRING,	ZK_WILDCARDLIST},
3713};
3714
3715/* ARGSUSED */
3716static int
3717zfs_ioc_bookmark(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl)
3718{
3719	return (dsl_bookmark_create(innvl, outnvl));
3720}
3721
3722/*
3723 * innvl: {
3724 *     property 1, property 2, ...
3725 * }
3726 *
3727 * outnvl: {
3728 *     bookmark name 1 -> { property 1, property 2, ... },
3729 *     bookmark name 2 -> { property 1, property 2, ... }
3730 * }
3731 *
3732 */
3733static const zfs_ioc_key_t zfs_keys_get_bookmarks[] = {
3734	{"<property>...", DATA_TYPE_BOOLEAN, ZK_WILDCARDLIST | ZK_OPTIONAL},
3735};
3736
3737static int
3738zfs_ioc_get_bookmarks(const char *fsname, nvlist_t *innvl, nvlist_t *outnvl)
3739{
3740	return (dsl_get_bookmarks(fsname, innvl, outnvl));
3741}
3742
3743/*
3744 * innvl is not used.
3745 *
3746 * outnvl: {
3747 *     property 1, property 2, ...
3748 * }
3749 *
3750 */
3751static const zfs_ioc_key_t zfs_keys_get_bookmark_props[] = {
3752	/* no nvl keys */
3753};
3754
3755/* ARGSUSED */
3756static int
3757zfs_ioc_get_bookmark_props(const char *bookmark, nvlist_t *innvl,
3758    nvlist_t *outnvl)
3759{
3760	char fsname[ZFS_MAX_DATASET_NAME_LEN];
3761	char *bmname;
3762
3763	bmname = strchr(bookmark, '#');
3764	if (bmname == NULL)
3765		return (SET_ERROR(EINVAL));
3766	bmname++;
3767
3768	(void) strlcpy(fsname, bookmark, sizeof (fsname));
3769	*(strchr(fsname, '#')) = '\0';
3770
3771	return (dsl_get_bookmark_props(fsname, bmname, outnvl));
3772}
3773
3774/*
3775 * innvl: {
3776 *     bookmark name 1, bookmark name 2
3777 * }
3778 *
3779 * outnvl: bookmark -> error code (int32)
3780 *
3781 */
3782static const zfs_ioc_key_t zfs_keys_destroy_bookmarks[] = {
3783	{"<bookmark>...",	DATA_TYPE_BOOLEAN,	ZK_WILDCARDLIST},
3784};
3785
3786static int
3787zfs_ioc_destroy_bookmarks(const char *poolname, nvlist_t *innvl,
3788    nvlist_t *outnvl)
3789{
3790	int error, poollen;
3791
3792	poollen = strlen(poolname);
3793	for (nvpair_t *pair = nvlist_next_nvpair(innvl, NULL);
3794	    pair != NULL; pair = nvlist_next_nvpair(innvl, pair)) {
3795		const char *name = nvpair_name(pair);
3796		const char *cp = strchr(name, '#');
3797
3798		/*
3799		 * The bookmark name must contain an #, and the part after it
3800		 * must contain only valid characters.
3801		 */
3802		if (cp == NULL ||
3803		    zfs_component_namecheck(cp + 1, NULL, NULL) != 0)
3804			return (SET_ERROR(EINVAL));
3805
3806		/*
3807		 * The bookmark must be in the specified pool.
3808		 */
3809		if (strncmp(name, poolname, poollen) != 0 ||
3810		    (name[poollen] != '/' && name[poollen] != '#'))
3811			return (SET_ERROR(EXDEV));
3812	}
3813
3814	error = dsl_bookmark_destroy(innvl, outnvl);
3815	return (error);
3816}
3817
3818static const zfs_ioc_key_t zfs_keys_channel_program[] = {
3819	{"program",	DATA_TYPE_STRING,		0},
3820	{"arg",		DATA_TYPE_ANY,			0},
3821	{"sync",	DATA_TYPE_BOOLEAN_VALUE,	ZK_OPTIONAL},
3822	{"instrlimit",	DATA_TYPE_UINT64,		ZK_OPTIONAL},
3823	{"memlimit",	DATA_TYPE_UINT64,		ZK_OPTIONAL},
3824};
3825
3826static int
3827zfs_ioc_channel_program(const char *poolname, nvlist_t *innvl,
3828    nvlist_t *outnvl)
3829{
3830	char *program;
3831	uint64_t instrlimit, memlimit;
3832	boolean_t sync_flag;
3833	nvpair_t *nvarg = NULL;
3834
3835	program = fnvlist_lookup_string(innvl, ZCP_ARG_PROGRAM);
3836	if (0 != nvlist_lookup_boolean_value(innvl, ZCP_ARG_SYNC, &sync_flag)) {
3837		sync_flag = B_TRUE;
3838	}
3839	if (0 != nvlist_lookup_uint64(innvl, ZCP_ARG_INSTRLIMIT, &instrlimit)) {
3840		instrlimit = ZCP_DEFAULT_INSTRLIMIT;
3841	}
3842	if (0 != nvlist_lookup_uint64(innvl, ZCP_ARG_MEMLIMIT, &memlimit)) {
3843		memlimit = ZCP_DEFAULT_MEMLIMIT;
3844	}
3845	nvarg = fnvlist_lookup_nvpair(innvl, ZCP_ARG_ARGLIST);
3846
3847	if (instrlimit == 0 || instrlimit > zfs_lua_max_instrlimit)
3848		return (SET_ERROR(EINVAL));
3849	if (memlimit == 0 || memlimit > zfs_lua_max_memlimit)
3850		return (SET_ERROR(EINVAL));
3851
3852	return (zcp_eval(poolname, program, sync_flag, instrlimit, memlimit,
3853	    nvarg, outnvl));
3854}
3855
3856/*
3857 * innvl: unused
3858 * outnvl: empty
3859 */
3860static const zfs_ioc_key_t zfs_keys_pool_checkpoint[] = {
3861	/* no nvl keys */
3862};
3863
3864/* ARGSUSED */
3865static int
3866zfs_ioc_pool_checkpoint(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl)
3867{
3868	return (spa_checkpoint(poolname));
3869}
3870
3871/*
3872 * innvl: unused
3873 * outnvl: empty
3874 */
3875static const zfs_ioc_key_t zfs_keys_pool_discard_checkpoint[] = {
3876	/* no nvl keys */
3877};
3878
3879/* ARGSUSED */
3880static int
3881zfs_ioc_pool_discard_checkpoint(const char *poolname, nvlist_t *innvl,
3882    nvlist_t *outnvl)
3883{
3884	return (spa_checkpoint_discard(poolname));
3885}
3886
3887/*
3888 * inputs:
3889 * zc_name		name of dataset to destroy
3890 * zc_defer_destroy	mark for deferred destroy
3891 *
3892 * outputs:		none
3893 */
3894static int
3895zfs_ioc_destroy(zfs_cmd_t *zc)
3896{
3897	objset_t *os;
3898	dmu_objset_type_t ost;
3899	int err;
3900
3901	err = dmu_objset_hold(zc->zc_name, FTAG, &os);
3902	if (err != 0)
3903		return (err);
3904	ost = dmu_objset_type(os);
3905	dmu_objset_rele(os, FTAG);
3906
3907	if (ost == DMU_OST_ZFS)
3908		zfs_unmount_snap(zc->zc_name);
3909
3910	if (strchr(zc->zc_name, '@')) {
3911		err = dsl_destroy_snapshot(zc->zc_name, zc->zc_defer_destroy);
3912	} else {
3913		err = dsl_destroy_head(zc->zc_name);
3914		if (err == EEXIST) {
3915			/*
3916			 * It is possible that the given DS may have
3917			 * hidden child (%recv) datasets - "leftovers"
3918			 * resulting from the previously interrupted
3919			 * 'zfs receive'.
3920			 *
3921			 * 6 extra bytes for /%recv
3922			 */
3923			char namebuf[ZFS_MAX_DATASET_NAME_LEN + 6];
3924
3925			if (snprintf(namebuf, sizeof (namebuf), "%s/%s",
3926			    zc->zc_name, recv_clone_name) >=
3927			    sizeof (namebuf))
3928				return (SET_ERROR(EINVAL));
3929
3930			/*
3931			 * Try to remove the hidden child (%recv) and after
3932			 * that try to remove the target dataset.
3933			 * If the hidden child (%recv) does not exist
3934			 * the original error (EEXIST) will be returned
3935			 */
3936			err = dsl_destroy_head(namebuf);
3937			if (err == 0)
3938				err = dsl_destroy_head(zc->zc_name);
3939			else if (err == ENOENT)
3940				err = SET_ERROR(EEXIST);
3941		}
3942	}
3943
3944	return (err);
3945}
3946
3947/*
3948 * innvl: {
3949 *     "initialize_command" -> POOL_INITIALIZE_{CANCEL|START|SUSPEND} (uint64)
3950 *     "initialize_vdevs": { -> guids to initialize (nvlist)
3951 *         "vdev_path_1": vdev_guid_1, (uint64),
3952 *         "vdev_path_2": vdev_guid_2, (uint64),
3953 *         ...
3954 *     },
3955 * }
3956 *
3957 * outnvl: {
3958 *     "initialize_vdevs": { -> initialization errors (nvlist)
3959 *         "vdev_path_1": errno, see function body for possible errnos (uint64)
3960 *         "vdev_path_2": errno, ... (uint64)
3961 *         ...
3962 *     }
3963 * }
3964 *
3965 * EINVAL is returned for an unknown commands or if any of the provided vdev
3966 * guids have be specified with a type other than uint64.
3967 */
3968static const zfs_ioc_key_t zfs_keys_pool_initialize[] = {
3969	{ZPOOL_INITIALIZE_COMMAND,	DATA_TYPE_UINT64,	0},
3970	{ZPOOL_INITIALIZE_VDEVS,	DATA_TYPE_NVLIST,	0}
3971};
3972
3973static int
3974zfs_ioc_pool_initialize(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl)
3975{
3976	uint64_t cmd_type;
3977	if (nvlist_lookup_uint64(innvl, ZPOOL_INITIALIZE_COMMAND,
3978	    &cmd_type) != 0) {
3979		return (SET_ERROR(EINVAL));
3980	}
3981
3982	if (!(cmd_type == POOL_INITIALIZE_CANCEL ||
3983	    cmd_type == POOL_INITIALIZE_START ||
3984	    cmd_type == POOL_INITIALIZE_SUSPEND)) {
3985		return (SET_ERROR(EINVAL));
3986	}
3987
3988	nvlist_t *vdev_guids;
3989	if (nvlist_lookup_nvlist(innvl, ZPOOL_INITIALIZE_VDEVS,
3990	    &vdev_guids) != 0) {
3991		return (SET_ERROR(EINVAL));
3992	}
3993
3994	for (nvpair_t *pair = nvlist_next_nvpair(vdev_guids, NULL);
3995	    pair != NULL; pair = nvlist_next_nvpair(vdev_guids, pair)) {
3996		uint64_t vdev_guid;
3997		if (nvpair_value_uint64(pair, &vdev_guid) != 0) {
3998			return (SET_ERROR(EINVAL));
3999		}
4000	}
4001
4002	spa_t *spa;
4003	int error = spa_open(poolname, &spa, FTAG);
4004	if (error != 0)
4005		return (error);
4006
4007	nvlist_t *vdev_errlist = fnvlist_alloc();
4008	int total_errors = spa_vdev_initialize(spa, vdev_guids, cmd_type,
4009	    vdev_errlist);
4010
4011	if (fnvlist_size(vdev_errlist) > 0) {
4012		fnvlist_add_nvlist(outnvl, ZPOOL_INITIALIZE_VDEVS,
4013		    vdev_errlist);
4014	}
4015	fnvlist_free(vdev_errlist);
4016
4017	spa_close(spa, FTAG);
4018	return (total_errors > 0 ? SET_ERROR(EINVAL) : 0);
4019}
4020
4021/*
4022 * innvl: {
4023 *     "trim_command" -> POOL_TRIM_{CANCEL|START|SUSPEND} (uint64)
4024 *     "trim_vdevs": { -> guids to TRIM (nvlist)
4025 *         "vdev_path_1": vdev_guid_1, (uint64),
4026 *         "vdev_path_2": vdev_guid_2, (uint64),
4027 *         ...
4028 *     },
4029 *     "trim_rate" -> Target TRIM rate in bytes/sec.
4030 *     "trim_secure" -> Set to request a secure TRIM.
4031 * }
4032 *
4033 * outnvl: {
4034 *     "trim_vdevs": { -> TRIM errors (nvlist)
4035 *         "vdev_path_1": errno, see function body for possible errnos (uint64)
4036 *         "vdev_path_2": errno, ... (uint64)
4037 *         ...
4038 *     }
4039 * }
4040 *
4041 * EINVAL is returned for an unknown commands or if any of the provided vdev
4042 * guids have be specified with a type other than uint64.
4043 */
4044static const zfs_ioc_key_t zfs_keys_pool_trim[] = {
4045	{ZPOOL_TRIM_COMMAND,	DATA_TYPE_UINT64,		0},
4046	{ZPOOL_TRIM_VDEVS,	DATA_TYPE_NVLIST,		0},
4047	{ZPOOL_TRIM_RATE,	DATA_TYPE_UINT64,		ZK_OPTIONAL},
4048	{ZPOOL_TRIM_SECURE,	DATA_TYPE_BOOLEAN_VALUE,	ZK_OPTIONAL},
4049};
4050
4051static int
4052zfs_ioc_pool_trim(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl)
4053{
4054	uint64_t cmd_type;
4055	if (nvlist_lookup_uint64(innvl, ZPOOL_TRIM_COMMAND, &cmd_type) != 0)
4056		return (SET_ERROR(EINVAL));
4057
4058	if (!(cmd_type == POOL_TRIM_CANCEL ||
4059	    cmd_type == POOL_TRIM_START ||
4060	    cmd_type == POOL_TRIM_SUSPEND)) {
4061		return (SET_ERROR(EINVAL));
4062	}
4063
4064	nvlist_t *vdev_guids;
4065	if (nvlist_lookup_nvlist(innvl, ZPOOL_TRIM_VDEVS, &vdev_guids) != 0)
4066		return (SET_ERROR(EINVAL));
4067
4068	for (nvpair_t *pair = nvlist_next_nvpair(vdev_guids, NULL);
4069	    pair != NULL; pair = nvlist_next_nvpair(vdev_guids, pair)) {
4070		uint64_t vdev_guid;
4071		if (nvpair_value_uint64(pair, &vdev_guid) != 0) {
4072			return (SET_ERROR(EINVAL));
4073		}
4074	}
4075
4076	/* Optional, defaults to maximum rate when not provided */
4077	uint64_t rate;
4078	if (nvlist_lookup_uint64(innvl, ZPOOL_TRIM_RATE, &rate) != 0)
4079		rate = 0;
4080
4081	/* Optional, defaults to standard TRIM when not provided */
4082	boolean_t secure;
4083	if (nvlist_lookup_boolean_value(innvl, ZPOOL_TRIM_SECURE,
4084	    &secure) != 0) {
4085		secure = B_FALSE;
4086	}
4087
4088	spa_t *spa;
4089	int error = spa_open(poolname, &spa, FTAG);
4090	if (error != 0)
4091		return (error);
4092
4093	nvlist_t *vdev_errlist = fnvlist_alloc();
4094	int total_errors = spa_vdev_trim(spa, vdev_guids, cmd_type,
4095	    rate, !!zfs_trim_metaslab_skip, secure, vdev_errlist);
4096
4097	if (fnvlist_size(vdev_errlist) > 0)
4098		fnvlist_add_nvlist(outnvl, ZPOOL_TRIM_VDEVS, vdev_errlist);
4099
4100	fnvlist_free(vdev_errlist);
4101
4102	spa_close(spa, FTAG);
4103	return (total_errors > 0 ? SET_ERROR(EINVAL) : 0);
4104}
4105
4106/*
4107 * This ioctl waits for activity of a particular type to complete. If there is
4108 * no activity of that type in progress, it returns immediately, and the
4109 * returned value "waited" is false. If there is activity in progress, and no
4110 * tag is passed in, the ioctl blocks until all activity of that type is
4111 * complete, and then returns with "waited" set to true.
4112 *
4113 * If a tag is provided, it identifies a particular instance of an activity to
4114 * wait for. Currently, this is only valid for use with 'initialize', because
4115 * that is the only activity for which there can be multiple instances running
4116 * concurrently. In the case of 'initialize', the tag corresponds to the guid of
4117 * the vdev on which to wait.
4118 *
4119 * If a thread waiting in the ioctl receives a signal, the call will return
4120 * immediately, and the return value will be EINTR.
4121 *
4122 * innvl: {
4123 *     "wait_activity" -> int32_t
4124 *     (optional) "wait_tag" -> uint64_t
4125 * }
4126 *
4127 * outnvl: "waited" -> boolean_t
4128 */
4129static const zfs_ioc_key_t zfs_keys_pool_wait[] = {
4130	{ZPOOL_WAIT_ACTIVITY,	DATA_TYPE_INT32,		0},
4131	{ZPOOL_WAIT_TAG,	DATA_TYPE_UINT64,		ZK_OPTIONAL},
4132};
4133
4134static int
4135zfs_ioc_wait(const char *name, nvlist_t *innvl, nvlist_t *outnvl)
4136{
4137	int32_t activity;
4138	uint64_t tag;
4139	boolean_t waited;
4140	int error;
4141
4142	if (nvlist_lookup_int32(innvl, ZPOOL_WAIT_ACTIVITY, &activity) != 0)
4143		return (EINVAL);
4144
4145	if (nvlist_lookup_uint64(innvl, ZPOOL_WAIT_TAG, &tag) == 0)
4146		error = spa_wait_tag(name, activity, tag, &waited);
4147	else
4148		error = spa_wait(name, activity, &waited);
4149
4150	if (error == 0)
4151		fnvlist_add_boolean_value(outnvl, ZPOOL_WAIT_WAITED, waited);
4152
4153	return (error);
4154}
4155
4156/*
4157 * This ioctl waits for activity of a particular type to complete. If there is
4158 * no activity of that type in progress, it returns immediately, and the
4159 * returned value "waited" is false. If there is activity in progress, and no
4160 * tag is passed in, the ioctl blocks until all activity of that type is
4161 * complete, and then returns with "waited" set to true.
4162 *
4163 * If a thread waiting in the ioctl receives a signal, the call will return
4164 * immediately, and the return value will be EINTR.
4165 *
4166 * innvl: {
4167 *     "wait_activity" -> int32_t
4168 * }
4169 *
4170 * outnvl: "waited" -> boolean_t
4171 */
4172static const zfs_ioc_key_t zfs_keys_fs_wait[] = {
4173	{ZFS_WAIT_ACTIVITY,	DATA_TYPE_INT32,		0},
4174};
4175
4176static int
4177zfs_ioc_wait_fs(const char *name, nvlist_t *innvl, nvlist_t *outnvl)
4178{
4179	int32_t activity;
4180	boolean_t waited = B_FALSE;
4181	int error;
4182	dsl_pool_t *dp;
4183	dsl_dir_t *dd;
4184	dsl_dataset_t *ds;
4185
4186	if (nvlist_lookup_int32(innvl, ZFS_WAIT_ACTIVITY, &activity) != 0)
4187		return (SET_ERROR(EINVAL));
4188
4189	if (activity >= ZFS_WAIT_NUM_ACTIVITIES || activity < 0)
4190		return (SET_ERROR(EINVAL));
4191
4192	if ((error = dsl_pool_hold(name, FTAG, &dp)) != 0)
4193		return (error);
4194
4195	if ((error = dsl_dataset_hold(dp, name, FTAG, &ds)) != 0) {
4196		dsl_pool_rele(dp, FTAG);
4197		return (error);
4198	}
4199
4200	dd = ds->ds_dir;
4201	mutex_enter(&dd->dd_activity_lock);
4202	dd->dd_activity_waiters++;
4203
4204	/*
4205	 * We get a long-hold here so that the dsl_dataset_t and dsl_dir_t
4206	 * aren't evicted while we're waiting. Normally this is prevented by
4207	 * holding the pool, but we can't do that while we're waiting since
4208	 * that would prevent TXGs from syncing out. Some of the functionality
4209	 * of long-holds (e.g. preventing deletion) is unnecessary for this
4210	 * case, since we would cancel the waiters before proceeding with a
4211	 * deletion. An alternative mechanism for keeping the dataset around
4212	 * could be developed but this is simpler.
4213	 */
4214	dsl_dataset_long_hold(ds, FTAG);
4215	dsl_pool_rele(dp, FTAG);
4216
4217	error = dsl_dir_wait(dd, ds, activity, &waited);
4218
4219	dsl_dataset_long_rele(ds, FTAG);
4220	dd->dd_activity_waiters--;
4221	if (dd->dd_activity_waiters == 0)
4222		cv_signal(&dd->dd_activity_cv);
4223	mutex_exit(&dd->dd_activity_lock);
4224
4225	dsl_dataset_rele(ds, FTAG);
4226
4227	if (error == 0)
4228		fnvlist_add_boolean_value(outnvl, ZFS_WAIT_WAITED, waited);
4229
4230	return (error);
4231}
4232
4233/*
4234 * fsname is name of dataset to rollback (to most recent snapshot)
4235 *
4236 * innvl may contain name of expected target snapshot
4237 *
4238 * outnvl: "target" -> name of most recent snapshot
4239 * }
4240 */
4241static const zfs_ioc_key_t zfs_keys_rollback[] = {
4242	{"target",	DATA_TYPE_STRING,	ZK_OPTIONAL},
4243};
4244
4245/* ARGSUSED */
4246static int
4247zfs_ioc_rollback(const char *fsname, nvlist_t *innvl, nvlist_t *outnvl)
4248{
4249	zfsvfs_t *zfsvfs;
4250	zvol_state_handle_t *zv;
4251	char *target = NULL;
4252	int error;
4253
4254	(void) nvlist_lookup_string(innvl, "target", &target);
4255	if (target != NULL) {
4256		const char *cp = strchr(target, '@');
4257
4258		/*
4259		 * The snap name must contain an @, and the part after it must
4260		 * contain only valid characters.
4261		 */
4262		if (cp == NULL ||
4263		    zfs_component_namecheck(cp + 1, NULL, NULL) != 0)
4264			return (SET_ERROR(EINVAL));
4265	}
4266
4267	if (getzfsvfs(fsname, &zfsvfs) == 0) {
4268		dsl_dataset_t *ds;
4269
4270		ds = dmu_objset_ds(zfsvfs->z_os);
4271		error = zfs_suspend_fs(zfsvfs);
4272		if (error == 0) {
4273			int resume_err;
4274
4275			error = dsl_dataset_rollback(fsname, target, zfsvfs,
4276			    outnvl);
4277			resume_err = zfs_resume_fs(zfsvfs, ds);
4278			error = error ? error : resume_err;
4279		}
4280		zfs_vfs_rele(zfsvfs);
4281	} else if ((zv = zvol_suspend(fsname)) != NULL) {
4282		error = dsl_dataset_rollback(fsname, target, zvol_tag(zv),
4283		    outnvl);
4284		zvol_resume(zv);
4285	} else {
4286		error = dsl_dataset_rollback(fsname, target, NULL, outnvl);
4287	}
4288	return (error);
4289}
4290
4291static int
4292recursive_unmount(const char *fsname, void *arg)
4293{
4294	const char *snapname = arg;
4295	char *fullname;
4296
4297	fullname = kmem_asprintf("%s@%s", fsname, snapname);
4298	zfs_unmount_snap(fullname);
4299	kmem_strfree(fullname);
4300
4301	return (0);
4302}
4303
4304/*
4305 *
4306 * snapname is the snapshot to redact.
4307 * innvl: {
4308 *     "bookname" -> (string)
4309 *         shortname of the redaction bookmark to generate
4310 *     "snapnv" -> (nvlist, values ignored)
4311 *         snapshots to redact snapname with respect to
4312 * }
4313 *
4314 * outnvl is unused
4315 */
4316
4317/* ARGSUSED */
4318static const zfs_ioc_key_t zfs_keys_redact[] = {
4319	{"bookname",		DATA_TYPE_STRING,	0},
4320	{"snapnv",		DATA_TYPE_NVLIST,	0},
4321};
4322static int
4323zfs_ioc_redact(const char *snapname, nvlist_t *innvl, nvlist_t *outnvl)
4324{
4325	nvlist_t *redactnvl = NULL;
4326	char *redactbook = NULL;
4327
4328	if (nvlist_lookup_nvlist(innvl, "snapnv", &redactnvl) != 0)
4329		return (SET_ERROR(EINVAL));
4330	if (fnvlist_num_pairs(redactnvl) == 0)
4331		return (SET_ERROR(ENXIO));
4332	if (nvlist_lookup_string(innvl, "bookname", &redactbook) != 0)
4333		return (SET_ERROR(EINVAL));
4334
4335	return (dmu_redact_snap(snapname, redactnvl, redactbook));
4336}
4337
4338/*
4339 * inputs:
4340 * zc_name	old name of dataset
4341 * zc_value	new name of dataset
4342 * zc_cookie	recursive flag (only valid for snapshots)
4343 *
4344 * outputs:	none
4345 */
4346static int
4347zfs_ioc_rename(zfs_cmd_t *zc)
4348{
4349	objset_t *os;
4350	dmu_objset_type_t ost;
4351	boolean_t recursive = zc->zc_cookie & 1;
4352	boolean_t nounmount = !!(zc->zc_cookie & 2);
4353	char *at;
4354	int err;
4355
4356	/* "zfs rename" from and to ...%recv datasets should both fail */
4357	zc->zc_name[sizeof (zc->zc_name) - 1] = '\0';
4358	zc->zc_value[sizeof (zc->zc_value) - 1] = '\0';
4359	if (dataset_namecheck(zc->zc_name, NULL, NULL) != 0 ||
4360	    dataset_namecheck(zc->zc_value, NULL, NULL) != 0 ||
4361	    strchr(zc->zc_name, '%') || strchr(zc->zc_value, '%'))
4362		return (SET_ERROR(EINVAL));
4363
4364	err = dmu_objset_hold(zc->zc_name, FTAG, &os);
4365	if (err != 0)
4366		return (err);
4367	ost = dmu_objset_type(os);
4368	dmu_objset_rele(os, FTAG);
4369
4370	at = strchr(zc->zc_name, '@');
4371	if (at != NULL) {
4372		/* snaps must be in same fs */
4373		int error;
4374
4375		if (strncmp(zc->zc_name, zc->zc_value, at - zc->zc_name + 1))
4376			return (SET_ERROR(EXDEV));
4377		*at = '\0';
4378		if (ost == DMU_OST_ZFS && !nounmount) {
4379			error = dmu_objset_find(zc->zc_name,
4380			    recursive_unmount, at + 1,
4381			    recursive ? DS_FIND_CHILDREN : 0);
4382			if (error != 0) {
4383				*at = '@';
4384				return (error);
4385			}
4386		}
4387		error = dsl_dataset_rename_snapshot(zc->zc_name,
4388		    at + 1, strchr(zc->zc_value, '@') + 1, recursive);
4389		*at = '@';
4390
4391		return (error);
4392	} else {
4393		return (dsl_dir_rename(zc->zc_name, zc->zc_value));
4394	}
4395}
4396
4397static int
4398zfs_check_settable(const char *dsname, nvpair_t *pair, cred_t *cr)
4399{
4400	const char *propname = nvpair_name(pair);
4401	boolean_t issnap = (strchr(dsname, '@') != NULL);
4402	zfs_prop_t prop = zfs_name_to_prop(propname);
4403	uint64_t intval, compval;
4404	int err;
4405
4406	if (prop == ZPROP_INVAL) {
4407		if (zfs_prop_user(propname)) {
4408			if ((err = zfs_secpolicy_write_perms(dsname,
4409			    ZFS_DELEG_PERM_USERPROP, cr)))
4410				return (err);
4411			return (0);
4412		}
4413
4414		if (!issnap && zfs_prop_userquota(propname)) {
4415			const char *perm = NULL;
4416			const char *uq_prefix =
4417			    zfs_userquota_prop_prefixes[ZFS_PROP_USERQUOTA];
4418			const char *gq_prefix =
4419			    zfs_userquota_prop_prefixes[ZFS_PROP_GROUPQUOTA];
4420			const char *uiq_prefix =
4421			    zfs_userquota_prop_prefixes[ZFS_PROP_USEROBJQUOTA];
4422			const char *giq_prefix =
4423			    zfs_userquota_prop_prefixes[ZFS_PROP_GROUPOBJQUOTA];
4424			const char *pq_prefix =
4425			    zfs_userquota_prop_prefixes[ZFS_PROP_PROJECTQUOTA];
4426			const char *piq_prefix = zfs_userquota_prop_prefixes[\
4427			    ZFS_PROP_PROJECTOBJQUOTA];
4428
4429			if (strncmp(propname, uq_prefix,
4430			    strlen(uq_prefix)) == 0) {
4431				perm = ZFS_DELEG_PERM_USERQUOTA;
4432			} else if (strncmp(propname, uiq_prefix,
4433			    strlen(uiq_prefix)) == 0) {
4434				perm = ZFS_DELEG_PERM_USEROBJQUOTA;
4435			} else if (strncmp(propname, gq_prefix,
4436			    strlen(gq_prefix)) == 0) {
4437				perm = ZFS_DELEG_PERM_GROUPQUOTA;
4438			} else if (strncmp(propname, giq_prefix,
4439			    strlen(giq_prefix)) == 0) {
4440				perm = ZFS_DELEG_PERM_GROUPOBJQUOTA;
4441			} else if (strncmp(propname, pq_prefix,
4442			    strlen(pq_prefix)) == 0) {
4443				perm = ZFS_DELEG_PERM_PROJECTQUOTA;
4444			} else if (strncmp(propname, piq_prefix,
4445			    strlen(piq_prefix)) == 0) {
4446				perm = ZFS_DELEG_PERM_PROJECTOBJQUOTA;
4447			} else {
4448				/* {USER|GROUP|PROJECT}USED are read-only */
4449				return (SET_ERROR(EINVAL));
4450			}
4451
4452			if ((err = zfs_secpolicy_write_perms(dsname, perm, cr)))
4453				return (err);
4454			return (0);
4455		}
4456
4457		return (SET_ERROR(EINVAL));
4458	}
4459
4460	if (issnap)
4461		return (SET_ERROR(EINVAL));
4462
4463	if (nvpair_type(pair) == DATA_TYPE_NVLIST) {
4464		/*
4465		 * dsl_prop_get_all_impl() returns properties in this
4466		 * format.
4467		 */
4468		nvlist_t *attrs;
4469		VERIFY(nvpair_value_nvlist(pair, &attrs) == 0);
4470		VERIFY(nvlist_lookup_nvpair(attrs, ZPROP_VALUE,
4471		    &pair) == 0);
4472	}
4473
4474	/*
4475	 * Check that this value is valid for this pool version
4476	 */
4477	switch (prop) {
4478	case ZFS_PROP_COMPRESSION:
4479		/*
4480		 * If the user specified gzip compression, make sure
4481		 * the SPA supports it. We ignore any errors here since
4482		 * we'll catch them later.
4483		 */
4484		if (nvpair_value_uint64(pair, &intval) == 0) {
4485			compval = ZIO_COMPRESS_ALGO(intval);
4486			if (compval >= ZIO_COMPRESS_GZIP_1 &&
4487			    compval <= ZIO_COMPRESS_GZIP_9 &&
4488			    zfs_earlier_version(dsname,
4489			    SPA_VERSION_GZIP_COMPRESSION)) {
4490				return (SET_ERROR(ENOTSUP));
4491			}
4492
4493			if (compval == ZIO_COMPRESS_ZLE &&
4494			    zfs_earlier_version(dsname,
4495			    SPA_VERSION_ZLE_COMPRESSION))
4496				return (SET_ERROR(ENOTSUP));
4497
4498			if (compval == ZIO_COMPRESS_LZ4) {
4499				spa_t *spa;
4500
4501				if ((err = spa_open(dsname, &spa, FTAG)) != 0)
4502					return (err);
4503
4504				if (!spa_feature_is_enabled(spa,
4505				    SPA_FEATURE_LZ4_COMPRESS)) {
4506					spa_close(spa, FTAG);
4507					return (SET_ERROR(ENOTSUP));
4508				}
4509				spa_close(spa, FTAG);
4510			}
4511
4512			if (compval == ZIO_COMPRESS_ZSTD) {
4513				spa_t *spa;
4514
4515				if ((err = spa_open(dsname, &spa, FTAG)) != 0)
4516					return (err);
4517
4518				if (!spa_feature_is_enabled(spa,
4519				    SPA_FEATURE_ZSTD_COMPRESS)) {
4520					spa_close(spa, FTAG);
4521					return (SET_ERROR(ENOTSUP));
4522				}
4523				spa_close(spa, FTAG);
4524			}
4525		}
4526		break;
4527
4528	case ZFS_PROP_COPIES:
4529		if (zfs_earlier_version(dsname, SPA_VERSION_DITTO_BLOCKS))
4530			return (SET_ERROR(ENOTSUP));
4531		break;
4532
4533	case ZFS_PROP_VOLBLOCKSIZE:
4534	case ZFS_PROP_RECORDSIZE:
4535		/* Record sizes above 128k need the feature to be enabled */
4536		if (nvpair_value_uint64(pair, &intval) == 0 &&
4537		    intval > SPA_OLD_MAXBLOCKSIZE) {
4538			spa_t *spa;
4539
4540			/*
4541			 * We don't allow setting the property above 1MB,
4542			 * unless the tunable has been changed.
4543			 */
4544			if (intval > zfs_max_recordsize ||
4545			    intval > SPA_MAXBLOCKSIZE)
4546				return (SET_ERROR(ERANGE));
4547
4548			if ((err = spa_open(dsname, &spa, FTAG)) != 0)
4549				return (err);
4550
4551			if (!spa_feature_is_enabled(spa,
4552			    SPA_FEATURE_LARGE_BLOCKS)) {
4553				spa_close(spa, FTAG);
4554				return (SET_ERROR(ENOTSUP));
4555			}
4556			spa_close(spa, FTAG);
4557		}
4558		break;
4559
4560	case ZFS_PROP_DNODESIZE:
4561		/* Dnode sizes above 512 need the feature to be enabled */
4562		if (nvpair_value_uint64(pair, &intval) == 0 &&
4563		    intval != ZFS_DNSIZE_LEGACY) {
4564			spa_t *spa;
4565
4566			if ((err = spa_open(dsname, &spa, FTAG)) != 0)
4567				return (err);
4568
4569			if (!spa_feature_is_enabled(spa,
4570			    SPA_FEATURE_LARGE_DNODE)) {
4571				spa_close(spa, FTAG);
4572				return (SET_ERROR(ENOTSUP));
4573			}
4574			spa_close(spa, FTAG);
4575		}
4576		break;
4577
4578	case ZFS_PROP_SPECIAL_SMALL_BLOCKS:
4579		/*
4580		 * This property could require the allocation classes
4581		 * feature to be active for setting, however we allow
4582		 * it so that tests of settable properties succeed.
4583		 * The CLI will issue a warning in this case.
4584		 */
4585		break;
4586
4587	case ZFS_PROP_SHARESMB:
4588		if (zpl_earlier_version(dsname, ZPL_VERSION_FUID))
4589			return (SET_ERROR(ENOTSUP));
4590		break;
4591
4592	case ZFS_PROP_ACLINHERIT:
4593		if (nvpair_type(pair) == DATA_TYPE_UINT64 &&
4594		    nvpair_value_uint64(pair, &intval) == 0) {
4595			if (intval == ZFS_ACL_PASSTHROUGH_X &&
4596			    zfs_earlier_version(dsname,
4597			    SPA_VERSION_PASSTHROUGH_X))
4598				return (SET_ERROR(ENOTSUP));
4599		}
4600		break;
4601	case ZFS_PROP_CHECKSUM:
4602	case ZFS_PROP_DEDUP:
4603	{
4604		spa_feature_t feature;
4605		spa_t *spa;
4606		int err;
4607
4608		/* dedup feature version checks */
4609		if (prop == ZFS_PROP_DEDUP &&
4610		    zfs_earlier_version(dsname, SPA_VERSION_DEDUP))
4611			return (SET_ERROR(ENOTSUP));
4612
4613		if (nvpair_type(pair) == DATA_TYPE_UINT64 &&
4614		    nvpair_value_uint64(pair, &intval) == 0) {
4615			/* check prop value is enabled in features */
4616			feature = zio_checksum_to_feature(
4617			    intval & ZIO_CHECKSUM_MASK);
4618			if (feature == SPA_FEATURE_NONE)
4619				break;
4620
4621			if ((err = spa_open(dsname, &spa, FTAG)) != 0)
4622				return (err);
4623
4624			if (!spa_feature_is_enabled(spa, feature)) {
4625				spa_close(spa, FTAG);
4626				return (SET_ERROR(ENOTSUP));
4627			}
4628			spa_close(spa, FTAG);
4629		}
4630		break;
4631	}
4632
4633	default:
4634		break;
4635	}
4636
4637	return (zfs_secpolicy_setprop(dsname, prop, pair, CRED()));
4638}
4639
4640/*
4641 * Removes properties from the given props list that fail permission checks
4642 * needed to clear them and to restore them in case of a receive error. For each
4643 * property, make sure we have both set and inherit permissions.
4644 *
4645 * Returns the first error encountered if any permission checks fail. If the
4646 * caller provides a non-NULL errlist, it also gives the complete list of names
4647 * of all the properties that failed a permission check along with the
4648 * corresponding error numbers. The caller is responsible for freeing the
4649 * returned errlist.
4650 *
4651 * If every property checks out successfully, zero is returned and the list
4652 * pointed at by errlist is NULL.
4653 */
4654static int
4655zfs_check_clearable(const char *dataset, nvlist_t *props, nvlist_t **errlist)
4656{
4657	zfs_cmd_t *zc;
4658	nvpair_t *pair, *next_pair;
4659	nvlist_t *errors;
4660	int err, rv = 0;
4661
4662	if (props == NULL)
4663		return (0);
4664
4665	VERIFY(nvlist_alloc(&errors, NV_UNIQUE_NAME, KM_SLEEP) == 0);
4666
4667	zc = kmem_alloc(sizeof (zfs_cmd_t), KM_SLEEP);
4668	(void) strlcpy(zc->zc_name, dataset, sizeof (zc->zc_name));
4669	pair = nvlist_next_nvpair(props, NULL);
4670	while (pair != NULL) {
4671		next_pair = nvlist_next_nvpair(props, pair);
4672
4673		(void) strlcpy(zc->zc_value, nvpair_name(pair),
4674		    sizeof (zc->zc_value));
4675		if ((err = zfs_check_settable(dataset, pair, CRED())) != 0 ||
4676		    (err = zfs_secpolicy_inherit_prop(zc, NULL, CRED())) != 0) {
4677			VERIFY(nvlist_remove_nvpair(props, pair) == 0);
4678			VERIFY(nvlist_add_int32(errors,
4679			    zc->zc_value, err) == 0);
4680		}
4681		pair = next_pair;
4682	}
4683	kmem_free(zc, sizeof (zfs_cmd_t));
4684
4685	if ((pair = nvlist_next_nvpair(errors, NULL)) == NULL) {
4686		nvlist_free(errors);
4687		errors = NULL;
4688	} else {
4689		VERIFY(nvpair_value_int32(pair, &rv) == 0);
4690	}
4691
4692	if (errlist == NULL)
4693		nvlist_free(errors);
4694	else
4695		*errlist = errors;
4696
4697	return (rv);
4698}
4699
4700static boolean_t
4701propval_equals(nvpair_t *p1, nvpair_t *p2)
4702{
4703	if (nvpair_type(p1) == DATA_TYPE_NVLIST) {
4704		/* dsl_prop_get_all_impl() format */
4705		nvlist_t *attrs;
4706		VERIFY(nvpair_value_nvlist(p1, &attrs) == 0);
4707		VERIFY(nvlist_lookup_nvpair(attrs, ZPROP_VALUE,
4708		    &p1) == 0);
4709	}
4710
4711	if (nvpair_type(p2) == DATA_TYPE_NVLIST) {
4712		nvlist_t *attrs;
4713		VERIFY(nvpair_value_nvlist(p2, &attrs) == 0);
4714		VERIFY(nvlist_lookup_nvpair(attrs, ZPROP_VALUE,
4715		    &p2) == 0);
4716	}
4717
4718	if (nvpair_type(p1) != nvpair_type(p2))
4719		return (B_FALSE);
4720
4721	if (nvpair_type(p1) == DATA_TYPE_STRING) {
4722		char *valstr1, *valstr2;
4723
4724		VERIFY(nvpair_value_string(p1, (char **)&valstr1) == 0);
4725		VERIFY(nvpair_value_string(p2, (char **)&valstr2) == 0);
4726		return (strcmp(valstr1, valstr2) == 0);
4727	} else {
4728		uint64_t intval1, intval2;
4729
4730		VERIFY(nvpair_value_uint64(p1, &intval1) == 0);
4731		VERIFY(nvpair_value_uint64(p2, &intval2) == 0);
4732		return (intval1 == intval2);
4733	}
4734}
4735
4736/*
4737 * Remove properties from props if they are not going to change (as determined
4738 * by comparison with origprops). Remove them from origprops as well, since we
4739 * do not need to clear or restore properties that won't change.
4740 */
4741static void
4742props_reduce(nvlist_t *props, nvlist_t *origprops)
4743{
4744	nvpair_t *pair, *next_pair;
4745
4746	if (origprops == NULL)
4747		return; /* all props need to be received */
4748
4749	pair = nvlist_next_nvpair(props, NULL);
4750	while (pair != NULL) {
4751		const char *propname = nvpair_name(pair);
4752		nvpair_t *match;
4753
4754		next_pair = nvlist_next_nvpair(props, pair);
4755
4756		if ((nvlist_lookup_nvpair(origprops, propname,
4757		    &match) != 0) || !propval_equals(pair, match))
4758			goto next; /* need to set received value */
4759
4760		/* don't clear the existing received value */
4761		(void) nvlist_remove_nvpair(origprops, match);
4762		/* don't bother receiving the property */
4763		(void) nvlist_remove_nvpair(props, pair);
4764next:
4765		pair = next_pair;
4766	}
4767}
4768
4769/*
4770 * Extract properties that cannot be set PRIOR to the receipt of a dataset.
4771 * For example, refquota cannot be set until after the receipt of a dataset,
4772 * because in replication streams, an older/earlier snapshot may exceed the
4773 * refquota.  We want to receive the older/earlier snapshot, but setting
4774 * refquota pre-receipt will set the dsl's ACTUAL quota, which will prevent
4775 * the older/earlier snapshot from being received (with EDQUOT).
4776 *
4777 * The ZFS test "zfs_receive_011_pos" demonstrates such a scenario.
4778 *
4779 * libzfs will need to be judicious handling errors encountered by props
4780 * extracted by this function.
4781 */
4782static nvlist_t *
4783extract_delay_props(nvlist_t *props)
4784{
4785	nvlist_t *delayprops;
4786	nvpair_t *nvp, *tmp;
4787	static const zfs_prop_t delayable[] = {
4788		ZFS_PROP_REFQUOTA,
4789		ZFS_PROP_KEYLOCATION,
4790		0
4791	};
4792	int i;
4793
4794	VERIFY(nvlist_alloc(&delayprops, NV_UNIQUE_NAME, KM_SLEEP) == 0);
4795
4796	for (nvp = nvlist_next_nvpair(props, NULL); nvp != NULL;
4797	    nvp = nvlist_next_nvpair(props, nvp)) {
4798		/*
4799		 * strcmp() is safe because zfs_prop_to_name() always returns
4800		 * a bounded string.
4801		 */
4802		for (i = 0; delayable[i] != 0; i++) {
4803			if (strcmp(zfs_prop_to_name(delayable[i]),
4804			    nvpair_name(nvp)) == 0) {
4805				break;
4806			}
4807		}
4808		if (delayable[i] != 0) {
4809			tmp = nvlist_prev_nvpair(props, nvp);
4810			VERIFY(nvlist_add_nvpair(delayprops, nvp) == 0);
4811			VERIFY(nvlist_remove_nvpair(props, nvp) == 0);
4812			nvp = tmp;
4813		}
4814	}
4815
4816	if (nvlist_empty(delayprops)) {
4817		nvlist_free(delayprops);
4818		delayprops = NULL;
4819	}
4820	return (delayprops);
4821}
4822
4823static void
4824zfs_allow_log_destroy(void *arg)
4825{
4826	char *poolname = arg;
4827
4828	if (poolname != NULL)
4829		kmem_strfree(poolname);
4830}
4831
4832#ifdef	ZFS_DEBUG
4833static boolean_t zfs_ioc_recv_inject_err;
4834#endif
4835
4836/*
4837 * nvlist 'errors' is always allocated. It will contain descriptions of
4838 * encountered errors, if any. It's the callers responsibility to free.
4839 */
4840static int
4841zfs_ioc_recv_impl(char *tofs, char *tosnap, char *origin, nvlist_t *recvprops,
4842    nvlist_t *localprops, nvlist_t *hidden_args, boolean_t force,
4843    boolean_t resumable, int input_fd,
4844    dmu_replay_record_t *begin_record, uint64_t *read_bytes,
4845    uint64_t *errflags, nvlist_t **errors)
4846{
4847	dmu_recv_cookie_t drc;
4848	int error = 0;
4849	int props_error = 0;
4850	offset_t off, noff;
4851	nvlist_t *local_delayprops = NULL;
4852	nvlist_t *recv_delayprops = NULL;
4853	nvlist_t *origprops = NULL; /* existing properties */
4854	nvlist_t *origrecvd = NULL; /* existing received properties */
4855	boolean_t first_recvd_props = B_FALSE;
4856	boolean_t tofs_was_redacted;
4857	zfs_file_t *input_fp;
4858
4859	*read_bytes = 0;
4860	*errflags = 0;
4861	*errors = fnvlist_alloc();
4862	off = 0;
4863
4864	if ((error = zfs_file_get(input_fd, &input_fp)))
4865		return (error);
4866
4867	noff = off = zfs_file_off(input_fp);
4868	error = dmu_recv_begin(tofs, tosnap, begin_record, force,
4869	    resumable, localprops, hidden_args, origin, &drc, input_fp,
4870	    &off);
4871	if (error != 0)
4872		goto out;
4873	tofs_was_redacted = dsl_get_redacted(drc.drc_ds);
4874
4875	/*
4876	 * Set properties before we receive the stream so that they are applied
4877	 * to the new data. Note that we must call dmu_recv_stream() if
4878	 * dmu_recv_begin() succeeds.
4879	 */
4880	if (recvprops != NULL && !drc.drc_newfs) {
4881		if (spa_version(dsl_dataset_get_spa(drc.drc_ds)) >=
4882		    SPA_VERSION_RECVD_PROPS &&
4883		    !dsl_prop_get_hasrecvd(tofs))
4884			first_recvd_props = B_TRUE;
4885
4886		/*
4887		 * If new received properties are supplied, they are to
4888		 * completely replace the existing received properties,
4889		 * so stash away the existing ones.
4890		 */
4891		if (dsl_prop_get_received(tofs, &origrecvd) == 0) {
4892			nvlist_t *errlist = NULL;
4893			/*
4894			 * Don't bother writing a property if its value won't
4895			 * change (and avoid the unnecessary security checks).
4896			 *
4897			 * The first receive after SPA_VERSION_RECVD_PROPS is a
4898			 * special case where we blow away all local properties
4899			 * regardless.
4900			 */
4901			if (!first_recvd_props)
4902				props_reduce(recvprops, origrecvd);
4903			if (zfs_check_clearable(tofs, origrecvd, &errlist) != 0)
4904				(void) nvlist_merge(*errors, errlist, 0);
4905			nvlist_free(errlist);
4906
4907			if (clear_received_props(tofs, origrecvd,
4908			    first_recvd_props ? NULL : recvprops) != 0)
4909				*errflags |= ZPROP_ERR_NOCLEAR;
4910		} else {
4911			*errflags |= ZPROP_ERR_NOCLEAR;
4912		}
4913	}
4914
4915	/*
4916	 * Stash away existing properties so we can restore them on error unless
4917	 * we're doing the first receive after SPA_VERSION_RECVD_PROPS, in which
4918	 * case "origrecvd" will take care of that.
4919	 */
4920	if (localprops != NULL && !drc.drc_newfs && !first_recvd_props) {
4921		objset_t *os;
4922		if (dmu_objset_hold(tofs, FTAG, &os) == 0) {
4923			if (dsl_prop_get_all(os, &origprops) != 0) {
4924				*errflags |= ZPROP_ERR_NOCLEAR;
4925			}
4926			dmu_objset_rele(os, FTAG);
4927		} else {
4928			*errflags |= ZPROP_ERR_NOCLEAR;
4929		}
4930	}
4931
4932	if (recvprops != NULL) {
4933		props_error = dsl_prop_set_hasrecvd(tofs);
4934
4935		if (props_error == 0) {
4936			recv_delayprops = extract_delay_props(recvprops);
4937			(void) zfs_set_prop_nvlist(tofs, ZPROP_SRC_RECEIVED,
4938			    recvprops, *errors);
4939		}
4940	}
4941
4942	if (localprops != NULL) {
4943		nvlist_t *oprops = fnvlist_alloc();
4944		nvlist_t *xprops = fnvlist_alloc();
4945		nvpair_t *nvp = NULL;
4946
4947		while ((nvp = nvlist_next_nvpair(localprops, nvp)) != NULL) {
4948			if (nvpair_type(nvp) == DATA_TYPE_BOOLEAN) {
4949				/* -x property */
4950				const char *name = nvpair_name(nvp);
4951				zfs_prop_t prop = zfs_name_to_prop(name);
4952				if (prop != ZPROP_INVAL) {
4953					if (!zfs_prop_inheritable(prop))
4954						continue;
4955				} else if (!zfs_prop_user(name))
4956					continue;
4957				fnvlist_add_boolean(xprops, name);
4958			} else {
4959				/* -o property=value */
4960				fnvlist_add_nvpair(oprops, nvp);
4961			}
4962		}
4963
4964		local_delayprops = extract_delay_props(oprops);
4965		(void) zfs_set_prop_nvlist(tofs, ZPROP_SRC_LOCAL,
4966		    oprops, *errors);
4967		(void) zfs_set_prop_nvlist(tofs, ZPROP_SRC_INHERITED,
4968		    xprops, *errors);
4969
4970		nvlist_free(oprops);
4971		nvlist_free(xprops);
4972	}
4973
4974	error = dmu_recv_stream(&drc, &off);
4975
4976	if (error == 0) {
4977		zfsvfs_t *zfsvfs = NULL;
4978		zvol_state_handle_t *zv = NULL;
4979
4980		if (getzfsvfs(tofs, &zfsvfs) == 0) {
4981			/* online recv */
4982			dsl_dataset_t *ds;
4983			int end_err;
4984			boolean_t stream_is_redacted = DMU_GET_FEATUREFLAGS(
4985			    begin_record->drr_u.drr_begin.
4986			    drr_versioninfo) & DMU_BACKUP_FEATURE_REDACTED;
4987
4988			ds = dmu_objset_ds(zfsvfs->z_os);
4989			error = zfs_suspend_fs(zfsvfs);
4990			/*
4991			 * If the suspend fails, then the recv_end will
4992			 * likely also fail, and clean up after itself.
4993			 */
4994			end_err = dmu_recv_end(&drc, zfsvfs);
4995			/*
4996			 * If the dataset was not redacted, but we received a
4997			 * redacted stream onto it, we need to unmount the
4998			 * dataset.  Otherwise, resume the filesystem.
4999			 */
5000			if (error == 0 && !drc.drc_newfs &&
5001			    stream_is_redacted && !tofs_was_redacted) {
5002				error = zfs_end_fs(zfsvfs, ds);
5003			} else if (error == 0) {
5004				error = zfs_resume_fs(zfsvfs, ds);
5005			}
5006			error = error ? error : end_err;
5007			zfs_vfs_rele(zfsvfs);
5008		} else if ((zv = zvol_suspend(tofs)) != NULL) {
5009			error = dmu_recv_end(&drc, zvol_tag(zv));
5010			zvol_resume(zv);
5011		} else {
5012			error = dmu_recv_end(&drc, NULL);
5013		}
5014
5015		/* Set delayed properties now, after we're done receiving. */
5016		if (recv_delayprops != NULL && error == 0) {
5017			(void) zfs_set_prop_nvlist(tofs, ZPROP_SRC_RECEIVED,
5018			    recv_delayprops, *errors);
5019		}
5020		if (local_delayprops != NULL && error == 0) {
5021			(void) zfs_set_prop_nvlist(tofs, ZPROP_SRC_LOCAL,
5022			    local_delayprops, *errors);
5023		}
5024	}
5025
5026	/*
5027	 * Merge delayed props back in with initial props, in case
5028	 * we're DEBUG and zfs_ioc_recv_inject_err is set (which means
5029	 * we have to make sure clear_received_props() includes
5030	 * the delayed properties).
5031	 *
5032	 * Since zfs_ioc_recv_inject_err is only in DEBUG kernels,
5033	 * using ASSERT() will be just like a VERIFY.
5034	 */
5035	if (recv_delayprops != NULL) {
5036		ASSERT(nvlist_merge(recvprops, recv_delayprops, 0) == 0);
5037		nvlist_free(recv_delayprops);
5038	}
5039	if (local_delayprops != NULL) {
5040		ASSERT(nvlist_merge(localprops, local_delayprops, 0) == 0);
5041		nvlist_free(local_delayprops);
5042	}
5043	*read_bytes = off - noff;
5044
5045#ifdef	ZFS_DEBUG
5046	if (zfs_ioc_recv_inject_err) {
5047		zfs_ioc_recv_inject_err = B_FALSE;
5048		error = 1;
5049	}
5050#endif
5051
5052	/*
5053	 * On error, restore the original props.
5054	 */
5055	if (error != 0 && recvprops != NULL && !drc.drc_newfs) {
5056		if (clear_received_props(tofs, recvprops, NULL) != 0) {
5057			/*
5058			 * We failed to clear the received properties.
5059			 * Since we may have left a $recvd value on the
5060			 * system, we can't clear the $hasrecvd flag.
5061			 */
5062			*errflags |= ZPROP_ERR_NORESTORE;
5063		} else if (first_recvd_props) {
5064			dsl_prop_unset_hasrecvd(tofs);
5065		}
5066
5067		if (origrecvd == NULL && !drc.drc_newfs) {
5068			/* We failed to stash the original properties. */
5069			*errflags |= ZPROP_ERR_NORESTORE;
5070		}
5071
5072		/*
5073		 * dsl_props_set() will not convert RECEIVED to LOCAL on or
5074		 * after SPA_VERSION_RECVD_PROPS, so we need to specify LOCAL
5075		 * explicitly if we're restoring local properties cleared in the
5076		 * first new-style receive.
5077		 */
5078		if (origrecvd != NULL &&
5079		    zfs_set_prop_nvlist(tofs, (first_recvd_props ?
5080		    ZPROP_SRC_LOCAL : ZPROP_SRC_RECEIVED),
5081		    origrecvd, NULL) != 0) {
5082			/*
5083			 * We stashed the original properties but failed to
5084			 * restore them.
5085			 */
5086			*errflags |= ZPROP_ERR_NORESTORE;
5087		}
5088	}
5089	if (error != 0 && localprops != NULL && !drc.drc_newfs &&
5090	    !first_recvd_props) {
5091		nvlist_t *setprops;
5092		nvlist_t *inheritprops;
5093		nvpair_t *nvp;
5094
5095		if (origprops == NULL) {
5096			/* We failed to stash the original properties. */
5097			*errflags |= ZPROP_ERR_NORESTORE;
5098			goto out;
5099		}
5100
5101		/* Restore original props */
5102		setprops = fnvlist_alloc();
5103		inheritprops = fnvlist_alloc();
5104		nvp = NULL;
5105		while ((nvp = nvlist_next_nvpair(localprops, nvp)) != NULL) {
5106			const char *name = nvpair_name(nvp);
5107			const char *source;
5108			nvlist_t *attrs;
5109
5110			if (!nvlist_exists(origprops, name)) {
5111				/*
5112				 * Property was not present or was explicitly
5113				 * inherited before the receive, restore this.
5114				 */
5115				fnvlist_add_boolean(inheritprops, name);
5116				continue;
5117			}
5118			attrs = fnvlist_lookup_nvlist(origprops, name);
5119			source = fnvlist_lookup_string(attrs, ZPROP_SOURCE);
5120
5121			/* Skip received properties */
5122			if (strcmp(source, ZPROP_SOURCE_VAL_RECVD) == 0)
5123				continue;
5124
5125			if (strcmp(source, tofs) == 0) {
5126				/* Property was locally set */
5127				fnvlist_add_nvlist(setprops, name, attrs);
5128			} else {
5129				/* Property was implicitly inherited */
5130				fnvlist_add_boolean(inheritprops, name);
5131			}
5132		}
5133
5134		if (zfs_set_prop_nvlist(tofs, ZPROP_SRC_LOCAL, setprops,
5135		    NULL) != 0)
5136			*errflags |= ZPROP_ERR_NORESTORE;
5137		if (zfs_set_prop_nvlist(tofs, ZPROP_SRC_INHERITED, inheritprops,
5138		    NULL) != 0)
5139			*errflags |= ZPROP_ERR_NORESTORE;
5140
5141		nvlist_free(setprops);
5142		nvlist_free(inheritprops);
5143	}
5144out:
5145	zfs_file_put(input_fd);
5146	nvlist_free(origrecvd);
5147	nvlist_free(origprops);
5148
5149	if (error == 0)
5150		error = props_error;
5151
5152	return (error);
5153}
5154
5155/*
5156 * inputs:
5157 * zc_name		name of containing filesystem (unused)
5158 * zc_nvlist_src{_size}	nvlist of properties to apply
5159 * zc_nvlist_conf{_size}	nvlist of properties to exclude
5160 *			(DATA_TYPE_BOOLEAN) and override (everything else)
5161 * zc_value		name of snapshot to create
5162 * zc_string		name of clone origin (if DRR_FLAG_CLONE)
5163 * zc_cookie		file descriptor to recv from
5164 * zc_begin_record	the BEGIN record of the stream (not byteswapped)
5165 * zc_guid		force flag
5166 *
5167 * outputs:
5168 * zc_cookie		number of bytes read
5169 * zc_obj		zprop_errflags_t
5170 * zc_nvlist_dst{_size} error for each unapplied received property
5171 */
5172static int
5173zfs_ioc_recv(zfs_cmd_t *zc)
5174{
5175	dmu_replay_record_t begin_record;
5176	nvlist_t *errors = NULL;
5177	nvlist_t *recvdprops = NULL;
5178	nvlist_t *localprops = NULL;
5179	char *origin = NULL;
5180	char *tosnap;
5181	char tofs[ZFS_MAX_DATASET_NAME_LEN];
5182	int error = 0;
5183
5184	if (dataset_namecheck(zc->zc_value, NULL, NULL) != 0 ||
5185	    strchr(zc->zc_value, '@') == NULL ||
5186	    strchr(zc->zc_value, '%'))
5187		return (SET_ERROR(EINVAL));
5188
5189	(void) strlcpy(tofs, zc->zc_value, sizeof (tofs));
5190	tosnap = strchr(tofs, '@');
5191	*tosnap++ = '\0';
5192
5193	if (zc->zc_nvlist_src != 0 &&
5194	    (error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
5195	    zc->zc_iflags, &recvdprops)) != 0)
5196		return (error);
5197
5198	if (zc->zc_nvlist_conf != 0 &&
5199	    (error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
5200	    zc->zc_iflags, &localprops)) != 0)
5201		return (error);
5202
5203	if (zc->zc_string[0])
5204		origin = zc->zc_string;
5205
5206	begin_record.drr_type = DRR_BEGIN;
5207	begin_record.drr_payloadlen = 0;
5208	begin_record.drr_u.drr_begin = zc->zc_begin_record;
5209
5210	error = zfs_ioc_recv_impl(tofs, tosnap, origin, recvdprops, localprops,
5211	    NULL, zc->zc_guid, B_FALSE, zc->zc_cookie, &begin_record,
5212	    &zc->zc_cookie, &zc->zc_obj, &errors);
5213	nvlist_free(recvdprops);
5214	nvlist_free(localprops);
5215
5216	/*
5217	 * Now that all props, initial and delayed, are set, report the prop
5218	 * errors to the caller.
5219	 */
5220	if (zc->zc_nvlist_dst_size != 0 && errors != NULL &&
5221	    (nvlist_smush(errors, zc->zc_nvlist_dst_size) != 0 ||
5222	    put_nvlist(zc, errors) != 0)) {
5223		/*
5224		 * Caller made zc->zc_nvlist_dst less than the minimum expected
5225		 * size or supplied an invalid address.
5226		 */
5227		error = SET_ERROR(EINVAL);
5228	}
5229
5230	nvlist_free(errors);
5231
5232	return (error);
5233}
5234
5235/*
5236 * innvl: {
5237 *     "snapname" -> full name of the snapshot to create
5238 *     (optional) "props" -> received properties to set (nvlist)
5239 *     (optional) "localprops" -> override and exclude properties (nvlist)
5240 *     (optional) "origin" -> name of clone origin (DRR_FLAG_CLONE)
5241 *     "begin_record" -> non-byteswapped dmu_replay_record_t
5242 *     "input_fd" -> file descriptor to read stream from (int32)
5243 *     (optional) "force" -> force flag (value ignored)
5244 *     (optional) "resumable" -> resumable flag (value ignored)
5245 *     (optional) "cleanup_fd" -> unused
5246 *     (optional) "action_handle" -> unused
5247 *     (optional) "hidden_args" -> { "wkeydata" -> value }
5248 * }
5249 *
5250 * outnvl: {
5251 *     "read_bytes" -> number of bytes read
5252 *     "error_flags" -> zprop_errflags_t
5253 *     "errors" -> error for each unapplied received property (nvlist)
5254 * }
5255 */
5256static const zfs_ioc_key_t zfs_keys_recv_new[] = {
5257	{"snapname",		DATA_TYPE_STRING,	0},
5258	{"props",		DATA_TYPE_NVLIST,	ZK_OPTIONAL},
5259	{"localprops",		DATA_TYPE_NVLIST,	ZK_OPTIONAL},
5260	{"origin",		DATA_TYPE_STRING,	ZK_OPTIONAL},
5261	{"begin_record",	DATA_TYPE_BYTE_ARRAY,	0},
5262	{"input_fd",		DATA_TYPE_INT32,	0},
5263	{"force",		DATA_TYPE_BOOLEAN,	ZK_OPTIONAL},
5264	{"resumable",		DATA_TYPE_BOOLEAN,	ZK_OPTIONAL},
5265	{"cleanup_fd",		DATA_TYPE_INT32,	ZK_OPTIONAL},
5266	{"action_handle",	DATA_TYPE_UINT64,	ZK_OPTIONAL},
5267	{"hidden_args",		DATA_TYPE_NVLIST,	ZK_OPTIONAL},
5268};
5269
5270static int
5271zfs_ioc_recv_new(const char *fsname, nvlist_t *innvl, nvlist_t *outnvl)
5272{
5273	dmu_replay_record_t *begin_record;
5274	uint_t begin_record_size;
5275	nvlist_t *errors = NULL;
5276	nvlist_t *recvprops = NULL;
5277	nvlist_t *localprops = NULL;
5278	nvlist_t *hidden_args = NULL;
5279	char *snapname;
5280	char *origin = NULL;
5281	char *tosnap;
5282	char tofs[ZFS_MAX_DATASET_NAME_LEN];
5283	boolean_t force;
5284	boolean_t resumable;
5285	uint64_t read_bytes = 0;
5286	uint64_t errflags = 0;
5287	int input_fd = -1;
5288	int error;
5289
5290	snapname = fnvlist_lookup_string(innvl, "snapname");
5291
5292	if (dataset_namecheck(snapname, NULL, NULL) != 0 ||
5293	    strchr(snapname, '@') == NULL ||
5294	    strchr(snapname, '%'))
5295		return (SET_ERROR(EINVAL));
5296
5297	(void) strlcpy(tofs, snapname, sizeof (tofs));
5298	tosnap = strchr(tofs, '@');
5299	*tosnap++ = '\0';
5300
5301	error = nvlist_lookup_string(innvl, "origin", &origin);
5302	if (error && error != ENOENT)
5303		return (error);
5304
5305	error = nvlist_lookup_byte_array(innvl, "begin_record",
5306	    (uchar_t **)&begin_record, &begin_record_size);
5307	if (error != 0 || begin_record_size != sizeof (*begin_record))
5308		return (SET_ERROR(EINVAL));
5309
5310	input_fd = fnvlist_lookup_int32(innvl, "input_fd");
5311
5312	force = nvlist_exists(innvl, "force");
5313	resumable = nvlist_exists(innvl, "resumable");
5314
5315	/* we still use "props" here for backwards compatibility */
5316	error = nvlist_lookup_nvlist(innvl, "props", &recvprops);
5317	if (error && error != ENOENT)
5318		return (error);
5319
5320	error = nvlist_lookup_nvlist(innvl, "localprops", &localprops);
5321	if (error && error != ENOENT)
5322		return (error);
5323
5324	error = nvlist_lookup_nvlist(innvl, ZPOOL_HIDDEN_ARGS, &hidden_args);
5325	if (error && error != ENOENT)
5326		return (error);
5327
5328	error = zfs_ioc_recv_impl(tofs, tosnap, origin, recvprops, localprops,
5329	    hidden_args, force, resumable, input_fd, begin_record,
5330	    &read_bytes, &errflags, &errors);
5331
5332	fnvlist_add_uint64(outnvl, "read_bytes", read_bytes);
5333	fnvlist_add_uint64(outnvl, "error_flags", errflags);
5334	fnvlist_add_nvlist(outnvl, "errors", errors);
5335
5336	nvlist_free(errors);
5337	nvlist_free(recvprops);
5338	nvlist_free(localprops);
5339
5340	return (error);
5341}
5342
5343typedef struct dump_bytes_io {
5344	zfs_file_t	*dbi_fp;
5345	caddr_t		dbi_buf;
5346	int		dbi_len;
5347	int		dbi_err;
5348} dump_bytes_io_t;
5349
5350static void
5351dump_bytes_cb(void *arg)
5352{
5353	dump_bytes_io_t *dbi = (dump_bytes_io_t *)arg;
5354	zfs_file_t *fp;
5355	caddr_t buf;
5356
5357	fp = dbi->dbi_fp;
5358	buf = dbi->dbi_buf;
5359
5360	dbi->dbi_err = zfs_file_write(fp, buf, dbi->dbi_len, NULL);
5361}
5362
5363static int
5364dump_bytes(objset_t *os, void *buf, int len, void *arg)
5365{
5366	dump_bytes_io_t dbi;
5367
5368	dbi.dbi_fp = arg;
5369	dbi.dbi_buf = buf;
5370	dbi.dbi_len = len;
5371
5372#if defined(HAVE_LARGE_STACKS)
5373	dump_bytes_cb(&dbi);
5374#else
5375	/*
5376	 * The vn_rdwr() call is performed in a taskq to ensure that there is
5377	 * always enough stack space to write safely to the target filesystem.
5378	 * The ZIO_TYPE_FREE threads are used because there can be a lot of
5379	 * them and they are used in vdev_file.c for a similar purpose.
5380	 */
5381	spa_taskq_dispatch_sync(dmu_objset_spa(os), ZIO_TYPE_FREE,
5382	    ZIO_TASKQ_ISSUE, dump_bytes_cb, &dbi, TQ_SLEEP);
5383#endif /* HAVE_LARGE_STACKS */
5384
5385	return (dbi.dbi_err);
5386}
5387
5388/*
5389 * inputs:
5390 * zc_name	name of snapshot to send
5391 * zc_cookie	file descriptor to send stream to
5392 * zc_obj	fromorigin flag (mutually exclusive with zc_fromobj)
5393 * zc_sendobj	objsetid of snapshot to send
5394 * zc_fromobj	objsetid of incremental fromsnap (may be zero)
5395 * zc_guid	if set, estimate size of stream only.  zc_cookie is ignored.
5396 *		output size in zc_objset_type.
5397 * zc_flags	lzc_send_flags
5398 *
5399 * outputs:
5400 * zc_objset_type	estimated size, if zc_guid is set
5401 *
5402 * NOTE: This is no longer the preferred interface, any new functionality
5403 *	  should be added to zfs_ioc_send_new() instead.
5404 */
5405static int
5406zfs_ioc_send(zfs_cmd_t *zc)
5407{
5408	int error;
5409	offset_t off;
5410	boolean_t estimate = (zc->zc_guid != 0);
5411	boolean_t embedok = (zc->zc_flags & 0x1);
5412	boolean_t large_block_ok = (zc->zc_flags & 0x2);
5413	boolean_t compressok = (zc->zc_flags & 0x4);
5414	boolean_t rawok = (zc->zc_flags & 0x8);
5415	boolean_t savedok = (zc->zc_flags & 0x10);
5416
5417	if (zc->zc_obj != 0) {
5418		dsl_pool_t *dp;
5419		dsl_dataset_t *tosnap;
5420
5421		error = dsl_pool_hold(zc->zc_name, FTAG, &dp);
5422		if (error != 0)
5423			return (error);
5424
5425		error = dsl_dataset_hold_obj(dp, zc->zc_sendobj, FTAG, &tosnap);
5426		if (error != 0) {
5427			dsl_pool_rele(dp, FTAG);
5428			return (error);
5429		}
5430
5431		if (dsl_dir_is_clone(tosnap->ds_dir))
5432			zc->zc_fromobj =
5433			    dsl_dir_phys(tosnap->ds_dir)->dd_origin_obj;
5434		dsl_dataset_rele(tosnap, FTAG);
5435		dsl_pool_rele(dp, FTAG);
5436	}
5437
5438	if (estimate) {
5439		dsl_pool_t *dp;
5440		dsl_dataset_t *tosnap;
5441		dsl_dataset_t *fromsnap = NULL;
5442
5443		error = dsl_pool_hold(zc->zc_name, FTAG, &dp);
5444		if (error != 0)
5445			return (error);
5446
5447		error = dsl_dataset_hold_obj(dp, zc->zc_sendobj,
5448		    FTAG, &tosnap);
5449		if (error != 0) {
5450			dsl_pool_rele(dp, FTAG);
5451			return (error);
5452		}
5453
5454		if (zc->zc_fromobj != 0) {
5455			error = dsl_dataset_hold_obj(dp, zc->zc_fromobj,
5456			    FTAG, &fromsnap);
5457			if (error != 0) {
5458				dsl_dataset_rele(tosnap, FTAG);
5459				dsl_pool_rele(dp, FTAG);
5460				return (error);
5461			}
5462		}
5463
5464		error = dmu_send_estimate_fast(tosnap, fromsnap, NULL,
5465		    compressok || rawok, savedok, &zc->zc_objset_type);
5466
5467		if (fromsnap != NULL)
5468			dsl_dataset_rele(fromsnap, FTAG);
5469		dsl_dataset_rele(tosnap, FTAG);
5470		dsl_pool_rele(dp, FTAG);
5471	} else {
5472		zfs_file_t *fp;
5473		dmu_send_outparams_t out = {0};
5474
5475		if ((error = zfs_file_get(zc->zc_cookie, &fp)))
5476			return (error);
5477
5478		off = zfs_file_off(fp);
5479		out.dso_outfunc = dump_bytes;
5480		out.dso_arg = fp;
5481		out.dso_dryrun = B_FALSE;
5482		error = dmu_send_obj(zc->zc_name, zc->zc_sendobj,
5483		    zc->zc_fromobj, embedok, large_block_ok, compressok,
5484		    rawok, savedok, zc->zc_cookie, &off, &out);
5485
5486		zfs_file_put(zc->zc_cookie);
5487	}
5488	return (error);
5489}
5490
5491/*
5492 * inputs:
5493 * zc_name		name of snapshot on which to report progress
5494 * zc_cookie		file descriptor of send stream
5495 *
5496 * outputs:
5497 * zc_cookie		number of bytes written in send stream thus far
5498 * zc_objset_type	logical size of data traversed by send thus far
5499 */
5500static int
5501zfs_ioc_send_progress(zfs_cmd_t *zc)
5502{
5503	dsl_pool_t *dp;
5504	dsl_dataset_t *ds;
5505	dmu_sendstatus_t *dsp = NULL;
5506	int error;
5507
5508	error = dsl_pool_hold(zc->zc_name, FTAG, &dp);
5509	if (error != 0)
5510		return (error);
5511
5512	error = dsl_dataset_hold(dp, zc->zc_name, FTAG, &ds);
5513	if (error != 0) {
5514		dsl_pool_rele(dp, FTAG);
5515		return (error);
5516	}
5517
5518	mutex_enter(&ds->ds_sendstream_lock);
5519
5520	/*
5521	 * Iterate over all the send streams currently active on this dataset.
5522	 * If there's one which matches the specified file descriptor _and_ the
5523	 * stream was started by the current process, return the progress of
5524	 * that stream.
5525	 */
5526
5527	for (dsp = list_head(&ds->ds_sendstreams); dsp != NULL;
5528	    dsp = list_next(&ds->ds_sendstreams, dsp)) {
5529		if (dsp->dss_outfd == zc->zc_cookie &&
5530		    zfs_proc_is_caller(dsp->dss_proc))
5531			break;
5532	}
5533
5534	if (dsp != NULL) {
5535		zc->zc_cookie = atomic_cas_64((volatile uint64_t *)dsp->dss_off,
5536		    0, 0);
5537		/* This is the closest thing we have to atomic_read_64. */
5538		zc->zc_objset_type = atomic_cas_64(&dsp->dss_blocks, 0, 0);
5539	} else {
5540		error = SET_ERROR(ENOENT);
5541	}
5542
5543	mutex_exit(&ds->ds_sendstream_lock);
5544	dsl_dataset_rele(ds, FTAG);
5545	dsl_pool_rele(dp, FTAG);
5546	return (error);
5547}
5548
5549static int
5550zfs_ioc_inject_fault(zfs_cmd_t *zc)
5551{
5552	int id, error;
5553
5554	error = zio_inject_fault(zc->zc_name, (int)zc->zc_guid, &id,
5555	    &zc->zc_inject_record);
5556
5557	if (error == 0)
5558		zc->zc_guid = (uint64_t)id;
5559
5560	return (error);
5561}
5562
5563static int
5564zfs_ioc_clear_fault(zfs_cmd_t *zc)
5565{
5566	return (zio_clear_fault((int)zc->zc_guid));
5567}
5568
5569static int
5570zfs_ioc_inject_list_next(zfs_cmd_t *zc)
5571{
5572	int id = (int)zc->zc_guid;
5573	int error;
5574
5575	error = zio_inject_list_next(&id, zc->zc_name, sizeof (zc->zc_name),
5576	    &zc->zc_inject_record);
5577
5578	zc->zc_guid = id;
5579
5580	return (error);
5581}
5582
5583static int
5584zfs_ioc_error_log(zfs_cmd_t *zc)
5585{
5586	spa_t *spa;
5587	int error;
5588	size_t count = (size_t)zc->zc_nvlist_dst_size;
5589
5590	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
5591		return (error);
5592
5593	error = spa_get_errlog(spa, (void *)(uintptr_t)zc->zc_nvlist_dst,
5594	    &count);
5595	if (error == 0)
5596		zc->zc_nvlist_dst_size = count;
5597	else
5598		zc->zc_nvlist_dst_size = spa_get_errlog_size(spa);
5599
5600	spa_close(spa, FTAG);
5601
5602	return (error);
5603}
5604
5605static int
5606zfs_ioc_clear(zfs_cmd_t *zc)
5607{
5608	spa_t *spa;
5609	vdev_t *vd;
5610	int error;
5611
5612	/*
5613	 * On zpool clear we also fix up missing slogs
5614	 */
5615	mutex_enter(&spa_namespace_lock);
5616	spa = spa_lookup(zc->zc_name);
5617	if (spa == NULL) {
5618		mutex_exit(&spa_namespace_lock);
5619		return (SET_ERROR(EIO));
5620	}
5621	if (spa_get_log_state(spa) == SPA_LOG_MISSING) {
5622		/* we need to let spa_open/spa_load clear the chains */
5623		spa_set_log_state(spa, SPA_LOG_CLEAR);
5624	}
5625	spa->spa_last_open_failed = 0;
5626	mutex_exit(&spa_namespace_lock);
5627
5628	if (zc->zc_cookie & ZPOOL_NO_REWIND) {
5629		error = spa_open(zc->zc_name, &spa, FTAG);
5630	} else {
5631		nvlist_t *policy;
5632		nvlist_t *config = NULL;
5633
5634		if (zc->zc_nvlist_src == 0)
5635			return (SET_ERROR(EINVAL));
5636
5637		if ((error = get_nvlist(zc->zc_nvlist_src,
5638		    zc->zc_nvlist_src_size, zc->zc_iflags, &policy)) == 0) {
5639			error = spa_open_rewind(zc->zc_name, &spa, FTAG,
5640			    policy, &config);
5641			if (config != NULL) {
5642				int err;
5643
5644				if ((err = put_nvlist(zc, config)) != 0)
5645					error = err;
5646				nvlist_free(config);
5647			}
5648			nvlist_free(policy);
5649		}
5650	}
5651
5652	if (error != 0)
5653		return (error);
5654
5655	/*
5656	 * If multihost is enabled, resuming I/O is unsafe as another
5657	 * host may have imported the pool.
5658	 */
5659	if (spa_multihost(spa) && spa_suspended(spa))
5660		return (SET_ERROR(EINVAL));
5661
5662	spa_vdev_state_enter(spa, SCL_NONE);
5663
5664	if (zc->zc_guid == 0) {
5665		vd = NULL;
5666	} else {
5667		vd = spa_lookup_by_guid(spa, zc->zc_guid, B_TRUE);
5668		if (vd == NULL) {
5669			error = SET_ERROR(ENODEV);
5670			(void) spa_vdev_state_exit(spa, NULL, error);
5671			spa_close(spa, FTAG);
5672			return (error);
5673		}
5674	}
5675
5676	vdev_clear(spa, vd);
5677
5678	(void) spa_vdev_state_exit(spa, spa_suspended(spa) ?
5679	    NULL : spa->spa_root_vdev, 0);
5680
5681	/*
5682	 * Resume any suspended I/Os.
5683	 */
5684	if (zio_resume(spa) != 0)
5685		error = SET_ERROR(EIO);
5686
5687	spa_close(spa, FTAG);
5688
5689	return (error);
5690}
5691
5692/*
5693 * Reopen all the vdevs associated with the pool.
5694 *
5695 * innvl: {
5696 *  "scrub_restart" -> when true and scrub is running, allow to restart
5697 *              scrub as the side effect of the reopen (boolean).
5698 * }
5699 *
5700 * outnvl is unused
5701 */
5702static const zfs_ioc_key_t zfs_keys_pool_reopen[] = {
5703	{"scrub_restart",	DATA_TYPE_BOOLEAN_VALUE,	ZK_OPTIONAL},
5704};
5705
5706/* ARGSUSED */
5707static int
5708zfs_ioc_pool_reopen(const char *pool, nvlist_t *innvl, nvlist_t *outnvl)
5709{
5710	spa_t *spa;
5711	int error;
5712	boolean_t rc, scrub_restart = B_TRUE;
5713
5714	if (innvl) {
5715		error = nvlist_lookup_boolean_value(innvl,
5716		    "scrub_restart", &rc);
5717		if (error == 0)
5718			scrub_restart = rc;
5719	}
5720
5721	error = spa_open(pool, &spa, FTAG);
5722	if (error != 0)
5723		return (error);
5724
5725	spa_vdev_state_enter(spa, SCL_NONE);
5726
5727	/*
5728	 * If the scrub_restart flag is B_FALSE and a scrub is already
5729	 * in progress then set spa_scrub_reopen flag to B_TRUE so that
5730	 * we don't restart the scrub as a side effect of the reopen.
5731	 * Otherwise, let vdev_open() decided if a resilver is required.
5732	 */
5733
5734	spa->spa_scrub_reopen = (!scrub_restart &&
5735	    dsl_scan_scrubbing(spa->spa_dsl_pool));
5736	vdev_reopen(spa->spa_root_vdev);
5737	spa->spa_scrub_reopen = B_FALSE;
5738
5739	(void) spa_vdev_state_exit(spa, NULL, 0);
5740	spa_close(spa, FTAG);
5741	return (0);
5742}
5743
5744/*
5745 * inputs:
5746 * zc_name	name of filesystem
5747 *
5748 * outputs:
5749 * zc_string	name of conflicting snapshot, if there is one
5750 */
5751static int
5752zfs_ioc_promote(zfs_cmd_t *zc)
5753{
5754	dsl_pool_t *dp;
5755	dsl_dataset_t *ds, *ods;
5756	char origin[ZFS_MAX_DATASET_NAME_LEN];
5757	char *cp;
5758	int error;
5759
5760	zc->zc_name[sizeof (zc->zc_name) - 1] = '\0';
5761	if (dataset_namecheck(zc->zc_name, NULL, NULL) != 0 ||
5762	    strchr(zc->zc_name, '%'))
5763		return (SET_ERROR(EINVAL));
5764
5765	error = dsl_pool_hold(zc->zc_name, FTAG, &dp);
5766	if (error != 0)
5767		return (error);
5768
5769	error = dsl_dataset_hold(dp, zc->zc_name, FTAG, &ds);
5770	if (error != 0) {
5771		dsl_pool_rele(dp, FTAG);
5772		return (error);
5773	}
5774
5775	if (!dsl_dir_is_clone(ds->ds_dir)) {
5776		dsl_dataset_rele(ds, FTAG);
5777		dsl_pool_rele(dp, FTAG);
5778		return (SET_ERROR(EINVAL));
5779	}
5780
5781	error = dsl_dataset_hold_obj(dp,
5782	    dsl_dir_phys(ds->ds_dir)->dd_origin_obj, FTAG, &ods);
5783	if (error != 0) {
5784		dsl_dataset_rele(ds, FTAG);
5785		dsl_pool_rele(dp, FTAG);
5786		return (error);
5787	}
5788
5789	dsl_dataset_name(ods, origin);
5790	dsl_dataset_rele(ods, FTAG);
5791	dsl_dataset_rele(ds, FTAG);
5792	dsl_pool_rele(dp, FTAG);
5793
5794	/*
5795	 * We don't need to unmount *all* the origin fs's snapshots, but
5796	 * it's easier.
5797	 */
5798	cp = strchr(origin, '@');
5799	if (cp)
5800		*cp = '\0';
5801	(void) dmu_objset_find(origin,
5802	    zfs_unmount_snap_cb, NULL, DS_FIND_SNAPSHOTS);
5803	return (dsl_dataset_promote(zc->zc_name, zc->zc_string));
5804}
5805
5806/*
5807 * Retrieve a single {user|group|project}{used|quota}@... property.
5808 *
5809 * inputs:
5810 * zc_name	name of filesystem
5811 * zc_objset_type zfs_userquota_prop_t
5812 * zc_value	domain name (eg. "S-1-234-567-89")
5813 * zc_guid	RID/UID/GID
5814 *
5815 * outputs:
5816 * zc_cookie	property value
5817 */
5818static int
5819zfs_ioc_userspace_one(zfs_cmd_t *zc)
5820{
5821	zfsvfs_t *zfsvfs;
5822	int error;
5823
5824	if (zc->zc_objset_type >= ZFS_NUM_USERQUOTA_PROPS)
5825		return (SET_ERROR(EINVAL));
5826
5827	error = zfsvfs_hold(zc->zc_name, FTAG, &zfsvfs, B_FALSE);
5828	if (error != 0)
5829		return (error);
5830
5831	error = zfs_userspace_one(zfsvfs,
5832	    zc->zc_objset_type, zc->zc_value, zc->zc_guid, &zc->zc_cookie);
5833	zfsvfs_rele(zfsvfs, FTAG);
5834
5835	return (error);
5836}
5837
5838/*
5839 * inputs:
5840 * zc_name		name of filesystem
5841 * zc_cookie		zap cursor
5842 * zc_objset_type	zfs_userquota_prop_t
5843 * zc_nvlist_dst[_size] buffer to fill (not really an nvlist)
5844 *
5845 * outputs:
5846 * zc_nvlist_dst[_size]	data buffer (array of zfs_useracct_t)
5847 * zc_cookie	zap cursor
5848 */
5849static int
5850zfs_ioc_userspace_many(zfs_cmd_t *zc)
5851{
5852	zfsvfs_t *zfsvfs;
5853	int bufsize = zc->zc_nvlist_dst_size;
5854
5855	if (bufsize <= 0)
5856		return (SET_ERROR(ENOMEM));
5857
5858	int error = zfsvfs_hold(zc->zc_name, FTAG, &zfsvfs, B_FALSE);
5859	if (error != 0)
5860		return (error);
5861
5862	void *buf = vmem_alloc(bufsize, KM_SLEEP);
5863
5864	error = zfs_userspace_many(zfsvfs, zc->zc_objset_type, &zc->zc_cookie,
5865	    buf, &zc->zc_nvlist_dst_size);
5866
5867	if (error == 0) {
5868		error = xcopyout(buf,
5869		    (void *)(uintptr_t)zc->zc_nvlist_dst,
5870		    zc->zc_nvlist_dst_size);
5871	}
5872	vmem_free(buf, bufsize);
5873	zfsvfs_rele(zfsvfs, FTAG);
5874
5875	return (error);
5876}
5877
5878/*
5879 * inputs:
5880 * zc_name		name of filesystem
5881 *
5882 * outputs:
5883 * none
5884 */
5885static int
5886zfs_ioc_userspace_upgrade(zfs_cmd_t *zc)
5887{
5888	int error = 0;
5889	zfsvfs_t *zfsvfs;
5890
5891	if (getzfsvfs(zc->zc_name, &zfsvfs) == 0) {
5892		if (!dmu_objset_userused_enabled(zfsvfs->z_os)) {
5893			/*
5894			 * If userused is not enabled, it may be because the
5895			 * objset needs to be closed & reopened (to grow the
5896			 * objset_phys_t).  Suspend/resume the fs will do that.
5897			 */
5898			dsl_dataset_t *ds, *newds;
5899
5900			ds = dmu_objset_ds(zfsvfs->z_os);
5901			error = zfs_suspend_fs(zfsvfs);
5902			if (error == 0) {
5903				dmu_objset_refresh_ownership(ds, &newds,
5904				    B_TRUE, zfsvfs);
5905				error = zfs_resume_fs(zfsvfs, newds);
5906			}
5907		}
5908		if (error == 0) {
5909			mutex_enter(&zfsvfs->z_os->os_upgrade_lock);
5910			if (zfsvfs->z_os->os_upgrade_id == 0) {
5911				/* clear potential error code and retry */
5912				zfsvfs->z_os->os_upgrade_status = 0;
5913				mutex_exit(&zfsvfs->z_os->os_upgrade_lock);
5914
5915				dsl_pool_config_enter(
5916				    dmu_objset_pool(zfsvfs->z_os), FTAG);
5917				dmu_objset_userspace_upgrade(zfsvfs->z_os);
5918				dsl_pool_config_exit(
5919				    dmu_objset_pool(zfsvfs->z_os), FTAG);
5920			} else {
5921				mutex_exit(&zfsvfs->z_os->os_upgrade_lock);
5922			}
5923
5924			taskq_wait_id(zfsvfs->z_os->os_spa->spa_upgrade_taskq,
5925			    zfsvfs->z_os->os_upgrade_id);
5926			error = zfsvfs->z_os->os_upgrade_status;
5927		}
5928		zfs_vfs_rele(zfsvfs);
5929	} else {
5930		objset_t *os;
5931
5932		/* XXX kind of reading contents without owning */
5933		error = dmu_objset_hold_flags(zc->zc_name, B_TRUE, FTAG, &os);
5934		if (error != 0)
5935			return (error);
5936
5937		mutex_enter(&os->os_upgrade_lock);
5938		if (os->os_upgrade_id == 0) {
5939			/* clear potential error code and retry */
5940			os->os_upgrade_status = 0;
5941			mutex_exit(&os->os_upgrade_lock);
5942
5943			dmu_objset_userspace_upgrade(os);
5944		} else {
5945			mutex_exit(&os->os_upgrade_lock);
5946		}
5947
5948		dsl_pool_rele(dmu_objset_pool(os), FTAG);
5949
5950		taskq_wait_id(os->os_spa->spa_upgrade_taskq, os->os_upgrade_id);
5951		error = os->os_upgrade_status;
5952
5953		dsl_dataset_rele_flags(dmu_objset_ds(os), DS_HOLD_FLAG_DECRYPT,
5954		    FTAG);
5955	}
5956	return (error);
5957}
5958
5959/*
5960 * inputs:
5961 * zc_name		name of filesystem
5962 *
5963 * outputs:
5964 * none
5965 */
5966static int
5967zfs_ioc_id_quota_upgrade(zfs_cmd_t *zc)
5968{
5969	objset_t *os;
5970	int error;
5971
5972	error = dmu_objset_hold_flags(zc->zc_name, B_TRUE, FTAG, &os);
5973	if (error != 0)
5974		return (error);
5975
5976	if (dmu_objset_userobjspace_upgradable(os) ||
5977	    dmu_objset_projectquota_upgradable(os)) {
5978		mutex_enter(&os->os_upgrade_lock);
5979		if (os->os_upgrade_id == 0) {
5980			/* clear potential error code and retry */
5981			os->os_upgrade_status = 0;
5982			mutex_exit(&os->os_upgrade_lock);
5983
5984			dmu_objset_id_quota_upgrade(os);
5985		} else {
5986			mutex_exit(&os->os_upgrade_lock);
5987		}
5988
5989		dsl_pool_rele(dmu_objset_pool(os), FTAG);
5990
5991		taskq_wait_id(os->os_spa->spa_upgrade_taskq, os->os_upgrade_id);
5992		error = os->os_upgrade_status;
5993	} else {
5994		dsl_pool_rele(dmu_objset_pool(os), FTAG);
5995	}
5996
5997	dsl_dataset_rele_flags(dmu_objset_ds(os), DS_HOLD_FLAG_DECRYPT, FTAG);
5998
5999	return (error);
6000}
6001
6002static int
6003zfs_ioc_share(zfs_cmd_t *zc)
6004{
6005	return (SET_ERROR(ENOSYS));
6006}
6007
6008ace_t full_access[] = {
6009	{(uid_t)-1, ACE_ALL_PERMS, ACE_EVERYONE, 0}
6010};
6011
6012/*
6013 * inputs:
6014 * zc_name		name of containing filesystem
6015 * zc_obj		object # beyond which we want next in-use object #
6016 *
6017 * outputs:
6018 * zc_obj		next in-use object #
6019 */
6020static int
6021zfs_ioc_next_obj(zfs_cmd_t *zc)
6022{
6023	objset_t *os = NULL;
6024	int error;
6025
6026	error = dmu_objset_hold(zc->zc_name, FTAG, &os);
6027	if (error != 0)
6028		return (error);
6029
6030	error = dmu_object_next(os, &zc->zc_obj, B_FALSE, 0);
6031
6032	dmu_objset_rele(os, FTAG);
6033	return (error);
6034}
6035
6036/*
6037 * inputs:
6038 * zc_name		name of filesystem
6039 * zc_value		prefix name for snapshot
6040 * zc_cleanup_fd	cleanup-on-exit file descriptor for calling process
6041 *
6042 * outputs:
6043 * zc_value		short name of new snapshot
6044 */
6045static int
6046zfs_ioc_tmp_snapshot(zfs_cmd_t *zc)
6047{
6048	char *snap_name;
6049	char *hold_name;
6050	int error;
6051	minor_t minor;
6052
6053	error = zfs_onexit_fd_hold(zc->zc_cleanup_fd, &minor);
6054	if (error != 0)
6055		return (error);
6056
6057	snap_name = kmem_asprintf("%s-%016llx", zc->zc_value,
6058	    (u_longlong_t)ddi_get_lbolt64());
6059	hold_name = kmem_asprintf("%%%s", zc->zc_value);
6060
6061	error = dsl_dataset_snapshot_tmp(zc->zc_name, snap_name, minor,
6062	    hold_name);
6063	if (error == 0)
6064		(void) strlcpy(zc->zc_value, snap_name,
6065		    sizeof (zc->zc_value));
6066	kmem_strfree(snap_name);
6067	kmem_strfree(hold_name);
6068	zfs_onexit_fd_rele(zc->zc_cleanup_fd);
6069	return (error);
6070}
6071
6072/*
6073 * inputs:
6074 * zc_name		name of "to" snapshot
6075 * zc_value		name of "from" snapshot
6076 * zc_cookie		file descriptor to write diff data on
6077 *
6078 * outputs:
6079 * dmu_diff_record_t's to the file descriptor
6080 */
6081static int
6082zfs_ioc_diff(zfs_cmd_t *zc)
6083{
6084	zfs_file_t *fp;
6085	offset_t off;
6086	int error;
6087
6088	if ((error = zfs_file_get(zc->zc_cookie, &fp)))
6089		return (error);
6090
6091	off = zfs_file_off(fp);
6092	error = dmu_diff(zc->zc_name, zc->zc_value, fp, &off);
6093
6094	zfs_file_put(zc->zc_cookie);
6095
6096	return (error);
6097}
6098
6099static int
6100zfs_ioc_smb_acl(zfs_cmd_t *zc)
6101{
6102	return (SET_ERROR(ENOTSUP));
6103}
6104
6105/*
6106 * innvl: {
6107 *     "holds" -> { snapname -> holdname (string), ... }
6108 *     (optional) "cleanup_fd" -> fd (int32)
6109 * }
6110 *
6111 * outnvl: {
6112 *     snapname -> error value (int32)
6113 *     ...
6114 * }
6115 */
6116static const zfs_ioc_key_t zfs_keys_hold[] = {
6117	{"holds",		DATA_TYPE_NVLIST,	0},
6118	{"cleanup_fd",		DATA_TYPE_INT32,	ZK_OPTIONAL},
6119};
6120
6121/* ARGSUSED */
6122static int
6123zfs_ioc_hold(const char *pool, nvlist_t *args, nvlist_t *errlist)
6124{
6125	nvpair_t *pair;
6126	nvlist_t *holds;
6127	int cleanup_fd = -1;
6128	int error;
6129	minor_t minor = 0;
6130
6131	holds = fnvlist_lookup_nvlist(args, "holds");
6132
6133	/* make sure the user didn't pass us any invalid (empty) tags */
6134	for (pair = nvlist_next_nvpair(holds, NULL); pair != NULL;
6135	    pair = nvlist_next_nvpair(holds, pair)) {
6136		char *htag;
6137
6138		error = nvpair_value_string(pair, &htag);
6139		if (error != 0)
6140			return (SET_ERROR(error));
6141
6142		if (strlen(htag) == 0)
6143			return (SET_ERROR(EINVAL));
6144	}
6145
6146	if (nvlist_lookup_int32(args, "cleanup_fd", &cleanup_fd) == 0) {
6147		error = zfs_onexit_fd_hold(cleanup_fd, &minor);
6148		if (error != 0)
6149			return (SET_ERROR(error));
6150	}
6151
6152	error = dsl_dataset_user_hold(holds, minor, errlist);
6153	if (minor != 0)
6154		zfs_onexit_fd_rele(cleanup_fd);
6155	return (SET_ERROR(error));
6156}
6157
6158/*
6159 * innvl is not used.
6160 *
6161 * outnvl: {
6162 *    holdname -> time added (uint64 seconds since epoch)
6163 *    ...
6164 * }
6165 */
6166static const zfs_ioc_key_t zfs_keys_get_holds[] = {
6167	/* no nvl keys */
6168};
6169
6170/* ARGSUSED */
6171static int
6172zfs_ioc_get_holds(const char *snapname, nvlist_t *args, nvlist_t *outnvl)
6173{
6174	return (dsl_dataset_get_holds(snapname, outnvl));
6175}
6176
6177/*
6178 * innvl: {
6179 *     snapname -> { holdname, ... }
6180 *     ...
6181 * }
6182 *
6183 * outnvl: {
6184 *     snapname -> error value (int32)
6185 *     ...
6186 * }
6187 */
6188static const zfs_ioc_key_t zfs_keys_release[] = {
6189	{"<snapname>...",	DATA_TYPE_NVLIST,	ZK_WILDCARDLIST},
6190};
6191
6192/* ARGSUSED */
6193static int
6194zfs_ioc_release(const char *pool, nvlist_t *holds, nvlist_t *errlist)
6195{
6196	return (dsl_dataset_user_release(holds, errlist));
6197}
6198
6199/*
6200 * inputs:
6201 * zc_guid		flags (ZEVENT_NONBLOCK)
6202 * zc_cleanup_fd	zevent file descriptor
6203 *
6204 * outputs:
6205 * zc_nvlist_dst	next nvlist event
6206 * zc_cookie		dropped events since last get
6207 */
6208static int
6209zfs_ioc_events_next(zfs_cmd_t *zc)
6210{
6211	zfs_zevent_t *ze;
6212	nvlist_t *event = NULL;
6213	minor_t minor;
6214	uint64_t dropped = 0;
6215	int error;
6216
6217	error = zfs_zevent_fd_hold(zc->zc_cleanup_fd, &minor, &ze);
6218	if (error != 0)
6219		return (error);
6220
6221	do {
6222		error = zfs_zevent_next(ze, &event,
6223		    &zc->zc_nvlist_dst_size, &dropped);
6224		if (event != NULL) {
6225			zc->zc_cookie = dropped;
6226			error = put_nvlist(zc, event);
6227			nvlist_free(event);
6228		}
6229
6230		if (zc->zc_guid & ZEVENT_NONBLOCK)
6231			break;
6232
6233		if ((error == 0) || (error != ENOENT))
6234			break;
6235
6236		error = zfs_zevent_wait(ze);
6237		if (error != 0)
6238			break;
6239	} while (1);
6240
6241	zfs_zevent_fd_rele(zc->zc_cleanup_fd);
6242
6243	return (error);
6244}
6245
6246/*
6247 * outputs:
6248 * zc_cookie		cleared events count
6249 */
6250static int
6251zfs_ioc_events_clear(zfs_cmd_t *zc)
6252{
6253	int count;
6254
6255	zfs_zevent_drain_all(&count);
6256	zc->zc_cookie = count;
6257
6258	return (0);
6259}
6260
6261/*
6262 * inputs:
6263 * zc_guid		eid | ZEVENT_SEEK_START | ZEVENT_SEEK_END
6264 * zc_cleanup		zevent file descriptor
6265 */
6266static int
6267zfs_ioc_events_seek(zfs_cmd_t *zc)
6268{
6269	zfs_zevent_t *ze;
6270	minor_t minor;
6271	int error;
6272
6273	error = zfs_zevent_fd_hold(zc->zc_cleanup_fd, &minor, &ze);
6274	if (error != 0)
6275		return (error);
6276
6277	error = zfs_zevent_seek(ze, zc->zc_guid);
6278	zfs_zevent_fd_rele(zc->zc_cleanup_fd);
6279
6280	return (error);
6281}
6282
6283/*
6284 * inputs:
6285 * zc_name		name of later filesystem or snapshot
6286 * zc_value		full name of old snapshot or bookmark
6287 *
6288 * outputs:
6289 * zc_cookie		space in bytes
6290 * zc_objset_type	compressed space in bytes
6291 * zc_perm_action	uncompressed space in bytes
6292 */
6293static int
6294zfs_ioc_space_written(zfs_cmd_t *zc)
6295{
6296	int error;
6297	dsl_pool_t *dp;
6298	dsl_dataset_t *new;
6299
6300	error = dsl_pool_hold(zc->zc_name, FTAG, &dp);
6301	if (error != 0)
6302		return (error);
6303	error = dsl_dataset_hold(dp, zc->zc_name, FTAG, &new);
6304	if (error != 0) {
6305		dsl_pool_rele(dp, FTAG);
6306		return (error);
6307	}
6308	if (strchr(zc->zc_value, '#') != NULL) {
6309		zfs_bookmark_phys_t bmp;
6310		error = dsl_bookmark_lookup(dp, zc->zc_value,
6311		    new, &bmp);
6312		if (error == 0) {
6313			error = dsl_dataset_space_written_bookmark(&bmp, new,
6314			    &zc->zc_cookie,
6315			    &zc->zc_objset_type, &zc->zc_perm_action);
6316		}
6317	} else {
6318		dsl_dataset_t *old;
6319		error = dsl_dataset_hold(dp, zc->zc_value, FTAG, &old);
6320
6321		if (error == 0) {
6322			error = dsl_dataset_space_written(old, new,
6323			    &zc->zc_cookie,
6324			    &zc->zc_objset_type, &zc->zc_perm_action);
6325			dsl_dataset_rele(old, FTAG);
6326		}
6327	}
6328	dsl_dataset_rele(new, FTAG);
6329	dsl_pool_rele(dp, FTAG);
6330	return (error);
6331}
6332
6333/*
6334 * innvl: {
6335 *     "firstsnap" -> snapshot name
6336 * }
6337 *
6338 * outnvl: {
6339 *     "used" -> space in bytes
6340 *     "compressed" -> compressed space in bytes
6341 *     "uncompressed" -> uncompressed space in bytes
6342 * }
6343 */
6344static const zfs_ioc_key_t zfs_keys_space_snaps[] = {
6345	{"firstsnap",	DATA_TYPE_STRING,	0},
6346};
6347
6348static int
6349zfs_ioc_space_snaps(const char *lastsnap, nvlist_t *innvl, nvlist_t *outnvl)
6350{
6351	int error;
6352	dsl_pool_t *dp;
6353	dsl_dataset_t *new, *old;
6354	char *firstsnap;
6355	uint64_t used, comp, uncomp;
6356
6357	firstsnap = fnvlist_lookup_string(innvl, "firstsnap");
6358
6359	error = dsl_pool_hold(lastsnap, FTAG, &dp);
6360	if (error != 0)
6361		return (error);
6362
6363	error = dsl_dataset_hold(dp, lastsnap, FTAG, &new);
6364	if (error == 0 && !new->ds_is_snapshot) {
6365		dsl_dataset_rele(new, FTAG);
6366		error = SET_ERROR(EINVAL);
6367	}
6368	if (error != 0) {
6369		dsl_pool_rele(dp, FTAG);
6370		return (error);
6371	}
6372	error = dsl_dataset_hold(dp, firstsnap, FTAG, &old);
6373	if (error == 0 && !old->ds_is_snapshot) {
6374		dsl_dataset_rele(old, FTAG);
6375		error = SET_ERROR(EINVAL);
6376	}
6377	if (error != 0) {
6378		dsl_dataset_rele(new, FTAG);
6379		dsl_pool_rele(dp, FTAG);
6380		return (error);
6381	}
6382
6383	error = dsl_dataset_space_wouldfree(old, new, &used, &comp, &uncomp);
6384	dsl_dataset_rele(old, FTAG);
6385	dsl_dataset_rele(new, FTAG);
6386	dsl_pool_rele(dp, FTAG);
6387	fnvlist_add_uint64(outnvl, "used", used);
6388	fnvlist_add_uint64(outnvl, "compressed", comp);
6389	fnvlist_add_uint64(outnvl, "uncompressed", uncomp);
6390	return (error);
6391}
6392
6393/*
6394 * innvl: {
6395 *     "fd" -> file descriptor to write stream to (int32)
6396 *     (optional) "fromsnap" -> full snap name to send an incremental from
6397 *     (optional) "largeblockok" -> (value ignored)
6398 *         indicates that blocks > 128KB are permitted
6399 *     (optional) "embedok" -> (value ignored)
6400 *         presence indicates DRR_WRITE_EMBEDDED records are permitted
6401 *     (optional) "compressok" -> (value ignored)
6402 *         presence indicates compressed DRR_WRITE records are permitted
6403 *     (optional) "rawok" -> (value ignored)
6404 *         presence indicates raw encrypted records should be used.
6405 *     (optional) "savedok" -> (value ignored)
6406 *         presence indicates we should send a partially received snapshot
6407 *     (optional) "resume_object" and "resume_offset" -> (uint64)
6408 *         if present, resume send stream from specified object and offset.
6409 *     (optional) "redactbook" -> (string)
6410 *         if present, use this bookmark's redaction list to generate a redacted
6411 *         send stream
6412 * }
6413 *
6414 * outnvl is unused
6415 */
6416static const zfs_ioc_key_t zfs_keys_send_new[] = {
6417	{"fd",			DATA_TYPE_INT32,	0},
6418	{"fromsnap",		DATA_TYPE_STRING,	ZK_OPTIONAL},
6419	{"largeblockok",	DATA_TYPE_BOOLEAN,	ZK_OPTIONAL},
6420	{"embedok",		DATA_TYPE_BOOLEAN,	ZK_OPTIONAL},
6421	{"compressok",		DATA_TYPE_BOOLEAN,	ZK_OPTIONAL},
6422	{"rawok",		DATA_TYPE_BOOLEAN,	ZK_OPTIONAL},
6423	{"savedok",		DATA_TYPE_BOOLEAN,	ZK_OPTIONAL},
6424	{"resume_object",	DATA_TYPE_UINT64,	ZK_OPTIONAL},
6425	{"resume_offset",	DATA_TYPE_UINT64,	ZK_OPTIONAL},
6426	{"redactbook",		DATA_TYPE_STRING,	ZK_OPTIONAL},
6427};
6428
6429/* ARGSUSED */
6430static int
6431zfs_ioc_send_new(const char *snapname, nvlist_t *innvl, nvlist_t *outnvl)
6432{
6433	int error;
6434	offset_t off;
6435	char *fromname = NULL;
6436	int fd;
6437	zfs_file_t *fp;
6438	boolean_t largeblockok;
6439	boolean_t embedok;
6440	boolean_t compressok;
6441	boolean_t rawok;
6442	boolean_t savedok;
6443	uint64_t resumeobj = 0;
6444	uint64_t resumeoff = 0;
6445	char *redactbook = NULL;
6446
6447	fd = fnvlist_lookup_int32(innvl, "fd");
6448
6449	(void) nvlist_lookup_string(innvl, "fromsnap", &fromname);
6450
6451	largeblockok = nvlist_exists(innvl, "largeblockok");
6452	embedok = nvlist_exists(innvl, "embedok");
6453	compressok = nvlist_exists(innvl, "compressok");
6454	rawok = nvlist_exists(innvl, "rawok");
6455	savedok = nvlist_exists(innvl, "savedok");
6456
6457	(void) nvlist_lookup_uint64(innvl, "resume_object", &resumeobj);
6458	(void) nvlist_lookup_uint64(innvl, "resume_offset", &resumeoff);
6459
6460	(void) nvlist_lookup_string(innvl, "redactbook", &redactbook);
6461
6462	if ((error = zfs_file_get(fd, &fp)))
6463		return (error);
6464
6465	off = zfs_file_off(fp);
6466
6467	dmu_send_outparams_t out = {0};
6468	out.dso_outfunc = dump_bytes;
6469	out.dso_arg = fp;
6470	out.dso_dryrun = B_FALSE;
6471	error = dmu_send(snapname, fromname, embedok, largeblockok,
6472	    compressok, rawok, savedok, resumeobj, resumeoff,
6473	    redactbook, fd, &off, &out);
6474
6475	zfs_file_put(fd);
6476	return (error);
6477}
6478
6479/* ARGSUSED */
6480static int
6481send_space_sum(objset_t *os, void *buf, int len, void *arg)
6482{
6483	uint64_t *size = arg;
6484	*size += len;
6485	return (0);
6486}
6487
6488/*
6489 * Determine approximately how large a zfs send stream will be -- the number
6490 * of bytes that will be written to the fd supplied to zfs_ioc_send_new().
6491 *
6492 * innvl: {
6493 *     (optional) "from" -> full snap or bookmark name to send an incremental
6494 *                          from
6495 *     (optional) "largeblockok" -> (value ignored)
6496 *         indicates that blocks > 128KB are permitted
6497 *     (optional) "embedok" -> (value ignored)
6498 *         presence indicates DRR_WRITE_EMBEDDED records are permitted
6499 *     (optional) "compressok" -> (value ignored)
6500 *         presence indicates compressed DRR_WRITE records are permitted
6501 *     (optional) "rawok" -> (value ignored)
6502 *         presence indicates raw encrypted records should be used.
6503 *     (optional) "resume_object" and "resume_offset" -> (uint64)
6504 *         if present, resume send stream from specified object and offset.
6505 *     (optional) "fd" -> file descriptor to use as a cookie for progress
6506 *         tracking (int32)
6507 * }
6508 *
6509 * outnvl: {
6510 *     "space" -> bytes of space (uint64)
6511 * }
6512 */
6513static const zfs_ioc_key_t zfs_keys_send_space[] = {
6514	{"from",		DATA_TYPE_STRING,	ZK_OPTIONAL},
6515	{"fromsnap",		DATA_TYPE_STRING,	ZK_OPTIONAL},
6516	{"largeblockok",	DATA_TYPE_BOOLEAN,	ZK_OPTIONAL},
6517	{"embedok",		DATA_TYPE_BOOLEAN,	ZK_OPTIONAL},
6518	{"compressok",		DATA_TYPE_BOOLEAN,	ZK_OPTIONAL},
6519	{"rawok",		DATA_TYPE_BOOLEAN,	ZK_OPTIONAL},
6520	{"fd",			DATA_TYPE_INT32,	ZK_OPTIONAL},
6521	{"redactbook",		DATA_TYPE_STRING,	ZK_OPTIONAL},
6522	{"resume_object",	DATA_TYPE_UINT64,	ZK_OPTIONAL},
6523	{"resume_offset",	DATA_TYPE_UINT64,	ZK_OPTIONAL},
6524	{"bytes",		DATA_TYPE_UINT64,	ZK_OPTIONAL},
6525};
6526
6527static int
6528zfs_ioc_send_space(const char *snapname, nvlist_t *innvl, nvlist_t *outnvl)
6529{
6530	dsl_pool_t *dp;
6531	dsl_dataset_t *tosnap;
6532	dsl_dataset_t *fromsnap = NULL;
6533	int error;
6534	char *fromname = NULL;
6535	char *redactlist_book = NULL;
6536	boolean_t largeblockok;
6537	boolean_t embedok;
6538	boolean_t compressok;
6539	boolean_t rawok;
6540	boolean_t savedok;
6541	uint64_t space = 0;
6542	boolean_t full_estimate = B_FALSE;
6543	uint64_t resumeobj = 0;
6544	uint64_t resumeoff = 0;
6545	uint64_t resume_bytes = 0;
6546	int32_t fd = -1;
6547	zfs_bookmark_phys_t zbm = {0};
6548
6549	error = dsl_pool_hold(snapname, FTAG, &dp);
6550	if (error != 0)
6551		return (error);
6552
6553	error = dsl_dataset_hold(dp, snapname, FTAG, &tosnap);
6554	if (error != 0) {
6555		dsl_pool_rele(dp, FTAG);
6556		return (error);
6557	}
6558	(void) nvlist_lookup_int32(innvl, "fd", &fd);
6559
6560	largeblockok = nvlist_exists(innvl, "largeblockok");
6561	embedok = nvlist_exists(innvl, "embedok");
6562	compressok = nvlist_exists(innvl, "compressok");
6563	rawok = nvlist_exists(innvl, "rawok");
6564	savedok = nvlist_exists(innvl, "savedok");
6565	boolean_t from = (nvlist_lookup_string(innvl, "from", &fromname) == 0);
6566	boolean_t altbook = (nvlist_lookup_string(innvl, "redactbook",
6567	    &redactlist_book) == 0);
6568
6569	(void) nvlist_lookup_uint64(innvl, "resume_object", &resumeobj);
6570	(void) nvlist_lookup_uint64(innvl, "resume_offset", &resumeoff);
6571	(void) nvlist_lookup_uint64(innvl, "bytes", &resume_bytes);
6572
6573	if (altbook) {
6574		full_estimate = B_TRUE;
6575	} else if (from) {
6576		if (strchr(fromname, '#')) {
6577			error = dsl_bookmark_lookup(dp, fromname, tosnap, &zbm);
6578
6579			/*
6580			 * dsl_bookmark_lookup() will fail with EXDEV if
6581			 * the from-bookmark and tosnap are at the same txg.
6582			 * However, it's valid to do a send (and therefore,
6583			 * a send estimate) from and to the same time point,
6584			 * if the bookmark is redacted (the incremental send
6585			 * can change what's redacted on the target).  In
6586			 * this case, dsl_bookmark_lookup() fills in zbm
6587			 * but returns EXDEV.  Ignore this error.
6588			 */
6589			if (error == EXDEV && zbm.zbm_redaction_obj != 0 &&
6590			    zbm.zbm_guid ==
6591			    dsl_dataset_phys(tosnap)->ds_guid)
6592				error = 0;
6593
6594			if (error != 0) {
6595				dsl_dataset_rele(tosnap, FTAG);
6596				dsl_pool_rele(dp, FTAG);
6597				return (error);
6598			}
6599			if (zbm.zbm_redaction_obj != 0 || !(zbm.zbm_flags &
6600			    ZBM_FLAG_HAS_FBN)) {
6601				full_estimate = B_TRUE;
6602			}
6603		} else if (strchr(fromname, '@')) {
6604			error = dsl_dataset_hold(dp, fromname, FTAG, &fromsnap);
6605			if (error != 0) {
6606				dsl_dataset_rele(tosnap, FTAG);
6607				dsl_pool_rele(dp, FTAG);
6608				return (error);
6609			}
6610
6611			if (!dsl_dataset_is_before(tosnap, fromsnap, 0)) {
6612				full_estimate = B_TRUE;
6613				dsl_dataset_rele(fromsnap, FTAG);
6614			}
6615		} else {
6616			/*
6617			 * from is not properly formatted as a snapshot or
6618			 * bookmark
6619			 */
6620			dsl_dataset_rele(tosnap, FTAG);
6621			dsl_pool_rele(dp, FTAG);
6622			return (SET_ERROR(EINVAL));
6623		}
6624	}
6625
6626	if (full_estimate) {
6627		dmu_send_outparams_t out = {0};
6628		offset_t off = 0;
6629		out.dso_outfunc = send_space_sum;
6630		out.dso_arg = &space;
6631		out.dso_dryrun = B_TRUE;
6632		/*
6633		 * We have to release these holds so dmu_send can take them.  It
6634		 * will do all the error checking we need.
6635		 */
6636		dsl_dataset_rele(tosnap, FTAG);
6637		dsl_pool_rele(dp, FTAG);
6638		error = dmu_send(snapname, fromname, embedok, largeblockok,
6639		    compressok, rawok, savedok, resumeobj, resumeoff,
6640		    redactlist_book, fd, &off, &out);
6641	} else {
6642		error = dmu_send_estimate_fast(tosnap, fromsnap,
6643		    (from && strchr(fromname, '#') != NULL ? &zbm : NULL),
6644		    compressok || rawok, savedok, &space);
6645		space -= resume_bytes;
6646		if (fromsnap != NULL)
6647			dsl_dataset_rele(fromsnap, FTAG);
6648		dsl_dataset_rele(tosnap, FTAG);
6649		dsl_pool_rele(dp, FTAG);
6650	}
6651
6652	fnvlist_add_uint64(outnvl, "space", space);
6653
6654	return (error);
6655}
6656
6657/*
6658 * Sync the currently open TXG to disk for the specified pool.
6659 * This is somewhat similar to 'zfs_sync()'.
6660 * For cases that do not result in error this ioctl will wait for
6661 * the currently open TXG to commit before returning back to the caller.
6662 *
6663 * innvl: {
6664 *  "force" -> when true, force uberblock update even if there is no dirty data.
6665 *             In addition this will cause the vdev configuration to be written
6666 *             out including updating the zpool cache file. (boolean_t)
6667 * }
6668 *
6669 * onvl is unused
6670 */
6671static const zfs_ioc_key_t zfs_keys_pool_sync[] = {
6672	{"force",	DATA_TYPE_BOOLEAN_VALUE,	0},
6673};
6674
6675/* ARGSUSED */
6676static int
6677zfs_ioc_pool_sync(const char *pool, nvlist_t *innvl, nvlist_t *onvl)
6678{
6679	int err;
6680	boolean_t rc, force = B_FALSE;
6681	spa_t *spa;
6682
6683	if ((err = spa_open(pool, &spa, FTAG)) != 0)
6684		return (err);
6685
6686	if (innvl) {
6687		err = nvlist_lookup_boolean_value(innvl, "force", &rc);
6688		if (err == 0)
6689			force = rc;
6690	}
6691
6692	if (force) {
6693		spa_config_enter(spa, SCL_CONFIG, FTAG, RW_WRITER);
6694		vdev_config_dirty(spa->spa_root_vdev);
6695		spa_config_exit(spa, SCL_CONFIG, FTAG);
6696	}
6697	txg_wait_synced(spa_get_dsl(spa), 0);
6698
6699	spa_close(spa, FTAG);
6700
6701	return (0);
6702}
6703
6704/*
6705 * Load a user's wrapping key into the kernel.
6706 * innvl: {
6707 *     "hidden_args" -> { "wkeydata" -> value }
6708 *         raw uint8_t array of encryption wrapping key data (32 bytes)
6709 *     (optional) "noop" -> (value ignored)
6710 *         presence indicated key should only be verified, not loaded
6711 * }
6712 */
6713static const zfs_ioc_key_t zfs_keys_load_key[] = {
6714	{"hidden_args",	DATA_TYPE_NVLIST,	0},
6715	{"noop",	DATA_TYPE_BOOLEAN,	ZK_OPTIONAL},
6716};
6717
6718/* ARGSUSED */
6719static int
6720zfs_ioc_load_key(const char *dsname, nvlist_t *innvl, nvlist_t *outnvl)
6721{
6722	int ret;
6723	dsl_crypto_params_t *dcp = NULL;
6724	nvlist_t *hidden_args;
6725	boolean_t noop = nvlist_exists(innvl, "noop");
6726
6727	if (strchr(dsname, '@') != NULL || strchr(dsname, '%') != NULL) {
6728		ret = SET_ERROR(EINVAL);
6729		goto error;
6730	}
6731
6732	hidden_args = fnvlist_lookup_nvlist(innvl, ZPOOL_HIDDEN_ARGS);
6733
6734	ret = dsl_crypto_params_create_nvlist(DCP_CMD_NONE, NULL,
6735	    hidden_args, &dcp);
6736	if (ret != 0)
6737		goto error;
6738
6739	ret = spa_keystore_load_wkey(dsname, dcp, noop);
6740	if (ret != 0)
6741		goto error;
6742
6743	dsl_crypto_params_free(dcp, noop);
6744
6745	return (0);
6746
6747error:
6748	dsl_crypto_params_free(dcp, B_TRUE);
6749	return (ret);
6750}
6751
6752/*
6753 * Unload a user's wrapping key from the kernel.
6754 * Both innvl and outnvl are unused.
6755 */
6756static const zfs_ioc_key_t zfs_keys_unload_key[] = {
6757	/* no nvl keys */
6758};
6759
6760/* ARGSUSED */
6761static int
6762zfs_ioc_unload_key(const char *dsname, nvlist_t *innvl, nvlist_t *outnvl)
6763{
6764	int ret = 0;
6765
6766	if (strchr(dsname, '@') != NULL || strchr(dsname, '%') != NULL) {
6767		ret = (SET_ERROR(EINVAL));
6768		goto out;
6769	}
6770
6771	ret = spa_keystore_unload_wkey(dsname);
6772	if (ret != 0)
6773		goto out;
6774
6775out:
6776	return (ret);
6777}
6778
6779/*
6780 * Changes a user's wrapping key used to decrypt a dataset. The keyformat,
6781 * keylocation, pbkdf2salt, and  pbkdf2iters properties can also be specified
6782 * here to change how the key is derived in userspace.
6783 *
6784 * innvl: {
6785 *    "hidden_args" (optional) -> { "wkeydata" -> value }
6786 *         raw uint8_t array of new encryption wrapping key data (32 bytes)
6787 *    "props" (optional) -> { prop -> value }
6788 * }
6789 *
6790 * outnvl is unused
6791 */
6792static const zfs_ioc_key_t zfs_keys_change_key[] = {
6793	{"crypt_cmd",	DATA_TYPE_UINT64,	ZK_OPTIONAL},
6794	{"hidden_args",	DATA_TYPE_NVLIST,	ZK_OPTIONAL},
6795	{"props",	DATA_TYPE_NVLIST,	ZK_OPTIONAL},
6796};
6797
6798/* ARGSUSED */
6799static int
6800zfs_ioc_change_key(const char *dsname, nvlist_t *innvl, nvlist_t *outnvl)
6801{
6802	int ret;
6803	uint64_t cmd = DCP_CMD_NONE;
6804	dsl_crypto_params_t *dcp = NULL;
6805	nvlist_t *args = NULL, *hidden_args = NULL;
6806
6807	if (strchr(dsname, '@') != NULL || strchr(dsname, '%') != NULL) {
6808		ret = (SET_ERROR(EINVAL));
6809		goto error;
6810	}
6811
6812	(void) nvlist_lookup_uint64(innvl, "crypt_cmd", &cmd);
6813	(void) nvlist_lookup_nvlist(innvl, "props", &args);
6814	(void) nvlist_lookup_nvlist(innvl, ZPOOL_HIDDEN_ARGS, &hidden_args);
6815
6816	ret = dsl_crypto_params_create_nvlist(cmd, args, hidden_args, &dcp);
6817	if (ret != 0)
6818		goto error;
6819
6820	ret = spa_keystore_change_key(dsname, dcp);
6821	if (ret != 0)
6822		goto error;
6823
6824	dsl_crypto_params_free(dcp, B_FALSE);
6825
6826	return (0);
6827
6828error:
6829	dsl_crypto_params_free(dcp, B_TRUE);
6830	return (ret);
6831}
6832
6833static zfs_ioc_vec_t zfs_ioc_vec[ZFS_IOC_LAST - ZFS_IOC_FIRST];
6834
6835static void
6836zfs_ioctl_register_legacy(zfs_ioc_t ioc, zfs_ioc_legacy_func_t *func,
6837    zfs_secpolicy_func_t *secpolicy, zfs_ioc_namecheck_t namecheck,
6838    boolean_t log_history, zfs_ioc_poolcheck_t pool_check)
6839{
6840	zfs_ioc_vec_t *vec = &zfs_ioc_vec[ioc - ZFS_IOC_FIRST];
6841
6842	ASSERT3U(ioc, >=, ZFS_IOC_FIRST);
6843	ASSERT3U(ioc, <, ZFS_IOC_LAST);
6844	ASSERT3P(vec->zvec_legacy_func, ==, NULL);
6845	ASSERT3P(vec->zvec_func, ==, NULL);
6846
6847	vec->zvec_legacy_func = func;
6848	vec->zvec_secpolicy = secpolicy;
6849	vec->zvec_namecheck = namecheck;
6850	vec->zvec_allow_log = log_history;
6851	vec->zvec_pool_check = pool_check;
6852}
6853
6854/*
6855 * See the block comment at the beginning of this file for details on
6856 * each argument to this function.
6857 */
6858void
6859zfs_ioctl_register(const char *name, zfs_ioc_t ioc, zfs_ioc_func_t *func,
6860    zfs_secpolicy_func_t *secpolicy, zfs_ioc_namecheck_t namecheck,
6861    zfs_ioc_poolcheck_t pool_check, boolean_t smush_outnvlist,
6862    boolean_t allow_log, const zfs_ioc_key_t *nvl_keys, size_t num_keys)
6863{
6864	zfs_ioc_vec_t *vec = &zfs_ioc_vec[ioc - ZFS_IOC_FIRST];
6865
6866	ASSERT3U(ioc, >=, ZFS_IOC_FIRST);
6867	ASSERT3U(ioc, <, ZFS_IOC_LAST);
6868	ASSERT3P(vec->zvec_legacy_func, ==, NULL);
6869	ASSERT3P(vec->zvec_func, ==, NULL);
6870
6871	/* if we are logging, the name must be valid */
6872	ASSERT(!allow_log || namecheck != NO_NAME);
6873
6874	vec->zvec_name = name;
6875	vec->zvec_func = func;
6876	vec->zvec_secpolicy = secpolicy;
6877	vec->zvec_namecheck = namecheck;
6878	vec->zvec_pool_check = pool_check;
6879	vec->zvec_smush_outnvlist = smush_outnvlist;
6880	vec->zvec_allow_log = allow_log;
6881	vec->zvec_nvl_keys = nvl_keys;
6882	vec->zvec_nvl_key_count = num_keys;
6883}
6884
6885static void
6886zfs_ioctl_register_pool(zfs_ioc_t ioc, zfs_ioc_legacy_func_t *func,
6887    zfs_secpolicy_func_t *secpolicy, boolean_t log_history,
6888    zfs_ioc_poolcheck_t pool_check)
6889{
6890	zfs_ioctl_register_legacy(ioc, func, secpolicy,
6891	    POOL_NAME, log_history, pool_check);
6892}
6893
6894void
6895zfs_ioctl_register_dataset_nolog(zfs_ioc_t ioc, zfs_ioc_legacy_func_t *func,
6896    zfs_secpolicy_func_t *secpolicy, zfs_ioc_poolcheck_t pool_check)
6897{
6898	zfs_ioctl_register_legacy(ioc, func, secpolicy,
6899	    DATASET_NAME, B_FALSE, pool_check);
6900}
6901
6902static void
6903zfs_ioctl_register_pool_modify(zfs_ioc_t ioc, zfs_ioc_legacy_func_t *func)
6904{
6905	zfs_ioctl_register_legacy(ioc, func, zfs_secpolicy_config,
6906	    POOL_NAME, B_TRUE, POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY);
6907}
6908
6909static void
6910zfs_ioctl_register_pool_meta(zfs_ioc_t ioc, zfs_ioc_legacy_func_t *func,
6911    zfs_secpolicy_func_t *secpolicy)
6912{
6913	zfs_ioctl_register_legacy(ioc, func, secpolicy,
6914	    NO_NAME, B_FALSE, POOL_CHECK_NONE);
6915}
6916
6917static void
6918zfs_ioctl_register_dataset_read_secpolicy(zfs_ioc_t ioc,
6919    zfs_ioc_legacy_func_t *func, zfs_secpolicy_func_t *secpolicy)
6920{
6921	zfs_ioctl_register_legacy(ioc, func, secpolicy,
6922	    DATASET_NAME, B_FALSE, POOL_CHECK_SUSPENDED);
6923}
6924
6925static void
6926zfs_ioctl_register_dataset_read(zfs_ioc_t ioc, zfs_ioc_legacy_func_t *func)
6927{
6928	zfs_ioctl_register_dataset_read_secpolicy(ioc, func,
6929	    zfs_secpolicy_read);
6930}
6931
6932static void
6933zfs_ioctl_register_dataset_modify(zfs_ioc_t ioc, zfs_ioc_legacy_func_t *func,
6934    zfs_secpolicy_func_t *secpolicy)
6935{
6936	zfs_ioctl_register_legacy(ioc, func, secpolicy,
6937	    DATASET_NAME, B_TRUE, POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY);
6938}
6939
6940static void
6941zfs_ioctl_init(void)
6942{
6943	zfs_ioctl_register("snapshot", ZFS_IOC_SNAPSHOT,
6944	    zfs_ioc_snapshot, zfs_secpolicy_snapshot, POOL_NAME,
6945	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE,
6946	    zfs_keys_snapshot, ARRAY_SIZE(zfs_keys_snapshot));
6947
6948	zfs_ioctl_register("log_history", ZFS_IOC_LOG_HISTORY,
6949	    zfs_ioc_log_history, zfs_secpolicy_log_history, NO_NAME,
6950	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_FALSE, B_FALSE,
6951	    zfs_keys_log_history, ARRAY_SIZE(zfs_keys_log_history));
6952
6953	zfs_ioctl_register("space_snaps", ZFS_IOC_SPACE_SNAPS,
6954	    zfs_ioc_space_snaps, zfs_secpolicy_read, DATASET_NAME,
6955	    POOL_CHECK_SUSPENDED, B_FALSE, B_FALSE,
6956	    zfs_keys_space_snaps, ARRAY_SIZE(zfs_keys_space_snaps));
6957
6958	zfs_ioctl_register("send", ZFS_IOC_SEND_NEW,
6959	    zfs_ioc_send_new, zfs_secpolicy_send_new, DATASET_NAME,
6960	    POOL_CHECK_SUSPENDED, B_FALSE, B_FALSE,
6961	    zfs_keys_send_new, ARRAY_SIZE(zfs_keys_send_new));
6962
6963	zfs_ioctl_register("send_space", ZFS_IOC_SEND_SPACE,
6964	    zfs_ioc_send_space, zfs_secpolicy_read, DATASET_NAME,
6965	    POOL_CHECK_SUSPENDED, B_FALSE, B_FALSE,
6966	    zfs_keys_send_space, ARRAY_SIZE(zfs_keys_send_space));
6967
6968	zfs_ioctl_register("create", ZFS_IOC_CREATE,
6969	    zfs_ioc_create, zfs_secpolicy_create_clone, DATASET_NAME,
6970	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE,
6971	    zfs_keys_create, ARRAY_SIZE(zfs_keys_create));
6972
6973	zfs_ioctl_register("clone", ZFS_IOC_CLONE,
6974	    zfs_ioc_clone, zfs_secpolicy_create_clone, DATASET_NAME,
6975	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE,
6976	    zfs_keys_clone, ARRAY_SIZE(zfs_keys_clone));
6977
6978	zfs_ioctl_register("remap", ZFS_IOC_REMAP,
6979	    zfs_ioc_remap, zfs_secpolicy_none, DATASET_NAME,
6980	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_FALSE, B_TRUE,
6981	    zfs_keys_remap, ARRAY_SIZE(zfs_keys_remap));
6982
6983	zfs_ioctl_register("destroy_snaps", ZFS_IOC_DESTROY_SNAPS,
6984	    zfs_ioc_destroy_snaps, zfs_secpolicy_destroy_snaps, POOL_NAME,
6985	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE,
6986	    zfs_keys_destroy_snaps, ARRAY_SIZE(zfs_keys_destroy_snaps));
6987
6988	zfs_ioctl_register("hold", ZFS_IOC_HOLD,
6989	    zfs_ioc_hold, zfs_secpolicy_hold, POOL_NAME,
6990	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE,
6991	    zfs_keys_hold, ARRAY_SIZE(zfs_keys_hold));
6992	zfs_ioctl_register("release", ZFS_IOC_RELEASE,
6993	    zfs_ioc_release, zfs_secpolicy_release, POOL_NAME,
6994	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE,
6995	    zfs_keys_release, ARRAY_SIZE(zfs_keys_release));
6996
6997	zfs_ioctl_register("get_holds", ZFS_IOC_GET_HOLDS,
6998	    zfs_ioc_get_holds, zfs_secpolicy_read, DATASET_NAME,
6999	    POOL_CHECK_SUSPENDED, B_FALSE, B_FALSE,
7000	    zfs_keys_get_holds, ARRAY_SIZE(zfs_keys_get_holds));
7001
7002	zfs_ioctl_register("rollback", ZFS_IOC_ROLLBACK,
7003	    zfs_ioc_rollback, zfs_secpolicy_rollback, DATASET_NAME,
7004	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_FALSE, B_TRUE,
7005	    zfs_keys_rollback, ARRAY_SIZE(zfs_keys_rollback));
7006
7007	zfs_ioctl_register("bookmark", ZFS_IOC_BOOKMARK,
7008	    zfs_ioc_bookmark, zfs_secpolicy_bookmark, POOL_NAME,
7009	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE,
7010	    zfs_keys_bookmark, ARRAY_SIZE(zfs_keys_bookmark));
7011
7012	zfs_ioctl_register("get_bookmarks", ZFS_IOC_GET_BOOKMARKS,
7013	    zfs_ioc_get_bookmarks, zfs_secpolicy_read, DATASET_NAME,
7014	    POOL_CHECK_SUSPENDED, B_FALSE, B_FALSE,
7015	    zfs_keys_get_bookmarks, ARRAY_SIZE(zfs_keys_get_bookmarks));
7016
7017	zfs_ioctl_register("get_bookmark_props", ZFS_IOC_GET_BOOKMARK_PROPS,
7018	    zfs_ioc_get_bookmark_props, zfs_secpolicy_read, ENTITY_NAME,
7019	    POOL_CHECK_SUSPENDED, B_FALSE, B_FALSE, zfs_keys_get_bookmark_props,
7020	    ARRAY_SIZE(zfs_keys_get_bookmark_props));
7021
7022	zfs_ioctl_register("destroy_bookmarks", ZFS_IOC_DESTROY_BOOKMARKS,
7023	    zfs_ioc_destroy_bookmarks, zfs_secpolicy_destroy_bookmarks,
7024	    POOL_NAME,
7025	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE,
7026	    zfs_keys_destroy_bookmarks,
7027	    ARRAY_SIZE(zfs_keys_destroy_bookmarks));
7028
7029	zfs_ioctl_register("receive", ZFS_IOC_RECV_NEW,
7030	    zfs_ioc_recv_new, zfs_secpolicy_recv_new, DATASET_NAME,
7031	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE,
7032	    zfs_keys_recv_new, ARRAY_SIZE(zfs_keys_recv_new));
7033	zfs_ioctl_register("load-key", ZFS_IOC_LOAD_KEY,
7034	    zfs_ioc_load_key, zfs_secpolicy_load_key,
7035	    DATASET_NAME, POOL_CHECK_SUSPENDED, B_TRUE, B_TRUE,
7036	    zfs_keys_load_key, ARRAY_SIZE(zfs_keys_load_key));
7037	zfs_ioctl_register("unload-key", ZFS_IOC_UNLOAD_KEY,
7038	    zfs_ioc_unload_key, zfs_secpolicy_load_key,
7039	    DATASET_NAME, POOL_CHECK_SUSPENDED, B_TRUE, B_TRUE,
7040	    zfs_keys_unload_key, ARRAY_SIZE(zfs_keys_unload_key));
7041	zfs_ioctl_register("change-key", ZFS_IOC_CHANGE_KEY,
7042	    zfs_ioc_change_key, zfs_secpolicy_change_key,
7043	    DATASET_NAME, POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY,
7044	    B_TRUE, B_TRUE, zfs_keys_change_key,
7045	    ARRAY_SIZE(zfs_keys_change_key));
7046
7047	zfs_ioctl_register("sync", ZFS_IOC_POOL_SYNC,
7048	    zfs_ioc_pool_sync, zfs_secpolicy_none, POOL_NAME,
7049	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_FALSE, B_FALSE,
7050	    zfs_keys_pool_sync, ARRAY_SIZE(zfs_keys_pool_sync));
7051	zfs_ioctl_register("reopen", ZFS_IOC_POOL_REOPEN, zfs_ioc_pool_reopen,
7052	    zfs_secpolicy_config, POOL_NAME, POOL_CHECK_SUSPENDED, B_TRUE,
7053	    B_TRUE, zfs_keys_pool_reopen, ARRAY_SIZE(zfs_keys_pool_reopen));
7054
7055	zfs_ioctl_register("channel_program", ZFS_IOC_CHANNEL_PROGRAM,
7056	    zfs_ioc_channel_program, zfs_secpolicy_config,
7057	    POOL_NAME, POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE,
7058	    B_TRUE, zfs_keys_channel_program,
7059	    ARRAY_SIZE(zfs_keys_channel_program));
7060
7061	zfs_ioctl_register("redact", ZFS_IOC_REDACT,
7062	    zfs_ioc_redact, zfs_secpolicy_config, DATASET_NAME,
7063	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE,
7064	    zfs_keys_redact, ARRAY_SIZE(zfs_keys_redact));
7065
7066	zfs_ioctl_register("zpool_checkpoint", ZFS_IOC_POOL_CHECKPOINT,
7067	    zfs_ioc_pool_checkpoint, zfs_secpolicy_config, POOL_NAME,
7068	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE,
7069	    zfs_keys_pool_checkpoint, ARRAY_SIZE(zfs_keys_pool_checkpoint));
7070
7071	zfs_ioctl_register("zpool_discard_checkpoint",
7072	    ZFS_IOC_POOL_DISCARD_CHECKPOINT, zfs_ioc_pool_discard_checkpoint,
7073	    zfs_secpolicy_config, POOL_NAME,
7074	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE,
7075	    zfs_keys_pool_discard_checkpoint,
7076	    ARRAY_SIZE(zfs_keys_pool_discard_checkpoint));
7077
7078	zfs_ioctl_register("initialize", ZFS_IOC_POOL_INITIALIZE,
7079	    zfs_ioc_pool_initialize, zfs_secpolicy_config, POOL_NAME,
7080	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE,
7081	    zfs_keys_pool_initialize, ARRAY_SIZE(zfs_keys_pool_initialize));
7082
7083	zfs_ioctl_register("trim", ZFS_IOC_POOL_TRIM,
7084	    zfs_ioc_pool_trim, zfs_secpolicy_config, POOL_NAME,
7085	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE,
7086	    zfs_keys_pool_trim, ARRAY_SIZE(zfs_keys_pool_trim));
7087
7088	zfs_ioctl_register("wait", ZFS_IOC_WAIT,
7089	    zfs_ioc_wait, zfs_secpolicy_none, POOL_NAME,
7090	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_FALSE, B_FALSE,
7091	    zfs_keys_pool_wait, ARRAY_SIZE(zfs_keys_pool_wait));
7092
7093	zfs_ioctl_register("wait_fs", ZFS_IOC_WAIT_FS,
7094	    zfs_ioc_wait_fs, zfs_secpolicy_none, DATASET_NAME,
7095	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_FALSE, B_FALSE,
7096	    zfs_keys_fs_wait, ARRAY_SIZE(zfs_keys_fs_wait));
7097
7098	zfs_ioctl_register("set_bootenv", ZFS_IOC_SET_BOOTENV,
7099	    zfs_ioc_set_bootenv, zfs_secpolicy_config, POOL_NAME,
7100	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_FALSE, B_TRUE,
7101	    zfs_keys_set_bootenv, ARRAY_SIZE(zfs_keys_set_bootenv));
7102
7103	zfs_ioctl_register("get_bootenv", ZFS_IOC_GET_BOOTENV,
7104	    zfs_ioc_get_bootenv, zfs_secpolicy_none, POOL_NAME,
7105	    POOL_CHECK_SUSPENDED, B_FALSE, B_TRUE,
7106	    zfs_keys_get_bootenv, ARRAY_SIZE(zfs_keys_get_bootenv));
7107
7108	/* IOCTLS that use the legacy function signature */
7109
7110	zfs_ioctl_register_legacy(ZFS_IOC_POOL_FREEZE, zfs_ioc_pool_freeze,
7111	    zfs_secpolicy_config, NO_NAME, B_FALSE, POOL_CHECK_READONLY);
7112
7113	zfs_ioctl_register_pool(ZFS_IOC_POOL_CREATE, zfs_ioc_pool_create,
7114	    zfs_secpolicy_config, B_TRUE, POOL_CHECK_NONE);
7115	zfs_ioctl_register_pool_modify(ZFS_IOC_POOL_SCAN,
7116	    zfs_ioc_pool_scan);
7117	zfs_ioctl_register_pool_modify(ZFS_IOC_POOL_UPGRADE,
7118	    zfs_ioc_pool_upgrade);
7119	zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_ADD,
7120	    zfs_ioc_vdev_add);
7121	zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_REMOVE,
7122	    zfs_ioc_vdev_remove);
7123	zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_SET_STATE,
7124	    zfs_ioc_vdev_set_state);
7125	zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_ATTACH,
7126	    zfs_ioc_vdev_attach);
7127	zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_DETACH,
7128	    zfs_ioc_vdev_detach);
7129	zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_SETPATH,
7130	    zfs_ioc_vdev_setpath);
7131	zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_SETFRU,
7132	    zfs_ioc_vdev_setfru);
7133	zfs_ioctl_register_pool_modify(ZFS_IOC_POOL_SET_PROPS,
7134	    zfs_ioc_pool_set_props);
7135	zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_SPLIT,
7136	    zfs_ioc_vdev_split);
7137	zfs_ioctl_register_pool_modify(ZFS_IOC_POOL_REGUID,
7138	    zfs_ioc_pool_reguid);
7139
7140	zfs_ioctl_register_pool_meta(ZFS_IOC_POOL_CONFIGS,
7141	    zfs_ioc_pool_configs, zfs_secpolicy_none);
7142	zfs_ioctl_register_pool_meta(ZFS_IOC_POOL_TRYIMPORT,
7143	    zfs_ioc_pool_tryimport, zfs_secpolicy_config);
7144	zfs_ioctl_register_pool_meta(ZFS_IOC_INJECT_FAULT,
7145	    zfs_ioc_inject_fault, zfs_secpolicy_inject);
7146	zfs_ioctl_register_pool_meta(ZFS_IOC_CLEAR_FAULT,
7147	    zfs_ioc_clear_fault, zfs_secpolicy_inject);
7148	zfs_ioctl_register_pool_meta(ZFS_IOC_INJECT_LIST_NEXT,
7149	    zfs_ioc_inject_list_next, zfs_secpolicy_inject);
7150
7151	/*
7152	 * pool destroy, and export don't log the history as part of
7153	 * zfsdev_ioctl, but rather zfs_ioc_pool_export
7154	 * does the logging of those commands.
7155	 */
7156	zfs_ioctl_register_pool(ZFS_IOC_POOL_DESTROY, zfs_ioc_pool_destroy,
7157	    zfs_secpolicy_config, B_FALSE, POOL_CHECK_SUSPENDED);
7158	zfs_ioctl_register_pool(ZFS_IOC_POOL_EXPORT, zfs_ioc_pool_export,
7159	    zfs_secpolicy_config, B_FALSE, POOL_CHECK_SUSPENDED);
7160
7161	zfs_ioctl_register_pool(ZFS_IOC_POOL_STATS, zfs_ioc_pool_stats,
7162	    zfs_secpolicy_read, B_FALSE, POOL_CHECK_NONE);
7163	zfs_ioctl_register_pool(ZFS_IOC_POOL_GET_PROPS, zfs_ioc_pool_get_props,
7164	    zfs_secpolicy_read, B_FALSE, POOL_CHECK_NONE);
7165
7166	zfs_ioctl_register_pool(ZFS_IOC_ERROR_LOG, zfs_ioc_error_log,
7167	    zfs_secpolicy_inject, B_FALSE, POOL_CHECK_SUSPENDED);
7168	zfs_ioctl_register_pool(ZFS_IOC_DSOBJ_TO_DSNAME,
7169	    zfs_ioc_dsobj_to_dsname,
7170	    zfs_secpolicy_diff, B_FALSE, POOL_CHECK_SUSPENDED);
7171	zfs_ioctl_register_pool(ZFS_IOC_POOL_GET_HISTORY,
7172	    zfs_ioc_pool_get_history,
7173	    zfs_secpolicy_config, B_FALSE, POOL_CHECK_SUSPENDED);
7174
7175	zfs_ioctl_register_pool(ZFS_IOC_POOL_IMPORT, zfs_ioc_pool_import,
7176	    zfs_secpolicy_config, B_TRUE, POOL_CHECK_NONE);
7177
7178	zfs_ioctl_register_pool(ZFS_IOC_CLEAR, zfs_ioc_clear,
7179	    zfs_secpolicy_config, B_TRUE, POOL_CHECK_READONLY);
7180
7181	zfs_ioctl_register_dataset_read(ZFS_IOC_SPACE_WRITTEN,
7182	    zfs_ioc_space_written);
7183	zfs_ioctl_register_dataset_read(ZFS_IOC_OBJSET_RECVD_PROPS,
7184	    zfs_ioc_objset_recvd_props);
7185	zfs_ioctl_register_dataset_read(ZFS_IOC_NEXT_OBJ,
7186	    zfs_ioc_next_obj);
7187	zfs_ioctl_register_dataset_read(ZFS_IOC_GET_FSACL,
7188	    zfs_ioc_get_fsacl);
7189	zfs_ioctl_register_dataset_read(ZFS_IOC_OBJSET_STATS,
7190	    zfs_ioc_objset_stats);
7191	zfs_ioctl_register_dataset_read(ZFS_IOC_OBJSET_ZPLPROPS,
7192	    zfs_ioc_objset_zplprops);
7193	zfs_ioctl_register_dataset_read(ZFS_IOC_DATASET_LIST_NEXT,
7194	    zfs_ioc_dataset_list_next);
7195	zfs_ioctl_register_dataset_read(ZFS_IOC_SNAPSHOT_LIST_NEXT,
7196	    zfs_ioc_snapshot_list_next);
7197	zfs_ioctl_register_dataset_read(ZFS_IOC_SEND_PROGRESS,
7198	    zfs_ioc_send_progress);
7199
7200	zfs_ioctl_register_dataset_read_secpolicy(ZFS_IOC_DIFF,
7201	    zfs_ioc_diff, zfs_secpolicy_diff);
7202	zfs_ioctl_register_dataset_read_secpolicy(ZFS_IOC_OBJ_TO_STATS,
7203	    zfs_ioc_obj_to_stats, zfs_secpolicy_diff);
7204	zfs_ioctl_register_dataset_read_secpolicy(ZFS_IOC_OBJ_TO_PATH,
7205	    zfs_ioc_obj_to_path, zfs_secpolicy_diff);
7206	zfs_ioctl_register_dataset_read_secpolicy(ZFS_IOC_USERSPACE_ONE,
7207	    zfs_ioc_userspace_one, zfs_secpolicy_userspace_one);
7208	zfs_ioctl_register_dataset_read_secpolicy(ZFS_IOC_USERSPACE_MANY,
7209	    zfs_ioc_userspace_many, zfs_secpolicy_userspace_many);
7210	zfs_ioctl_register_dataset_read_secpolicy(ZFS_IOC_SEND,
7211	    zfs_ioc_send, zfs_secpolicy_send);
7212
7213	zfs_ioctl_register_dataset_modify(ZFS_IOC_SET_PROP, zfs_ioc_set_prop,
7214	    zfs_secpolicy_none);
7215	zfs_ioctl_register_dataset_modify(ZFS_IOC_DESTROY, zfs_ioc_destroy,
7216	    zfs_secpolicy_destroy);
7217	zfs_ioctl_register_dataset_modify(ZFS_IOC_RENAME, zfs_ioc_rename,
7218	    zfs_secpolicy_rename);
7219	zfs_ioctl_register_dataset_modify(ZFS_IOC_RECV, zfs_ioc_recv,
7220	    zfs_secpolicy_recv);
7221	zfs_ioctl_register_dataset_modify(ZFS_IOC_PROMOTE, zfs_ioc_promote,
7222	    zfs_secpolicy_promote);
7223	zfs_ioctl_register_dataset_modify(ZFS_IOC_INHERIT_PROP,
7224	    zfs_ioc_inherit_prop, zfs_secpolicy_inherit_prop);
7225	zfs_ioctl_register_dataset_modify(ZFS_IOC_SET_FSACL, zfs_ioc_set_fsacl,
7226	    zfs_secpolicy_set_fsacl);
7227
7228	zfs_ioctl_register_dataset_nolog(ZFS_IOC_SHARE, zfs_ioc_share,
7229	    zfs_secpolicy_share, POOL_CHECK_NONE);
7230	zfs_ioctl_register_dataset_nolog(ZFS_IOC_SMB_ACL, zfs_ioc_smb_acl,
7231	    zfs_secpolicy_smb_acl, POOL_CHECK_NONE);
7232	zfs_ioctl_register_dataset_nolog(ZFS_IOC_USERSPACE_UPGRADE,
7233	    zfs_ioc_userspace_upgrade, zfs_secpolicy_userspace_upgrade,
7234	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY);
7235	zfs_ioctl_register_dataset_nolog(ZFS_IOC_TMP_SNAPSHOT,
7236	    zfs_ioc_tmp_snapshot, zfs_secpolicy_tmp_snapshot,
7237	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY);
7238
7239	zfs_ioctl_register_legacy(ZFS_IOC_EVENTS_NEXT, zfs_ioc_events_next,
7240	    zfs_secpolicy_config, NO_NAME, B_FALSE, POOL_CHECK_NONE);
7241	zfs_ioctl_register_legacy(ZFS_IOC_EVENTS_CLEAR, zfs_ioc_events_clear,
7242	    zfs_secpolicy_config, NO_NAME, B_FALSE, POOL_CHECK_NONE);
7243	zfs_ioctl_register_legacy(ZFS_IOC_EVENTS_SEEK, zfs_ioc_events_seek,
7244	    zfs_secpolicy_config, NO_NAME, B_FALSE, POOL_CHECK_NONE);
7245
7246	zfs_ioctl_init_os();
7247}
7248
7249/*
7250 * Verify that for non-legacy ioctls the input nvlist
7251 * pairs match against the expected input.
7252 *
7253 * Possible errors are:
7254 * ZFS_ERR_IOC_ARG_UNAVAIL	An unrecognized nvpair was encountered
7255 * ZFS_ERR_IOC_ARG_REQUIRED	A required nvpair is missing
7256 * ZFS_ERR_IOC_ARG_BADTYPE	Invalid type for nvpair
7257 */
7258static int
7259zfs_check_input_nvpairs(nvlist_t *innvl, const zfs_ioc_vec_t *vec)
7260{
7261	const zfs_ioc_key_t *nvl_keys = vec->zvec_nvl_keys;
7262	boolean_t required_keys_found = B_FALSE;
7263
7264	/*
7265	 * examine each input pair
7266	 */
7267	for (nvpair_t *pair = nvlist_next_nvpair(innvl, NULL);
7268	    pair != NULL; pair = nvlist_next_nvpair(innvl, pair)) {
7269		char *name = nvpair_name(pair);
7270		data_type_t type = nvpair_type(pair);
7271		boolean_t identified = B_FALSE;
7272
7273		/*
7274		 * check pair against the documented names and type
7275		 */
7276		for (int k = 0; k < vec->zvec_nvl_key_count; k++) {
7277			/* if not a wild card name, check for an exact match */
7278			if ((nvl_keys[k].zkey_flags & ZK_WILDCARDLIST) == 0 &&
7279			    strcmp(nvl_keys[k].zkey_name, name) != 0)
7280				continue;
7281
7282			identified = B_TRUE;
7283
7284			if (nvl_keys[k].zkey_type != DATA_TYPE_ANY &&
7285			    nvl_keys[k].zkey_type != type) {
7286				return (SET_ERROR(ZFS_ERR_IOC_ARG_BADTYPE));
7287			}
7288
7289			if (nvl_keys[k].zkey_flags & ZK_OPTIONAL)
7290				continue;
7291
7292			required_keys_found = B_TRUE;
7293			break;
7294		}
7295
7296		/* allow an 'optional' key, everything else is invalid */
7297		if (!identified &&
7298		    (strcmp(name, "optional") != 0 ||
7299		    type != DATA_TYPE_NVLIST)) {
7300			return (SET_ERROR(ZFS_ERR_IOC_ARG_UNAVAIL));
7301		}
7302	}
7303
7304	/* verify that all required keys were found */
7305	for (int k = 0; k < vec->zvec_nvl_key_count; k++) {
7306		if (nvl_keys[k].zkey_flags & ZK_OPTIONAL)
7307			continue;
7308
7309		if (nvl_keys[k].zkey_flags & ZK_WILDCARDLIST) {
7310			/* at least one non-optional key is expected here */
7311			if (!required_keys_found)
7312				return (SET_ERROR(ZFS_ERR_IOC_ARG_REQUIRED));
7313			continue;
7314		}
7315
7316		if (!nvlist_exists(innvl, nvl_keys[k].zkey_name))
7317			return (SET_ERROR(ZFS_ERR_IOC_ARG_REQUIRED));
7318	}
7319
7320	return (0);
7321}
7322
7323static int
7324pool_status_check(const char *name, zfs_ioc_namecheck_t type,
7325    zfs_ioc_poolcheck_t check)
7326{
7327	spa_t *spa;
7328	int error;
7329
7330	ASSERT(type == POOL_NAME || type == DATASET_NAME ||
7331	    type == ENTITY_NAME);
7332
7333	if (check & POOL_CHECK_NONE)
7334		return (0);
7335
7336	error = spa_open(name, &spa, FTAG);
7337	if (error == 0) {
7338		if ((check & POOL_CHECK_SUSPENDED) && spa_suspended(spa))
7339			error = SET_ERROR(EAGAIN);
7340		else if ((check & POOL_CHECK_READONLY) && !spa_writeable(spa))
7341			error = SET_ERROR(EROFS);
7342		spa_close(spa, FTAG);
7343	}
7344	return (error);
7345}
7346
7347int
7348zfsdev_getminor(int fd, minor_t *minorp)
7349{
7350	zfsdev_state_t *zs, *fpd;
7351	zfs_file_t *fp;
7352	int rc;
7353
7354	ASSERT(!MUTEX_HELD(&zfsdev_state_lock));
7355
7356	if ((rc = zfs_file_get(fd, &fp)))
7357		return (rc);
7358
7359	fpd = zfs_file_private(fp);
7360	if (fpd == NULL)
7361		return (SET_ERROR(EBADF));
7362
7363	mutex_enter(&zfsdev_state_lock);
7364
7365	for (zs = zfsdev_state_list; zs != NULL; zs = zs->zs_next) {
7366
7367		if (zs->zs_minor == -1)
7368			continue;
7369
7370		if (fpd == zs) {
7371			*minorp = fpd->zs_minor;
7372			mutex_exit(&zfsdev_state_lock);
7373			return (0);
7374		}
7375	}
7376
7377	mutex_exit(&zfsdev_state_lock);
7378
7379	return (SET_ERROR(EBADF));
7380}
7381
7382static void *
7383zfsdev_get_state_impl(minor_t minor, enum zfsdev_state_type which)
7384{
7385	zfsdev_state_t *zs;
7386
7387	for (zs = zfsdev_state_list; zs != NULL; zs = zs->zs_next) {
7388		if (zs->zs_minor == minor) {
7389			smp_rmb();
7390			switch (which) {
7391			case ZST_ONEXIT:
7392				return (zs->zs_onexit);
7393			case ZST_ZEVENT:
7394				return (zs->zs_zevent);
7395			case ZST_ALL:
7396				return (zs);
7397			}
7398		}
7399	}
7400
7401	return (NULL);
7402}
7403
7404void *
7405zfsdev_get_state(minor_t minor, enum zfsdev_state_type which)
7406{
7407	void *ptr;
7408
7409	ptr = zfsdev_get_state_impl(minor, which);
7410
7411	return (ptr);
7412}
7413
7414/*
7415 * Find a free minor number.  The zfsdev_state_list is expected to
7416 * be short since it is only a list of currently open file handles.
7417 */
7418minor_t
7419zfsdev_minor_alloc(void)
7420{
7421	static minor_t last_minor = 0;
7422	minor_t m;
7423
7424	ASSERT(MUTEX_HELD(&zfsdev_state_lock));
7425
7426	for (m = last_minor + 1; m != last_minor; m++) {
7427		if (m > ZFSDEV_MAX_MINOR)
7428			m = 1;
7429		if (zfsdev_get_state_impl(m, ZST_ALL) == NULL) {
7430			last_minor = m;
7431			return (m);
7432		}
7433	}
7434
7435	return (0);
7436}
7437
7438long
7439zfsdev_ioctl_common(uint_t vecnum, zfs_cmd_t *zc, int flag)
7440{
7441	int error, cmd;
7442	const zfs_ioc_vec_t *vec;
7443	char *saved_poolname = NULL;
7444	uint64_t max_nvlist_src_size;
7445	size_t saved_poolname_len = 0;
7446	nvlist_t *innvl = NULL;
7447	fstrans_cookie_t cookie;
7448	hrtime_t start_time = gethrtime();
7449
7450	cmd = vecnum;
7451	error = 0;
7452	if (vecnum >= sizeof (zfs_ioc_vec) / sizeof (zfs_ioc_vec[0]))
7453		return (SET_ERROR(ZFS_ERR_IOC_CMD_UNAVAIL));
7454
7455	vec = &zfs_ioc_vec[vecnum];
7456
7457	/*
7458	 * The registered ioctl list may be sparse, verify that either
7459	 * a normal or legacy handler are registered.
7460	 */
7461	if (vec->zvec_func == NULL && vec->zvec_legacy_func == NULL)
7462		return (SET_ERROR(ZFS_ERR_IOC_CMD_UNAVAIL));
7463
7464	zc->zc_iflags = flag & FKIOCTL;
7465	max_nvlist_src_size = zfs_max_nvlist_src_size_os();
7466	if (zc->zc_nvlist_src_size > max_nvlist_src_size) {
7467		/*
7468		 * Make sure the user doesn't pass in an insane value for
7469		 * zc_nvlist_src_size.  We have to check, since we will end
7470		 * up allocating that much memory inside of get_nvlist().  This
7471		 * prevents a nefarious user from allocating tons of kernel
7472		 * memory.
7473		 *
7474		 * Also, we return EINVAL instead of ENOMEM here.  The reason
7475		 * being that returning ENOMEM from an ioctl() has a special
7476		 * connotation; that the user's size value is too small and
7477		 * needs to be expanded to hold the nvlist.  See
7478		 * zcmd_expand_dst_nvlist() for details.
7479		 */
7480		error = SET_ERROR(EINVAL);	/* User's size too big */
7481
7482	} else if (zc->zc_nvlist_src_size != 0) {
7483		error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
7484		    zc->zc_iflags, &innvl);
7485		if (error != 0)
7486			goto out;
7487	}
7488
7489	/*
7490	 * Ensure that all pool/dataset names are valid before we pass down to
7491	 * the lower layers.
7492	 */
7493	zc->zc_name[sizeof (zc->zc_name) - 1] = '\0';
7494	switch (vec->zvec_namecheck) {
7495	case POOL_NAME:
7496		if (pool_namecheck(zc->zc_name, NULL, NULL) != 0)
7497			error = SET_ERROR(EINVAL);
7498		else
7499			error = pool_status_check(zc->zc_name,
7500			    vec->zvec_namecheck, vec->zvec_pool_check);
7501		break;
7502
7503	case DATASET_NAME:
7504		if (dataset_namecheck(zc->zc_name, NULL, NULL) != 0)
7505			error = SET_ERROR(EINVAL);
7506		else
7507			error = pool_status_check(zc->zc_name,
7508			    vec->zvec_namecheck, vec->zvec_pool_check);
7509		break;
7510
7511	case ENTITY_NAME:
7512		if (entity_namecheck(zc->zc_name, NULL, NULL) != 0) {
7513			error = SET_ERROR(EINVAL);
7514		} else {
7515			error = pool_status_check(zc->zc_name,
7516			    vec->zvec_namecheck, vec->zvec_pool_check);
7517		}
7518		break;
7519
7520	case NO_NAME:
7521		break;
7522	}
7523	/*
7524	 * Ensure that all input pairs are valid before we pass them down
7525	 * to the lower layers.
7526	 *
7527	 * The vectored functions can use fnvlist_lookup_{type} for any
7528	 * required pairs since zfs_check_input_nvpairs() confirmed that
7529	 * they exist and are of the correct type.
7530	 */
7531	if (error == 0 && vec->zvec_func != NULL) {
7532		error = zfs_check_input_nvpairs(innvl, vec);
7533		if (error != 0)
7534			goto out;
7535	}
7536
7537	if (error == 0) {
7538		cookie = spl_fstrans_mark();
7539		error = vec->zvec_secpolicy(zc, innvl, CRED());
7540		spl_fstrans_unmark(cookie);
7541	}
7542
7543	if (error != 0)
7544		goto out;
7545
7546	/* legacy ioctls can modify zc_name */
7547	/*
7548	 * Can't use kmem_strdup() as we might truncate the string and
7549	 * kmem_strfree() would then free with incorrect size.
7550	 */
7551	saved_poolname_len = strlen(zc->zc_name) + 1;
7552	saved_poolname = kmem_alloc(saved_poolname_len, KM_SLEEP);
7553
7554	strlcpy(saved_poolname, zc->zc_name, saved_poolname_len);
7555	saved_poolname[strcspn(saved_poolname, "/@#")] = '\0';
7556
7557	if (vec->zvec_func != NULL) {
7558		nvlist_t *outnvl;
7559		int puterror = 0;
7560		spa_t *spa;
7561		nvlist_t *lognv = NULL;
7562
7563		ASSERT(vec->zvec_legacy_func == NULL);
7564
7565		/*
7566		 * Add the innvl to the lognv before calling the func,
7567		 * in case the func changes the innvl.
7568		 */
7569		if (vec->zvec_allow_log) {
7570			lognv = fnvlist_alloc();
7571			fnvlist_add_string(lognv, ZPOOL_HIST_IOCTL,
7572			    vec->zvec_name);
7573			if (!nvlist_empty(innvl)) {
7574				fnvlist_add_nvlist(lognv, ZPOOL_HIST_INPUT_NVL,
7575				    innvl);
7576			}
7577		}
7578
7579		outnvl = fnvlist_alloc();
7580		cookie = spl_fstrans_mark();
7581		error = vec->zvec_func(zc->zc_name, innvl, outnvl);
7582		spl_fstrans_unmark(cookie);
7583
7584		/*
7585		 * Some commands can partially execute, modify state, and still
7586		 * return an error.  In these cases, attempt to record what
7587		 * was modified.
7588		 */
7589		if ((error == 0 ||
7590		    (cmd == ZFS_IOC_CHANNEL_PROGRAM && error != EINVAL)) &&
7591		    vec->zvec_allow_log &&
7592		    spa_open(zc->zc_name, &spa, FTAG) == 0) {
7593			if (!nvlist_empty(outnvl)) {
7594				size_t out_size = fnvlist_size(outnvl);
7595				if (out_size > zfs_history_output_max) {
7596					fnvlist_add_int64(lognv,
7597					    ZPOOL_HIST_OUTPUT_SIZE, out_size);
7598				} else {
7599					fnvlist_add_nvlist(lognv,
7600					    ZPOOL_HIST_OUTPUT_NVL, outnvl);
7601				}
7602			}
7603			if (error != 0) {
7604				fnvlist_add_int64(lognv, ZPOOL_HIST_ERRNO,
7605				    error);
7606			}
7607			fnvlist_add_int64(lognv, ZPOOL_HIST_ELAPSED_NS,
7608			    gethrtime() - start_time);
7609			(void) spa_history_log_nvl(spa, lognv);
7610			spa_close(spa, FTAG);
7611		}
7612		fnvlist_free(lognv);
7613
7614		if (!nvlist_empty(outnvl) || zc->zc_nvlist_dst_size != 0) {
7615			int smusherror = 0;
7616			if (vec->zvec_smush_outnvlist) {
7617				smusherror = nvlist_smush(outnvl,
7618				    zc->zc_nvlist_dst_size);
7619			}
7620			if (smusherror == 0)
7621				puterror = put_nvlist(zc, outnvl);
7622		}
7623
7624		if (puterror != 0)
7625			error = puterror;
7626
7627		nvlist_free(outnvl);
7628	} else {
7629		cookie = spl_fstrans_mark();
7630		error = vec->zvec_legacy_func(zc);
7631		spl_fstrans_unmark(cookie);
7632	}
7633
7634out:
7635	nvlist_free(innvl);
7636	if (error == 0 && vec->zvec_allow_log) {
7637		char *s = tsd_get(zfs_allow_log_key);
7638		if (s != NULL)
7639			kmem_strfree(s);
7640		(void) tsd_set(zfs_allow_log_key, kmem_strdup(saved_poolname));
7641	}
7642	if (saved_poolname != NULL)
7643		kmem_free(saved_poolname, saved_poolname_len);
7644
7645	return (error);
7646}
7647
7648int
7649zfs_kmod_init(void)
7650{
7651	int error;
7652
7653	if ((error = zvol_init()) != 0)
7654		return (error);
7655
7656	spa_init(SPA_MODE_READ | SPA_MODE_WRITE);
7657	zfs_init();
7658
7659	zfs_ioctl_init();
7660
7661	mutex_init(&zfsdev_state_lock, NULL, MUTEX_DEFAULT, NULL);
7662	zfsdev_state_list = kmem_zalloc(sizeof (zfsdev_state_t), KM_SLEEP);
7663	zfsdev_state_list->zs_minor = -1;
7664
7665	if ((error = zfsdev_attach()) != 0)
7666		goto out;
7667
7668	tsd_create(&zfs_fsyncer_key, NULL);
7669	tsd_create(&rrw_tsd_key, rrw_tsd_destroy);
7670	tsd_create(&zfs_allow_log_key, zfs_allow_log_destroy);
7671
7672	return (0);
7673out:
7674	zfs_fini();
7675	spa_fini();
7676	zvol_fini();
7677
7678	return (error);
7679}
7680
7681void
7682zfs_kmod_fini(void)
7683{
7684	zfsdev_state_t *zs, *zsnext = NULL;
7685
7686	zfsdev_detach();
7687
7688	mutex_destroy(&zfsdev_state_lock);
7689
7690	for (zs = zfsdev_state_list; zs != NULL; zs = zsnext) {
7691		zsnext = zs->zs_next;
7692		if (zs->zs_onexit)
7693			zfs_onexit_destroy(zs->zs_onexit);
7694		if (zs->zs_zevent)
7695			zfs_zevent_destroy(zs->zs_zevent);
7696		kmem_free(zs, sizeof (zfsdev_state_t));
7697	}
7698
7699	zfs_ereport_taskq_fini();	/* run before zfs_fini() on Linux */
7700	zfs_fini();
7701	spa_fini();
7702	zvol_fini();
7703
7704	tsd_destroy(&zfs_fsyncer_key);
7705	tsd_destroy(&rrw_tsd_key);
7706	tsd_destroy(&zfs_allow_log_key);
7707}
7708
7709/* BEGIN CSTYLED */
7710ZFS_MODULE_PARAM(zfs, zfs_, max_nvlist_src_size, ULONG, ZMOD_RW,
7711    "Maximum size in bytes allowed for src nvlist passed with ZFS ioctls");
7712
7713ZFS_MODULE_PARAM(zfs, zfs_, history_output_max, ULONG, ZMOD_RW,
7714    "Maximum size in bytes of ZFS ioctl output that will be logged");
7715/* END CSTYLED */
7716