zfs_ioctl.c revision 332535
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22/*
23 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
24 * Copyright (c) 2011-2012 Pawel Jakub Dawidek. All rights reserved.
25 * Copyright 2013 Martin Matuska <mm@FreeBSD.org>. All rights reserved.
26 * Copyright 2014 Xin Li <delphij@FreeBSD.org>. All rights reserved.
27 * Copyright 2015, OmniTI Computer Consulting, Inc. All rights reserved.
28 * Copyright 2015 Nexenta Systems, Inc.  All rights reserved.
29 * Copyright (c) 2014, 2016 Joyent, Inc. All rights reserved.
30 * Copyright (c) 2011, 2017 by Delphix. All rights reserved.
31 * Copyright (c) 2013 by Saso Kiselkov. All rights reserved.
32 * Copyright (c) 2013 Steven Hartland. All rights reserved.
33 * Copyright (c) 2014 Integros [integros.com]
34 * Copyright 2016 Toomas Soome <tsoome@me.com>
35 * Copyright 2017 RackTop Systems.
36 * Copyright (c) 2017 Datto Inc.
37 * Copyright 2016 Toomas Soome <tsoome@me.com>
38 */
39
40/*
41 * ZFS ioctls.
42 *
43 * This file handles the ioctls to /dev/zfs, used for configuring ZFS storage
44 * pools and filesystems, e.g. with /sbin/zfs and /sbin/zpool.
45 *
46 * There are two ways that we handle ioctls: the legacy way where almost
47 * all of the logic is in the ioctl callback, and the new way where most
48 * of the marshalling is handled in the common entry point, zfsdev_ioctl().
49 *
50 * Non-legacy ioctls should be registered by calling
51 * zfs_ioctl_register() from zfs_ioctl_init().  The ioctl is invoked
52 * from userland by lzc_ioctl().
53 *
54 * The registration arguments are as follows:
55 *
56 * const char *name
57 *   The name of the ioctl.  This is used for history logging.  If the
58 *   ioctl returns successfully (the callback returns 0), and allow_log
59 *   is true, then a history log entry will be recorded with the input &
60 *   output nvlists.  The log entry can be printed with "zpool history -i".
61 *
62 * zfs_ioc_t ioc
63 *   The ioctl request number, which userland will pass to ioctl(2).
64 *   The ioctl numbers can change from release to release, because
65 *   the caller (libzfs) must be matched to the kernel.
66 *
67 * zfs_secpolicy_func_t *secpolicy
68 *   This function will be called before the zfs_ioc_func_t, to
69 *   determine if this operation is permitted.  It should return EPERM
70 *   on failure, and 0 on success.  Checks include determining if the
71 *   dataset is visible in this zone, and if the user has either all
72 *   zfs privileges in the zone (SYS_MOUNT), or has been granted permission
73 *   to do this operation on this dataset with "zfs allow".
74 *
75 * zfs_ioc_namecheck_t namecheck
76 *   This specifies what to expect in the zfs_cmd_t:zc_name -- a pool
77 *   name, a dataset name, or nothing.  If the name is not well-formed,
78 *   the ioctl will fail and the callback will not be called.
79 *   Therefore, the callback can assume that the name is well-formed
80 *   (e.g. is null-terminated, doesn't have more than one '@' character,
81 *   doesn't have invalid characters).
82 *
83 * zfs_ioc_poolcheck_t pool_check
84 *   This specifies requirements on the pool state.  If the pool does
85 *   not meet them (is suspended or is readonly), the ioctl will fail
86 *   and the callback will not be called.  If any checks are specified
87 *   (i.e. it is not POOL_CHECK_NONE), namecheck must not be NO_NAME.
88 *   Multiple checks can be or-ed together (e.g. POOL_CHECK_SUSPENDED |
89 *   POOL_CHECK_READONLY).
90 *
91 * boolean_t smush_outnvlist
92 *   If smush_outnvlist is true, then the output is presumed to be a
93 *   list of errors, and it will be "smushed" down to fit into the
94 *   caller's buffer, by removing some entries and replacing them with a
95 *   single "N_MORE_ERRORS" entry indicating how many were removed.  See
96 *   nvlist_smush() for details.  If smush_outnvlist is false, and the
97 *   outnvlist does not fit into the userland-provided buffer, then the
98 *   ioctl will fail with ENOMEM.
99 *
100 * zfs_ioc_func_t *func
101 *   The callback function that will perform the operation.
102 *
103 *   The callback should return 0 on success, or an error number on
104 *   failure.  If the function fails, the userland ioctl will return -1,
105 *   and errno will be set to the callback's return value.  The callback
106 *   will be called with the following arguments:
107 *
108 *   const char *name
109 *     The name of the pool or dataset to operate on, from
110 *     zfs_cmd_t:zc_name.  The 'namecheck' argument specifies the
111 *     expected type (pool, dataset, or none).
112 *
113 *   nvlist_t *innvl
114 *     The input nvlist, deserialized from zfs_cmd_t:zc_nvlist_src.  Or
115 *     NULL if no input nvlist was provided.  Changes to this nvlist are
116 *     ignored.  If the input nvlist could not be deserialized, the
117 *     ioctl will fail and the callback will not be called.
118 *
119 *   nvlist_t *outnvl
120 *     The output nvlist, initially empty.  The callback can fill it in,
121 *     and it will be returned to userland by serializing it into
122 *     zfs_cmd_t:zc_nvlist_dst.  If it is non-empty, and serialization
123 *     fails (e.g. because the caller didn't supply a large enough
124 *     buffer), then the overall ioctl will fail.  See the
125 *     'smush_nvlist' argument above for additional behaviors.
126 *
127 *     There are two typical uses of the output nvlist:
128 *       - To return state, e.g. property values.  In this case,
129 *         smush_outnvlist should be false.  If the buffer was not large
130 *         enough, the caller will reallocate a larger buffer and try
131 *         the ioctl again.
132 *
133 *       - To return multiple errors from an ioctl which makes on-disk
134 *         changes.  In this case, smush_outnvlist should be true.
135 *         Ioctls which make on-disk modifications should generally not
136 *         use the outnvl if they succeed, because the caller can not
137 *         distinguish between the operation failing, and
138 *         deserialization failing.
139 */
140#ifdef __FreeBSD__
141#include "opt_kstack_pages.h"
142#endif
143
144#include <sys/types.h>
145#include <sys/param.h>
146#include <sys/systm.h>
147#include <sys/conf.h>
148#include <sys/kernel.h>
149#include <sys/lock.h>
150#include <sys/malloc.h>
151#include <sys/mutex.h>
152#include <sys/proc.h>
153#include <sys/errno.h>
154#include <sys/uio.h>
155#include <sys/buf.h>
156#include <sys/file.h>
157#include <sys/kmem.h>
158#include <sys/conf.h>
159#include <sys/cmn_err.h>
160#include <sys/stat.h>
161#include <sys/zfs_ioctl.h>
162#include <sys/zfs_vfsops.h>
163#include <sys/zfs_znode.h>
164#include <sys/zap.h>
165#include <sys/spa.h>
166#include <sys/spa_impl.h>
167#include <sys/vdev.h>
168#include <sys/dmu.h>
169#include <sys/dsl_dir.h>
170#include <sys/dsl_dataset.h>
171#include <sys/dsl_prop.h>
172#include <sys/dsl_deleg.h>
173#include <sys/dmu_objset.h>
174#include <sys/dmu_impl.h>
175#include <sys/dmu_tx.h>
176#include <sys/sunddi.h>
177#include <sys/policy.h>
178#include <sys/zone.h>
179#include <sys/nvpair.h>
180#include <sys/mount.h>
181#include <sys/taskqueue.h>
182#include <sys/sdt.h>
183#include <sys/varargs.h>
184#include <sys/fs/zfs.h>
185#include <sys/zfs_ctldir.h>
186#include <sys/zfs_dir.h>
187#include <sys/zfs_onexit.h>
188#include <sys/zvol.h>
189#include <sys/dsl_scan.h>
190#include <sys/dmu_objset.h>
191#include <sys/dmu_send.h>
192#include <sys/dsl_destroy.h>
193#include <sys/dsl_bookmark.h>
194#include <sys/dsl_userhold.h>
195#include <sys/zfeature.h>
196#include <sys/zcp.h>
197#include <sys/zio_checksum.h>
198#include <sys/vdev_removal.h>
199
200#include "zfs_namecheck.h"
201#include "zfs_prop.h"
202#include "zfs_deleg.h"
203#include "zfs_comutil.h"
204#include "zfs_ioctl_compat.h"
205
206#include "lua.h"
207#include "lauxlib.h"
208
209static struct cdev *zfsdev;
210
211extern void zfs_init(void);
212extern void zfs_fini(void);
213
214uint_t zfs_fsyncer_key;
215extern uint_t rrw_tsd_key;
216static uint_t zfs_allow_log_key;
217extern uint_t zfs_geom_probe_vdev_key;
218
219typedef int zfs_ioc_legacy_func_t(zfs_cmd_t *);
220typedef int zfs_ioc_func_t(const char *, nvlist_t *, nvlist_t *);
221typedef int zfs_secpolicy_func_t(zfs_cmd_t *, nvlist_t *, cred_t *);
222
223typedef enum {
224	NO_NAME,
225	POOL_NAME,
226	DATASET_NAME
227} zfs_ioc_namecheck_t;
228
229typedef enum {
230	POOL_CHECK_NONE		= 1 << 0,
231	POOL_CHECK_SUSPENDED	= 1 << 1,
232	POOL_CHECK_READONLY	= 1 << 2,
233} zfs_ioc_poolcheck_t;
234
235typedef struct zfs_ioc_vec {
236	zfs_ioc_legacy_func_t	*zvec_legacy_func;
237	zfs_ioc_func_t		*zvec_func;
238	zfs_secpolicy_func_t	*zvec_secpolicy;
239	zfs_ioc_namecheck_t	zvec_namecheck;
240	boolean_t		zvec_allow_log;
241	zfs_ioc_poolcheck_t	zvec_pool_check;
242	boolean_t		zvec_smush_outnvlist;
243	const char		*zvec_name;
244} zfs_ioc_vec_t;
245
246/* This array is indexed by zfs_userquota_prop_t */
247static const char *userquota_perms[] = {
248	ZFS_DELEG_PERM_USERUSED,
249	ZFS_DELEG_PERM_USERQUOTA,
250	ZFS_DELEG_PERM_GROUPUSED,
251	ZFS_DELEG_PERM_GROUPQUOTA,
252};
253
254static int zfs_ioc_userspace_upgrade(zfs_cmd_t *zc);
255static int zfs_check_settable(const char *name, nvpair_t *property,
256    cred_t *cr);
257static int zfs_check_clearable(char *dataset, nvlist_t *props,
258    nvlist_t **errors);
259static int zfs_fill_zplprops_root(uint64_t, nvlist_t *, nvlist_t *,
260    boolean_t *);
261int zfs_set_prop_nvlist(const char *, zprop_source_t, nvlist_t *, nvlist_t *);
262static int get_nvlist(uint64_t nvl, uint64_t size, int iflag, nvlist_t **nvp);
263
264static void zfsdev_close(void *data);
265
266static int zfs_prop_activate_feature(spa_t *spa, spa_feature_t feature);
267
268/* _NOTE(PRINTFLIKE(4)) - this is printf-like, but lint is too whiney */
269void
270__dprintf(const char *file, const char *func, int line, const char *fmt, ...)
271{
272	const char *newfile;
273	char buf[512];
274	va_list adx;
275
276	/*
277	 * Get rid of annoying "../common/" prefix to filename.
278	 */
279	newfile = strrchr(file, '/');
280	if (newfile != NULL) {
281		newfile = newfile + 1; /* Get rid of leading / */
282	} else {
283		newfile = file;
284	}
285
286	va_start(adx, fmt);
287	(void) vsnprintf(buf, sizeof (buf), fmt, adx);
288	va_end(adx);
289
290	/*
291	 * To get this data, use the zfs-dprintf probe as so:
292	 * dtrace -q -n 'zfs-dprintf \
293	 *	/stringof(arg0) == "dbuf.c"/ \
294	 *	{printf("%s: %s", stringof(arg1), stringof(arg3))}'
295	 * arg0 = file name
296	 * arg1 = function name
297	 * arg2 = line number
298	 * arg3 = message
299	 */
300	DTRACE_PROBE4(zfs__dprintf,
301	    char *, newfile, char *, func, int, line, char *, buf);
302}
303
304static void
305history_str_free(char *buf)
306{
307	kmem_free(buf, HIS_MAX_RECORD_LEN);
308}
309
310static char *
311history_str_get(zfs_cmd_t *zc)
312{
313	char *buf;
314
315	if (zc->zc_history == 0)
316		return (NULL);
317
318	buf = kmem_alloc(HIS_MAX_RECORD_LEN, KM_SLEEP);
319	if (copyinstr((void *)(uintptr_t)zc->zc_history,
320	    buf, HIS_MAX_RECORD_LEN, NULL) != 0) {
321		history_str_free(buf);
322		return (NULL);
323	}
324
325	buf[HIS_MAX_RECORD_LEN -1] = '\0';
326
327	return (buf);
328}
329
330/*
331 * Check to see if the named dataset is currently defined as bootable
332 */
333static boolean_t
334zfs_is_bootfs(const char *name)
335{
336	objset_t *os;
337
338	if (dmu_objset_hold(name, FTAG, &os) == 0) {
339		boolean_t ret;
340		ret = (dmu_objset_id(os) == spa_bootfs(dmu_objset_spa(os)));
341		dmu_objset_rele(os, FTAG);
342		return (ret);
343	}
344	return (B_FALSE);
345}
346
347/*
348 * Return non-zero if the spa version is less than requested version.
349 */
350static int
351zfs_earlier_version(const char *name, int version)
352{
353	spa_t *spa;
354
355	if (spa_open(name, &spa, FTAG) == 0) {
356		if (spa_version(spa) < version) {
357			spa_close(spa, FTAG);
358			return (1);
359		}
360		spa_close(spa, FTAG);
361	}
362	return (0);
363}
364
365/*
366 * Return TRUE if the ZPL version is less than requested version.
367 */
368static boolean_t
369zpl_earlier_version(const char *name, int version)
370{
371	objset_t *os;
372	boolean_t rc = B_TRUE;
373
374	if (dmu_objset_hold(name, FTAG, &os) == 0) {
375		uint64_t zplversion;
376
377		if (dmu_objset_type(os) != DMU_OST_ZFS) {
378			dmu_objset_rele(os, FTAG);
379			return (B_TRUE);
380		}
381		/* XXX reading from non-owned objset */
382		if (zfs_get_zplprop(os, ZFS_PROP_VERSION, &zplversion) == 0)
383			rc = zplversion < version;
384		dmu_objset_rele(os, FTAG);
385	}
386	return (rc);
387}
388
389static void
390zfs_log_history(zfs_cmd_t *zc)
391{
392	spa_t *spa;
393	char *buf;
394
395	if ((buf = history_str_get(zc)) == NULL)
396		return;
397
398	if (spa_open(zc->zc_name, &spa, FTAG) == 0) {
399		if (spa_version(spa) >= SPA_VERSION_ZPOOL_HISTORY)
400			(void) spa_history_log(spa, buf);
401		spa_close(spa, FTAG);
402	}
403	history_str_free(buf);
404}
405
406/*
407 * Policy for top-level read operations (list pools).  Requires no privileges,
408 * and can be used in the local zone, as there is no associated dataset.
409 */
410/* ARGSUSED */
411static int
412zfs_secpolicy_none(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
413{
414	return (0);
415}
416
417/*
418 * Policy for dataset read operations (list children, get statistics).  Requires
419 * no privileges, but must be visible in the local zone.
420 */
421/* ARGSUSED */
422static int
423zfs_secpolicy_read(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
424{
425	if (INGLOBALZONE(curthread) ||
426	    zone_dataset_visible(zc->zc_name, NULL))
427		return (0);
428
429	return (SET_ERROR(ENOENT));
430}
431
432static int
433zfs_dozonecheck_impl(const char *dataset, uint64_t zoned, cred_t *cr)
434{
435	int writable = 1;
436
437	/*
438	 * The dataset must be visible by this zone -- check this first
439	 * so they don't see EPERM on something they shouldn't know about.
440	 */
441	if (!INGLOBALZONE(curthread) &&
442	    !zone_dataset_visible(dataset, &writable))
443		return (SET_ERROR(ENOENT));
444
445	if (INGLOBALZONE(curthread)) {
446		/*
447		 * If the fs is zoned, only root can access it from the
448		 * global zone.
449		 */
450		if (secpolicy_zfs(cr) && zoned)
451			return (SET_ERROR(EPERM));
452	} else {
453		/*
454		 * If we are in a local zone, the 'zoned' property must be set.
455		 */
456		if (!zoned)
457			return (SET_ERROR(EPERM));
458
459		/* must be writable by this zone */
460		if (!writable)
461			return (SET_ERROR(EPERM));
462	}
463	return (0);
464}
465
466static int
467zfs_dozonecheck(const char *dataset, cred_t *cr)
468{
469	uint64_t zoned;
470
471	if (dsl_prop_get_integer(dataset, "jailed", &zoned, NULL))
472		return (SET_ERROR(ENOENT));
473
474	return (zfs_dozonecheck_impl(dataset, zoned, cr));
475}
476
477static int
478zfs_dozonecheck_ds(const char *dataset, dsl_dataset_t *ds, cred_t *cr)
479{
480	uint64_t zoned;
481
482	if (dsl_prop_get_int_ds(ds, "jailed", &zoned))
483		return (SET_ERROR(ENOENT));
484
485	return (zfs_dozonecheck_impl(dataset, zoned, cr));
486}
487
488static int
489zfs_secpolicy_write_perms_ds(const char *name, dsl_dataset_t *ds,
490    const char *perm, cred_t *cr)
491{
492	int error;
493
494	error = zfs_dozonecheck_ds(name, ds, cr);
495	if (error == 0) {
496		error = secpolicy_zfs(cr);
497		if (error != 0)
498			error = dsl_deleg_access_impl(ds, perm, cr);
499	}
500	return (error);
501}
502
503static int
504zfs_secpolicy_write_perms(const char *name, const char *perm, cred_t *cr)
505{
506	int error;
507	dsl_dataset_t *ds;
508	dsl_pool_t *dp;
509
510	/*
511	 * First do a quick check for root in the global zone, which
512	 * is allowed to do all write_perms.  This ensures that zfs_ioc_*
513	 * will get to handle nonexistent datasets.
514	 */
515	if (INGLOBALZONE(curthread) && secpolicy_zfs(cr) == 0)
516		return (0);
517
518	error = dsl_pool_hold(name, FTAG, &dp);
519	if (error != 0)
520		return (error);
521
522	error = dsl_dataset_hold(dp, name, FTAG, &ds);
523	if (error != 0) {
524		dsl_pool_rele(dp, FTAG);
525		return (error);
526	}
527
528	error = zfs_secpolicy_write_perms_ds(name, ds, perm, cr);
529
530	dsl_dataset_rele(ds, FTAG);
531	dsl_pool_rele(dp, FTAG);
532	return (error);
533}
534
535#ifdef SECLABEL
536/*
537 * Policy for setting the security label property.
538 *
539 * Returns 0 for success, non-zero for access and other errors.
540 */
541static int
542zfs_set_slabel_policy(const char *name, char *strval, cred_t *cr)
543{
544	char		ds_hexsl[MAXNAMELEN];
545	bslabel_t	ds_sl, new_sl;
546	boolean_t	new_default = FALSE;
547	uint64_t	zoned;
548	int		needed_priv = -1;
549	int		error;
550
551	/* First get the existing dataset label. */
552	error = dsl_prop_get(name, zfs_prop_to_name(ZFS_PROP_MLSLABEL),
553	    1, sizeof (ds_hexsl), &ds_hexsl, NULL);
554	if (error != 0)
555		return (SET_ERROR(EPERM));
556
557	if (strcasecmp(strval, ZFS_MLSLABEL_DEFAULT) == 0)
558		new_default = TRUE;
559
560	/* The label must be translatable */
561	if (!new_default && (hexstr_to_label(strval, &new_sl) != 0))
562		return (SET_ERROR(EINVAL));
563
564	/*
565	 * In a non-global zone, disallow attempts to set a label that
566	 * doesn't match that of the zone; otherwise no other checks
567	 * are needed.
568	 */
569	if (!INGLOBALZONE(curproc)) {
570		if (new_default || !blequal(&new_sl, CR_SL(CRED())))
571			return (SET_ERROR(EPERM));
572		return (0);
573	}
574
575	/*
576	 * For global-zone datasets (i.e., those whose zoned property is
577	 * "off", verify that the specified new label is valid for the
578	 * global zone.
579	 */
580	if (dsl_prop_get_integer(name,
581	    zfs_prop_to_name(ZFS_PROP_ZONED), &zoned, NULL))
582		return (SET_ERROR(EPERM));
583	if (!zoned) {
584		if (zfs_check_global_label(name, strval) != 0)
585			return (SET_ERROR(EPERM));
586	}
587
588	/*
589	 * If the existing dataset label is nondefault, check if the
590	 * dataset is mounted (label cannot be changed while mounted).
591	 * Get the zfsvfs; if there isn't one, then the dataset isn't
592	 * mounted (or isn't a dataset, doesn't exist, ...).
593	 */
594	if (strcasecmp(ds_hexsl, ZFS_MLSLABEL_DEFAULT) != 0) {
595		objset_t *os;
596		static char *setsl_tag = "setsl_tag";
597
598		/*
599		 * Try to own the dataset; abort if there is any error,
600		 * (e.g., already mounted, in use, or other error).
601		 */
602		error = dmu_objset_own(name, DMU_OST_ZFS, B_TRUE,
603		    setsl_tag, &os);
604		if (error != 0)
605			return (SET_ERROR(EPERM));
606
607		dmu_objset_disown(os, setsl_tag);
608
609		if (new_default) {
610			needed_priv = PRIV_FILE_DOWNGRADE_SL;
611			goto out_check;
612		}
613
614		if (hexstr_to_label(strval, &new_sl) != 0)
615			return (SET_ERROR(EPERM));
616
617		if (blstrictdom(&ds_sl, &new_sl))
618			needed_priv = PRIV_FILE_DOWNGRADE_SL;
619		else if (blstrictdom(&new_sl, &ds_sl))
620			needed_priv = PRIV_FILE_UPGRADE_SL;
621	} else {
622		/* dataset currently has a default label */
623		if (!new_default)
624			needed_priv = PRIV_FILE_UPGRADE_SL;
625	}
626
627out_check:
628	if (needed_priv != -1)
629		return (PRIV_POLICY(cr, needed_priv, B_FALSE, EPERM, NULL));
630	return (0);
631}
632#endif	/* SECLABEL */
633
634static int
635zfs_secpolicy_setprop(const char *dsname, zfs_prop_t prop, nvpair_t *propval,
636    cred_t *cr)
637{
638	char *strval;
639
640	/*
641	 * Check permissions for special properties.
642	 */
643	switch (prop) {
644	case ZFS_PROP_ZONED:
645		/*
646		 * Disallow setting of 'zoned' from within a local zone.
647		 */
648		if (!INGLOBALZONE(curthread))
649			return (SET_ERROR(EPERM));
650		break;
651
652	case ZFS_PROP_QUOTA:
653	case ZFS_PROP_FILESYSTEM_LIMIT:
654	case ZFS_PROP_SNAPSHOT_LIMIT:
655		if (!INGLOBALZONE(curthread)) {
656			uint64_t zoned;
657			char setpoint[ZFS_MAX_DATASET_NAME_LEN];
658			/*
659			 * Unprivileged users are allowed to modify the
660			 * limit on things *under* (ie. contained by)
661			 * the thing they own.
662			 */
663			if (dsl_prop_get_integer(dsname, "jailed", &zoned,
664			    setpoint))
665				return (SET_ERROR(EPERM));
666			if (!zoned || strlen(dsname) <= strlen(setpoint))
667				return (SET_ERROR(EPERM));
668		}
669		break;
670
671	case ZFS_PROP_MLSLABEL:
672#ifdef SECLABEL
673		if (!is_system_labeled())
674			return (SET_ERROR(EPERM));
675
676		if (nvpair_value_string(propval, &strval) == 0) {
677			int err;
678
679			err = zfs_set_slabel_policy(dsname, strval, CRED());
680			if (err != 0)
681				return (err);
682		}
683#else
684		return (EOPNOTSUPP);
685#endif
686		break;
687	}
688
689	return (zfs_secpolicy_write_perms(dsname, zfs_prop_to_name(prop), cr));
690}
691
692/* ARGSUSED */
693static int
694zfs_secpolicy_set_fsacl(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
695{
696	int error;
697
698	error = zfs_dozonecheck(zc->zc_name, cr);
699	if (error != 0)
700		return (error);
701
702	/*
703	 * permission to set permissions will be evaluated later in
704	 * dsl_deleg_can_allow()
705	 */
706	return (0);
707}
708
709/* ARGSUSED */
710static int
711zfs_secpolicy_rollback(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
712{
713	return (zfs_secpolicy_write_perms(zc->zc_name,
714	    ZFS_DELEG_PERM_ROLLBACK, cr));
715}
716
717/* ARGSUSED */
718static int
719zfs_secpolicy_send(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
720{
721	dsl_pool_t *dp;
722	dsl_dataset_t *ds;
723	char *cp;
724	int error;
725
726	/*
727	 * Generate the current snapshot name from the given objsetid, then
728	 * use that name for the secpolicy/zone checks.
729	 */
730	cp = strchr(zc->zc_name, '@');
731	if (cp == NULL)
732		return (SET_ERROR(EINVAL));
733	error = dsl_pool_hold(zc->zc_name, FTAG, &dp);
734	if (error != 0)
735		return (error);
736
737	error = dsl_dataset_hold_obj(dp, zc->zc_sendobj, FTAG, &ds);
738	if (error != 0) {
739		dsl_pool_rele(dp, FTAG);
740		return (error);
741	}
742
743	dsl_dataset_name(ds, zc->zc_name);
744
745	error = zfs_secpolicy_write_perms_ds(zc->zc_name, ds,
746	    ZFS_DELEG_PERM_SEND, cr);
747	dsl_dataset_rele(ds, FTAG);
748	dsl_pool_rele(dp, FTAG);
749
750	return (error);
751}
752
753/* ARGSUSED */
754static int
755zfs_secpolicy_send_new(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
756{
757	return (zfs_secpolicy_write_perms(zc->zc_name,
758	    ZFS_DELEG_PERM_SEND, cr));
759}
760
761/* ARGSUSED */
762static int
763zfs_secpolicy_deleg_share(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
764{
765	vnode_t *vp;
766	int error;
767
768	if ((error = lookupname(zc->zc_value, UIO_SYSSPACE,
769	    NO_FOLLOW, NULL, &vp)) != 0)
770		return (error);
771
772	/* Now make sure mntpnt and dataset are ZFS */
773
774	if (strcmp(vp->v_vfsp->mnt_stat.f_fstypename, "zfs") != 0 ||
775	    (strcmp((char *)refstr_value(vp->v_vfsp->vfs_resource),
776	    zc->zc_name) != 0)) {
777		VN_RELE(vp);
778		return (SET_ERROR(EPERM));
779	}
780
781	VN_RELE(vp);
782	return (dsl_deleg_access(zc->zc_name,
783	    ZFS_DELEG_PERM_SHARE, cr));
784}
785
786int
787zfs_secpolicy_share(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
788{
789	if (!INGLOBALZONE(curthread))
790		return (SET_ERROR(EPERM));
791
792	if (secpolicy_nfs(cr) == 0) {
793		return (0);
794	} else {
795		return (zfs_secpolicy_deleg_share(zc, innvl, cr));
796	}
797}
798
799int
800zfs_secpolicy_smb_acl(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
801{
802	if (!INGLOBALZONE(curthread))
803		return (SET_ERROR(EPERM));
804
805	if (secpolicy_smb(cr) == 0) {
806		return (0);
807	} else {
808		return (zfs_secpolicy_deleg_share(zc, innvl, cr));
809	}
810}
811
812static int
813zfs_get_parent(const char *datasetname, char *parent, int parentsize)
814{
815	char *cp;
816
817	/*
818	 * Remove the @bla or /bla from the end of the name to get the parent.
819	 */
820	(void) strncpy(parent, datasetname, parentsize);
821	cp = strrchr(parent, '@');
822	if (cp != NULL) {
823		cp[0] = '\0';
824	} else {
825		cp = strrchr(parent, '/');
826		if (cp == NULL)
827			return (SET_ERROR(ENOENT));
828		cp[0] = '\0';
829	}
830
831	return (0);
832}
833
834int
835zfs_secpolicy_destroy_perms(const char *name, cred_t *cr)
836{
837	int error;
838
839	if ((error = zfs_secpolicy_write_perms(name,
840	    ZFS_DELEG_PERM_MOUNT, cr)) != 0)
841		return (error);
842
843	return (zfs_secpolicy_write_perms(name, ZFS_DELEG_PERM_DESTROY, cr));
844}
845
846/* ARGSUSED */
847static int
848zfs_secpolicy_destroy(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
849{
850	return (zfs_secpolicy_destroy_perms(zc->zc_name, cr));
851}
852
853/*
854 * Destroying snapshots with delegated permissions requires
855 * descendant mount and destroy permissions.
856 */
857/* ARGSUSED */
858static int
859zfs_secpolicy_destroy_snaps(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
860{
861	nvlist_t *snaps;
862	nvpair_t *pair, *nextpair;
863	int error = 0;
864
865	if (nvlist_lookup_nvlist(innvl, "snaps", &snaps) != 0)
866		return (SET_ERROR(EINVAL));
867	for (pair = nvlist_next_nvpair(snaps, NULL); pair != NULL;
868	    pair = nextpair) {
869		nextpair = nvlist_next_nvpair(snaps, pair);
870		error = zfs_secpolicy_destroy_perms(nvpair_name(pair), cr);
871		if (error == ENOENT) {
872			/*
873			 * Ignore any snapshots that don't exist (we consider
874			 * them "already destroyed").  Remove the name from the
875			 * nvl here in case the snapshot is created between
876			 * now and when we try to destroy it (in which case
877			 * we don't want to destroy it since we haven't
878			 * checked for permission).
879			 */
880			fnvlist_remove_nvpair(snaps, pair);
881			error = 0;
882		}
883		if (error != 0)
884			break;
885	}
886
887	return (error);
888}
889
890int
891zfs_secpolicy_rename_perms(const char *from, const char *to, cred_t *cr)
892{
893	char	parentname[ZFS_MAX_DATASET_NAME_LEN];
894	int	error;
895
896	if ((error = zfs_secpolicy_write_perms(from,
897	    ZFS_DELEG_PERM_RENAME, cr)) != 0)
898		return (error);
899
900	if ((error = zfs_secpolicy_write_perms(from,
901	    ZFS_DELEG_PERM_MOUNT, cr)) != 0)
902		return (error);
903
904	if ((error = zfs_get_parent(to, parentname,
905	    sizeof (parentname))) != 0)
906		return (error);
907
908	if ((error = zfs_secpolicy_write_perms(parentname,
909	    ZFS_DELEG_PERM_CREATE, cr)) != 0)
910		return (error);
911
912	if ((error = zfs_secpolicy_write_perms(parentname,
913	    ZFS_DELEG_PERM_MOUNT, cr)) != 0)
914		return (error);
915
916	return (error);
917}
918
919/* ARGSUSED */
920static int
921zfs_secpolicy_rename(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
922{
923	char *at = NULL;
924	int error;
925
926	if ((zc->zc_cookie & 1) != 0) {
927		/*
928		 * This is recursive rename, so the starting snapshot might
929		 * not exist. Check file system or volume permission instead.
930		 */
931		at = strchr(zc->zc_name, '@');
932		if (at == NULL)
933			return (EINVAL);
934		*at = '\0';
935	}
936
937	error = zfs_secpolicy_rename_perms(zc->zc_name, zc->zc_value, cr);
938
939	if (at != NULL)
940		*at = '@';
941
942	return (error);
943}
944
945/* ARGSUSED */
946static int
947zfs_secpolicy_promote(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
948{
949	dsl_pool_t *dp;
950	dsl_dataset_t *clone;
951	int error;
952
953	error = zfs_secpolicy_write_perms(zc->zc_name,
954	    ZFS_DELEG_PERM_PROMOTE, cr);
955	if (error != 0)
956		return (error);
957
958	error = dsl_pool_hold(zc->zc_name, FTAG, &dp);
959	if (error != 0)
960		return (error);
961
962	error = dsl_dataset_hold(dp, zc->zc_name, FTAG, &clone);
963
964	if (error == 0) {
965		char parentname[ZFS_MAX_DATASET_NAME_LEN];
966		dsl_dataset_t *origin = NULL;
967		dsl_dir_t *dd;
968		dd = clone->ds_dir;
969
970		error = dsl_dataset_hold_obj(dd->dd_pool,
971		    dsl_dir_phys(dd)->dd_origin_obj, FTAG, &origin);
972		if (error != 0) {
973			dsl_dataset_rele(clone, FTAG);
974			dsl_pool_rele(dp, FTAG);
975			return (error);
976		}
977
978		error = zfs_secpolicy_write_perms_ds(zc->zc_name, clone,
979		    ZFS_DELEG_PERM_MOUNT, cr);
980
981		dsl_dataset_name(origin, parentname);
982		if (error == 0) {
983			error = zfs_secpolicy_write_perms_ds(parentname, origin,
984			    ZFS_DELEG_PERM_PROMOTE, cr);
985		}
986		dsl_dataset_rele(clone, FTAG);
987		dsl_dataset_rele(origin, FTAG);
988	}
989	dsl_pool_rele(dp, FTAG);
990	return (error);
991}
992
993/* ARGSUSED */
994static int
995zfs_secpolicy_recv(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
996{
997	int error;
998
999	if ((error = zfs_secpolicy_write_perms(zc->zc_name,
1000	    ZFS_DELEG_PERM_RECEIVE, cr)) != 0)
1001		return (error);
1002
1003	if ((error = zfs_secpolicy_write_perms(zc->zc_name,
1004	    ZFS_DELEG_PERM_MOUNT, cr)) != 0)
1005		return (error);
1006
1007	return (zfs_secpolicy_write_perms(zc->zc_name,
1008	    ZFS_DELEG_PERM_CREATE, cr));
1009}
1010
1011int
1012zfs_secpolicy_snapshot_perms(const char *name, cred_t *cr)
1013{
1014	return (zfs_secpolicy_write_perms(name,
1015	    ZFS_DELEG_PERM_SNAPSHOT, cr));
1016}
1017
1018/*
1019 * Check for permission to create each snapshot in the nvlist.
1020 */
1021/* ARGSUSED */
1022static int
1023zfs_secpolicy_snapshot(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1024{
1025	nvlist_t *snaps;
1026	int error;
1027	nvpair_t *pair;
1028
1029	if (nvlist_lookup_nvlist(innvl, "snaps", &snaps) != 0)
1030		return (SET_ERROR(EINVAL));
1031	for (pair = nvlist_next_nvpair(snaps, NULL); pair != NULL;
1032	    pair = nvlist_next_nvpair(snaps, pair)) {
1033		char *name = nvpair_name(pair);
1034		char *atp = strchr(name, '@');
1035
1036		if (atp == NULL) {
1037			error = SET_ERROR(EINVAL);
1038			break;
1039		}
1040		*atp = '\0';
1041		error = zfs_secpolicy_snapshot_perms(name, cr);
1042		*atp = '@';
1043		if (error != 0)
1044			break;
1045	}
1046	return (error);
1047}
1048
1049/*
1050 * Check for permission to create each snapshot in the nvlist.
1051 */
1052/* ARGSUSED */
1053static int
1054zfs_secpolicy_bookmark(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1055{
1056	int error = 0;
1057
1058	for (nvpair_t *pair = nvlist_next_nvpair(innvl, NULL);
1059	    pair != NULL; pair = nvlist_next_nvpair(innvl, pair)) {
1060		char *name = nvpair_name(pair);
1061		char *hashp = strchr(name, '#');
1062
1063		if (hashp == NULL) {
1064			error = SET_ERROR(EINVAL);
1065			break;
1066		}
1067		*hashp = '\0';
1068		error = zfs_secpolicy_write_perms(name,
1069		    ZFS_DELEG_PERM_BOOKMARK, cr);
1070		*hashp = '#';
1071		if (error != 0)
1072			break;
1073	}
1074	return (error);
1075}
1076
1077/* ARGSUSED */
1078static int
1079zfs_secpolicy_remap(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1080{
1081	return (zfs_secpolicy_write_perms(zc->zc_name,
1082	    ZFS_DELEG_PERM_REMAP, cr));
1083}
1084
1085/* ARGSUSED */
1086static int
1087zfs_secpolicy_destroy_bookmarks(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1088{
1089	nvpair_t *pair, *nextpair;
1090	int error = 0;
1091
1092	for (pair = nvlist_next_nvpair(innvl, NULL); pair != NULL;
1093	    pair = nextpair) {
1094		char *name = nvpair_name(pair);
1095		char *hashp = strchr(name, '#');
1096		nextpair = nvlist_next_nvpair(innvl, pair);
1097
1098		if (hashp == NULL) {
1099			error = SET_ERROR(EINVAL);
1100			break;
1101		}
1102
1103		*hashp = '\0';
1104		error = zfs_secpolicy_write_perms(name,
1105		    ZFS_DELEG_PERM_DESTROY, cr);
1106		*hashp = '#';
1107		if (error == ENOENT) {
1108			/*
1109			 * Ignore any filesystems that don't exist (we consider
1110			 * their bookmarks "already destroyed").  Remove
1111			 * the name from the nvl here in case the filesystem
1112			 * is created between now and when we try to destroy
1113			 * the bookmark (in which case we don't want to
1114			 * destroy it since we haven't checked for permission).
1115			 */
1116			fnvlist_remove_nvpair(innvl, pair);
1117			error = 0;
1118		}
1119		if (error != 0)
1120			break;
1121	}
1122
1123	return (error);
1124}
1125
1126/* ARGSUSED */
1127static int
1128zfs_secpolicy_log_history(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1129{
1130	/*
1131	 * Even root must have a proper TSD so that we know what pool
1132	 * to log to.
1133	 */
1134	if (tsd_get(zfs_allow_log_key) == NULL)
1135		return (SET_ERROR(EPERM));
1136	return (0);
1137}
1138
1139static int
1140zfs_secpolicy_create_clone(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1141{
1142	char	parentname[ZFS_MAX_DATASET_NAME_LEN];
1143	int	error;
1144	char	*origin;
1145
1146	if ((error = zfs_get_parent(zc->zc_name, parentname,
1147	    sizeof (parentname))) != 0)
1148		return (error);
1149
1150	if (nvlist_lookup_string(innvl, "origin", &origin) == 0 &&
1151	    (error = zfs_secpolicy_write_perms(origin,
1152	    ZFS_DELEG_PERM_CLONE, cr)) != 0)
1153		return (error);
1154
1155	if ((error = zfs_secpolicy_write_perms(parentname,
1156	    ZFS_DELEG_PERM_CREATE, cr)) != 0)
1157		return (error);
1158
1159	return (zfs_secpolicy_write_perms(parentname,
1160	    ZFS_DELEG_PERM_MOUNT, cr));
1161}
1162
1163/*
1164 * Policy for pool operations - create/destroy pools, add vdevs, etc.  Requires
1165 * SYS_CONFIG privilege, which is not available in a local zone.
1166 */
1167/* ARGSUSED */
1168static int
1169zfs_secpolicy_config(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1170{
1171	if (secpolicy_sys_config(cr, B_FALSE) != 0)
1172		return (SET_ERROR(EPERM));
1173
1174	return (0);
1175}
1176
1177/*
1178 * Policy for object to name lookups.
1179 */
1180/* ARGSUSED */
1181static int
1182zfs_secpolicy_diff(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1183{
1184	int error;
1185
1186	if ((error = secpolicy_sys_config(cr, B_FALSE)) == 0)
1187		return (0);
1188
1189	error = zfs_secpolicy_write_perms(zc->zc_name, ZFS_DELEG_PERM_DIFF, cr);
1190	return (error);
1191}
1192
1193/*
1194 * Policy for fault injection.  Requires all privileges.
1195 */
1196/* ARGSUSED */
1197static int
1198zfs_secpolicy_inject(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1199{
1200	return (secpolicy_zinject(cr));
1201}
1202
1203/* ARGSUSED */
1204static int
1205zfs_secpolicy_inherit_prop(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1206{
1207	zfs_prop_t prop = zfs_name_to_prop(zc->zc_value);
1208
1209	if (prop == ZPROP_INVAL) {
1210		if (!zfs_prop_user(zc->zc_value))
1211			return (SET_ERROR(EINVAL));
1212		return (zfs_secpolicy_write_perms(zc->zc_name,
1213		    ZFS_DELEG_PERM_USERPROP, cr));
1214	} else {
1215		return (zfs_secpolicy_setprop(zc->zc_name, prop,
1216		    NULL, cr));
1217	}
1218}
1219
1220static int
1221zfs_secpolicy_userspace_one(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1222{
1223	int err = zfs_secpolicy_read(zc, innvl, cr);
1224	if (err)
1225		return (err);
1226
1227	if (zc->zc_objset_type >= ZFS_NUM_USERQUOTA_PROPS)
1228		return (SET_ERROR(EINVAL));
1229
1230	if (zc->zc_value[0] == 0) {
1231		/*
1232		 * They are asking about a posix uid/gid.  If it's
1233		 * themself, allow it.
1234		 */
1235		if (zc->zc_objset_type == ZFS_PROP_USERUSED ||
1236		    zc->zc_objset_type == ZFS_PROP_USERQUOTA) {
1237			if (zc->zc_guid == crgetuid(cr))
1238				return (0);
1239		} else {
1240			if (groupmember(zc->zc_guid, cr))
1241				return (0);
1242		}
1243	}
1244
1245	return (zfs_secpolicy_write_perms(zc->zc_name,
1246	    userquota_perms[zc->zc_objset_type], cr));
1247}
1248
1249static int
1250zfs_secpolicy_userspace_many(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1251{
1252	int err = zfs_secpolicy_read(zc, innvl, cr);
1253	if (err)
1254		return (err);
1255
1256	if (zc->zc_objset_type >= ZFS_NUM_USERQUOTA_PROPS)
1257		return (SET_ERROR(EINVAL));
1258
1259	return (zfs_secpolicy_write_perms(zc->zc_name,
1260	    userquota_perms[zc->zc_objset_type], cr));
1261}
1262
1263/* ARGSUSED */
1264static int
1265zfs_secpolicy_userspace_upgrade(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1266{
1267	return (zfs_secpolicy_setprop(zc->zc_name, ZFS_PROP_VERSION,
1268	    NULL, cr));
1269}
1270
1271/* ARGSUSED */
1272static int
1273zfs_secpolicy_hold(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1274{
1275	nvpair_t *pair;
1276	nvlist_t *holds;
1277	int error;
1278
1279	error = nvlist_lookup_nvlist(innvl, "holds", &holds);
1280	if (error != 0)
1281		return (SET_ERROR(EINVAL));
1282
1283	for (pair = nvlist_next_nvpair(holds, NULL); pair != NULL;
1284	    pair = nvlist_next_nvpair(holds, pair)) {
1285		char fsname[ZFS_MAX_DATASET_NAME_LEN];
1286		error = dmu_fsname(nvpair_name(pair), fsname);
1287		if (error != 0)
1288			return (error);
1289		error = zfs_secpolicy_write_perms(fsname,
1290		    ZFS_DELEG_PERM_HOLD, cr);
1291		if (error != 0)
1292			return (error);
1293	}
1294	return (0);
1295}
1296
1297/* ARGSUSED */
1298static int
1299zfs_secpolicy_release(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1300{
1301	nvpair_t *pair;
1302	int error;
1303
1304	for (pair = nvlist_next_nvpair(innvl, NULL); pair != NULL;
1305	    pair = nvlist_next_nvpair(innvl, pair)) {
1306		char fsname[ZFS_MAX_DATASET_NAME_LEN];
1307		error = dmu_fsname(nvpair_name(pair), fsname);
1308		if (error != 0)
1309			return (error);
1310		error = zfs_secpolicy_write_perms(fsname,
1311		    ZFS_DELEG_PERM_RELEASE, cr);
1312		if (error != 0)
1313			return (error);
1314	}
1315	return (0);
1316}
1317
1318/*
1319 * Policy for allowing temporary snapshots to be taken or released
1320 */
1321static int
1322zfs_secpolicy_tmp_snapshot(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1323{
1324	/*
1325	 * A temporary snapshot is the same as a snapshot,
1326	 * hold, destroy and release all rolled into one.
1327	 * Delegated diff alone is sufficient that we allow this.
1328	 */
1329	int error;
1330
1331	if ((error = zfs_secpolicy_write_perms(zc->zc_name,
1332	    ZFS_DELEG_PERM_DIFF, cr)) == 0)
1333		return (0);
1334
1335	error = zfs_secpolicy_snapshot_perms(zc->zc_name, cr);
1336	if (error == 0)
1337		error = zfs_secpolicy_hold(zc, innvl, cr);
1338	if (error == 0)
1339		error = zfs_secpolicy_release(zc, innvl, cr);
1340	if (error == 0)
1341		error = zfs_secpolicy_destroy(zc, innvl, cr);
1342	return (error);
1343}
1344
1345/*
1346 * Returns the nvlist as specified by the user in the zfs_cmd_t.
1347 */
1348static int
1349get_nvlist(uint64_t nvl, uint64_t size, int iflag, nvlist_t **nvp)
1350{
1351	char *packed;
1352	int error;
1353	nvlist_t *list = NULL;
1354
1355	/*
1356	 * Read in and unpack the user-supplied nvlist.
1357	 */
1358	if (size == 0)
1359		return (SET_ERROR(EINVAL));
1360
1361	packed = kmem_alloc(size, KM_SLEEP);
1362
1363	if ((error = ddi_copyin((void *)(uintptr_t)nvl, packed, size,
1364	    iflag)) != 0) {
1365		kmem_free(packed, size);
1366		return (SET_ERROR(EFAULT));
1367	}
1368
1369	if ((error = nvlist_unpack(packed, size, &list, 0)) != 0) {
1370		kmem_free(packed, size);
1371		return (error);
1372	}
1373
1374	kmem_free(packed, size);
1375
1376	*nvp = list;
1377	return (0);
1378}
1379
1380/*
1381 * Reduce the size of this nvlist until it can be serialized in 'max' bytes.
1382 * Entries will be removed from the end of the nvlist, and one int32 entry
1383 * named "N_MORE_ERRORS" will be added indicating how many entries were
1384 * removed.
1385 */
1386static int
1387nvlist_smush(nvlist_t *errors, size_t max)
1388{
1389	size_t size;
1390
1391	size = fnvlist_size(errors);
1392
1393	if (size > max) {
1394		nvpair_t *more_errors;
1395		int n = 0;
1396
1397		if (max < 1024)
1398			return (SET_ERROR(ENOMEM));
1399
1400		fnvlist_add_int32(errors, ZPROP_N_MORE_ERRORS, 0);
1401		more_errors = nvlist_prev_nvpair(errors, NULL);
1402
1403		do {
1404			nvpair_t *pair = nvlist_prev_nvpair(errors,
1405			    more_errors);
1406			fnvlist_remove_nvpair(errors, pair);
1407			n++;
1408			size = fnvlist_size(errors);
1409		} while (size > max);
1410
1411		fnvlist_remove_nvpair(errors, more_errors);
1412		fnvlist_add_int32(errors, ZPROP_N_MORE_ERRORS, n);
1413		ASSERT3U(fnvlist_size(errors), <=, max);
1414	}
1415
1416	return (0);
1417}
1418
1419static int
1420put_nvlist(zfs_cmd_t *zc, nvlist_t *nvl)
1421{
1422	char *packed = NULL;
1423	int error = 0;
1424	size_t size;
1425
1426	size = fnvlist_size(nvl);
1427
1428	if (size > zc->zc_nvlist_dst_size) {
1429		/*
1430		 * Solaris returns ENOMEM here, because even if an error is
1431		 * returned from an ioctl(2), new zc_nvlist_dst_size will be
1432		 * passed to the userland. This is not the case for FreeBSD.
1433		 * We need to return 0, so the kernel will copy the
1434		 * zc_nvlist_dst_size back and the userland can discover that a
1435		 * bigger buffer is needed.
1436		 */
1437		error = 0;
1438	} else {
1439		packed = fnvlist_pack(nvl, &size);
1440		if (ddi_copyout(packed, (void *)(uintptr_t)zc->zc_nvlist_dst,
1441		    size, zc->zc_iflags) != 0)
1442			error = SET_ERROR(EFAULT);
1443		fnvlist_pack_free(packed, size);
1444	}
1445
1446	zc->zc_nvlist_dst_size = size;
1447	zc->zc_nvlist_dst_filled = B_TRUE;
1448	return (error);
1449}
1450
1451int
1452getzfsvfs_impl(objset_t *os, vfs_t **vfsp)
1453{
1454	zfsvfs_t *zfvp;
1455	int error = 0;
1456
1457	if (dmu_objset_type(os) != DMU_OST_ZFS) {
1458		return (SET_ERROR(EINVAL));
1459	}
1460
1461	mutex_enter(&os->os_user_ptr_lock);
1462	zfvp = dmu_objset_get_user(os);
1463	if (zfvp) {
1464		*vfsp = zfvp->z_vfs;
1465		vfs_ref(zfvp->z_vfs);
1466	} else {
1467		error = SET_ERROR(ESRCH);
1468	}
1469	mutex_exit(&os->os_user_ptr_lock);
1470	return (error);
1471}
1472
1473int
1474getzfsvfs(const char *dsname, zfsvfs_t **zfvp)
1475{
1476	objset_t *os;
1477	vfs_t *vfsp;
1478	int error;
1479
1480	error = dmu_objset_hold(dsname, FTAG, &os);
1481	if (error != 0)
1482		return (error);
1483	error = getzfsvfs_impl(os, &vfsp);
1484	dmu_objset_rele(os, FTAG);
1485	if (error != 0)
1486		return (error);
1487
1488	error = vfs_busy(vfsp, 0);
1489	vfs_rel(vfsp);
1490	if (error != 0) {
1491		*zfvp = NULL;
1492		error = SET_ERROR(ESRCH);
1493	} else {
1494		*zfvp = vfsp->vfs_data;
1495	}
1496	return (error);
1497}
1498
1499/*
1500 * Find a zfsvfs_t for a mounted filesystem, or create our own, in which
1501 * case its z_vfs will be NULL, and it will be opened as the owner.
1502 * If 'writer' is set, the z_teardown_lock will be held for RW_WRITER,
1503 * which prevents all vnode ops from running.
1504 */
1505static int
1506zfsvfs_hold(const char *name, void *tag, zfsvfs_t **zfvp, boolean_t writer)
1507{
1508	int error = 0;
1509
1510	if (getzfsvfs(name, zfvp) != 0)
1511		error = zfsvfs_create(name, zfvp);
1512	if (error == 0) {
1513		rrm_enter(&(*zfvp)->z_teardown_lock, (writer) ? RW_WRITER :
1514		    RW_READER, tag);
1515#ifdef illumos
1516		if ((*zfvp)->z_unmounted) {
1517			/*
1518			 * XXX we could probably try again, since the unmounting
1519			 * thread should be just about to disassociate the
1520			 * objset from the zfsvfs.
1521			 */
1522			rrm_exit(&(*zfvp)->z_teardown_lock, tag);
1523			return (SET_ERROR(EBUSY));
1524		}
1525#else
1526		/*
1527		 * vfs_busy() ensures that the filesystem is not and
1528		 * can not be unmounted.
1529		 */
1530		ASSERT(!(*zfvp)->z_unmounted);
1531#endif
1532	}
1533	return (error);
1534}
1535
1536static void
1537zfsvfs_rele(zfsvfs_t *zfsvfs, void *tag)
1538{
1539	rrm_exit(&zfsvfs->z_teardown_lock, tag);
1540
1541	if (zfsvfs->z_vfs) {
1542#ifdef illumos
1543		VFS_RELE(zfsvfs->z_vfs);
1544#else
1545		vfs_unbusy(zfsvfs->z_vfs);
1546#endif
1547	} else {
1548		dmu_objset_disown(zfsvfs->z_os, zfsvfs);
1549		zfsvfs_free(zfsvfs);
1550	}
1551}
1552
1553static int
1554zfs_ioc_pool_create(zfs_cmd_t *zc)
1555{
1556	int error;
1557	nvlist_t *config, *props = NULL;
1558	nvlist_t *rootprops = NULL;
1559	nvlist_t *zplprops = NULL;
1560
1561	if (error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
1562	    zc->zc_iflags, &config))
1563		return (error);
1564
1565	if (zc->zc_nvlist_src_size != 0 && (error =
1566	    get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
1567	    zc->zc_iflags, &props))) {
1568		nvlist_free(config);
1569		return (error);
1570	}
1571
1572	if (props) {
1573		nvlist_t *nvl = NULL;
1574		uint64_t version = SPA_VERSION;
1575
1576		(void) nvlist_lookup_uint64(props,
1577		    zpool_prop_to_name(ZPOOL_PROP_VERSION), &version);
1578		if (!SPA_VERSION_IS_SUPPORTED(version)) {
1579			error = SET_ERROR(EINVAL);
1580			goto pool_props_bad;
1581		}
1582		(void) nvlist_lookup_nvlist(props, ZPOOL_ROOTFS_PROPS, &nvl);
1583		if (nvl) {
1584			error = nvlist_dup(nvl, &rootprops, KM_SLEEP);
1585			if (error != 0) {
1586				nvlist_free(config);
1587				nvlist_free(props);
1588				return (error);
1589			}
1590			(void) nvlist_remove_all(props, ZPOOL_ROOTFS_PROPS);
1591		}
1592		VERIFY(nvlist_alloc(&zplprops, NV_UNIQUE_NAME, KM_SLEEP) == 0);
1593		error = zfs_fill_zplprops_root(version, rootprops,
1594		    zplprops, NULL);
1595		if (error != 0)
1596			goto pool_props_bad;
1597	}
1598
1599	error = spa_create(zc->zc_name, config, props, zplprops);
1600
1601	/*
1602	 * Set the remaining root properties
1603	 */
1604	if (!error && (error = zfs_set_prop_nvlist(zc->zc_name,
1605	    ZPROP_SRC_LOCAL, rootprops, NULL)) != 0)
1606		(void) spa_destroy(zc->zc_name);
1607
1608pool_props_bad:
1609	nvlist_free(rootprops);
1610	nvlist_free(zplprops);
1611	nvlist_free(config);
1612	nvlist_free(props);
1613
1614	return (error);
1615}
1616
1617static int
1618zfs_ioc_pool_destroy(zfs_cmd_t *zc)
1619{
1620	int error;
1621	zfs_log_history(zc);
1622	error = spa_destroy(zc->zc_name);
1623	if (error == 0)
1624		zvol_remove_minors(zc->zc_name);
1625	return (error);
1626}
1627
1628static int
1629zfs_ioc_pool_import(zfs_cmd_t *zc)
1630{
1631	nvlist_t *config, *props = NULL;
1632	uint64_t guid;
1633	int error;
1634
1635	if ((error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
1636	    zc->zc_iflags, &config)) != 0)
1637		return (error);
1638
1639	if (zc->zc_nvlist_src_size != 0 && (error =
1640	    get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
1641	    zc->zc_iflags, &props))) {
1642		nvlist_free(config);
1643		return (error);
1644	}
1645
1646	if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID, &guid) != 0 ||
1647	    guid != zc->zc_guid)
1648		error = SET_ERROR(EINVAL);
1649	else
1650		error = spa_import(zc->zc_name, config, props, zc->zc_cookie);
1651
1652	if (zc->zc_nvlist_dst != 0) {
1653		int err;
1654
1655		if ((err = put_nvlist(zc, config)) != 0)
1656			error = err;
1657	}
1658
1659	nvlist_free(config);
1660
1661	nvlist_free(props);
1662
1663	return (error);
1664}
1665
1666static int
1667zfs_ioc_pool_export(zfs_cmd_t *zc)
1668{
1669	int error;
1670	boolean_t force = (boolean_t)zc->zc_cookie;
1671	boolean_t hardforce = (boolean_t)zc->zc_guid;
1672
1673	zfs_log_history(zc);
1674	error = spa_export(zc->zc_name, NULL, force, hardforce);
1675	if (error == 0)
1676		zvol_remove_minors(zc->zc_name);
1677	return (error);
1678}
1679
1680static int
1681zfs_ioc_pool_configs(zfs_cmd_t *zc)
1682{
1683	nvlist_t *configs;
1684	int error;
1685
1686	if ((configs = spa_all_configs(&zc->zc_cookie)) == NULL)
1687		return (SET_ERROR(EEXIST));
1688
1689	error = put_nvlist(zc, configs);
1690
1691	nvlist_free(configs);
1692
1693	return (error);
1694}
1695
1696/*
1697 * inputs:
1698 * zc_name		name of the pool
1699 *
1700 * outputs:
1701 * zc_cookie		real errno
1702 * zc_nvlist_dst	config nvlist
1703 * zc_nvlist_dst_size	size of config nvlist
1704 */
1705static int
1706zfs_ioc_pool_stats(zfs_cmd_t *zc)
1707{
1708	nvlist_t *config;
1709	int error;
1710	int ret = 0;
1711
1712	error = spa_get_stats(zc->zc_name, &config, zc->zc_value,
1713	    sizeof (zc->zc_value));
1714
1715	if (config != NULL) {
1716		ret = put_nvlist(zc, config);
1717		nvlist_free(config);
1718
1719		/*
1720		 * The config may be present even if 'error' is non-zero.
1721		 * In this case we return success, and preserve the real errno
1722		 * in 'zc_cookie'.
1723		 */
1724		zc->zc_cookie = error;
1725	} else {
1726		ret = error;
1727	}
1728
1729	return (ret);
1730}
1731
1732/*
1733 * Try to import the given pool, returning pool stats as appropriate so that
1734 * user land knows which devices are available and overall pool health.
1735 */
1736static int
1737zfs_ioc_pool_tryimport(zfs_cmd_t *zc)
1738{
1739	nvlist_t *tryconfig, *config;
1740	int error;
1741
1742	if ((error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
1743	    zc->zc_iflags, &tryconfig)) != 0)
1744		return (error);
1745
1746	config = spa_tryimport(tryconfig);
1747
1748	nvlist_free(tryconfig);
1749
1750	if (config == NULL)
1751		return (SET_ERROR(EINVAL));
1752
1753	error = put_nvlist(zc, config);
1754	nvlist_free(config);
1755
1756	return (error);
1757}
1758
1759/*
1760 * inputs:
1761 * zc_name              name of the pool
1762 * zc_cookie            scan func (pool_scan_func_t)
1763 * zc_flags             scrub pause/resume flag (pool_scrub_cmd_t)
1764 */
1765static int
1766zfs_ioc_pool_scan(zfs_cmd_t *zc)
1767{
1768	spa_t *spa;
1769	int error;
1770
1771	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
1772		return (error);
1773
1774	if (zc->zc_flags >= POOL_SCRUB_FLAGS_END)
1775		return (SET_ERROR(EINVAL));
1776
1777	if (zc->zc_flags == POOL_SCRUB_PAUSE)
1778		error = spa_scrub_pause_resume(spa, POOL_SCRUB_PAUSE);
1779	else if (zc->zc_cookie == POOL_SCAN_NONE)
1780		error = spa_scan_stop(spa);
1781	else
1782		error = spa_scan(spa, zc->zc_cookie);
1783
1784	spa_close(spa, FTAG);
1785
1786	return (error);
1787}
1788
1789static int
1790zfs_ioc_pool_freeze(zfs_cmd_t *zc)
1791{
1792	spa_t *spa;
1793	int error;
1794
1795	error = spa_open(zc->zc_name, &spa, FTAG);
1796	if (error == 0) {
1797		spa_freeze(spa);
1798		spa_close(spa, FTAG);
1799	}
1800	return (error);
1801}
1802
1803static int
1804zfs_ioc_pool_upgrade(zfs_cmd_t *zc)
1805{
1806	spa_t *spa;
1807	int error;
1808
1809	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
1810		return (error);
1811
1812	if (zc->zc_cookie < spa_version(spa) ||
1813	    !SPA_VERSION_IS_SUPPORTED(zc->zc_cookie)) {
1814		spa_close(spa, FTAG);
1815		return (SET_ERROR(EINVAL));
1816	}
1817
1818	spa_upgrade(spa, zc->zc_cookie);
1819	spa_close(spa, FTAG);
1820
1821	return (error);
1822}
1823
1824static int
1825zfs_ioc_pool_get_history(zfs_cmd_t *zc)
1826{
1827	spa_t *spa;
1828	char *hist_buf;
1829	uint64_t size;
1830	int error;
1831
1832	if ((size = zc->zc_history_len) == 0)
1833		return (SET_ERROR(EINVAL));
1834
1835	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
1836		return (error);
1837
1838	if (spa_version(spa) < SPA_VERSION_ZPOOL_HISTORY) {
1839		spa_close(spa, FTAG);
1840		return (SET_ERROR(ENOTSUP));
1841	}
1842
1843	hist_buf = kmem_alloc(size, KM_SLEEP);
1844	if ((error = spa_history_get(spa, &zc->zc_history_offset,
1845	    &zc->zc_history_len, hist_buf)) == 0) {
1846		error = ddi_copyout(hist_buf,
1847		    (void *)(uintptr_t)zc->zc_history,
1848		    zc->zc_history_len, zc->zc_iflags);
1849	}
1850
1851	spa_close(spa, FTAG);
1852	kmem_free(hist_buf, size);
1853	return (error);
1854}
1855
1856static int
1857zfs_ioc_pool_reguid(zfs_cmd_t *zc)
1858{
1859	spa_t *spa;
1860	int error;
1861
1862	error = spa_open(zc->zc_name, &spa, FTAG);
1863	if (error == 0) {
1864		error = spa_change_guid(spa);
1865		spa_close(spa, FTAG);
1866	}
1867	return (error);
1868}
1869
1870static int
1871zfs_ioc_dsobj_to_dsname(zfs_cmd_t *zc)
1872{
1873	return (dsl_dsobj_to_dsname(zc->zc_name, zc->zc_obj, zc->zc_value));
1874}
1875
1876/*
1877 * inputs:
1878 * zc_name		name of filesystem
1879 * zc_obj		object to find
1880 *
1881 * outputs:
1882 * zc_value		name of object
1883 */
1884static int
1885zfs_ioc_obj_to_path(zfs_cmd_t *zc)
1886{
1887	objset_t *os;
1888	int error;
1889
1890	/* XXX reading from objset not owned */
1891	if ((error = dmu_objset_hold(zc->zc_name, FTAG, &os)) != 0)
1892		return (error);
1893	if (dmu_objset_type(os) != DMU_OST_ZFS) {
1894		dmu_objset_rele(os, FTAG);
1895		return (SET_ERROR(EINVAL));
1896	}
1897	error = zfs_obj_to_path(os, zc->zc_obj, zc->zc_value,
1898	    sizeof (zc->zc_value));
1899	dmu_objset_rele(os, FTAG);
1900
1901	return (error);
1902}
1903
1904/*
1905 * inputs:
1906 * zc_name		name of filesystem
1907 * zc_obj		object to find
1908 *
1909 * outputs:
1910 * zc_stat		stats on object
1911 * zc_value		path to object
1912 */
1913static int
1914zfs_ioc_obj_to_stats(zfs_cmd_t *zc)
1915{
1916	objset_t *os;
1917	int error;
1918
1919	/* XXX reading from objset not owned */
1920	if ((error = dmu_objset_hold(zc->zc_name, FTAG, &os)) != 0)
1921		return (error);
1922	if (dmu_objset_type(os) != DMU_OST_ZFS) {
1923		dmu_objset_rele(os, FTAG);
1924		return (SET_ERROR(EINVAL));
1925	}
1926	error = zfs_obj_to_stats(os, zc->zc_obj, &zc->zc_stat, zc->zc_value,
1927	    sizeof (zc->zc_value));
1928	dmu_objset_rele(os, FTAG);
1929
1930	return (error);
1931}
1932
1933static int
1934zfs_ioc_vdev_add(zfs_cmd_t *zc)
1935{
1936	spa_t *spa;
1937	int error;
1938	nvlist_t *config, **l2cache, **spares;
1939	uint_t nl2cache = 0, nspares = 0;
1940
1941	error = spa_open(zc->zc_name, &spa, FTAG);
1942	if (error != 0)
1943		return (error);
1944
1945	error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
1946	    zc->zc_iflags, &config);
1947	(void) nvlist_lookup_nvlist_array(config, ZPOOL_CONFIG_L2CACHE,
1948	    &l2cache, &nl2cache);
1949
1950	(void) nvlist_lookup_nvlist_array(config, ZPOOL_CONFIG_SPARES,
1951	    &spares, &nspares);
1952
1953#ifdef illumos
1954	/*
1955	 * A root pool with concatenated devices is not supported.
1956	 * Thus, can not add a device to a root pool.
1957	 *
1958	 * Intent log device can not be added to a rootpool because
1959	 * during mountroot, zil is replayed, a seperated log device
1960	 * can not be accessed during the mountroot time.
1961	 *
1962	 * l2cache and spare devices are ok to be added to a rootpool.
1963	 */
1964	if (spa_bootfs(spa) != 0 && nl2cache == 0 && nspares == 0) {
1965		nvlist_free(config);
1966		spa_close(spa, FTAG);
1967		return (SET_ERROR(EDOM));
1968	}
1969#endif /* illumos */
1970
1971	if (error == 0) {
1972		error = spa_vdev_add(spa, config);
1973		nvlist_free(config);
1974	}
1975	spa_close(spa, FTAG);
1976	return (error);
1977}
1978
1979/*
1980 * inputs:
1981 * zc_name		name of the pool
1982 * zc_guid		guid of vdev to remove
1983 * zc_cookie		cancel removal
1984 */
1985static int
1986zfs_ioc_vdev_remove(zfs_cmd_t *zc)
1987{
1988	spa_t *spa;
1989	int error;
1990
1991	error = spa_open(zc->zc_name, &spa, FTAG);
1992	if (error != 0)
1993		return (error);
1994	if (zc->zc_cookie != 0) {
1995		error = spa_vdev_remove_cancel(spa);
1996	} else {
1997		error = spa_vdev_remove(spa, zc->zc_guid, B_FALSE);
1998	}
1999	spa_close(spa, FTAG);
2000	return (error);
2001}
2002
2003static int
2004zfs_ioc_vdev_set_state(zfs_cmd_t *zc)
2005{
2006	spa_t *spa;
2007	int error;
2008	vdev_state_t newstate = VDEV_STATE_UNKNOWN;
2009
2010	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
2011		return (error);
2012	switch (zc->zc_cookie) {
2013	case VDEV_STATE_ONLINE:
2014		error = vdev_online(spa, zc->zc_guid, zc->zc_obj, &newstate);
2015		break;
2016
2017	case VDEV_STATE_OFFLINE:
2018		error = vdev_offline(spa, zc->zc_guid, zc->zc_obj);
2019		break;
2020
2021	case VDEV_STATE_FAULTED:
2022		if (zc->zc_obj != VDEV_AUX_ERR_EXCEEDED &&
2023		    zc->zc_obj != VDEV_AUX_EXTERNAL)
2024			zc->zc_obj = VDEV_AUX_ERR_EXCEEDED;
2025
2026		error = vdev_fault(spa, zc->zc_guid, zc->zc_obj);
2027		break;
2028
2029	case VDEV_STATE_DEGRADED:
2030		if (zc->zc_obj != VDEV_AUX_ERR_EXCEEDED &&
2031		    zc->zc_obj != VDEV_AUX_EXTERNAL)
2032			zc->zc_obj = VDEV_AUX_ERR_EXCEEDED;
2033
2034		error = vdev_degrade(spa, zc->zc_guid, zc->zc_obj);
2035		break;
2036
2037	default:
2038		error = SET_ERROR(EINVAL);
2039	}
2040	zc->zc_cookie = newstate;
2041	spa_close(spa, FTAG);
2042	return (error);
2043}
2044
2045static int
2046zfs_ioc_vdev_attach(zfs_cmd_t *zc)
2047{
2048	spa_t *spa;
2049	int replacing = zc->zc_cookie;
2050	nvlist_t *config;
2051	int error;
2052
2053	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
2054		return (error);
2055
2056	if ((error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
2057	    zc->zc_iflags, &config)) == 0) {
2058		error = spa_vdev_attach(spa, zc->zc_guid, config, replacing);
2059		nvlist_free(config);
2060	}
2061
2062	spa_close(spa, FTAG);
2063	return (error);
2064}
2065
2066static int
2067zfs_ioc_vdev_detach(zfs_cmd_t *zc)
2068{
2069	spa_t *spa;
2070	int error;
2071
2072	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
2073		return (error);
2074
2075	error = spa_vdev_detach(spa, zc->zc_guid, 0, B_FALSE);
2076
2077	spa_close(spa, FTAG);
2078	return (error);
2079}
2080
2081static int
2082zfs_ioc_vdev_split(zfs_cmd_t *zc)
2083{
2084	spa_t *spa;
2085	nvlist_t *config, *props = NULL;
2086	int error;
2087	boolean_t exp = !!(zc->zc_cookie & ZPOOL_EXPORT_AFTER_SPLIT);
2088
2089	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
2090		return (error);
2091
2092	if (error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
2093	    zc->zc_iflags, &config)) {
2094		spa_close(spa, FTAG);
2095		return (error);
2096	}
2097
2098	if (zc->zc_nvlist_src_size != 0 && (error =
2099	    get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
2100	    zc->zc_iflags, &props))) {
2101		spa_close(spa, FTAG);
2102		nvlist_free(config);
2103		return (error);
2104	}
2105
2106	error = spa_vdev_split_mirror(spa, zc->zc_string, config, props, exp);
2107
2108	spa_close(spa, FTAG);
2109
2110	nvlist_free(config);
2111	nvlist_free(props);
2112
2113	return (error);
2114}
2115
2116static int
2117zfs_ioc_vdev_setpath(zfs_cmd_t *zc)
2118{
2119	spa_t *spa;
2120	char *path = zc->zc_value;
2121	uint64_t guid = zc->zc_guid;
2122	int error;
2123
2124	error = spa_open(zc->zc_name, &spa, FTAG);
2125	if (error != 0)
2126		return (error);
2127
2128	error = spa_vdev_setpath(spa, guid, path);
2129	spa_close(spa, FTAG);
2130	return (error);
2131}
2132
2133static int
2134zfs_ioc_vdev_setfru(zfs_cmd_t *zc)
2135{
2136	spa_t *spa;
2137	char *fru = zc->zc_value;
2138	uint64_t guid = zc->zc_guid;
2139	int error;
2140
2141	error = spa_open(zc->zc_name, &spa, FTAG);
2142	if (error != 0)
2143		return (error);
2144
2145	error = spa_vdev_setfru(spa, guid, fru);
2146	spa_close(spa, FTAG);
2147	return (error);
2148}
2149
2150static int
2151zfs_ioc_objset_stats_impl(zfs_cmd_t *zc, objset_t *os)
2152{
2153	int error = 0;
2154	nvlist_t *nv;
2155
2156	dmu_objset_fast_stat(os, &zc->zc_objset_stats);
2157
2158	if (zc->zc_nvlist_dst != 0 &&
2159	    (error = dsl_prop_get_all(os, &nv)) == 0) {
2160		dmu_objset_stats(os, nv);
2161		/*
2162		 * NB: zvol_get_stats() will read the objset contents,
2163		 * which we aren't supposed to do with a
2164		 * DS_MODE_USER hold, because it could be
2165		 * inconsistent.  So this is a bit of a workaround...
2166		 * XXX reading with out owning
2167		 */
2168		if (!zc->zc_objset_stats.dds_inconsistent &&
2169		    dmu_objset_type(os) == DMU_OST_ZVOL) {
2170			error = zvol_get_stats(os, nv);
2171			if (error == EIO)
2172				return (error);
2173			VERIFY0(error);
2174		}
2175		error = put_nvlist(zc, nv);
2176		nvlist_free(nv);
2177	}
2178
2179	return (error);
2180}
2181
2182/*
2183 * inputs:
2184 * zc_name		name of filesystem
2185 * zc_nvlist_dst_size	size of buffer for property nvlist
2186 *
2187 * outputs:
2188 * zc_objset_stats	stats
2189 * zc_nvlist_dst	property nvlist
2190 * zc_nvlist_dst_size	size of property nvlist
2191 */
2192static int
2193zfs_ioc_objset_stats(zfs_cmd_t *zc)
2194{
2195	objset_t *os;
2196	int error;
2197
2198	error = dmu_objset_hold(zc->zc_name, FTAG, &os);
2199	if (error == 0) {
2200		error = zfs_ioc_objset_stats_impl(zc, os);
2201		dmu_objset_rele(os, FTAG);
2202	}
2203
2204	if (error == ENOMEM)
2205		error = 0;
2206	return (error);
2207}
2208
2209/*
2210 * inputs:
2211 * zc_name		name of filesystem
2212 * zc_nvlist_dst_size	size of buffer for property nvlist
2213 *
2214 * outputs:
2215 * zc_nvlist_dst	received property nvlist
2216 * zc_nvlist_dst_size	size of received property nvlist
2217 *
2218 * Gets received properties (distinct from local properties on or after
2219 * SPA_VERSION_RECVD_PROPS) for callers who want to differentiate received from
2220 * local property values.
2221 */
2222static int
2223zfs_ioc_objset_recvd_props(zfs_cmd_t *zc)
2224{
2225	int error = 0;
2226	nvlist_t *nv;
2227
2228	/*
2229	 * Without this check, we would return local property values if the
2230	 * caller has not already received properties on or after
2231	 * SPA_VERSION_RECVD_PROPS.
2232	 */
2233	if (!dsl_prop_get_hasrecvd(zc->zc_name))
2234		return (SET_ERROR(ENOTSUP));
2235
2236	if (zc->zc_nvlist_dst != 0 &&
2237	    (error = dsl_prop_get_received(zc->zc_name, &nv)) == 0) {
2238		error = put_nvlist(zc, nv);
2239		nvlist_free(nv);
2240	}
2241
2242	return (error);
2243}
2244
2245static int
2246nvl_add_zplprop(objset_t *os, nvlist_t *props, zfs_prop_t prop)
2247{
2248	uint64_t value;
2249	int error;
2250
2251	/*
2252	 * zfs_get_zplprop() will either find a value or give us
2253	 * the default value (if there is one).
2254	 */
2255	if ((error = zfs_get_zplprop(os, prop, &value)) != 0)
2256		return (error);
2257	VERIFY(nvlist_add_uint64(props, zfs_prop_to_name(prop), value) == 0);
2258	return (0);
2259}
2260
2261/*
2262 * inputs:
2263 * zc_name		name of filesystem
2264 * zc_nvlist_dst_size	size of buffer for zpl property nvlist
2265 *
2266 * outputs:
2267 * zc_nvlist_dst	zpl property nvlist
2268 * zc_nvlist_dst_size	size of zpl property nvlist
2269 */
2270static int
2271zfs_ioc_objset_zplprops(zfs_cmd_t *zc)
2272{
2273	objset_t *os;
2274	int err;
2275
2276	/* XXX reading without owning */
2277	if (err = dmu_objset_hold(zc->zc_name, FTAG, &os))
2278		return (err);
2279
2280	dmu_objset_fast_stat(os, &zc->zc_objset_stats);
2281
2282	/*
2283	 * NB: nvl_add_zplprop() will read the objset contents,
2284	 * which we aren't supposed to do with a DS_MODE_USER
2285	 * hold, because it could be inconsistent.
2286	 */
2287	if (zc->zc_nvlist_dst != 0 &&
2288	    !zc->zc_objset_stats.dds_inconsistent &&
2289	    dmu_objset_type(os) == DMU_OST_ZFS) {
2290		nvlist_t *nv;
2291
2292		VERIFY(nvlist_alloc(&nv, NV_UNIQUE_NAME, KM_SLEEP) == 0);
2293		if ((err = nvl_add_zplprop(os, nv, ZFS_PROP_VERSION)) == 0 &&
2294		    (err = nvl_add_zplprop(os, nv, ZFS_PROP_NORMALIZE)) == 0 &&
2295		    (err = nvl_add_zplprop(os, nv, ZFS_PROP_UTF8ONLY)) == 0 &&
2296		    (err = nvl_add_zplprop(os, nv, ZFS_PROP_CASE)) == 0)
2297			err = put_nvlist(zc, nv);
2298		nvlist_free(nv);
2299	} else {
2300		err = SET_ERROR(ENOENT);
2301	}
2302	dmu_objset_rele(os, FTAG);
2303	return (err);
2304}
2305
2306boolean_t
2307dataset_name_hidden(const char *name)
2308{
2309	/*
2310	 * Skip over datasets that are not visible in this zone,
2311	 * internal datasets (which have a $ in their name), and
2312	 * temporary datasets (which have a % in their name).
2313	 */
2314	if (strchr(name, '$') != NULL)
2315		return (B_TRUE);
2316	if (strchr(name, '%') != NULL)
2317		return (B_TRUE);
2318	if (!INGLOBALZONE(curthread) && !zone_dataset_visible(name, NULL))
2319		return (B_TRUE);
2320	return (B_FALSE);
2321}
2322
2323/*
2324 * inputs:
2325 * zc_name		name of filesystem
2326 * zc_cookie		zap cursor
2327 * zc_nvlist_dst_size	size of buffer for property nvlist
2328 *
2329 * outputs:
2330 * zc_name		name of next filesystem
2331 * zc_cookie		zap cursor
2332 * zc_objset_stats	stats
2333 * zc_nvlist_dst	property nvlist
2334 * zc_nvlist_dst_size	size of property nvlist
2335 */
2336static int
2337zfs_ioc_dataset_list_next(zfs_cmd_t *zc)
2338{
2339	objset_t *os;
2340	int error;
2341	char *p;
2342	size_t orig_len = strlen(zc->zc_name);
2343
2344top:
2345	if (error = dmu_objset_hold(zc->zc_name, FTAG, &os)) {
2346		if (error == ENOENT)
2347			error = SET_ERROR(ESRCH);
2348		return (error);
2349	}
2350
2351	p = strrchr(zc->zc_name, '/');
2352	if (p == NULL || p[1] != '\0')
2353		(void) strlcat(zc->zc_name, "/", sizeof (zc->zc_name));
2354	p = zc->zc_name + strlen(zc->zc_name);
2355
2356	do {
2357		error = dmu_dir_list_next(os,
2358		    sizeof (zc->zc_name) - (p - zc->zc_name), p,
2359		    NULL, &zc->zc_cookie);
2360		if (error == ENOENT)
2361			error = SET_ERROR(ESRCH);
2362	} while (error == 0 && dataset_name_hidden(zc->zc_name));
2363	dmu_objset_rele(os, FTAG);
2364
2365	/*
2366	 * If it's an internal dataset (ie. with a '$' in its name),
2367	 * don't try to get stats for it, otherwise we'll return ENOENT.
2368	 */
2369	if (error == 0 && strchr(zc->zc_name, '$') == NULL) {
2370		error = zfs_ioc_objset_stats(zc); /* fill in the stats */
2371		if (error == ENOENT) {
2372			/* We lost a race with destroy, get the next one. */
2373			zc->zc_name[orig_len] = '\0';
2374			goto top;
2375		}
2376	}
2377	return (error);
2378}
2379
2380/*
2381 * inputs:
2382 * zc_name		name of filesystem
2383 * zc_cookie		zap cursor
2384 * zc_nvlist_dst_size	size of buffer for property nvlist
2385 * zc_simple		when set, only name is requested
2386 *
2387 * outputs:
2388 * zc_name		name of next snapshot
2389 * zc_objset_stats	stats
2390 * zc_nvlist_dst	property nvlist
2391 * zc_nvlist_dst_size	size of property nvlist
2392 */
2393static int
2394zfs_ioc_snapshot_list_next(zfs_cmd_t *zc)
2395{
2396	objset_t *os;
2397	int error;
2398
2399	error = dmu_objset_hold(zc->zc_name, FTAG, &os);
2400	if (error != 0) {
2401		return (error == ENOENT ? ESRCH : error);
2402	}
2403
2404	/*
2405	 * A dataset name of maximum length cannot have any snapshots,
2406	 * so exit immediately.
2407	 */
2408	if (strlcat(zc->zc_name, "@", sizeof (zc->zc_name)) >=
2409	    ZFS_MAX_DATASET_NAME_LEN) {
2410		dmu_objset_rele(os, FTAG);
2411		return (SET_ERROR(ESRCH));
2412	}
2413
2414	error = dmu_snapshot_list_next(os,
2415	    sizeof (zc->zc_name) - strlen(zc->zc_name),
2416	    zc->zc_name + strlen(zc->zc_name), &zc->zc_obj, &zc->zc_cookie,
2417	    NULL);
2418
2419	if (error == 0 && !zc->zc_simple) {
2420		dsl_dataset_t *ds;
2421		dsl_pool_t *dp = os->os_dsl_dataset->ds_dir->dd_pool;
2422
2423		error = dsl_dataset_hold_obj(dp, zc->zc_obj, FTAG, &ds);
2424		if (error == 0) {
2425			objset_t *ossnap;
2426
2427			error = dmu_objset_from_ds(ds, &ossnap);
2428			if (error == 0)
2429				error = zfs_ioc_objset_stats_impl(zc, ossnap);
2430			dsl_dataset_rele(ds, FTAG);
2431		}
2432	} else if (error == ENOENT) {
2433		error = SET_ERROR(ESRCH);
2434	}
2435
2436	dmu_objset_rele(os, FTAG);
2437	/* if we failed, undo the @ that we tacked on to zc_name */
2438	if (error != 0)
2439		*strchr(zc->zc_name, '@') = '\0';
2440	return (error);
2441}
2442
2443static int
2444zfs_prop_set_userquota(const char *dsname, nvpair_t *pair)
2445{
2446	const char *propname = nvpair_name(pair);
2447	uint64_t *valary;
2448	unsigned int vallen;
2449	const char *domain;
2450	char *dash;
2451	zfs_userquota_prop_t type;
2452	uint64_t rid;
2453	uint64_t quota;
2454	zfsvfs_t *zfsvfs;
2455	int err;
2456
2457	if (nvpair_type(pair) == DATA_TYPE_NVLIST) {
2458		nvlist_t *attrs;
2459		VERIFY(nvpair_value_nvlist(pair, &attrs) == 0);
2460		if (nvlist_lookup_nvpair(attrs, ZPROP_VALUE,
2461		    &pair) != 0)
2462			return (SET_ERROR(EINVAL));
2463	}
2464
2465	/*
2466	 * A correctly constructed propname is encoded as
2467	 * userquota@<rid>-<domain>.
2468	 */
2469	if ((dash = strchr(propname, '-')) == NULL ||
2470	    nvpair_value_uint64_array(pair, &valary, &vallen) != 0 ||
2471	    vallen != 3)
2472		return (SET_ERROR(EINVAL));
2473
2474	domain = dash + 1;
2475	type = valary[0];
2476	rid = valary[1];
2477	quota = valary[2];
2478
2479	err = zfsvfs_hold(dsname, FTAG, &zfsvfs, B_FALSE);
2480	if (err == 0) {
2481		err = zfs_set_userquota(zfsvfs, type, domain, rid, quota);
2482		zfsvfs_rele(zfsvfs, FTAG);
2483	}
2484
2485	return (err);
2486}
2487
2488/*
2489 * If the named property is one that has a special function to set its value,
2490 * return 0 on success and a positive error code on failure; otherwise if it is
2491 * not one of the special properties handled by this function, return -1.
2492 *
2493 * XXX: It would be better for callers of the property interface if we handled
2494 * these special cases in dsl_prop.c (in the dsl layer).
2495 */
2496static int
2497zfs_prop_set_special(const char *dsname, zprop_source_t source,
2498    nvpair_t *pair)
2499{
2500	const char *propname = nvpair_name(pair);
2501	zfs_prop_t prop = zfs_name_to_prop(propname);
2502	uint64_t intval;
2503	int err = -1;
2504
2505	if (prop == ZPROP_INVAL) {
2506		if (zfs_prop_userquota(propname))
2507			return (zfs_prop_set_userquota(dsname, pair));
2508		return (-1);
2509	}
2510
2511	if (nvpair_type(pair) == DATA_TYPE_NVLIST) {
2512		nvlist_t *attrs;
2513		VERIFY(nvpair_value_nvlist(pair, &attrs) == 0);
2514		VERIFY(nvlist_lookup_nvpair(attrs, ZPROP_VALUE,
2515		    &pair) == 0);
2516	}
2517
2518	if (zfs_prop_get_type(prop) == PROP_TYPE_STRING)
2519		return (-1);
2520
2521	VERIFY(0 == nvpair_value_uint64(pair, &intval));
2522
2523	switch (prop) {
2524	case ZFS_PROP_QUOTA:
2525		err = dsl_dir_set_quota(dsname, source, intval);
2526		break;
2527	case ZFS_PROP_REFQUOTA:
2528		err = dsl_dataset_set_refquota(dsname, source, intval);
2529		break;
2530	case ZFS_PROP_FILESYSTEM_LIMIT:
2531	case ZFS_PROP_SNAPSHOT_LIMIT:
2532		if (intval == UINT64_MAX) {
2533			/* clearing the limit, just do it */
2534			err = 0;
2535		} else {
2536			err = dsl_dir_activate_fs_ss_limit(dsname);
2537		}
2538		/*
2539		 * Set err to -1 to force the zfs_set_prop_nvlist code down the
2540		 * default path to set the value in the nvlist.
2541		 */
2542		if (err == 0)
2543			err = -1;
2544		break;
2545	case ZFS_PROP_RESERVATION:
2546		err = dsl_dir_set_reservation(dsname, source, intval);
2547		break;
2548	case ZFS_PROP_REFRESERVATION:
2549		err = dsl_dataset_set_refreservation(dsname, source, intval);
2550		break;
2551	case ZFS_PROP_VOLSIZE:
2552		err = zvol_set_volsize(dsname, intval);
2553		break;
2554	case ZFS_PROP_VERSION:
2555	{
2556		zfsvfs_t *zfsvfs;
2557
2558		if ((err = zfsvfs_hold(dsname, FTAG, &zfsvfs, B_TRUE)) != 0)
2559			break;
2560
2561		err = zfs_set_version(zfsvfs, intval);
2562		zfsvfs_rele(zfsvfs, FTAG);
2563
2564		if (err == 0 && intval >= ZPL_VERSION_USERSPACE) {
2565			zfs_cmd_t *zc;
2566
2567			zc = kmem_zalloc(sizeof (zfs_cmd_t), KM_SLEEP);
2568			(void) strcpy(zc->zc_name, dsname);
2569			(void) zfs_ioc_userspace_upgrade(zc);
2570			kmem_free(zc, sizeof (zfs_cmd_t));
2571		}
2572		break;
2573	}
2574	default:
2575		err = -1;
2576	}
2577
2578	return (err);
2579}
2580
2581/*
2582 * This function is best effort. If it fails to set any of the given properties,
2583 * it continues to set as many as it can and returns the last error
2584 * encountered. If the caller provides a non-NULL errlist, it will be filled in
2585 * with the list of names of all the properties that failed along with the
2586 * corresponding error numbers.
2587 *
2588 * If every property is set successfully, zero is returned and errlist is not
2589 * modified.
2590 */
2591int
2592zfs_set_prop_nvlist(const char *dsname, zprop_source_t source, nvlist_t *nvl,
2593    nvlist_t *errlist)
2594{
2595	nvpair_t *pair;
2596	nvpair_t *propval;
2597	int rv = 0;
2598	uint64_t intval;
2599	char *strval;
2600	nvlist_t *genericnvl = fnvlist_alloc();
2601	nvlist_t *retrynvl = fnvlist_alloc();
2602
2603retry:
2604	pair = NULL;
2605	while ((pair = nvlist_next_nvpair(nvl, pair)) != NULL) {
2606		const char *propname = nvpair_name(pair);
2607		zfs_prop_t prop = zfs_name_to_prop(propname);
2608		int err = 0;
2609
2610		/* decode the property value */
2611		propval = pair;
2612		if (nvpair_type(pair) == DATA_TYPE_NVLIST) {
2613			nvlist_t *attrs;
2614			attrs = fnvpair_value_nvlist(pair);
2615			if (nvlist_lookup_nvpair(attrs, ZPROP_VALUE,
2616			    &propval) != 0)
2617				err = SET_ERROR(EINVAL);
2618		}
2619
2620		/* Validate value type */
2621		if (err == 0 && prop == ZPROP_INVAL) {
2622			if (zfs_prop_user(propname)) {
2623				if (nvpair_type(propval) != DATA_TYPE_STRING)
2624					err = SET_ERROR(EINVAL);
2625			} else if (zfs_prop_userquota(propname)) {
2626				if (nvpair_type(propval) !=
2627				    DATA_TYPE_UINT64_ARRAY)
2628					err = SET_ERROR(EINVAL);
2629			} else {
2630				err = SET_ERROR(EINVAL);
2631			}
2632		} else if (err == 0) {
2633			if (nvpair_type(propval) == DATA_TYPE_STRING) {
2634				if (zfs_prop_get_type(prop) != PROP_TYPE_STRING)
2635					err = SET_ERROR(EINVAL);
2636			} else if (nvpair_type(propval) == DATA_TYPE_UINT64) {
2637				const char *unused;
2638
2639				intval = fnvpair_value_uint64(propval);
2640
2641				switch (zfs_prop_get_type(prop)) {
2642				case PROP_TYPE_NUMBER:
2643					break;
2644				case PROP_TYPE_STRING:
2645					err = SET_ERROR(EINVAL);
2646					break;
2647				case PROP_TYPE_INDEX:
2648					if (zfs_prop_index_to_string(prop,
2649					    intval, &unused) != 0)
2650						err = SET_ERROR(EINVAL);
2651					break;
2652				default:
2653					cmn_err(CE_PANIC,
2654					    "unknown property type");
2655				}
2656			} else {
2657				err = SET_ERROR(EINVAL);
2658			}
2659		}
2660
2661		/* Validate permissions */
2662		if (err == 0)
2663			err = zfs_check_settable(dsname, pair, CRED());
2664
2665		if (err == 0) {
2666			err = zfs_prop_set_special(dsname, source, pair);
2667			if (err == -1) {
2668				/*
2669				 * For better performance we build up a list of
2670				 * properties to set in a single transaction.
2671				 */
2672				err = nvlist_add_nvpair(genericnvl, pair);
2673			} else if (err != 0 && nvl != retrynvl) {
2674				/*
2675				 * This may be a spurious error caused by
2676				 * receiving quota and reservation out of order.
2677				 * Try again in a second pass.
2678				 */
2679				err = nvlist_add_nvpair(retrynvl, pair);
2680			}
2681		}
2682
2683		if (err != 0) {
2684			if (errlist != NULL)
2685				fnvlist_add_int32(errlist, propname, err);
2686			rv = err;
2687		}
2688	}
2689
2690	if (nvl != retrynvl && !nvlist_empty(retrynvl)) {
2691		nvl = retrynvl;
2692		goto retry;
2693	}
2694
2695	if (!nvlist_empty(genericnvl) &&
2696	    dsl_props_set(dsname, source, genericnvl) != 0) {
2697		/*
2698		 * If this fails, we still want to set as many properties as we
2699		 * can, so try setting them individually.
2700		 */
2701		pair = NULL;
2702		while ((pair = nvlist_next_nvpair(genericnvl, pair)) != NULL) {
2703			const char *propname = nvpair_name(pair);
2704			int err = 0;
2705
2706			propval = pair;
2707			if (nvpair_type(pair) == DATA_TYPE_NVLIST) {
2708				nvlist_t *attrs;
2709				attrs = fnvpair_value_nvlist(pair);
2710				propval = fnvlist_lookup_nvpair(attrs,
2711				    ZPROP_VALUE);
2712			}
2713
2714			if (nvpair_type(propval) == DATA_TYPE_STRING) {
2715				strval = fnvpair_value_string(propval);
2716				err = dsl_prop_set_string(dsname, propname,
2717				    source, strval);
2718			} else {
2719				intval = fnvpair_value_uint64(propval);
2720				err = dsl_prop_set_int(dsname, propname, source,
2721				    intval);
2722			}
2723
2724			if (err != 0) {
2725				if (errlist != NULL) {
2726					fnvlist_add_int32(errlist, propname,
2727					    err);
2728				}
2729				rv = err;
2730			}
2731		}
2732	}
2733	nvlist_free(genericnvl);
2734	nvlist_free(retrynvl);
2735
2736	return (rv);
2737}
2738
2739/*
2740 * Check that all the properties are valid user properties.
2741 */
2742static int
2743zfs_check_userprops(const char *fsname, nvlist_t *nvl)
2744{
2745	nvpair_t *pair = NULL;
2746	int error = 0;
2747
2748	while ((pair = nvlist_next_nvpair(nvl, pair)) != NULL) {
2749		const char *propname = nvpair_name(pair);
2750
2751		if (!zfs_prop_user(propname) ||
2752		    nvpair_type(pair) != DATA_TYPE_STRING)
2753			return (SET_ERROR(EINVAL));
2754
2755		if (error = zfs_secpolicy_write_perms(fsname,
2756		    ZFS_DELEG_PERM_USERPROP, CRED()))
2757			return (error);
2758
2759		if (strlen(propname) >= ZAP_MAXNAMELEN)
2760			return (SET_ERROR(ENAMETOOLONG));
2761
2762		if (strlen(fnvpair_value_string(pair)) >= ZAP_MAXVALUELEN)
2763			return (E2BIG);
2764	}
2765	return (0);
2766}
2767
2768static void
2769props_skip(nvlist_t *props, nvlist_t *skipped, nvlist_t **newprops)
2770{
2771	nvpair_t *pair;
2772
2773	VERIFY(nvlist_alloc(newprops, NV_UNIQUE_NAME, KM_SLEEP) == 0);
2774
2775	pair = NULL;
2776	while ((pair = nvlist_next_nvpair(props, pair)) != NULL) {
2777		if (nvlist_exists(skipped, nvpair_name(pair)))
2778			continue;
2779
2780		VERIFY(nvlist_add_nvpair(*newprops, pair) == 0);
2781	}
2782}
2783
2784static int
2785clear_received_props(const char *dsname, nvlist_t *props,
2786    nvlist_t *skipped)
2787{
2788	int err = 0;
2789	nvlist_t *cleared_props = NULL;
2790	props_skip(props, skipped, &cleared_props);
2791	if (!nvlist_empty(cleared_props)) {
2792		/*
2793		 * Acts on local properties until the dataset has received
2794		 * properties at least once on or after SPA_VERSION_RECVD_PROPS.
2795		 */
2796		zprop_source_t flags = (ZPROP_SRC_NONE |
2797		    (dsl_prop_get_hasrecvd(dsname) ? ZPROP_SRC_RECEIVED : 0));
2798		err = zfs_set_prop_nvlist(dsname, flags, cleared_props, NULL);
2799	}
2800	nvlist_free(cleared_props);
2801	return (err);
2802}
2803
2804/*
2805 * inputs:
2806 * zc_name		name of filesystem
2807 * zc_value		name of property to set
2808 * zc_nvlist_src{_size}	nvlist of properties to apply
2809 * zc_cookie		received properties flag
2810 *
2811 * outputs:
2812 * zc_nvlist_dst{_size} error for each unapplied received property
2813 */
2814static int
2815zfs_ioc_set_prop(zfs_cmd_t *zc)
2816{
2817	nvlist_t *nvl;
2818	boolean_t received = zc->zc_cookie;
2819	zprop_source_t source = (received ? ZPROP_SRC_RECEIVED :
2820	    ZPROP_SRC_LOCAL);
2821	nvlist_t *errors;
2822	int error;
2823
2824	if ((error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
2825	    zc->zc_iflags, &nvl)) != 0)
2826		return (error);
2827
2828	if (received) {
2829		nvlist_t *origprops;
2830
2831		if (dsl_prop_get_received(zc->zc_name, &origprops) == 0) {
2832			(void) clear_received_props(zc->zc_name,
2833			    origprops, nvl);
2834			nvlist_free(origprops);
2835		}
2836
2837		error = dsl_prop_set_hasrecvd(zc->zc_name);
2838	}
2839
2840	errors = fnvlist_alloc();
2841	if (error == 0)
2842		error = zfs_set_prop_nvlist(zc->zc_name, source, nvl, errors);
2843
2844	if (zc->zc_nvlist_dst != 0 && errors != NULL) {
2845		(void) put_nvlist(zc, errors);
2846	}
2847
2848	nvlist_free(errors);
2849	nvlist_free(nvl);
2850	return (error);
2851}
2852
2853/*
2854 * inputs:
2855 * zc_name		name of filesystem
2856 * zc_value		name of property to inherit
2857 * zc_cookie		revert to received value if TRUE
2858 *
2859 * outputs:		none
2860 */
2861static int
2862zfs_ioc_inherit_prop(zfs_cmd_t *zc)
2863{
2864	const char *propname = zc->zc_value;
2865	zfs_prop_t prop = zfs_name_to_prop(propname);
2866	boolean_t received = zc->zc_cookie;
2867	zprop_source_t source = (received
2868	    ? ZPROP_SRC_NONE		/* revert to received value, if any */
2869	    : ZPROP_SRC_INHERITED);	/* explicitly inherit */
2870
2871	if (received) {
2872		nvlist_t *dummy;
2873		nvpair_t *pair;
2874		zprop_type_t type;
2875		int err;
2876
2877		/*
2878		 * zfs_prop_set_special() expects properties in the form of an
2879		 * nvpair with type info.
2880		 */
2881		if (prop == ZPROP_INVAL) {
2882			if (!zfs_prop_user(propname))
2883				return (SET_ERROR(EINVAL));
2884
2885			type = PROP_TYPE_STRING;
2886		} else if (prop == ZFS_PROP_VOLSIZE ||
2887		    prop == ZFS_PROP_VERSION) {
2888			return (SET_ERROR(EINVAL));
2889		} else {
2890			type = zfs_prop_get_type(prop);
2891		}
2892
2893		VERIFY(nvlist_alloc(&dummy, NV_UNIQUE_NAME, KM_SLEEP) == 0);
2894
2895		switch (type) {
2896		case PROP_TYPE_STRING:
2897			VERIFY(0 == nvlist_add_string(dummy, propname, ""));
2898			break;
2899		case PROP_TYPE_NUMBER:
2900		case PROP_TYPE_INDEX:
2901			VERIFY(0 == nvlist_add_uint64(dummy, propname, 0));
2902			break;
2903		default:
2904			nvlist_free(dummy);
2905			return (SET_ERROR(EINVAL));
2906		}
2907
2908		pair = nvlist_next_nvpair(dummy, NULL);
2909		err = zfs_prop_set_special(zc->zc_name, source, pair);
2910		nvlist_free(dummy);
2911		if (err != -1)
2912			return (err); /* special property already handled */
2913	} else {
2914		/*
2915		 * Only check this in the non-received case. We want to allow
2916		 * 'inherit -S' to revert non-inheritable properties like quota
2917		 * and reservation to the received or default values even though
2918		 * they are not considered inheritable.
2919		 */
2920		if (prop != ZPROP_INVAL && !zfs_prop_inheritable(prop))
2921			return (SET_ERROR(EINVAL));
2922	}
2923
2924	/* property name has been validated by zfs_secpolicy_inherit_prop() */
2925	return (dsl_prop_inherit(zc->zc_name, zc->zc_value, source));
2926}
2927
2928static int
2929zfs_ioc_pool_set_props(zfs_cmd_t *zc)
2930{
2931	nvlist_t *props;
2932	spa_t *spa;
2933	int error;
2934	nvpair_t *pair;
2935
2936	if (error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
2937	    zc->zc_iflags, &props))
2938		return (error);
2939
2940	/*
2941	 * If the only property is the configfile, then just do a spa_lookup()
2942	 * to handle the faulted case.
2943	 */
2944	pair = nvlist_next_nvpair(props, NULL);
2945	if (pair != NULL && strcmp(nvpair_name(pair),
2946	    zpool_prop_to_name(ZPOOL_PROP_CACHEFILE)) == 0 &&
2947	    nvlist_next_nvpair(props, pair) == NULL) {
2948		mutex_enter(&spa_namespace_lock);
2949		if ((spa = spa_lookup(zc->zc_name)) != NULL) {
2950			spa_configfile_set(spa, props, B_FALSE);
2951			spa_write_cachefile(spa, B_FALSE, B_TRUE);
2952		}
2953		mutex_exit(&spa_namespace_lock);
2954		if (spa != NULL) {
2955			nvlist_free(props);
2956			return (0);
2957		}
2958	}
2959
2960	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0) {
2961		nvlist_free(props);
2962		return (error);
2963	}
2964
2965	error = spa_prop_set(spa, props);
2966
2967	nvlist_free(props);
2968	spa_close(spa, FTAG);
2969
2970	return (error);
2971}
2972
2973static int
2974zfs_ioc_pool_get_props(zfs_cmd_t *zc)
2975{
2976	spa_t *spa;
2977	int error;
2978	nvlist_t *nvp = NULL;
2979
2980	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0) {
2981		/*
2982		 * If the pool is faulted, there may be properties we can still
2983		 * get (such as altroot and cachefile), so attempt to get them
2984		 * anyway.
2985		 */
2986		mutex_enter(&spa_namespace_lock);
2987		if ((spa = spa_lookup(zc->zc_name)) != NULL)
2988			error = spa_prop_get(spa, &nvp);
2989		mutex_exit(&spa_namespace_lock);
2990	} else {
2991		error = spa_prop_get(spa, &nvp);
2992		spa_close(spa, FTAG);
2993	}
2994
2995	if (error == 0 && zc->zc_nvlist_dst != 0)
2996		error = put_nvlist(zc, nvp);
2997	else
2998		error = SET_ERROR(EFAULT);
2999
3000	nvlist_free(nvp);
3001	return (error);
3002}
3003
3004/*
3005 * inputs:
3006 * zc_name		name of filesystem
3007 * zc_nvlist_src{_size}	nvlist of delegated permissions
3008 * zc_perm_action	allow/unallow flag
3009 *
3010 * outputs:		none
3011 */
3012static int
3013zfs_ioc_set_fsacl(zfs_cmd_t *zc)
3014{
3015	int error;
3016	nvlist_t *fsaclnv = NULL;
3017
3018	if ((error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
3019	    zc->zc_iflags, &fsaclnv)) != 0)
3020		return (error);
3021
3022	/*
3023	 * Verify nvlist is constructed correctly
3024	 */
3025	if ((error = zfs_deleg_verify_nvlist(fsaclnv)) != 0) {
3026		nvlist_free(fsaclnv);
3027		return (SET_ERROR(EINVAL));
3028	}
3029
3030	/*
3031	 * If we don't have PRIV_SYS_MOUNT, then validate
3032	 * that user is allowed to hand out each permission in
3033	 * the nvlist(s)
3034	 */
3035
3036	error = secpolicy_zfs(CRED());
3037	if (error != 0) {
3038		if (zc->zc_perm_action == B_FALSE) {
3039			error = dsl_deleg_can_allow(zc->zc_name,
3040			    fsaclnv, CRED());
3041		} else {
3042			error = dsl_deleg_can_unallow(zc->zc_name,
3043			    fsaclnv, CRED());
3044		}
3045	}
3046
3047	if (error == 0)
3048		error = dsl_deleg_set(zc->zc_name, fsaclnv, zc->zc_perm_action);
3049
3050	nvlist_free(fsaclnv);
3051	return (error);
3052}
3053
3054/*
3055 * inputs:
3056 * zc_name		name of filesystem
3057 *
3058 * outputs:
3059 * zc_nvlist_src{_size}	nvlist of delegated permissions
3060 */
3061static int
3062zfs_ioc_get_fsacl(zfs_cmd_t *zc)
3063{
3064	nvlist_t *nvp;
3065	int error;
3066
3067	if ((error = dsl_deleg_get(zc->zc_name, &nvp)) == 0) {
3068		error = put_nvlist(zc, nvp);
3069		nvlist_free(nvp);
3070	}
3071
3072	return (error);
3073}
3074
3075/* ARGSUSED */
3076static void
3077zfs_create_cb(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx)
3078{
3079	zfs_creat_t *zct = arg;
3080
3081	zfs_create_fs(os, cr, zct->zct_zplprops, tx);
3082}
3083
3084#define	ZFS_PROP_UNDEFINED	((uint64_t)-1)
3085
3086/*
3087 * inputs:
3088 * os			parent objset pointer (NULL if root fs)
3089 * fuids_ok		fuids allowed in this version of the spa?
3090 * sa_ok		SAs allowed in this version of the spa?
3091 * createprops		list of properties requested by creator
3092 *
3093 * outputs:
3094 * zplprops	values for the zplprops we attach to the master node object
3095 * is_ci	true if requested file system will be purely case-insensitive
3096 *
3097 * Determine the settings for utf8only, normalization and
3098 * casesensitivity.  Specific values may have been requested by the
3099 * creator and/or we can inherit values from the parent dataset.  If
3100 * the file system is of too early a vintage, a creator can not
3101 * request settings for these properties, even if the requested
3102 * setting is the default value.  We don't actually want to create dsl
3103 * properties for these, so remove them from the source nvlist after
3104 * processing.
3105 */
3106static int
3107zfs_fill_zplprops_impl(objset_t *os, uint64_t zplver,
3108    boolean_t fuids_ok, boolean_t sa_ok, nvlist_t *createprops,
3109    nvlist_t *zplprops, boolean_t *is_ci)
3110{
3111	uint64_t sense = ZFS_PROP_UNDEFINED;
3112	uint64_t norm = ZFS_PROP_UNDEFINED;
3113	uint64_t u8 = ZFS_PROP_UNDEFINED;
3114
3115	ASSERT(zplprops != NULL);
3116
3117	if (os != NULL && os->os_phys->os_type != DMU_OST_ZFS)
3118		return (SET_ERROR(EINVAL));
3119
3120	/*
3121	 * Pull out creator prop choices, if any.
3122	 */
3123	if (createprops) {
3124		(void) nvlist_lookup_uint64(createprops,
3125		    zfs_prop_to_name(ZFS_PROP_VERSION), &zplver);
3126		(void) nvlist_lookup_uint64(createprops,
3127		    zfs_prop_to_name(ZFS_PROP_NORMALIZE), &norm);
3128		(void) nvlist_remove_all(createprops,
3129		    zfs_prop_to_name(ZFS_PROP_NORMALIZE));
3130		(void) nvlist_lookup_uint64(createprops,
3131		    zfs_prop_to_name(ZFS_PROP_UTF8ONLY), &u8);
3132		(void) nvlist_remove_all(createprops,
3133		    zfs_prop_to_name(ZFS_PROP_UTF8ONLY));
3134		(void) nvlist_lookup_uint64(createprops,
3135		    zfs_prop_to_name(ZFS_PROP_CASE), &sense);
3136		(void) nvlist_remove_all(createprops,
3137		    zfs_prop_to_name(ZFS_PROP_CASE));
3138	}
3139
3140	/*
3141	 * If the zpl version requested is whacky or the file system
3142	 * or pool is version is too "young" to support normalization
3143	 * and the creator tried to set a value for one of the props,
3144	 * error out.
3145	 */
3146	if ((zplver < ZPL_VERSION_INITIAL || zplver > ZPL_VERSION) ||
3147	    (zplver >= ZPL_VERSION_FUID && !fuids_ok) ||
3148	    (zplver >= ZPL_VERSION_SA && !sa_ok) ||
3149	    (zplver < ZPL_VERSION_NORMALIZATION &&
3150	    (norm != ZFS_PROP_UNDEFINED || u8 != ZFS_PROP_UNDEFINED ||
3151	    sense != ZFS_PROP_UNDEFINED)))
3152		return (SET_ERROR(ENOTSUP));
3153
3154	/*
3155	 * Put the version in the zplprops
3156	 */
3157	VERIFY(nvlist_add_uint64(zplprops,
3158	    zfs_prop_to_name(ZFS_PROP_VERSION), zplver) == 0);
3159
3160	if (norm == ZFS_PROP_UNDEFINED)
3161		VERIFY(zfs_get_zplprop(os, ZFS_PROP_NORMALIZE, &norm) == 0);
3162	VERIFY(nvlist_add_uint64(zplprops,
3163	    zfs_prop_to_name(ZFS_PROP_NORMALIZE), norm) == 0);
3164
3165	/*
3166	 * If we're normalizing, names must always be valid UTF-8 strings.
3167	 */
3168	if (norm)
3169		u8 = 1;
3170	if (u8 == ZFS_PROP_UNDEFINED)
3171		VERIFY(zfs_get_zplprop(os, ZFS_PROP_UTF8ONLY, &u8) == 0);
3172	VERIFY(nvlist_add_uint64(zplprops,
3173	    zfs_prop_to_name(ZFS_PROP_UTF8ONLY), u8) == 0);
3174
3175	if (sense == ZFS_PROP_UNDEFINED)
3176		VERIFY(zfs_get_zplprop(os, ZFS_PROP_CASE, &sense) == 0);
3177	VERIFY(nvlist_add_uint64(zplprops,
3178	    zfs_prop_to_name(ZFS_PROP_CASE), sense) == 0);
3179
3180	if (is_ci)
3181		*is_ci = (sense == ZFS_CASE_INSENSITIVE);
3182
3183	return (0);
3184}
3185
3186static int
3187zfs_fill_zplprops(const char *dataset, nvlist_t *createprops,
3188    nvlist_t *zplprops, boolean_t *is_ci)
3189{
3190	boolean_t fuids_ok, sa_ok;
3191	uint64_t zplver = ZPL_VERSION;
3192	objset_t *os = NULL;
3193	char parentname[ZFS_MAX_DATASET_NAME_LEN];
3194	char *cp;
3195	spa_t *spa;
3196	uint64_t spa_vers;
3197	int error;
3198
3199	(void) strlcpy(parentname, dataset, sizeof (parentname));
3200	cp = strrchr(parentname, '/');
3201	ASSERT(cp != NULL);
3202	cp[0] = '\0';
3203
3204	if ((error = spa_open(dataset, &spa, FTAG)) != 0)
3205		return (error);
3206
3207	spa_vers = spa_version(spa);
3208	spa_close(spa, FTAG);
3209
3210	zplver = zfs_zpl_version_map(spa_vers);
3211	fuids_ok = (zplver >= ZPL_VERSION_FUID);
3212	sa_ok = (zplver >= ZPL_VERSION_SA);
3213
3214	/*
3215	 * Open parent object set so we can inherit zplprop values.
3216	 */
3217	if ((error = dmu_objset_hold(parentname, FTAG, &os)) != 0)
3218		return (error);
3219
3220	error = zfs_fill_zplprops_impl(os, zplver, fuids_ok, sa_ok, createprops,
3221	    zplprops, is_ci);
3222	dmu_objset_rele(os, FTAG);
3223	return (error);
3224}
3225
3226static int
3227zfs_fill_zplprops_root(uint64_t spa_vers, nvlist_t *createprops,
3228    nvlist_t *zplprops, boolean_t *is_ci)
3229{
3230	boolean_t fuids_ok;
3231	boolean_t sa_ok;
3232	uint64_t zplver = ZPL_VERSION;
3233	int error;
3234
3235	zplver = zfs_zpl_version_map(spa_vers);
3236	fuids_ok = (zplver >= ZPL_VERSION_FUID);
3237	sa_ok = (zplver >= ZPL_VERSION_SA);
3238
3239	error = zfs_fill_zplprops_impl(NULL, zplver, fuids_ok, sa_ok,
3240	    createprops, zplprops, is_ci);
3241	return (error);
3242}
3243
3244/*
3245 * innvl: {
3246 *     "type" -> dmu_objset_type_t (int32)
3247 *     (optional) "props" -> { prop -> value }
3248 * }
3249 *
3250 * outnvl: propname -> error code (int32)
3251 */
3252static int
3253zfs_ioc_create(const char *fsname, nvlist_t *innvl, nvlist_t *outnvl)
3254{
3255	int error = 0;
3256	zfs_creat_t zct = { 0 };
3257	nvlist_t *nvprops = NULL;
3258	void (*cbfunc)(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx);
3259	int32_t type32;
3260	dmu_objset_type_t type;
3261	boolean_t is_insensitive = B_FALSE;
3262
3263	if (nvlist_lookup_int32(innvl, "type", &type32) != 0)
3264		return (SET_ERROR(EINVAL));
3265	type = type32;
3266	(void) nvlist_lookup_nvlist(innvl, "props", &nvprops);
3267
3268	switch (type) {
3269	case DMU_OST_ZFS:
3270		cbfunc = zfs_create_cb;
3271		break;
3272
3273	case DMU_OST_ZVOL:
3274		cbfunc = zvol_create_cb;
3275		break;
3276
3277	default:
3278		cbfunc = NULL;
3279		break;
3280	}
3281	if (strchr(fsname, '@') ||
3282	    strchr(fsname, '%'))
3283		return (SET_ERROR(EINVAL));
3284
3285	zct.zct_props = nvprops;
3286
3287	if (cbfunc == NULL)
3288		return (SET_ERROR(EINVAL));
3289
3290	if (type == DMU_OST_ZVOL) {
3291		uint64_t volsize, volblocksize;
3292
3293		if (nvprops == NULL)
3294			return (SET_ERROR(EINVAL));
3295		if (nvlist_lookup_uint64(nvprops,
3296		    zfs_prop_to_name(ZFS_PROP_VOLSIZE), &volsize) != 0)
3297			return (SET_ERROR(EINVAL));
3298
3299		if ((error = nvlist_lookup_uint64(nvprops,
3300		    zfs_prop_to_name(ZFS_PROP_VOLBLOCKSIZE),
3301		    &volblocksize)) != 0 && error != ENOENT)
3302			return (SET_ERROR(EINVAL));
3303
3304		if (error != 0)
3305			volblocksize = zfs_prop_default_numeric(
3306			    ZFS_PROP_VOLBLOCKSIZE);
3307
3308		if ((error = zvol_check_volblocksize(
3309		    volblocksize)) != 0 ||
3310		    (error = zvol_check_volsize(volsize,
3311		    volblocksize)) != 0)
3312			return (error);
3313	} else if (type == DMU_OST_ZFS) {
3314		int error;
3315
3316		/*
3317		 * We have to have normalization and
3318		 * case-folding flags correct when we do the
3319		 * file system creation, so go figure them out
3320		 * now.
3321		 */
3322		VERIFY(nvlist_alloc(&zct.zct_zplprops,
3323		    NV_UNIQUE_NAME, KM_SLEEP) == 0);
3324		error = zfs_fill_zplprops(fsname, nvprops,
3325		    zct.zct_zplprops, &is_insensitive);
3326		if (error != 0) {
3327			nvlist_free(zct.zct_zplprops);
3328			return (error);
3329		}
3330	}
3331
3332	error = dmu_objset_create(fsname, type,
3333	    is_insensitive ? DS_FLAG_CI_DATASET : 0, cbfunc, &zct);
3334	nvlist_free(zct.zct_zplprops);
3335
3336	/*
3337	 * It would be nice to do this atomically.
3338	 */
3339	if (error == 0) {
3340		error = zfs_set_prop_nvlist(fsname, ZPROP_SRC_LOCAL,
3341		    nvprops, outnvl);
3342		if (error != 0)
3343			(void) dsl_destroy_head(fsname);
3344	}
3345#ifdef __FreeBSD__
3346	if (error == 0 && type == DMU_OST_ZVOL)
3347		zvol_create_minors(fsname);
3348#endif
3349	return (error);
3350}
3351
3352/*
3353 * innvl: {
3354 *     "origin" -> name of origin snapshot
3355 *     (optional) "props" -> { prop -> value }
3356 * }
3357 *
3358 * outnvl: propname -> error code (int32)
3359 */
3360static int
3361zfs_ioc_clone(const char *fsname, nvlist_t *innvl, nvlist_t *outnvl)
3362{
3363	int error = 0;
3364	nvlist_t *nvprops = NULL;
3365	char *origin_name;
3366
3367	if (nvlist_lookup_string(innvl, "origin", &origin_name) != 0)
3368		return (SET_ERROR(EINVAL));
3369	(void) nvlist_lookup_nvlist(innvl, "props", &nvprops);
3370
3371	if (strchr(fsname, '@') ||
3372	    strchr(fsname, '%'))
3373		return (SET_ERROR(EINVAL));
3374
3375	if (dataset_namecheck(origin_name, NULL, NULL) != 0)
3376		return (SET_ERROR(EINVAL));
3377	error = dmu_objset_clone(fsname, origin_name);
3378	if (error != 0)
3379		return (error);
3380
3381	/*
3382	 * It would be nice to do this atomically.
3383	 */
3384	if (error == 0) {
3385		error = zfs_set_prop_nvlist(fsname, ZPROP_SRC_LOCAL,
3386		    nvprops, outnvl);
3387		if (error != 0)
3388			(void) dsl_destroy_head(fsname);
3389	}
3390#ifdef __FreeBSD__
3391	if (error == 0)
3392		zvol_create_minors(fsname);
3393#endif
3394	return (error);
3395}
3396
3397/* ARGSUSED */
3398static int
3399zfs_ioc_remap(const char *fsname, nvlist_t *innvl, nvlist_t *outnvl)
3400{
3401	if (strchr(fsname, '@') ||
3402	    strchr(fsname, '%'))
3403		return (SET_ERROR(EINVAL));
3404
3405	return (dmu_objset_remap_indirects(fsname));
3406}
3407
3408/*
3409 * innvl: {
3410 *     "snaps" -> { snapshot1, snapshot2 }
3411 *     (optional) "props" -> { prop -> value (string) }
3412 * }
3413 *
3414 * outnvl: snapshot -> error code (int32)
3415 */
3416static int
3417zfs_ioc_snapshot(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl)
3418{
3419	nvlist_t *snaps;
3420	nvlist_t *props = NULL;
3421	int error, poollen;
3422	nvpair_t *pair;
3423
3424	(void) nvlist_lookup_nvlist(innvl, "props", &props);
3425	if ((error = zfs_check_userprops(poolname, props)) != 0)
3426		return (error);
3427
3428	if (!nvlist_empty(props) &&
3429	    zfs_earlier_version(poolname, SPA_VERSION_SNAP_PROPS))
3430		return (SET_ERROR(ENOTSUP));
3431
3432	if (nvlist_lookup_nvlist(innvl, "snaps", &snaps) != 0)
3433		return (SET_ERROR(EINVAL));
3434	poollen = strlen(poolname);
3435	for (pair = nvlist_next_nvpair(snaps, NULL); pair != NULL;
3436	    pair = nvlist_next_nvpair(snaps, pair)) {
3437		const char *name = nvpair_name(pair);
3438		const char *cp = strchr(name, '@');
3439
3440		/*
3441		 * The snap name must contain an @, and the part after it must
3442		 * contain only valid characters.
3443		 */
3444		if (cp == NULL ||
3445		    zfs_component_namecheck(cp + 1, NULL, NULL) != 0)
3446			return (SET_ERROR(EINVAL));
3447
3448		/*
3449		 * The snap must be in the specified pool.
3450		 */
3451		if (strncmp(name, poolname, poollen) != 0 ||
3452		    (name[poollen] != '/' && name[poollen] != '@'))
3453			return (SET_ERROR(EXDEV));
3454
3455		/* This must be the only snap of this fs. */
3456		for (nvpair_t *pair2 = nvlist_next_nvpair(snaps, pair);
3457		    pair2 != NULL; pair2 = nvlist_next_nvpair(snaps, pair2)) {
3458			if (strncmp(name, nvpair_name(pair2), cp - name + 1)
3459			    == 0) {
3460				return (SET_ERROR(EXDEV));
3461			}
3462		}
3463	}
3464
3465	error = dsl_dataset_snapshot(snaps, props, outnvl);
3466	return (error);
3467}
3468
3469/*
3470 * innvl: "message" -> string
3471 */
3472/* ARGSUSED */
3473static int
3474zfs_ioc_log_history(const char *unused, nvlist_t *innvl, nvlist_t *outnvl)
3475{
3476	char *message;
3477	spa_t *spa;
3478	int error;
3479	char *poolname;
3480
3481	/*
3482	 * The poolname in the ioctl is not set, we get it from the TSD,
3483	 * which was set at the end of the last successful ioctl that allows
3484	 * logging.  The secpolicy func already checked that it is set.
3485	 * Only one log ioctl is allowed after each successful ioctl, so
3486	 * we clear the TSD here.
3487	 */
3488	poolname = tsd_get(zfs_allow_log_key);
3489	(void) tsd_set(zfs_allow_log_key, NULL);
3490	error = spa_open(poolname, &spa, FTAG);
3491	strfree(poolname);
3492	if (error != 0)
3493		return (error);
3494
3495	if (nvlist_lookup_string(innvl, "message", &message) != 0)  {
3496		spa_close(spa, FTAG);
3497		return (SET_ERROR(EINVAL));
3498	}
3499
3500	if (spa_version(spa) < SPA_VERSION_ZPOOL_HISTORY) {
3501		spa_close(spa, FTAG);
3502		return (SET_ERROR(ENOTSUP));
3503	}
3504
3505	error = spa_history_log(spa, message);
3506	spa_close(spa, FTAG);
3507	return (error);
3508}
3509
3510#ifdef __FreeBSD__
3511static int
3512zfs_ioc_nextboot(const char *unused, nvlist_t *innvl, nvlist_t *outnvl)
3513{
3514	char name[MAXNAMELEN];
3515	spa_t *spa;
3516	vdev_t *vd;
3517	char *command;
3518	uint64_t pool_guid;
3519	uint64_t vdev_guid;
3520	int error;
3521
3522	if (nvlist_lookup_uint64(innvl,
3523	    ZPOOL_CONFIG_POOL_GUID, &pool_guid) != 0)
3524		return (EINVAL);
3525	if (nvlist_lookup_uint64(innvl,
3526	    ZPOOL_CONFIG_GUID, &vdev_guid) != 0)
3527		return (EINVAL);
3528	if (nvlist_lookup_string(innvl,
3529	    "command", &command) != 0)
3530		return (EINVAL);
3531
3532	mutex_enter(&spa_namespace_lock);
3533	spa = spa_by_guid(pool_guid, vdev_guid);
3534	if (spa != NULL)
3535		strcpy(name, spa_name(spa));
3536	mutex_exit(&spa_namespace_lock);
3537	if (spa == NULL)
3538		return (ENOENT);
3539
3540	if ((error = spa_open(name, &spa, FTAG)) != 0)
3541		return (error);
3542	spa_vdev_state_enter(spa, SCL_ALL);
3543	vd = spa_lookup_by_guid(spa, vdev_guid, B_TRUE);
3544	if (vd == NULL) {
3545		(void) spa_vdev_state_exit(spa, NULL, ENXIO);
3546		spa_close(spa, FTAG);
3547		return (ENODEV);
3548	}
3549	error = vdev_label_write_pad2(vd, command, strlen(command));
3550	(void) spa_vdev_state_exit(spa, NULL, 0);
3551	txg_wait_synced(spa->spa_dsl_pool, 0);
3552	spa_close(spa, FTAG);
3553	return (error);
3554}
3555#endif
3556
3557/*
3558 * The dp_config_rwlock must not be held when calling this, because the
3559 * unmount may need to write out data.
3560 *
3561 * This function is best-effort.  Callers must deal gracefully if it
3562 * remains mounted (or is remounted after this call).
3563 *
3564 * Returns 0 if the argument is not a snapshot, or it is not currently a
3565 * filesystem, or we were able to unmount it.  Returns error code otherwise.
3566 */
3567void
3568zfs_unmount_snap(const char *snapname)
3569{
3570	vfs_t *vfsp = NULL;
3571	zfsvfs_t *zfsvfs = NULL;
3572
3573	if (strchr(snapname, '@') == NULL)
3574		return;
3575
3576	int err = getzfsvfs(snapname, &zfsvfs);
3577	if (err != 0) {
3578		ASSERT3P(zfsvfs, ==, NULL);
3579		return;
3580	}
3581	vfsp = zfsvfs->z_vfs;
3582
3583	ASSERT(!dsl_pool_config_held(dmu_objset_pool(zfsvfs->z_os)));
3584
3585#ifdef illumos
3586	err = vn_vfswlock(vfsp->vfs_vnodecovered);
3587	VFS_RELE(vfsp);
3588	if (err != 0)
3589		return;
3590#endif
3591
3592	/*
3593	 * Always force the unmount for snapshots.
3594	 */
3595#ifdef illumos
3596	(void) dounmount(vfsp, MS_FORCE, kcred);
3597#else
3598	vfs_ref(vfsp);
3599	vfs_unbusy(vfsp);
3600	(void) dounmount(vfsp, MS_FORCE, curthread);
3601#endif
3602}
3603
3604/* ARGSUSED */
3605static int
3606zfs_unmount_snap_cb(const char *snapname, void *arg)
3607{
3608	zfs_unmount_snap(snapname);
3609	return (0);
3610}
3611
3612/*
3613 * When a clone is destroyed, its origin may also need to be destroyed,
3614 * in which case it must be unmounted.  This routine will do that unmount
3615 * if necessary.
3616 */
3617void
3618zfs_destroy_unmount_origin(const char *fsname)
3619{
3620	int error;
3621	objset_t *os;
3622	dsl_dataset_t *ds;
3623
3624	error = dmu_objset_hold(fsname, FTAG, &os);
3625	if (error != 0)
3626		return;
3627	ds = dmu_objset_ds(os);
3628	if (dsl_dir_is_clone(ds->ds_dir) && DS_IS_DEFER_DESTROY(ds->ds_prev)) {
3629		char originname[ZFS_MAX_DATASET_NAME_LEN];
3630		dsl_dataset_name(ds->ds_prev, originname);
3631		dmu_objset_rele(os, FTAG);
3632		zfs_unmount_snap(originname);
3633	} else {
3634		dmu_objset_rele(os, FTAG);
3635	}
3636}
3637
3638/*
3639 * innvl: {
3640 *     "snaps" -> { snapshot1, snapshot2 }
3641 *     (optional boolean) "defer"
3642 * }
3643 *
3644 * outnvl: snapshot -> error code (int32)
3645 *
3646 */
3647/* ARGSUSED */
3648static int
3649zfs_ioc_destroy_snaps(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl)
3650{
3651	int error, poollen;
3652	nvlist_t *snaps;
3653	nvpair_t *pair;
3654	boolean_t defer;
3655
3656	if (nvlist_lookup_nvlist(innvl, "snaps", &snaps) != 0)
3657		return (SET_ERROR(EINVAL));
3658	defer = nvlist_exists(innvl, "defer");
3659
3660	poollen = strlen(poolname);
3661	for (pair = nvlist_next_nvpair(snaps, NULL); pair != NULL;
3662	    pair = nvlist_next_nvpair(snaps, pair)) {
3663		const char *name = nvpair_name(pair);
3664
3665		/*
3666		 * The snap must be in the specified pool to prevent the
3667		 * invalid removal of zvol minors below.
3668		 */
3669		if (strncmp(name, poolname, poollen) != 0 ||
3670		    (name[poollen] != '/' && name[poollen] != '@'))
3671			return (SET_ERROR(EXDEV));
3672
3673		zfs_unmount_snap(nvpair_name(pair));
3674#if defined(__FreeBSD__)
3675		zvol_remove_minors(name);
3676#endif
3677	}
3678
3679	return (dsl_destroy_snapshots_nvl(snaps, defer, outnvl));
3680}
3681
3682/*
3683 * Create bookmarks.  Bookmark names are of the form <fs>#<bmark>.
3684 * All bookmarks must be in the same pool.
3685 *
3686 * innvl: {
3687 *     bookmark1 -> snapshot1, bookmark2 -> snapshot2
3688 * }
3689 *
3690 * outnvl: bookmark -> error code (int32)
3691 *
3692 */
3693/* ARGSUSED */
3694static int
3695zfs_ioc_bookmark(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl)
3696{
3697	for (nvpair_t *pair = nvlist_next_nvpair(innvl, NULL);
3698	    pair != NULL; pair = nvlist_next_nvpair(innvl, pair)) {
3699		char *snap_name;
3700
3701		/*
3702		 * Verify the snapshot argument.
3703		 */
3704		if (nvpair_value_string(pair, &snap_name) != 0)
3705			return (SET_ERROR(EINVAL));
3706
3707
3708		/* Verify that the keys (bookmarks) are unique */
3709		for (nvpair_t *pair2 = nvlist_next_nvpair(innvl, pair);
3710		    pair2 != NULL; pair2 = nvlist_next_nvpair(innvl, pair2)) {
3711			if (strcmp(nvpair_name(pair), nvpair_name(pair2)) == 0)
3712				return (SET_ERROR(EINVAL));
3713		}
3714	}
3715
3716	return (dsl_bookmark_create(innvl, outnvl));
3717}
3718
3719/*
3720 * innvl: {
3721 *     property 1, property 2, ...
3722 * }
3723 *
3724 * outnvl: {
3725 *     bookmark name 1 -> { property 1, property 2, ... },
3726 *     bookmark name 2 -> { property 1, property 2, ... }
3727 * }
3728 *
3729 */
3730static int
3731zfs_ioc_get_bookmarks(const char *fsname, nvlist_t *innvl, nvlist_t *outnvl)
3732{
3733	return (dsl_get_bookmarks(fsname, innvl, outnvl));
3734}
3735
3736/*
3737 * innvl: {
3738 *     bookmark name 1, bookmark name 2
3739 * }
3740 *
3741 * outnvl: bookmark -> error code (int32)
3742 *
3743 */
3744static int
3745zfs_ioc_destroy_bookmarks(const char *poolname, nvlist_t *innvl,
3746    nvlist_t *outnvl)
3747{
3748	int error, poollen;
3749
3750	poollen = strlen(poolname);
3751	for (nvpair_t *pair = nvlist_next_nvpair(innvl, NULL);
3752	    pair != NULL; pair = nvlist_next_nvpair(innvl, pair)) {
3753		const char *name = nvpair_name(pair);
3754		const char *cp = strchr(name, '#');
3755
3756		/*
3757		 * The bookmark name must contain an #, and the part after it
3758		 * must contain only valid characters.
3759		 */
3760		if (cp == NULL ||
3761		    zfs_component_namecheck(cp + 1, NULL, NULL) != 0)
3762			return (SET_ERROR(EINVAL));
3763
3764		/*
3765		 * The bookmark must be in the specified pool.
3766		 */
3767		if (strncmp(name, poolname, poollen) != 0 ||
3768		    (name[poollen] != '/' && name[poollen] != '#'))
3769			return (SET_ERROR(EXDEV));
3770	}
3771
3772	error = dsl_bookmark_destroy(innvl, outnvl);
3773	return (error);
3774}
3775
3776static int
3777zfs_ioc_channel_program(const char *poolname, nvlist_t *innvl,
3778    nvlist_t *outnvl)
3779{
3780	char *program;
3781	uint64_t instrlimit, memlimit;
3782	boolean_t sync_flag;
3783	nvpair_t *nvarg = NULL;
3784
3785	if (0 != nvlist_lookup_string(innvl, ZCP_ARG_PROGRAM, &program)) {
3786		return (EINVAL);
3787	}
3788	if (0 != nvlist_lookup_boolean_value(innvl, ZCP_ARG_SYNC, &sync_flag)) {
3789		sync_flag = B_TRUE;
3790	}
3791	if (0 != nvlist_lookup_uint64(innvl, ZCP_ARG_INSTRLIMIT, &instrlimit)) {
3792		instrlimit = ZCP_DEFAULT_INSTRLIMIT;
3793	}
3794	if (0 != nvlist_lookup_uint64(innvl, ZCP_ARG_MEMLIMIT, &memlimit)) {
3795		memlimit = ZCP_DEFAULT_MEMLIMIT;
3796	}
3797	if (0 != nvlist_lookup_nvpair(innvl, ZCP_ARG_ARGLIST, &nvarg)) {
3798		return (EINVAL);
3799	}
3800
3801	if (instrlimit == 0 || instrlimit > zfs_lua_max_instrlimit)
3802		return (EINVAL);
3803	if (memlimit == 0 || memlimit > zfs_lua_max_memlimit)
3804		return (EINVAL);
3805
3806	return (zcp_eval(poolname, program, sync_flag, instrlimit, memlimit,
3807	    nvarg, outnvl));
3808}
3809
3810/*
3811 * inputs:
3812 * zc_name		name of dataset to destroy
3813 * zc_objset_type	type of objset
3814 * zc_defer_destroy	mark for deferred destroy
3815 *
3816 * outputs:		none
3817 */
3818static int
3819zfs_ioc_destroy(zfs_cmd_t *zc)
3820{
3821	int err;
3822
3823	if (zc->zc_objset_type == DMU_OST_ZFS)
3824		zfs_unmount_snap(zc->zc_name);
3825
3826	if (strchr(zc->zc_name, '@'))
3827		err = dsl_destroy_snapshot(zc->zc_name, zc->zc_defer_destroy);
3828	else
3829		err = dsl_destroy_head(zc->zc_name);
3830	if (zc->zc_objset_type == DMU_OST_ZVOL && err == 0)
3831#ifdef __FreeBSD__
3832		zvol_remove_minors(zc->zc_name);
3833#else
3834		(void) zvol_remove_minor(zc->zc_name);
3835#endif
3836	return (err);
3837}
3838
3839/*
3840 * fsname is name of dataset to rollback (to most recent snapshot)
3841 *
3842 * innvl may contain name of expected target snapshot
3843 *
3844 * outnvl: "target" -> name of most recent snapshot
3845 * }
3846 */
3847/* ARGSUSED */
3848static int
3849zfs_ioc_rollback(const char *fsname, nvlist_t *innvl, nvlist_t *outnvl)
3850{
3851	zfsvfs_t *zfsvfs;
3852	char *target = NULL;
3853	int error;
3854
3855	(void) nvlist_lookup_string(innvl, "target", &target);
3856	if (target != NULL) {
3857		const char *cp = strchr(target, '@');
3858
3859		/*
3860		 * The snap name must contain an @, and the part after it must
3861		 * contain only valid characters.
3862		 */
3863		if (cp == NULL ||
3864		    zfs_component_namecheck(cp + 1, NULL, NULL) != 0)
3865			return (SET_ERROR(EINVAL));
3866	}
3867
3868	if (getzfsvfs(fsname, &zfsvfs) == 0) {
3869		dsl_dataset_t *ds;
3870
3871		ds = dmu_objset_ds(zfsvfs->z_os);
3872		error = zfs_suspend_fs(zfsvfs);
3873		if (error == 0) {
3874			int resume_err;
3875
3876			error = dsl_dataset_rollback(fsname, target, zfsvfs,
3877			    outnvl);
3878			resume_err = zfs_resume_fs(zfsvfs, ds);
3879			error = error ? error : resume_err;
3880		}
3881#ifdef illumos
3882		VFS_RELE(zfsvfs->z_vfs);
3883#else
3884		vfs_unbusy(zfsvfs->z_vfs);
3885#endif
3886	} else {
3887		error = dsl_dataset_rollback(fsname, target, NULL, outnvl);
3888	}
3889	return (error);
3890}
3891
3892static int
3893recursive_unmount(const char *fsname, void *arg)
3894{
3895	const char *snapname = arg;
3896	char fullname[ZFS_MAX_DATASET_NAME_LEN];
3897
3898	(void) snprintf(fullname, sizeof (fullname), "%s@%s", fsname, snapname);
3899	zfs_unmount_snap(fullname);
3900
3901	return (0);
3902}
3903
3904/*
3905 * inputs:
3906 * zc_name	old name of dataset
3907 * zc_value	new name of dataset
3908 * zc_cookie	recursive flag (only valid for snapshots)
3909 *
3910 * outputs:	none
3911 */
3912static int
3913zfs_ioc_rename(zfs_cmd_t *zc)
3914{
3915	boolean_t recursive = zc->zc_cookie & 1;
3916	char *at;
3917	boolean_t allow_mounted = B_TRUE;
3918
3919#ifdef __FreeBSD__
3920	allow_mounted = (zc->zc_cookie & 2) != 0;
3921#endif
3922
3923	/* "zfs rename" from and to ...%recv datasets should both fail */
3924	zc->zc_name[sizeof (zc->zc_name) - 1] = '\0';
3925	zc->zc_value[sizeof (zc->zc_value) - 1] = '\0';
3926	if (dataset_namecheck(zc->zc_name, NULL, NULL) != 0 ||
3927	    dataset_namecheck(zc->zc_value, NULL, NULL) != 0 ||
3928	    strchr(zc->zc_name, '%') || strchr(zc->zc_value, '%'))
3929		return (SET_ERROR(EINVAL));
3930
3931	at = strchr(zc->zc_name, '@');
3932	if (at != NULL) {
3933		/* snaps must be in same fs */
3934		int error;
3935
3936		if (strncmp(zc->zc_name, zc->zc_value, at - zc->zc_name + 1))
3937			return (SET_ERROR(EXDEV));
3938		*at = '\0';
3939		if (zc->zc_objset_type == DMU_OST_ZFS && !allow_mounted) {
3940			error = dmu_objset_find(zc->zc_name,
3941			    recursive_unmount, at + 1,
3942			    recursive ? DS_FIND_CHILDREN : 0);
3943			if (error != 0) {
3944				*at = '@';
3945				return (error);
3946			}
3947		}
3948		error = dsl_dataset_rename_snapshot(zc->zc_name,
3949		    at + 1, strchr(zc->zc_value, '@') + 1, recursive);
3950		*at = '@';
3951
3952		return (error);
3953	} else {
3954#ifdef illumos
3955		if (zc->zc_objset_type == DMU_OST_ZVOL)
3956			(void) zvol_remove_minor(zc->zc_name);
3957#endif
3958		return (dsl_dir_rename(zc->zc_name, zc->zc_value));
3959	}
3960}
3961
3962static int
3963zfs_check_settable(const char *dsname, nvpair_t *pair, cred_t *cr)
3964{
3965	const char *propname = nvpair_name(pair);
3966	boolean_t issnap = (strchr(dsname, '@') != NULL);
3967	zfs_prop_t prop = zfs_name_to_prop(propname);
3968	uint64_t intval;
3969	int err;
3970
3971	if (prop == ZPROP_INVAL) {
3972		if (zfs_prop_user(propname)) {
3973			if (err = zfs_secpolicy_write_perms(dsname,
3974			    ZFS_DELEG_PERM_USERPROP, cr))
3975				return (err);
3976			return (0);
3977		}
3978
3979		if (!issnap && zfs_prop_userquota(propname)) {
3980			const char *perm = NULL;
3981			const char *uq_prefix =
3982			    zfs_userquota_prop_prefixes[ZFS_PROP_USERQUOTA];
3983			const char *gq_prefix =
3984			    zfs_userquota_prop_prefixes[ZFS_PROP_GROUPQUOTA];
3985
3986			if (strncmp(propname, uq_prefix,
3987			    strlen(uq_prefix)) == 0) {
3988				perm = ZFS_DELEG_PERM_USERQUOTA;
3989			} else if (strncmp(propname, gq_prefix,
3990			    strlen(gq_prefix)) == 0) {
3991				perm = ZFS_DELEG_PERM_GROUPQUOTA;
3992			} else {
3993				/* USERUSED and GROUPUSED are read-only */
3994				return (SET_ERROR(EINVAL));
3995			}
3996
3997			if (err = zfs_secpolicy_write_perms(dsname, perm, cr))
3998				return (err);
3999			return (0);
4000		}
4001
4002		return (SET_ERROR(EINVAL));
4003	}
4004
4005	if (issnap)
4006		return (SET_ERROR(EINVAL));
4007
4008	if (nvpair_type(pair) == DATA_TYPE_NVLIST) {
4009		/*
4010		 * dsl_prop_get_all_impl() returns properties in this
4011		 * format.
4012		 */
4013		nvlist_t *attrs;
4014		VERIFY(nvpair_value_nvlist(pair, &attrs) == 0);
4015		VERIFY(nvlist_lookup_nvpair(attrs, ZPROP_VALUE,
4016		    &pair) == 0);
4017	}
4018
4019	/*
4020	 * Check that this value is valid for this pool version
4021	 */
4022	switch (prop) {
4023	case ZFS_PROP_COMPRESSION:
4024		/*
4025		 * If the user specified gzip compression, make sure
4026		 * the SPA supports it. We ignore any errors here since
4027		 * we'll catch them later.
4028		 */
4029		if (nvpair_value_uint64(pair, &intval) == 0) {
4030			if (intval >= ZIO_COMPRESS_GZIP_1 &&
4031			    intval <= ZIO_COMPRESS_GZIP_9 &&
4032			    zfs_earlier_version(dsname,
4033			    SPA_VERSION_GZIP_COMPRESSION)) {
4034				return (SET_ERROR(ENOTSUP));
4035			}
4036
4037			if (intval == ZIO_COMPRESS_ZLE &&
4038			    zfs_earlier_version(dsname,
4039			    SPA_VERSION_ZLE_COMPRESSION))
4040				return (SET_ERROR(ENOTSUP));
4041
4042			if (intval == ZIO_COMPRESS_LZ4) {
4043				spa_t *spa;
4044
4045				if ((err = spa_open(dsname, &spa, FTAG)) != 0)
4046					return (err);
4047
4048				if (!spa_feature_is_enabled(spa,
4049				    SPA_FEATURE_LZ4_COMPRESS)) {
4050					spa_close(spa, FTAG);
4051					return (SET_ERROR(ENOTSUP));
4052				}
4053				spa_close(spa, FTAG);
4054			}
4055
4056			/*
4057			 * If this is a bootable dataset then
4058			 * verify that the compression algorithm
4059			 * is supported for booting. We must return
4060			 * something other than ENOTSUP since it
4061			 * implies a downrev pool version.
4062			 */
4063			if (zfs_is_bootfs(dsname) &&
4064			    !BOOTFS_COMPRESS_VALID(intval)) {
4065				return (SET_ERROR(ERANGE));
4066			}
4067		}
4068		break;
4069
4070	case ZFS_PROP_COPIES:
4071		if (zfs_earlier_version(dsname, SPA_VERSION_DITTO_BLOCKS))
4072			return (SET_ERROR(ENOTSUP));
4073		break;
4074
4075	case ZFS_PROP_RECORDSIZE:
4076		/* Record sizes above 128k need the feature to be enabled */
4077		if (nvpair_value_uint64(pair, &intval) == 0 &&
4078		    intval > SPA_OLD_MAXBLOCKSIZE) {
4079			spa_t *spa;
4080
4081			/*
4082			 * We don't allow setting the property above 1MB,
4083			 * unless the tunable has been changed.
4084			 */
4085			if (intval > zfs_max_recordsize ||
4086			    intval > SPA_MAXBLOCKSIZE)
4087				return (SET_ERROR(ERANGE));
4088
4089			if ((err = spa_open(dsname, &spa, FTAG)) != 0)
4090				return (err);
4091
4092			if (!spa_feature_is_enabled(spa,
4093			    SPA_FEATURE_LARGE_BLOCKS)) {
4094				spa_close(spa, FTAG);
4095				return (SET_ERROR(ENOTSUP));
4096			}
4097			spa_close(spa, FTAG);
4098		}
4099		break;
4100
4101	case ZFS_PROP_SHARESMB:
4102		if (zpl_earlier_version(dsname, ZPL_VERSION_FUID))
4103			return (SET_ERROR(ENOTSUP));
4104		break;
4105
4106	case ZFS_PROP_ACLINHERIT:
4107		if (nvpair_type(pair) == DATA_TYPE_UINT64 &&
4108		    nvpair_value_uint64(pair, &intval) == 0) {
4109			if (intval == ZFS_ACL_PASSTHROUGH_X &&
4110			    zfs_earlier_version(dsname,
4111			    SPA_VERSION_PASSTHROUGH_X))
4112				return (SET_ERROR(ENOTSUP));
4113		}
4114		break;
4115
4116	case ZFS_PROP_CHECKSUM:
4117	case ZFS_PROP_DEDUP:
4118	{
4119		spa_feature_t feature;
4120		spa_t *spa;
4121
4122		/* dedup feature version checks */
4123		if (prop == ZFS_PROP_DEDUP &&
4124		    zfs_earlier_version(dsname, SPA_VERSION_DEDUP))
4125			return (SET_ERROR(ENOTSUP));
4126
4127		if (nvpair_value_uint64(pair, &intval) != 0)
4128			return (SET_ERROR(EINVAL));
4129
4130		/* check prop value is enabled in features */
4131		feature = zio_checksum_to_feature(intval & ZIO_CHECKSUM_MASK);
4132		if (feature == SPA_FEATURE_NONE)
4133			break;
4134
4135		if ((err = spa_open(dsname, &spa, FTAG)) != 0)
4136			return (err);
4137		/*
4138		 * Salted checksums are not supported on root pools.
4139		 */
4140		if (spa_bootfs(spa) != 0 &&
4141		    intval < ZIO_CHECKSUM_FUNCTIONS &&
4142		    (zio_checksum_table[intval].ci_flags &
4143		    ZCHECKSUM_FLAG_SALTED)) {
4144			spa_close(spa, FTAG);
4145			return (SET_ERROR(ERANGE));
4146		}
4147		if (!spa_feature_is_enabled(spa, feature)) {
4148			spa_close(spa, FTAG);
4149			return (SET_ERROR(ENOTSUP));
4150		}
4151		spa_close(spa, FTAG);
4152		break;
4153	}
4154	}
4155
4156	return (zfs_secpolicy_setprop(dsname, prop, pair, CRED()));
4157}
4158
4159/*
4160 * Checks for a race condition to make sure we don't increment a feature flag
4161 * multiple times.
4162 */
4163static int
4164zfs_prop_activate_feature_check(void *arg, dmu_tx_t *tx)
4165{
4166	spa_t *spa = dmu_tx_pool(tx)->dp_spa;
4167	spa_feature_t *featurep = arg;
4168
4169	if (!spa_feature_is_active(spa, *featurep))
4170		return (0);
4171	else
4172		return (SET_ERROR(EBUSY));
4173}
4174
4175/*
4176 * The callback invoked on feature activation in the sync task caused by
4177 * zfs_prop_activate_feature.
4178 */
4179static void
4180zfs_prop_activate_feature_sync(void *arg, dmu_tx_t *tx)
4181{
4182	spa_t *spa = dmu_tx_pool(tx)->dp_spa;
4183	spa_feature_t *featurep = arg;
4184
4185	spa_feature_incr(spa, *featurep, tx);
4186}
4187
4188/*
4189 * Activates a feature on a pool in response to a property setting. This
4190 * creates a new sync task which modifies the pool to reflect the feature
4191 * as being active.
4192 */
4193static int
4194zfs_prop_activate_feature(spa_t *spa, spa_feature_t feature)
4195{
4196	int err;
4197
4198	/* EBUSY here indicates that the feature is already active */
4199	err = dsl_sync_task(spa_name(spa),
4200	    zfs_prop_activate_feature_check, zfs_prop_activate_feature_sync,
4201	    &feature, 2, ZFS_SPACE_CHECK_RESERVED);
4202
4203	if (err != 0 && err != EBUSY)
4204		return (err);
4205	else
4206		return (0);
4207}
4208
4209/*
4210 * Removes properties from the given props list that fail permission checks
4211 * needed to clear them and to restore them in case of a receive error. For each
4212 * property, make sure we have both set and inherit permissions.
4213 *
4214 * Returns the first error encountered if any permission checks fail. If the
4215 * caller provides a non-NULL errlist, it also gives the complete list of names
4216 * of all the properties that failed a permission check along with the
4217 * corresponding error numbers. The caller is responsible for freeing the
4218 * returned errlist.
4219 *
4220 * If every property checks out successfully, zero is returned and the list
4221 * pointed at by errlist is NULL.
4222 */
4223static int
4224zfs_check_clearable(char *dataset, nvlist_t *props, nvlist_t **errlist)
4225{
4226	zfs_cmd_t *zc;
4227	nvpair_t *pair, *next_pair;
4228	nvlist_t *errors;
4229	int err, rv = 0;
4230
4231	if (props == NULL)
4232		return (0);
4233
4234	VERIFY(nvlist_alloc(&errors, NV_UNIQUE_NAME, KM_SLEEP) == 0);
4235
4236	zc = kmem_alloc(sizeof (zfs_cmd_t), KM_SLEEP);
4237	(void) strcpy(zc->zc_name, dataset);
4238	pair = nvlist_next_nvpair(props, NULL);
4239	while (pair != NULL) {
4240		next_pair = nvlist_next_nvpair(props, pair);
4241
4242		(void) strcpy(zc->zc_value, nvpair_name(pair));
4243		if ((err = zfs_check_settable(dataset, pair, CRED())) != 0 ||
4244		    (err = zfs_secpolicy_inherit_prop(zc, NULL, CRED())) != 0) {
4245			VERIFY(nvlist_remove_nvpair(props, pair) == 0);
4246			VERIFY(nvlist_add_int32(errors,
4247			    zc->zc_value, err) == 0);
4248		}
4249		pair = next_pair;
4250	}
4251	kmem_free(zc, sizeof (zfs_cmd_t));
4252
4253	if ((pair = nvlist_next_nvpair(errors, NULL)) == NULL) {
4254		nvlist_free(errors);
4255		errors = NULL;
4256	} else {
4257		VERIFY(nvpair_value_int32(pair, &rv) == 0);
4258	}
4259
4260	if (errlist == NULL)
4261		nvlist_free(errors);
4262	else
4263		*errlist = errors;
4264
4265	return (rv);
4266}
4267
4268static boolean_t
4269propval_equals(nvpair_t *p1, nvpair_t *p2)
4270{
4271	if (nvpair_type(p1) == DATA_TYPE_NVLIST) {
4272		/* dsl_prop_get_all_impl() format */
4273		nvlist_t *attrs;
4274		VERIFY(nvpair_value_nvlist(p1, &attrs) == 0);
4275		VERIFY(nvlist_lookup_nvpair(attrs, ZPROP_VALUE,
4276		    &p1) == 0);
4277	}
4278
4279	if (nvpair_type(p2) == DATA_TYPE_NVLIST) {
4280		nvlist_t *attrs;
4281		VERIFY(nvpair_value_nvlist(p2, &attrs) == 0);
4282		VERIFY(nvlist_lookup_nvpair(attrs, ZPROP_VALUE,
4283		    &p2) == 0);
4284	}
4285
4286	if (nvpair_type(p1) != nvpair_type(p2))
4287		return (B_FALSE);
4288
4289	if (nvpair_type(p1) == DATA_TYPE_STRING) {
4290		char *valstr1, *valstr2;
4291
4292		VERIFY(nvpair_value_string(p1, (char **)&valstr1) == 0);
4293		VERIFY(nvpair_value_string(p2, (char **)&valstr2) == 0);
4294		return (strcmp(valstr1, valstr2) == 0);
4295	} else {
4296		uint64_t intval1, intval2;
4297
4298		VERIFY(nvpair_value_uint64(p1, &intval1) == 0);
4299		VERIFY(nvpair_value_uint64(p2, &intval2) == 0);
4300		return (intval1 == intval2);
4301	}
4302}
4303
4304/*
4305 * Remove properties from props if they are not going to change (as determined
4306 * by comparison with origprops). Remove them from origprops as well, since we
4307 * do not need to clear or restore properties that won't change.
4308 */
4309static void
4310props_reduce(nvlist_t *props, nvlist_t *origprops)
4311{
4312	nvpair_t *pair, *next_pair;
4313
4314	if (origprops == NULL)
4315		return; /* all props need to be received */
4316
4317	pair = nvlist_next_nvpair(props, NULL);
4318	while (pair != NULL) {
4319		const char *propname = nvpair_name(pair);
4320		nvpair_t *match;
4321
4322		next_pair = nvlist_next_nvpair(props, pair);
4323
4324		if ((nvlist_lookup_nvpair(origprops, propname,
4325		    &match) != 0) || !propval_equals(pair, match))
4326			goto next; /* need to set received value */
4327
4328		/* don't clear the existing received value */
4329		(void) nvlist_remove_nvpair(origprops, match);
4330		/* don't bother receiving the property */
4331		(void) nvlist_remove_nvpair(props, pair);
4332next:
4333		pair = next_pair;
4334	}
4335}
4336
4337/*
4338 * Extract properties that cannot be set PRIOR to the receipt of a dataset.
4339 * For example, refquota cannot be set until after the receipt of a dataset,
4340 * because in replication streams, an older/earlier snapshot may exceed the
4341 * refquota.  We want to receive the older/earlier snapshot, but setting
4342 * refquota pre-receipt will set the dsl's ACTUAL quota, which will prevent
4343 * the older/earlier snapshot from being received (with EDQUOT).
4344 *
4345 * The ZFS test "zfs_receive_011_pos" demonstrates such a scenario.
4346 *
4347 * libzfs will need to be judicious handling errors encountered by props
4348 * extracted by this function.
4349 */
4350static nvlist_t *
4351extract_delay_props(nvlist_t *props)
4352{
4353	nvlist_t *delayprops;
4354	nvpair_t *nvp, *tmp;
4355	static const zfs_prop_t delayable[] = { ZFS_PROP_REFQUOTA, 0 };
4356	int i;
4357
4358	VERIFY(nvlist_alloc(&delayprops, NV_UNIQUE_NAME, KM_SLEEP) == 0);
4359
4360	for (nvp = nvlist_next_nvpair(props, NULL); nvp != NULL;
4361	    nvp = nvlist_next_nvpair(props, nvp)) {
4362		/*
4363		 * strcmp() is safe because zfs_prop_to_name() always returns
4364		 * a bounded string.
4365		 */
4366		for (i = 0; delayable[i] != 0; i++) {
4367			if (strcmp(zfs_prop_to_name(delayable[i]),
4368			    nvpair_name(nvp)) == 0) {
4369				break;
4370			}
4371		}
4372		if (delayable[i] != 0) {
4373			tmp = nvlist_prev_nvpair(props, nvp);
4374			VERIFY(nvlist_add_nvpair(delayprops, nvp) == 0);
4375			VERIFY(nvlist_remove_nvpair(props, nvp) == 0);
4376			nvp = tmp;
4377		}
4378	}
4379
4380	if (nvlist_empty(delayprops)) {
4381		nvlist_free(delayprops);
4382		delayprops = NULL;
4383	}
4384	return (delayprops);
4385}
4386
4387#ifdef	DEBUG
4388static boolean_t zfs_ioc_recv_inject_err;
4389#endif
4390
4391/*
4392 * inputs:
4393 * zc_name		name of containing filesystem
4394 * zc_nvlist_src{_size}	nvlist of properties to apply
4395 * zc_value		name of snapshot to create
4396 * zc_string		name of clone origin (if DRR_FLAG_CLONE)
4397 * zc_cookie		file descriptor to recv from
4398 * zc_begin_record	the BEGIN record of the stream (not byteswapped)
4399 * zc_guid		force flag
4400 * zc_cleanup_fd	cleanup-on-exit file descriptor
4401 * zc_action_handle	handle for this guid/ds mapping (or zero on first call)
4402 * zc_resumable		if data is incomplete assume sender will resume
4403 *
4404 * outputs:
4405 * zc_cookie		number of bytes read
4406 * zc_nvlist_dst{_size} error for each unapplied received property
4407 * zc_obj		zprop_errflags_t
4408 * zc_action_handle	handle for this guid/ds mapping
4409 */
4410static int
4411zfs_ioc_recv(zfs_cmd_t *zc)
4412{
4413	file_t *fp;
4414	dmu_recv_cookie_t drc;
4415	boolean_t force = (boolean_t)zc->zc_guid;
4416	int fd;
4417	int error = 0;
4418	int props_error = 0;
4419	nvlist_t *errors;
4420	offset_t off;
4421	nvlist_t *props = NULL; /* sent properties */
4422	nvlist_t *origprops = NULL; /* existing properties */
4423	nvlist_t *delayprops = NULL; /* sent properties applied post-receive */
4424	char *origin = NULL;
4425	char *tosnap;
4426	char tofs[ZFS_MAX_DATASET_NAME_LEN];
4427	cap_rights_t rights;
4428	boolean_t first_recvd_props = B_FALSE;
4429
4430	if (dataset_namecheck(zc->zc_value, NULL, NULL) != 0 ||
4431	    strchr(zc->zc_value, '@') == NULL ||
4432	    strchr(zc->zc_value, '%'))
4433		return (SET_ERROR(EINVAL));
4434
4435	(void) strcpy(tofs, zc->zc_value);
4436	tosnap = strchr(tofs, '@');
4437	*tosnap++ = '\0';
4438
4439	if (zc->zc_nvlist_src != 0 &&
4440	    (error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
4441	    zc->zc_iflags, &props)) != 0)
4442		return (error);
4443
4444	fd = zc->zc_cookie;
4445#ifdef illumos
4446	fp = getf(fd);
4447#else
4448	fget_read(curthread, fd, cap_rights_init(&rights, CAP_PREAD), &fp);
4449#endif
4450	if (fp == NULL) {
4451		nvlist_free(props);
4452		return (SET_ERROR(EBADF));
4453	}
4454
4455	errors = fnvlist_alloc();
4456
4457	if (zc->zc_string[0])
4458		origin = zc->zc_string;
4459
4460	error = dmu_recv_begin(tofs, tosnap,
4461	    &zc->zc_begin_record, force, zc->zc_resumable, origin, &drc);
4462	if (error != 0)
4463		goto out;
4464
4465	/*
4466	 * Set properties before we receive the stream so that they are applied
4467	 * to the new data. Note that we must call dmu_recv_stream() if
4468	 * dmu_recv_begin() succeeds.
4469	 */
4470	if (props != NULL && !drc.drc_newfs) {
4471		if (spa_version(dsl_dataset_get_spa(drc.drc_ds)) >=
4472		    SPA_VERSION_RECVD_PROPS &&
4473		    !dsl_prop_get_hasrecvd(tofs))
4474			first_recvd_props = B_TRUE;
4475
4476		/*
4477		 * If new received properties are supplied, they are to
4478		 * completely replace the existing received properties, so stash
4479		 * away the existing ones.
4480		 */
4481		if (dsl_prop_get_received(tofs, &origprops) == 0) {
4482			nvlist_t *errlist = NULL;
4483			/*
4484			 * Don't bother writing a property if its value won't
4485			 * change (and avoid the unnecessary security checks).
4486			 *
4487			 * The first receive after SPA_VERSION_RECVD_PROPS is a
4488			 * special case where we blow away all local properties
4489			 * regardless.
4490			 */
4491			if (!first_recvd_props)
4492				props_reduce(props, origprops);
4493			if (zfs_check_clearable(tofs, origprops, &errlist) != 0)
4494				(void) nvlist_merge(errors, errlist, 0);
4495			nvlist_free(errlist);
4496
4497			if (clear_received_props(tofs, origprops,
4498			    first_recvd_props ? NULL : props) != 0)
4499				zc->zc_obj |= ZPROP_ERR_NOCLEAR;
4500		} else {
4501			zc->zc_obj |= ZPROP_ERR_NOCLEAR;
4502		}
4503	}
4504
4505	if (props != NULL) {
4506		props_error = dsl_prop_set_hasrecvd(tofs);
4507
4508		if (props_error == 0) {
4509			delayprops = extract_delay_props(props);
4510			(void) zfs_set_prop_nvlist(tofs, ZPROP_SRC_RECEIVED,
4511			    props, errors);
4512		}
4513	}
4514
4515	off = fp->f_offset;
4516	error = dmu_recv_stream(&drc, fp, &off, zc->zc_cleanup_fd,
4517	    &zc->zc_action_handle);
4518
4519	if (error == 0) {
4520		zfsvfs_t *zfsvfs = NULL;
4521
4522		if (getzfsvfs(tofs, &zfsvfs) == 0) {
4523			/* online recv */
4524			dsl_dataset_t *ds;
4525			int end_err;
4526
4527			ds = dmu_objset_ds(zfsvfs->z_os);
4528			error = zfs_suspend_fs(zfsvfs);
4529			/*
4530			 * If the suspend fails, then the recv_end will
4531			 * likely also fail, and clean up after itself.
4532			 */
4533			end_err = dmu_recv_end(&drc, zfsvfs);
4534			if (error == 0)
4535				error = zfs_resume_fs(zfsvfs, ds);
4536			error = error ? error : end_err;
4537#ifdef illumos
4538			VFS_RELE(zfsvfs->z_vfs);
4539#else
4540			vfs_unbusy(zfsvfs->z_vfs);
4541#endif
4542		} else {
4543			error = dmu_recv_end(&drc, NULL);
4544		}
4545
4546		/* Set delayed properties now, after we're done receiving. */
4547		if (delayprops != NULL && error == 0) {
4548			(void) zfs_set_prop_nvlist(tofs, ZPROP_SRC_RECEIVED,
4549			    delayprops, errors);
4550		}
4551	}
4552
4553	if (delayprops != NULL) {
4554		/*
4555		 * Merge delayed props back in with initial props, in case
4556		 * we're DEBUG and zfs_ioc_recv_inject_err is set (which means
4557		 * we have to make sure clear_received_props() includes
4558		 * the delayed properties).
4559		 *
4560		 * Since zfs_ioc_recv_inject_err is only in DEBUG kernels,
4561		 * using ASSERT() will be just like a VERIFY.
4562		 */
4563		ASSERT(nvlist_merge(props, delayprops, 0) == 0);
4564		nvlist_free(delayprops);
4565	}
4566
4567	/*
4568	 * Now that all props, initial and delayed, are set, report the prop
4569	 * errors to the caller.
4570	 */
4571	if (zc->zc_nvlist_dst_size != 0 &&
4572	    (nvlist_smush(errors, zc->zc_nvlist_dst_size) != 0 ||
4573	    put_nvlist(zc, errors) != 0)) {
4574		/*
4575		 * Caller made zc->zc_nvlist_dst less than the minimum expected
4576		 * size or supplied an invalid address.
4577		 */
4578		props_error = SET_ERROR(EINVAL);
4579	}
4580
4581	zc->zc_cookie = off - fp->f_offset;
4582	if (off >= 0 && off <= MAXOFFSET_T)
4583		fp->f_offset = off;
4584
4585#ifdef	DEBUG
4586	if (zfs_ioc_recv_inject_err) {
4587		zfs_ioc_recv_inject_err = B_FALSE;
4588		error = 1;
4589	}
4590#endif
4591
4592#ifdef __FreeBSD__
4593	if (error == 0)
4594		zvol_create_minors(tofs);
4595#endif
4596
4597	/*
4598	 * On error, restore the original props.
4599	 */
4600	if (error != 0 && props != NULL && !drc.drc_newfs) {
4601		if (clear_received_props(tofs, props, NULL) != 0) {
4602			/*
4603			 * We failed to clear the received properties.
4604			 * Since we may have left a $recvd value on the
4605			 * system, we can't clear the $hasrecvd flag.
4606			 */
4607			zc->zc_obj |= ZPROP_ERR_NORESTORE;
4608		} else if (first_recvd_props) {
4609			dsl_prop_unset_hasrecvd(tofs);
4610		}
4611
4612		if (origprops == NULL && !drc.drc_newfs) {
4613			/* We failed to stash the original properties. */
4614			zc->zc_obj |= ZPROP_ERR_NORESTORE;
4615		}
4616
4617		/*
4618		 * dsl_props_set() will not convert RECEIVED to LOCAL on or
4619		 * after SPA_VERSION_RECVD_PROPS, so we need to specify LOCAL
4620		 * explictly if we're restoring local properties cleared in the
4621		 * first new-style receive.
4622		 */
4623		if (origprops != NULL &&
4624		    zfs_set_prop_nvlist(tofs, (first_recvd_props ?
4625		    ZPROP_SRC_LOCAL : ZPROP_SRC_RECEIVED),
4626		    origprops, NULL) != 0) {
4627			/*
4628			 * We stashed the original properties but failed to
4629			 * restore them.
4630			 */
4631			zc->zc_obj |= ZPROP_ERR_NORESTORE;
4632		}
4633	}
4634out:
4635	nvlist_free(props);
4636	nvlist_free(origprops);
4637	nvlist_free(errors);
4638	releasef(fd);
4639
4640	if (error == 0)
4641		error = props_error;
4642
4643	return (error);
4644}
4645
4646/*
4647 * inputs:
4648 * zc_name	name of snapshot to send
4649 * zc_cookie	file descriptor to send stream to
4650 * zc_obj	fromorigin flag (mutually exclusive with zc_fromobj)
4651 * zc_sendobj	objsetid of snapshot to send
4652 * zc_fromobj	objsetid of incremental fromsnap (may be zero)
4653 * zc_guid	if set, estimate size of stream only.  zc_cookie is ignored.
4654 *		output size in zc_objset_type.
4655 * zc_flags	lzc_send_flags
4656 *
4657 * outputs:
4658 * zc_objset_type	estimated size, if zc_guid is set
4659 */
4660static int
4661zfs_ioc_send(zfs_cmd_t *zc)
4662{
4663	int error;
4664	offset_t off;
4665	boolean_t estimate = (zc->zc_guid != 0);
4666	boolean_t embedok = (zc->zc_flags & 0x1);
4667	boolean_t large_block_ok = (zc->zc_flags & 0x2);
4668	boolean_t compressok = (zc->zc_flags & 0x4);
4669
4670	if (zc->zc_obj != 0) {
4671		dsl_pool_t *dp;
4672		dsl_dataset_t *tosnap;
4673
4674		error = dsl_pool_hold(zc->zc_name, FTAG, &dp);
4675		if (error != 0)
4676			return (error);
4677
4678		error = dsl_dataset_hold_obj(dp, zc->zc_sendobj, FTAG, &tosnap);
4679		if (error != 0) {
4680			dsl_pool_rele(dp, FTAG);
4681			return (error);
4682		}
4683
4684		if (dsl_dir_is_clone(tosnap->ds_dir))
4685			zc->zc_fromobj =
4686			    dsl_dir_phys(tosnap->ds_dir)->dd_origin_obj;
4687		dsl_dataset_rele(tosnap, FTAG);
4688		dsl_pool_rele(dp, FTAG);
4689	}
4690
4691	if (estimate) {
4692		dsl_pool_t *dp;
4693		dsl_dataset_t *tosnap;
4694		dsl_dataset_t *fromsnap = NULL;
4695
4696		error = dsl_pool_hold(zc->zc_name, FTAG, &dp);
4697		if (error != 0)
4698			return (error);
4699
4700		error = dsl_dataset_hold_obj(dp, zc->zc_sendobj, FTAG, &tosnap);
4701		if (error != 0) {
4702			dsl_pool_rele(dp, FTAG);
4703			return (error);
4704		}
4705
4706		if (zc->zc_fromobj != 0) {
4707			error = dsl_dataset_hold_obj(dp, zc->zc_fromobj,
4708			    FTAG, &fromsnap);
4709			if (error != 0) {
4710				dsl_dataset_rele(tosnap, FTAG);
4711				dsl_pool_rele(dp, FTAG);
4712				return (error);
4713			}
4714		}
4715
4716		error = dmu_send_estimate(tosnap, fromsnap, compressok,
4717		    &zc->zc_objset_type);
4718
4719		if (fromsnap != NULL)
4720			dsl_dataset_rele(fromsnap, FTAG);
4721		dsl_dataset_rele(tosnap, FTAG);
4722		dsl_pool_rele(dp, FTAG);
4723	} else {
4724		file_t *fp;
4725		cap_rights_t rights;
4726
4727#ifdef illumos
4728		fp = getf(zc->zc_cookie);
4729#else
4730		fget_write(curthread, zc->zc_cookie,
4731		    cap_rights_init(&rights, CAP_WRITE), &fp);
4732#endif
4733		if (fp == NULL)
4734			return (SET_ERROR(EBADF));
4735
4736		off = fp->f_offset;
4737		error = dmu_send_obj(zc->zc_name, zc->zc_sendobj,
4738		    zc->zc_fromobj, embedok, large_block_ok, compressok,
4739#ifdef illumos
4740		    zc->zc_cookie, fp->f_vnode, &off);
4741#else
4742		    zc->zc_cookie, fp, &off);
4743#endif
4744
4745		if (off >= 0 && off <= MAXOFFSET_T)
4746			fp->f_offset = off;
4747		releasef(zc->zc_cookie);
4748	}
4749	return (error);
4750}
4751
4752/*
4753 * inputs:
4754 * zc_name	name of snapshot on which to report progress
4755 * zc_cookie	file descriptor of send stream
4756 *
4757 * outputs:
4758 * zc_cookie	number of bytes written in send stream thus far
4759 */
4760static int
4761zfs_ioc_send_progress(zfs_cmd_t *zc)
4762{
4763	dsl_pool_t *dp;
4764	dsl_dataset_t *ds;
4765	dmu_sendarg_t *dsp = NULL;
4766	int error;
4767
4768	error = dsl_pool_hold(zc->zc_name, FTAG, &dp);
4769	if (error != 0)
4770		return (error);
4771
4772	error = dsl_dataset_hold(dp, zc->zc_name, FTAG, &ds);
4773	if (error != 0) {
4774		dsl_pool_rele(dp, FTAG);
4775		return (error);
4776	}
4777
4778	mutex_enter(&ds->ds_sendstream_lock);
4779
4780	/*
4781	 * Iterate over all the send streams currently active on this dataset.
4782	 * If there's one which matches the specified file descriptor _and_ the
4783	 * stream was started by the current process, return the progress of
4784	 * that stream.
4785	 */
4786	for (dsp = list_head(&ds->ds_sendstreams); dsp != NULL;
4787	    dsp = list_next(&ds->ds_sendstreams, dsp)) {
4788		if (dsp->dsa_outfd == zc->zc_cookie &&
4789		    dsp->dsa_proc == curproc)
4790			break;
4791	}
4792
4793	if (dsp != NULL)
4794		zc->zc_cookie = *(dsp->dsa_off);
4795	else
4796		error = SET_ERROR(ENOENT);
4797
4798	mutex_exit(&ds->ds_sendstream_lock);
4799	dsl_dataset_rele(ds, FTAG);
4800	dsl_pool_rele(dp, FTAG);
4801	return (error);
4802}
4803
4804static int
4805zfs_ioc_inject_fault(zfs_cmd_t *zc)
4806{
4807	int id, error;
4808
4809	error = zio_inject_fault(zc->zc_name, (int)zc->zc_guid, &id,
4810	    &zc->zc_inject_record);
4811
4812	if (error == 0)
4813		zc->zc_guid = (uint64_t)id;
4814
4815	return (error);
4816}
4817
4818static int
4819zfs_ioc_clear_fault(zfs_cmd_t *zc)
4820{
4821	return (zio_clear_fault((int)zc->zc_guid));
4822}
4823
4824static int
4825zfs_ioc_inject_list_next(zfs_cmd_t *zc)
4826{
4827	int id = (int)zc->zc_guid;
4828	int error;
4829
4830	error = zio_inject_list_next(&id, zc->zc_name, sizeof (zc->zc_name),
4831	    &zc->zc_inject_record);
4832
4833	zc->zc_guid = id;
4834
4835	return (error);
4836}
4837
4838static int
4839zfs_ioc_error_log(zfs_cmd_t *zc)
4840{
4841	spa_t *spa;
4842	int error;
4843	size_t count = (size_t)zc->zc_nvlist_dst_size;
4844
4845	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
4846		return (error);
4847
4848	error = spa_get_errlog(spa, (void *)(uintptr_t)zc->zc_nvlist_dst,
4849	    &count);
4850	if (error == 0)
4851		zc->zc_nvlist_dst_size = count;
4852	else
4853		zc->zc_nvlist_dst_size = spa_get_errlog_size(spa);
4854
4855	spa_close(spa, FTAG);
4856
4857	return (error);
4858}
4859
4860static int
4861zfs_ioc_clear(zfs_cmd_t *zc)
4862{
4863	spa_t *spa;
4864	vdev_t *vd;
4865	int error;
4866
4867	/*
4868	 * On zpool clear we also fix up missing slogs
4869	 */
4870	mutex_enter(&spa_namespace_lock);
4871	spa = spa_lookup(zc->zc_name);
4872	if (spa == NULL) {
4873		mutex_exit(&spa_namespace_lock);
4874		return (SET_ERROR(EIO));
4875	}
4876	if (spa_get_log_state(spa) == SPA_LOG_MISSING) {
4877		/* we need to let spa_open/spa_load clear the chains */
4878		spa_set_log_state(spa, SPA_LOG_CLEAR);
4879	}
4880	spa->spa_last_open_failed = 0;
4881	mutex_exit(&spa_namespace_lock);
4882
4883	if (zc->zc_cookie & ZPOOL_NO_REWIND) {
4884		error = spa_open(zc->zc_name, &spa, FTAG);
4885	} else {
4886		nvlist_t *policy;
4887		nvlist_t *config = NULL;
4888
4889		if (zc->zc_nvlist_src == 0)
4890			return (SET_ERROR(EINVAL));
4891
4892		if ((error = get_nvlist(zc->zc_nvlist_src,
4893		    zc->zc_nvlist_src_size, zc->zc_iflags, &policy)) == 0) {
4894			error = spa_open_rewind(zc->zc_name, &spa, FTAG,
4895			    policy, &config);
4896			if (config != NULL) {
4897				int err;
4898
4899				if ((err = put_nvlist(zc, config)) != 0)
4900					error = err;
4901				nvlist_free(config);
4902			}
4903			nvlist_free(policy);
4904		}
4905	}
4906
4907	if (error != 0)
4908		return (error);
4909
4910	spa_vdev_state_enter(spa, SCL_NONE);
4911
4912	if (zc->zc_guid == 0) {
4913		vd = NULL;
4914	} else {
4915		vd = spa_lookup_by_guid(spa, zc->zc_guid, B_TRUE);
4916		if (vd == NULL) {
4917			(void) spa_vdev_state_exit(spa, NULL, ENODEV);
4918			spa_close(spa, FTAG);
4919			return (SET_ERROR(ENODEV));
4920		}
4921	}
4922
4923	vdev_clear(spa, vd);
4924
4925	(void) spa_vdev_state_exit(spa, NULL, 0);
4926
4927	/*
4928	 * Resume any suspended I/Os.
4929	 */
4930	if (zio_resume(spa) != 0)
4931		error = SET_ERROR(EIO);
4932
4933	spa_close(spa, FTAG);
4934
4935	return (error);
4936}
4937
4938static int
4939zfs_ioc_pool_reopen(zfs_cmd_t *zc)
4940{
4941	spa_t *spa;
4942	int error;
4943
4944	error = spa_open(zc->zc_name, &spa, FTAG);
4945	if (error != 0)
4946		return (error);
4947
4948	spa_vdev_state_enter(spa, SCL_NONE);
4949
4950	/*
4951	 * If a resilver is already in progress then set the
4952	 * spa_scrub_reopen flag to B_TRUE so that we don't restart
4953	 * the scan as a side effect of the reopen. Otherwise, let
4954	 * vdev_open() decided if a resilver is required.
4955	 */
4956	spa->spa_scrub_reopen = dsl_scan_resilvering(spa->spa_dsl_pool);
4957	vdev_reopen(spa->spa_root_vdev);
4958	spa->spa_scrub_reopen = B_FALSE;
4959
4960	(void) spa_vdev_state_exit(spa, NULL, 0);
4961	spa_close(spa, FTAG);
4962	return (0);
4963}
4964/*
4965 * inputs:
4966 * zc_name	name of filesystem
4967 *
4968 * outputs:
4969 * zc_string	name of conflicting snapshot, if there is one
4970 */
4971static int
4972zfs_ioc_promote(zfs_cmd_t *zc)
4973{
4974	dsl_pool_t *dp;
4975	dsl_dataset_t *ds, *ods;
4976	char origin[ZFS_MAX_DATASET_NAME_LEN];
4977	char *cp;
4978	int error;
4979
4980	zc->zc_name[sizeof (zc->zc_name) - 1] = '\0';
4981	if (dataset_namecheck(zc->zc_name, NULL, NULL) != 0 ||
4982	    strchr(zc->zc_name, '%'))
4983		return (SET_ERROR(EINVAL));
4984
4985	error = dsl_pool_hold(zc->zc_name, FTAG, &dp);
4986	if (error != 0)
4987		return (error);
4988
4989	error = dsl_dataset_hold(dp, zc->zc_name, FTAG, &ds);
4990	if (error != 0) {
4991		dsl_pool_rele(dp, FTAG);
4992		return (error);
4993	}
4994
4995	if (!dsl_dir_is_clone(ds->ds_dir)) {
4996		dsl_dataset_rele(ds, FTAG);
4997		dsl_pool_rele(dp, FTAG);
4998		return (SET_ERROR(EINVAL));
4999	}
5000
5001	error = dsl_dataset_hold_obj(dp,
5002	    dsl_dir_phys(ds->ds_dir)->dd_origin_obj, FTAG, &ods);
5003	if (error != 0) {
5004		dsl_dataset_rele(ds, FTAG);
5005		dsl_pool_rele(dp, FTAG);
5006		return (error);
5007	}
5008
5009	dsl_dataset_name(ods, origin);
5010	dsl_dataset_rele(ods, FTAG);
5011	dsl_dataset_rele(ds, FTAG);
5012	dsl_pool_rele(dp, FTAG);
5013
5014	/*
5015	 * We don't need to unmount *all* the origin fs's snapshots, but
5016	 * it's easier.
5017	 */
5018	cp = strchr(origin, '@');
5019	if (cp)
5020		*cp = '\0';
5021	(void) dmu_objset_find(origin,
5022	    zfs_unmount_snap_cb, NULL, DS_FIND_SNAPSHOTS);
5023	return (dsl_dataset_promote(zc->zc_name, zc->zc_string));
5024}
5025
5026/*
5027 * Retrieve a single {user|group}{used|quota}@... property.
5028 *
5029 * inputs:
5030 * zc_name	name of filesystem
5031 * zc_objset_type zfs_userquota_prop_t
5032 * zc_value	domain name (eg. "S-1-234-567-89")
5033 * zc_guid	RID/UID/GID
5034 *
5035 * outputs:
5036 * zc_cookie	property value
5037 */
5038static int
5039zfs_ioc_userspace_one(zfs_cmd_t *zc)
5040{
5041	zfsvfs_t *zfsvfs;
5042	int error;
5043
5044	if (zc->zc_objset_type >= ZFS_NUM_USERQUOTA_PROPS)
5045		return (SET_ERROR(EINVAL));
5046
5047	error = zfsvfs_hold(zc->zc_name, FTAG, &zfsvfs, B_FALSE);
5048	if (error != 0)
5049		return (error);
5050
5051	error = zfs_userspace_one(zfsvfs,
5052	    zc->zc_objset_type, zc->zc_value, zc->zc_guid, &zc->zc_cookie);
5053	zfsvfs_rele(zfsvfs, FTAG);
5054
5055	return (error);
5056}
5057
5058/*
5059 * inputs:
5060 * zc_name		name of filesystem
5061 * zc_cookie		zap cursor
5062 * zc_objset_type	zfs_userquota_prop_t
5063 * zc_nvlist_dst[_size] buffer to fill (not really an nvlist)
5064 *
5065 * outputs:
5066 * zc_nvlist_dst[_size]	data buffer (array of zfs_useracct_t)
5067 * zc_cookie	zap cursor
5068 */
5069static int
5070zfs_ioc_userspace_many(zfs_cmd_t *zc)
5071{
5072	zfsvfs_t *zfsvfs;
5073	int bufsize = zc->zc_nvlist_dst_size;
5074
5075	if (bufsize <= 0)
5076		return (SET_ERROR(ENOMEM));
5077
5078	int error = zfsvfs_hold(zc->zc_name, FTAG, &zfsvfs, B_FALSE);
5079	if (error != 0)
5080		return (error);
5081
5082	void *buf = kmem_alloc(bufsize, KM_SLEEP);
5083
5084	error = zfs_userspace_many(zfsvfs, zc->zc_objset_type, &zc->zc_cookie,
5085	    buf, &zc->zc_nvlist_dst_size);
5086
5087	if (error == 0) {
5088		error = ddi_copyout(buf,
5089		    (void *)(uintptr_t)zc->zc_nvlist_dst,
5090		    zc->zc_nvlist_dst_size, zc->zc_iflags);
5091	}
5092	kmem_free(buf, bufsize);
5093	zfsvfs_rele(zfsvfs, FTAG);
5094
5095	return (error);
5096}
5097
5098/*
5099 * inputs:
5100 * zc_name		name of filesystem
5101 *
5102 * outputs:
5103 * none
5104 */
5105static int
5106zfs_ioc_userspace_upgrade(zfs_cmd_t *zc)
5107{
5108	objset_t *os;
5109	int error = 0;
5110	zfsvfs_t *zfsvfs;
5111
5112	if (getzfsvfs(zc->zc_name, &zfsvfs) == 0) {
5113		if (!dmu_objset_userused_enabled(zfsvfs->z_os)) {
5114			/*
5115			 * If userused is not enabled, it may be because the
5116			 * objset needs to be closed & reopened (to grow the
5117			 * objset_phys_t).  Suspend/resume the fs will do that.
5118			 */
5119			dsl_dataset_t *ds, *newds;
5120
5121			ds = dmu_objset_ds(zfsvfs->z_os);
5122			error = zfs_suspend_fs(zfsvfs);
5123			if (error == 0) {
5124				dmu_objset_refresh_ownership(ds, &newds,
5125				    zfsvfs);
5126				error = zfs_resume_fs(zfsvfs, newds);
5127			}
5128		}
5129		if (error == 0)
5130			error = dmu_objset_userspace_upgrade(zfsvfs->z_os);
5131#ifdef illumos
5132		VFS_RELE(zfsvfs->z_vfs);
5133#else
5134		vfs_unbusy(zfsvfs->z_vfs);
5135#endif
5136	} else {
5137		/* XXX kind of reading contents without owning */
5138		error = dmu_objset_hold(zc->zc_name, FTAG, &os);
5139		if (error != 0)
5140			return (error);
5141
5142		error = dmu_objset_userspace_upgrade(os);
5143		dmu_objset_rele(os, FTAG);
5144	}
5145
5146	return (error);
5147}
5148
5149#ifdef illumos
5150/*
5151 * We don't want to have a hard dependency
5152 * against some special symbols in sharefs
5153 * nfs, and smbsrv.  Determine them if needed when
5154 * the first file system is shared.
5155 * Neither sharefs, nfs or smbsrv are unloadable modules.
5156 */
5157int (*znfsexport_fs)(void *arg);
5158int (*zshare_fs)(enum sharefs_sys_op, share_t *, uint32_t);
5159int (*zsmbexport_fs)(void *arg, boolean_t add_share);
5160
5161int zfs_nfsshare_inited;
5162int zfs_smbshare_inited;
5163
5164ddi_modhandle_t nfs_mod;
5165ddi_modhandle_t sharefs_mod;
5166ddi_modhandle_t smbsrv_mod;
5167#endif	/* illumos */
5168kmutex_t zfs_share_lock;
5169
5170#ifdef illumos
5171static int
5172zfs_init_sharefs()
5173{
5174	int error;
5175
5176	ASSERT(MUTEX_HELD(&zfs_share_lock));
5177	/* Both NFS and SMB shares also require sharetab support. */
5178	if (sharefs_mod == NULL && ((sharefs_mod =
5179	    ddi_modopen("fs/sharefs",
5180	    KRTLD_MODE_FIRST, &error)) == NULL)) {
5181		return (SET_ERROR(ENOSYS));
5182	}
5183	if (zshare_fs == NULL && ((zshare_fs =
5184	    (int (*)(enum sharefs_sys_op, share_t *, uint32_t))
5185	    ddi_modsym(sharefs_mod, "sharefs_impl", &error)) == NULL)) {
5186		return (SET_ERROR(ENOSYS));
5187	}
5188	return (0);
5189}
5190#endif	/* illumos */
5191
5192static int
5193zfs_ioc_share(zfs_cmd_t *zc)
5194{
5195#ifdef illumos
5196	int error;
5197	int opcode;
5198
5199	switch (zc->zc_share.z_sharetype) {
5200	case ZFS_SHARE_NFS:
5201	case ZFS_UNSHARE_NFS:
5202		if (zfs_nfsshare_inited == 0) {
5203			mutex_enter(&zfs_share_lock);
5204			if (nfs_mod == NULL && ((nfs_mod = ddi_modopen("fs/nfs",
5205			    KRTLD_MODE_FIRST, &error)) == NULL)) {
5206				mutex_exit(&zfs_share_lock);
5207				return (SET_ERROR(ENOSYS));
5208			}
5209			if (znfsexport_fs == NULL &&
5210			    ((znfsexport_fs = (int (*)(void *))
5211			    ddi_modsym(nfs_mod,
5212			    "nfs_export", &error)) == NULL)) {
5213				mutex_exit(&zfs_share_lock);
5214				return (SET_ERROR(ENOSYS));
5215			}
5216			error = zfs_init_sharefs();
5217			if (error != 0) {
5218				mutex_exit(&zfs_share_lock);
5219				return (SET_ERROR(ENOSYS));
5220			}
5221			zfs_nfsshare_inited = 1;
5222			mutex_exit(&zfs_share_lock);
5223		}
5224		break;
5225	case ZFS_SHARE_SMB:
5226	case ZFS_UNSHARE_SMB:
5227		if (zfs_smbshare_inited == 0) {
5228			mutex_enter(&zfs_share_lock);
5229			if (smbsrv_mod == NULL && ((smbsrv_mod =
5230			    ddi_modopen("drv/smbsrv",
5231			    KRTLD_MODE_FIRST, &error)) == NULL)) {
5232				mutex_exit(&zfs_share_lock);
5233				return (SET_ERROR(ENOSYS));
5234			}
5235			if (zsmbexport_fs == NULL && ((zsmbexport_fs =
5236			    (int (*)(void *, boolean_t))ddi_modsym(smbsrv_mod,
5237			    "smb_server_share", &error)) == NULL)) {
5238				mutex_exit(&zfs_share_lock);
5239				return (SET_ERROR(ENOSYS));
5240			}
5241			error = zfs_init_sharefs();
5242			if (error != 0) {
5243				mutex_exit(&zfs_share_lock);
5244				return (SET_ERROR(ENOSYS));
5245			}
5246			zfs_smbshare_inited = 1;
5247			mutex_exit(&zfs_share_lock);
5248		}
5249		break;
5250	default:
5251		return (SET_ERROR(EINVAL));
5252	}
5253
5254	switch (zc->zc_share.z_sharetype) {
5255	case ZFS_SHARE_NFS:
5256	case ZFS_UNSHARE_NFS:
5257		if (error =
5258		    znfsexport_fs((void *)
5259		    (uintptr_t)zc->zc_share.z_exportdata))
5260			return (error);
5261		break;
5262	case ZFS_SHARE_SMB:
5263	case ZFS_UNSHARE_SMB:
5264		if (error = zsmbexport_fs((void *)
5265		    (uintptr_t)zc->zc_share.z_exportdata,
5266		    zc->zc_share.z_sharetype == ZFS_SHARE_SMB ?
5267		    B_TRUE: B_FALSE)) {
5268			return (error);
5269		}
5270		break;
5271	}
5272
5273	opcode = (zc->zc_share.z_sharetype == ZFS_SHARE_NFS ||
5274	    zc->zc_share.z_sharetype == ZFS_SHARE_SMB) ?
5275	    SHAREFS_ADD : SHAREFS_REMOVE;
5276
5277	/*
5278	 * Add or remove share from sharetab
5279	 */
5280	error = zshare_fs(opcode,
5281	    (void *)(uintptr_t)zc->zc_share.z_sharedata,
5282	    zc->zc_share.z_sharemax);
5283
5284	return (error);
5285
5286#else	/* !illumos */
5287	return (ENOSYS);
5288#endif	/* illumos */
5289}
5290
5291ace_t full_access[] = {
5292	{(uid_t)-1, ACE_ALL_PERMS, ACE_EVERYONE, 0}
5293};
5294
5295/*
5296 * inputs:
5297 * zc_name		name of containing filesystem
5298 * zc_obj		object # beyond which we want next in-use object #
5299 *
5300 * outputs:
5301 * zc_obj		next in-use object #
5302 */
5303static int
5304zfs_ioc_next_obj(zfs_cmd_t *zc)
5305{
5306	objset_t *os = NULL;
5307	int error;
5308
5309	error = dmu_objset_hold(zc->zc_name, FTAG, &os);
5310	if (error != 0)
5311		return (error);
5312
5313	error = dmu_object_next(os, &zc->zc_obj, B_FALSE,
5314	    dsl_dataset_phys(os->os_dsl_dataset)->ds_prev_snap_txg);
5315
5316	dmu_objset_rele(os, FTAG);
5317	return (error);
5318}
5319
5320/*
5321 * inputs:
5322 * zc_name		name of filesystem
5323 * zc_value		prefix name for snapshot
5324 * zc_cleanup_fd	cleanup-on-exit file descriptor for calling process
5325 *
5326 * outputs:
5327 * zc_value		short name of new snapshot
5328 */
5329static int
5330zfs_ioc_tmp_snapshot(zfs_cmd_t *zc)
5331{
5332	char *snap_name;
5333	char *hold_name;
5334	int error;
5335	minor_t minor;
5336
5337	error = zfs_onexit_fd_hold(zc->zc_cleanup_fd, &minor);
5338	if (error != 0)
5339		return (error);
5340
5341	snap_name = kmem_asprintf("%s-%016llx", zc->zc_value,
5342	    (u_longlong_t)ddi_get_lbolt64());
5343	hold_name = kmem_asprintf("%%%s", zc->zc_value);
5344
5345	error = dsl_dataset_snapshot_tmp(zc->zc_name, snap_name, minor,
5346	    hold_name);
5347	if (error == 0)
5348		(void) strcpy(zc->zc_value, snap_name);
5349	strfree(snap_name);
5350	strfree(hold_name);
5351	zfs_onexit_fd_rele(zc->zc_cleanup_fd);
5352	return (error);
5353}
5354
5355/*
5356 * inputs:
5357 * zc_name		name of "to" snapshot
5358 * zc_value		name of "from" snapshot
5359 * zc_cookie		file descriptor to write diff data on
5360 *
5361 * outputs:
5362 * dmu_diff_record_t's to the file descriptor
5363 */
5364static int
5365zfs_ioc_diff(zfs_cmd_t *zc)
5366{
5367	file_t *fp;
5368	cap_rights_t rights;
5369	offset_t off;
5370	int error;
5371
5372#ifdef illumos
5373	fp = getf(zc->zc_cookie);
5374#else
5375	fget_write(curthread, zc->zc_cookie,
5376		    cap_rights_init(&rights, CAP_WRITE), &fp);
5377#endif
5378	if (fp == NULL)
5379		return (SET_ERROR(EBADF));
5380
5381	off = fp->f_offset;
5382
5383#ifdef illumos
5384	error = dmu_diff(zc->zc_name, zc->zc_value, fp->f_vnode, &off);
5385#else
5386	error = dmu_diff(zc->zc_name, zc->zc_value, fp, &off);
5387#endif
5388
5389	if (off >= 0 && off <= MAXOFFSET_T)
5390		fp->f_offset = off;
5391	releasef(zc->zc_cookie);
5392
5393	return (error);
5394}
5395
5396#ifdef illumos
5397/*
5398 * Remove all ACL files in shares dir
5399 */
5400static int
5401zfs_smb_acl_purge(znode_t *dzp)
5402{
5403	zap_cursor_t	zc;
5404	zap_attribute_t	zap;
5405	zfsvfs_t *zfsvfs = dzp->z_zfsvfs;
5406	int error;
5407
5408	for (zap_cursor_init(&zc, zfsvfs->z_os, dzp->z_id);
5409	    (error = zap_cursor_retrieve(&zc, &zap)) == 0;
5410	    zap_cursor_advance(&zc)) {
5411		if ((error = VOP_REMOVE(ZTOV(dzp), zap.za_name, kcred,
5412		    NULL, 0)) != 0)
5413			break;
5414	}
5415	zap_cursor_fini(&zc);
5416	return (error);
5417}
5418#endif	/* illumos */
5419
5420static int
5421zfs_ioc_smb_acl(zfs_cmd_t *zc)
5422{
5423#ifdef illumos
5424	vnode_t *vp;
5425	znode_t *dzp;
5426	vnode_t *resourcevp = NULL;
5427	znode_t *sharedir;
5428	zfsvfs_t *zfsvfs;
5429	nvlist_t *nvlist;
5430	char *src, *target;
5431	vattr_t vattr;
5432	vsecattr_t vsec;
5433	int error = 0;
5434
5435	if ((error = lookupname(zc->zc_value, UIO_SYSSPACE,
5436	    NO_FOLLOW, NULL, &vp)) != 0)
5437		return (error);
5438
5439	/* Now make sure mntpnt and dataset are ZFS */
5440
5441	if (strcmp(vp->v_vfsp->mnt_stat.f_fstypename, "zfs") != 0 ||
5442	    (strcmp((char *)refstr_value(vp->v_vfsp->vfs_resource),
5443	    zc->zc_name) != 0)) {
5444		VN_RELE(vp);
5445		return (SET_ERROR(EINVAL));
5446	}
5447
5448	dzp = VTOZ(vp);
5449	zfsvfs = dzp->z_zfsvfs;
5450	ZFS_ENTER(zfsvfs);
5451
5452	/*
5453	 * Create share dir if its missing.
5454	 */
5455	mutex_enter(&zfsvfs->z_lock);
5456	if (zfsvfs->z_shares_dir == 0) {
5457		dmu_tx_t *tx;
5458
5459		tx = dmu_tx_create(zfsvfs->z_os);
5460		dmu_tx_hold_zap(tx, MASTER_NODE_OBJ, TRUE,
5461		    ZFS_SHARES_DIR);
5462		dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, FALSE, NULL);
5463		error = dmu_tx_assign(tx, TXG_WAIT);
5464		if (error != 0) {
5465			dmu_tx_abort(tx);
5466		} else {
5467			error = zfs_create_share_dir(zfsvfs, tx);
5468			dmu_tx_commit(tx);
5469		}
5470		if (error != 0) {
5471			mutex_exit(&zfsvfs->z_lock);
5472			VN_RELE(vp);
5473			ZFS_EXIT(zfsvfs);
5474			return (error);
5475		}
5476	}
5477	mutex_exit(&zfsvfs->z_lock);
5478
5479	ASSERT(zfsvfs->z_shares_dir);
5480	if ((error = zfs_zget(zfsvfs, zfsvfs->z_shares_dir, &sharedir)) != 0) {
5481		VN_RELE(vp);
5482		ZFS_EXIT(zfsvfs);
5483		return (error);
5484	}
5485
5486	switch (zc->zc_cookie) {
5487	case ZFS_SMB_ACL_ADD:
5488		vattr.va_mask = AT_MODE|AT_UID|AT_GID|AT_TYPE;
5489		vattr.va_type = VREG;
5490		vattr.va_mode = S_IFREG|0777;
5491		vattr.va_uid = 0;
5492		vattr.va_gid = 0;
5493
5494		vsec.vsa_mask = VSA_ACE;
5495		vsec.vsa_aclentp = &full_access;
5496		vsec.vsa_aclentsz = sizeof (full_access);
5497		vsec.vsa_aclcnt = 1;
5498
5499		error = VOP_CREATE(ZTOV(sharedir), zc->zc_string,
5500		    &vattr, EXCL, 0, &resourcevp, kcred, 0, NULL, &vsec);
5501		if (resourcevp)
5502			VN_RELE(resourcevp);
5503		break;
5504
5505	case ZFS_SMB_ACL_REMOVE:
5506		error = VOP_REMOVE(ZTOV(sharedir), zc->zc_string, kcred,
5507		    NULL, 0);
5508		break;
5509
5510	case ZFS_SMB_ACL_RENAME:
5511		if ((error = get_nvlist(zc->zc_nvlist_src,
5512		    zc->zc_nvlist_src_size, zc->zc_iflags, &nvlist)) != 0) {
5513			VN_RELE(vp);
5514			VN_RELE(ZTOV(sharedir));
5515			ZFS_EXIT(zfsvfs);
5516			return (error);
5517		}
5518		if (nvlist_lookup_string(nvlist, ZFS_SMB_ACL_SRC, &src) ||
5519		    nvlist_lookup_string(nvlist, ZFS_SMB_ACL_TARGET,
5520		    &target)) {
5521			VN_RELE(vp);
5522			VN_RELE(ZTOV(sharedir));
5523			ZFS_EXIT(zfsvfs);
5524			nvlist_free(nvlist);
5525			return (error);
5526		}
5527		error = VOP_RENAME(ZTOV(sharedir), src, ZTOV(sharedir), target,
5528		    kcred, NULL, 0);
5529		nvlist_free(nvlist);
5530		break;
5531
5532	case ZFS_SMB_ACL_PURGE:
5533		error = zfs_smb_acl_purge(sharedir);
5534		break;
5535
5536	default:
5537		error = SET_ERROR(EINVAL);
5538		break;
5539	}
5540
5541	VN_RELE(vp);
5542	VN_RELE(ZTOV(sharedir));
5543
5544	ZFS_EXIT(zfsvfs);
5545
5546	return (error);
5547#else	/* !illumos */
5548	return (EOPNOTSUPP);
5549#endif	/* illumos */
5550}
5551
5552/*
5553 * innvl: {
5554 *     "holds" -> { snapname -> holdname (string), ... }
5555 *     (optional) "cleanup_fd" -> fd (int32)
5556 * }
5557 *
5558 * outnvl: {
5559 *     snapname -> error value (int32)
5560 *     ...
5561 * }
5562 */
5563/* ARGSUSED */
5564static int
5565zfs_ioc_hold(const char *pool, nvlist_t *args, nvlist_t *errlist)
5566{
5567	nvpair_t *pair;
5568	nvlist_t *holds;
5569	int cleanup_fd = -1;
5570	int error;
5571	minor_t minor = 0;
5572
5573	error = nvlist_lookup_nvlist(args, "holds", &holds);
5574	if (error != 0)
5575		return (SET_ERROR(EINVAL));
5576
5577	/* make sure the user didn't pass us any invalid (empty) tags */
5578	for (pair = nvlist_next_nvpair(holds, NULL); pair != NULL;
5579	    pair = nvlist_next_nvpair(holds, pair)) {
5580		char *htag;
5581
5582		error = nvpair_value_string(pair, &htag);
5583		if (error != 0)
5584			return (SET_ERROR(error));
5585
5586		if (strlen(htag) == 0)
5587			return (SET_ERROR(EINVAL));
5588	}
5589
5590	if (nvlist_lookup_int32(args, "cleanup_fd", &cleanup_fd) == 0) {
5591		error = zfs_onexit_fd_hold(cleanup_fd, &minor);
5592		if (error != 0)
5593			return (error);
5594	}
5595
5596	error = dsl_dataset_user_hold(holds, minor, errlist);
5597	if (minor != 0)
5598		zfs_onexit_fd_rele(cleanup_fd);
5599	return (error);
5600}
5601
5602/*
5603 * innvl is not used.
5604 *
5605 * outnvl: {
5606 *    holdname -> time added (uint64 seconds since epoch)
5607 *    ...
5608 * }
5609 */
5610/* ARGSUSED */
5611static int
5612zfs_ioc_get_holds(const char *snapname, nvlist_t *args, nvlist_t *outnvl)
5613{
5614	return (dsl_dataset_get_holds(snapname, outnvl));
5615}
5616
5617/*
5618 * innvl: {
5619 *     snapname -> { holdname, ... }
5620 *     ...
5621 * }
5622 *
5623 * outnvl: {
5624 *     snapname -> error value (int32)
5625 *     ...
5626 * }
5627 */
5628/* ARGSUSED */
5629static int
5630zfs_ioc_release(const char *pool, nvlist_t *holds, nvlist_t *errlist)
5631{
5632	return (dsl_dataset_user_release(holds, errlist));
5633}
5634
5635/*
5636 * inputs:
5637 * zc_name		name of new filesystem or snapshot
5638 * zc_value		full name of old snapshot
5639 *
5640 * outputs:
5641 * zc_cookie		space in bytes
5642 * zc_objset_type	compressed space in bytes
5643 * zc_perm_action	uncompressed space in bytes
5644 */
5645static int
5646zfs_ioc_space_written(zfs_cmd_t *zc)
5647{
5648	int error;
5649	dsl_pool_t *dp;
5650	dsl_dataset_t *new, *old;
5651
5652	error = dsl_pool_hold(zc->zc_name, FTAG, &dp);
5653	if (error != 0)
5654		return (error);
5655	error = dsl_dataset_hold(dp, zc->zc_name, FTAG, &new);
5656	if (error != 0) {
5657		dsl_pool_rele(dp, FTAG);
5658		return (error);
5659	}
5660	error = dsl_dataset_hold(dp, zc->zc_value, FTAG, &old);
5661	if (error != 0) {
5662		dsl_dataset_rele(new, FTAG);
5663		dsl_pool_rele(dp, FTAG);
5664		return (error);
5665	}
5666
5667	error = dsl_dataset_space_written(old, new, &zc->zc_cookie,
5668	    &zc->zc_objset_type, &zc->zc_perm_action);
5669	dsl_dataset_rele(old, FTAG);
5670	dsl_dataset_rele(new, FTAG);
5671	dsl_pool_rele(dp, FTAG);
5672	return (error);
5673}
5674
5675/*
5676 * innvl: {
5677 *     "firstsnap" -> snapshot name
5678 * }
5679 *
5680 * outnvl: {
5681 *     "used" -> space in bytes
5682 *     "compressed" -> compressed space in bytes
5683 *     "uncompressed" -> uncompressed space in bytes
5684 * }
5685 */
5686static int
5687zfs_ioc_space_snaps(const char *lastsnap, nvlist_t *innvl, nvlist_t *outnvl)
5688{
5689	int error;
5690	dsl_pool_t *dp;
5691	dsl_dataset_t *new, *old;
5692	char *firstsnap;
5693	uint64_t used, comp, uncomp;
5694
5695	if (nvlist_lookup_string(innvl, "firstsnap", &firstsnap) != 0)
5696		return (SET_ERROR(EINVAL));
5697
5698	error = dsl_pool_hold(lastsnap, FTAG, &dp);
5699	if (error != 0)
5700		return (error);
5701
5702	error = dsl_dataset_hold(dp, lastsnap, FTAG, &new);
5703	if (error == 0 && !new->ds_is_snapshot) {
5704		dsl_dataset_rele(new, FTAG);
5705		error = SET_ERROR(EINVAL);
5706	}
5707	if (error != 0) {
5708		dsl_pool_rele(dp, FTAG);
5709		return (error);
5710	}
5711	error = dsl_dataset_hold(dp, firstsnap, FTAG, &old);
5712	if (error == 0 && !old->ds_is_snapshot) {
5713		dsl_dataset_rele(old, FTAG);
5714		error = SET_ERROR(EINVAL);
5715	}
5716	if (error != 0) {
5717		dsl_dataset_rele(new, FTAG);
5718		dsl_pool_rele(dp, FTAG);
5719		return (error);
5720	}
5721
5722	error = dsl_dataset_space_wouldfree(old, new, &used, &comp, &uncomp);
5723	dsl_dataset_rele(old, FTAG);
5724	dsl_dataset_rele(new, FTAG);
5725	dsl_pool_rele(dp, FTAG);
5726	fnvlist_add_uint64(outnvl, "used", used);
5727	fnvlist_add_uint64(outnvl, "compressed", comp);
5728	fnvlist_add_uint64(outnvl, "uncompressed", uncomp);
5729	return (error);
5730}
5731
5732static int
5733zfs_ioc_jail(zfs_cmd_t *zc)
5734{
5735
5736	return (zone_dataset_attach(curthread->td_ucred, zc->zc_name,
5737	    (int)zc->zc_jailid));
5738}
5739
5740static int
5741zfs_ioc_unjail(zfs_cmd_t *zc)
5742{
5743
5744	return (zone_dataset_detach(curthread->td_ucred, zc->zc_name,
5745	    (int)zc->zc_jailid));
5746}
5747
5748/*
5749 * innvl: {
5750 *     "fd" -> file descriptor to write stream to (int32)
5751 *     (optional) "fromsnap" -> full snap name to send an incremental from
5752 *     (optional) "largeblockok" -> (value ignored)
5753 *         indicates that blocks > 128KB are permitted
5754 *     (optional) "embedok" -> (value ignored)
5755 *         presence indicates DRR_WRITE_EMBEDDED records are permitted
5756 *     (optional) "compressok" -> (value ignored)
5757 *         presence indicates compressed DRR_WRITE records are permitted
5758 *     (optional) "resume_object" and "resume_offset" -> (uint64)
5759 *         if present, resume send stream from specified object and offset.
5760 * }
5761 *
5762 * outnvl is unused
5763 */
5764/* ARGSUSED */
5765static int
5766zfs_ioc_send_new(const char *snapname, nvlist_t *innvl, nvlist_t *outnvl)
5767{
5768	cap_rights_t rights;
5769	file_t *fp;
5770	int error;
5771	offset_t off;
5772	char *fromname = NULL;
5773	int fd;
5774	boolean_t largeblockok;
5775	boolean_t embedok;
5776	boolean_t compressok;
5777	uint64_t resumeobj = 0;
5778	uint64_t resumeoff = 0;
5779
5780	error = nvlist_lookup_int32(innvl, "fd", &fd);
5781	if (error != 0)
5782		return (SET_ERROR(EINVAL));
5783
5784	(void) nvlist_lookup_string(innvl, "fromsnap", &fromname);
5785
5786	largeblockok = nvlist_exists(innvl, "largeblockok");
5787	embedok = nvlist_exists(innvl, "embedok");
5788	compressok = nvlist_exists(innvl, "compressok");
5789
5790	(void) nvlist_lookup_uint64(innvl, "resume_object", &resumeobj);
5791	(void) nvlist_lookup_uint64(innvl, "resume_offset", &resumeoff);
5792
5793#ifdef illumos
5794	file_t *fp = getf(fd);
5795#else
5796	fget_write(curthread, fd, cap_rights_init(&rights, CAP_WRITE), &fp);
5797#endif
5798	if (fp == NULL)
5799		return (SET_ERROR(EBADF));
5800
5801	off = fp->f_offset;
5802	error = dmu_send(snapname, fromname, embedok, largeblockok, compressok,
5803#ifdef illumos
5804	    fd, resumeobj, resumeoff, fp->f_vnode, &off);
5805#else
5806	    fd, resumeobj, resumeoff, fp, &off);
5807#endif
5808
5809#ifdef illumos
5810	if (VOP_SEEK(fp->f_vnode, fp->f_offset, &off, NULL) == 0)
5811		fp->f_offset = off;
5812#else
5813	fp->f_offset = off;
5814#endif
5815
5816	releasef(fd);
5817	return (error);
5818}
5819
5820/*
5821 * Determine approximately how large a zfs send stream will be -- the number
5822 * of bytes that will be written to the fd supplied to zfs_ioc_send_new().
5823 *
5824 * innvl: {
5825 *     (optional) "from" -> full snap or bookmark name to send an incremental
5826 *                          from
5827 *     (optional) "largeblockok" -> (value ignored)
5828 *         indicates that blocks > 128KB are permitted
5829 *     (optional) "embedok" -> (value ignored)
5830 *         presence indicates DRR_WRITE_EMBEDDED records are permitted
5831 *     (optional) "compressok" -> (value ignored)
5832 *         presence indicates compressed DRR_WRITE records are permitted
5833 * }
5834 *
5835 * outnvl: {
5836 *     "space" -> bytes of space (uint64)
5837 * }
5838 */
5839static int
5840zfs_ioc_send_space(const char *snapname, nvlist_t *innvl, nvlist_t *outnvl)
5841{
5842	dsl_pool_t *dp;
5843	dsl_dataset_t *tosnap;
5844	int error;
5845	char *fromname;
5846	boolean_t compressok;
5847	uint64_t space;
5848
5849	error = dsl_pool_hold(snapname, FTAG, &dp);
5850	if (error != 0)
5851		return (error);
5852
5853	error = dsl_dataset_hold(dp, snapname, FTAG, &tosnap);
5854	if (error != 0) {
5855		dsl_pool_rele(dp, FTAG);
5856		return (error);
5857	}
5858
5859	compressok = nvlist_exists(innvl, "compressok");
5860
5861	error = nvlist_lookup_string(innvl, "from", &fromname);
5862	if (error == 0) {
5863		if (strchr(fromname, '@') != NULL) {
5864			/*
5865			 * If from is a snapshot, hold it and use the more
5866			 * efficient dmu_send_estimate to estimate send space
5867			 * size using deadlists.
5868			 */
5869			dsl_dataset_t *fromsnap;
5870			error = dsl_dataset_hold(dp, fromname, FTAG, &fromsnap);
5871			if (error != 0)
5872				goto out;
5873			error = dmu_send_estimate(tosnap, fromsnap, compressok,
5874			    &space);
5875			dsl_dataset_rele(fromsnap, FTAG);
5876		} else if (strchr(fromname, '#') != NULL) {
5877			/*
5878			 * If from is a bookmark, fetch the creation TXG of the
5879			 * snapshot it was created from and use that to find
5880			 * blocks that were born after it.
5881			 */
5882			zfs_bookmark_phys_t frombm;
5883
5884			error = dsl_bookmark_lookup(dp, fromname, tosnap,
5885			    &frombm);
5886			if (error != 0)
5887				goto out;
5888			error = dmu_send_estimate_from_txg(tosnap,
5889			    frombm.zbm_creation_txg, compressok, &space);
5890		} else {
5891			/*
5892			 * from is not properly formatted as a snapshot or
5893			 * bookmark
5894			 */
5895			error = SET_ERROR(EINVAL);
5896			goto out;
5897		}
5898	} else {
5899		/*
5900		 * If estimating the size of a full send, use dmu_send_estimate.
5901		 */
5902		error = dmu_send_estimate(tosnap, NULL, compressok, &space);
5903	}
5904
5905	fnvlist_add_uint64(outnvl, "space", space);
5906
5907out:
5908	dsl_dataset_rele(tosnap, FTAG);
5909	dsl_pool_rele(dp, FTAG);
5910	return (error);
5911}
5912
5913static zfs_ioc_vec_t zfs_ioc_vec[ZFS_IOC_LAST - ZFS_IOC_FIRST];
5914
5915static void
5916zfs_ioctl_register_legacy(zfs_ioc_t ioc, zfs_ioc_legacy_func_t *func,
5917    zfs_secpolicy_func_t *secpolicy, zfs_ioc_namecheck_t namecheck,
5918    boolean_t log_history, zfs_ioc_poolcheck_t pool_check)
5919{
5920	zfs_ioc_vec_t *vec = &zfs_ioc_vec[ioc - ZFS_IOC_FIRST];
5921
5922	ASSERT3U(ioc, >=, ZFS_IOC_FIRST);
5923	ASSERT3U(ioc, <, ZFS_IOC_LAST);
5924	ASSERT3P(vec->zvec_legacy_func, ==, NULL);
5925	ASSERT3P(vec->zvec_func, ==, NULL);
5926
5927	vec->zvec_legacy_func = func;
5928	vec->zvec_secpolicy = secpolicy;
5929	vec->zvec_namecheck = namecheck;
5930	vec->zvec_allow_log = log_history;
5931	vec->zvec_pool_check = pool_check;
5932}
5933
5934/*
5935 * See the block comment at the beginning of this file for details on
5936 * each argument to this function.
5937 */
5938static void
5939zfs_ioctl_register(const char *name, zfs_ioc_t ioc, zfs_ioc_func_t *func,
5940    zfs_secpolicy_func_t *secpolicy, zfs_ioc_namecheck_t namecheck,
5941    zfs_ioc_poolcheck_t pool_check, boolean_t smush_outnvlist,
5942    boolean_t allow_log)
5943{
5944	zfs_ioc_vec_t *vec = &zfs_ioc_vec[ioc - ZFS_IOC_FIRST];
5945
5946	ASSERT3U(ioc, >=, ZFS_IOC_FIRST);
5947	ASSERT3U(ioc, <, ZFS_IOC_LAST);
5948	ASSERT3P(vec->zvec_legacy_func, ==, NULL);
5949	ASSERT3P(vec->zvec_func, ==, NULL);
5950
5951	/* if we are logging, the name must be valid */
5952	ASSERT(!allow_log || namecheck != NO_NAME);
5953
5954	vec->zvec_name = name;
5955	vec->zvec_func = func;
5956	vec->zvec_secpolicy = secpolicy;
5957	vec->zvec_namecheck = namecheck;
5958	vec->zvec_pool_check = pool_check;
5959	vec->zvec_smush_outnvlist = smush_outnvlist;
5960	vec->zvec_allow_log = allow_log;
5961}
5962
5963static void
5964zfs_ioctl_register_pool(zfs_ioc_t ioc, zfs_ioc_legacy_func_t *func,
5965    zfs_secpolicy_func_t *secpolicy, boolean_t log_history,
5966    zfs_ioc_poolcheck_t pool_check)
5967{
5968	zfs_ioctl_register_legacy(ioc, func, secpolicy,
5969	    POOL_NAME, log_history, pool_check);
5970}
5971
5972static void
5973zfs_ioctl_register_dataset_nolog(zfs_ioc_t ioc, zfs_ioc_legacy_func_t *func,
5974    zfs_secpolicy_func_t *secpolicy, zfs_ioc_poolcheck_t pool_check)
5975{
5976	zfs_ioctl_register_legacy(ioc, func, secpolicy,
5977	    DATASET_NAME, B_FALSE, pool_check);
5978}
5979
5980static void
5981zfs_ioctl_register_pool_modify(zfs_ioc_t ioc, zfs_ioc_legacy_func_t *func)
5982{
5983	zfs_ioctl_register_legacy(ioc, func, zfs_secpolicy_config,
5984	    POOL_NAME, B_TRUE, POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY);
5985}
5986
5987static void
5988zfs_ioctl_register_pool_meta(zfs_ioc_t ioc, zfs_ioc_legacy_func_t *func,
5989    zfs_secpolicy_func_t *secpolicy)
5990{
5991	zfs_ioctl_register_legacy(ioc, func, secpolicy,
5992	    NO_NAME, B_FALSE, POOL_CHECK_NONE);
5993}
5994
5995static void
5996zfs_ioctl_register_dataset_read_secpolicy(zfs_ioc_t ioc,
5997    zfs_ioc_legacy_func_t *func, zfs_secpolicy_func_t *secpolicy)
5998{
5999	zfs_ioctl_register_legacy(ioc, func, secpolicy,
6000	    DATASET_NAME, B_FALSE, POOL_CHECK_SUSPENDED);
6001}
6002
6003static void
6004zfs_ioctl_register_dataset_read(zfs_ioc_t ioc, zfs_ioc_legacy_func_t *func)
6005{
6006	zfs_ioctl_register_dataset_read_secpolicy(ioc, func,
6007	    zfs_secpolicy_read);
6008}
6009
6010static void
6011zfs_ioctl_register_dataset_modify(zfs_ioc_t ioc, zfs_ioc_legacy_func_t *func,
6012    zfs_secpolicy_func_t *secpolicy)
6013{
6014	zfs_ioctl_register_legacy(ioc, func, secpolicy,
6015	    DATASET_NAME, B_TRUE, POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY);
6016}
6017
6018static void
6019zfs_ioctl_init(void)
6020{
6021	zfs_ioctl_register("snapshot", ZFS_IOC_SNAPSHOT,
6022	    zfs_ioc_snapshot, zfs_secpolicy_snapshot, POOL_NAME,
6023	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE);
6024
6025	zfs_ioctl_register("log_history", ZFS_IOC_LOG_HISTORY,
6026	    zfs_ioc_log_history, zfs_secpolicy_log_history, NO_NAME,
6027	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_FALSE, B_FALSE);
6028
6029	zfs_ioctl_register("space_snaps", ZFS_IOC_SPACE_SNAPS,
6030	    zfs_ioc_space_snaps, zfs_secpolicy_read, DATASET_NAME,
6031	    POOL_CHECK_SUSPENDED, B_FALSE, B_FALSE);
6032
6033	zfs_ioctl_register("send", ZFS_IOC_SEND_NEW,
6034	    zfs_ioc_send_new, zfs_secpolicy_send_new, DATASET_NAME,
6035	    POOL_CHECK_SUSPENDED, B_FALSE, B_FALSE);
6036
6037	zfs_ioctl_register("send_space", ZFS_IOC_SEND_SPACE,
6038	    zfs_ioc_send_space, zfs_secpolicy_read, DATASET_NAME,
6039	    POOL_CHECK_SUSPENDED, B_FALSE, B_FALSE);
6040
6041	zfs_ioctl_register("create", ZFS_IOC_CREATE,
6042	    zfs_ioc_create, zfs_secpolicy_create_clone, DATASET_NAME,
6043	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE);
6044
6045	zfs_ioctl_register("clone", ZFS_IOC_CLONE,
6046	    zfs_ioc_clone, zfs_secpolicy_create_clone, DATASET_NAME,
6047	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE);
6048
6049	zfs_ioctl_register("remap", ZFS_IOC_REMAP,
6050	    zfs_ioc_remap, zfs_secpolicy_remap, DATASET_NAME,
6051	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_FALSE, B_TRUE);
6052
6053	zfs_ioctl_register("destroy_snaps", ZFS_IOC_DESTROY_SNAPS,
6054	    zfs_ioc_destroy_snaps, zfs_secpolicy_destroy_snaps, POOL_NAME,
6055	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE);
6056
6057	zfs_ioctl_register("hold", ZFS_IOC_HOLD,
6058	    zfs_ioc_hold, zfs_secpolicy_hold, POOL_NAME,
6059	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE);
6060	zfs_ioctl_register("release", ZFS_IOC_RELEASE,
6061	    zfs_ioc_release, zfs_secpolicy_release, POOL_NAME,
6062	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE);
6063
6064	zfs_ioctl_register("get_holds", ZFS_IOC_GET_HOLDS,
6065	    zfs_ioc_get_holds, zfs_secpolicy_read, DATASET_NAME,
6066	    POOL_CHECK_SUSPENDED, B_FALSE, B_FALSE);
6067
6068	zfs_ioctl_register("rollback", ZFS_IOC_ROLLBACK,
6069	    zfs_ioc_rollback, zfs_secpolicy_rollback, DATASET_NAME,
6070	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_FALSE, B_TRUE);
6071
6072	zfs_ioctl_register("bookmark", ZFS_IOC_BOOKMARK,
6073	    zfs_ioc_bookmark, zfs_secpolicy_bookmark, POOL_NAME,
6074	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE);
6075
6076	zfs_ioctl_register("get_bookmarks", ZFS_IOC_GET_BOOKMARKS,
6077	    zfs_ioc_get_bookmarks, zfs_secpolicy_read, DATASET_NAME,
6078	    POOL_CHECK_SUSPENDED, B_FALSE, B_FALSE);
6079
6080	zfs_ioctl_register("destroy_bookmarks", ZFS_IOC_DESTROY_BOOKMARKS,
6081	    zfs_ioc_destroy_bookmarks, zfs_secpolicy_destroy_bookmarks,
6082	    POOL_NAME,
6083	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE);
6084
6085	zfs_ioctl_register("channel_program", ZFS_IOC_CHANNEL_PROGRAM,
6086	    zfs_ioc_channel_program, zfs_secpolicy_config,
6087	    POOL_NAME, POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE,
6088	    B_TRUE);
6089
6090	/* IOCTLS that use the legacy function signature */
6091
6092	zfs_ioctl_register_legacy(ZFS_IOC_POOL_FREEZE, zfs_ioc_pool_freeze,
6093	    zfs_secpolicy_config, NO_NAME, B_FALSE, POOL_CHECK_READONLY);
6094
6095	zfs_ioctl_register_pool(ZFS_IOC_POOL_CREATE, zfs_ioc_pool_create,
6096	    zfs_secpolicy_config, B_TRUE, POOL_CHECK_NONE);
6097	zfs_ioctl_register_pool_modify(ZFS_IOC_POOL_SCAN,
6098	    zfs_ioc_pool_scan);
6099	zfs_ioctl_register_pool_modify(ZFS_IOC_POOL_UPGRADE,
6100	    zfs_ioc_pool_upgrade);
6101	zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_ADD,
6102	    zfs_ioc_vdev_add);
6103	zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_REMOVE,
6104	    zfs_ioc_vdev_remove);
6105	zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_SET_STATE,
6106	    zfs_ioc_vdev_set_state);
6107	zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_ATTACH,
6108	    zfs_ioc_vdev_attach);
6109	zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_DETACH,
6110	    zfs_ioc_vdev_detach);
6111	zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_SETPATH,
6112	    zfs_ioc_vdev_setpath);
6113	zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_SETFRU,
6114	    zfs_ioc_vdev_setfru);
6115	zfs_ioctl_register_pool_modify(ZFS_IOC_POOL_SET_PROPS,
6116	    zfs_ioc_pool_set_props);
6117	zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_SPLIT,
6118	    zfs_ioc_vdev_split);
6119	zfs_ioctl_register_pool_modify(ZFS_IOC_POOL_REGUID,
6120	    zfs_ioc_pool_reguid);
6121
6122	zfs_ioctl_register_pool_meta(ZFS_IOC_POOL_CONFIGS,
6123	    zfs_ioc_pool_configs, zfs_secpolicy_none);
6124	zfs_ioctl_register_pool_meta(ZFS_IOC_POOL_TRYIMPORT,
6125	    zfs_ioc_pool_tryimport, zfs_secpolicy_config);
6126	zfs_ioctl_register_pool_meta(ZFS_IOC_INJECT_FAULT,
6127	    zfs_ioc_inject_fault, zfs_secpolicy_inject);
6128	zfs_ioctl_register_pool_meta(ZFS_IOC_CLEAR_FAULT,
6129	    zfs_ioc_clear_fault, zfs_secpolicy_inject);
6130	zfs_ioctl_register_pool_meta(ZFS_IOC_INJECT_LIST_NEXT,
6131	    zfs_ioc_inject_list_next, zfs_secpolicy_inject);
6132
6133	/*
6134	 * pool destroy, and export don't log the history as part of
6135	 * zfsdev_ioctl, but rather zfs_ioc_pool_export
6136	 * does the logging of those commands.
6137	 */
6138	zfs_ioctl_register_pool(ZFS_IOC_POOL_DESTROY, zfs_ioc_pool_destroy,
6139	    zfs_secpolicy_config, B_FALSE, POOL_CHECK_NONE);
6140	zfs_ioctl_register_pool(ZFS_IOC_POOL_EXPORT, zfs_ioc_pool_export,
6141	    zfs_secpolicy_config, B_FALSE, POOL_CHECK_NONE);
6142
6143	zfs_ioctl_register_pool(ZFS_IOC_POOL_STATS, zfs_ioc_pool_stats,
6144	    zfs_secpolicy_read, B_FALSE, POOL_CHECK_NONE);
6145	zfs_ioctl_register_pool(ZFS_IOC_POOL_GET_PROPS, zfs_ioc_pool_get_props,
6146	    zfs_secpolicy_read, B_FALSE, POOL_CHECK_NONE);
6147
6148	zfs_ioctl_register_pool(ZFS_IOC_ERROR_LOG, zfs_ioc_error_log,
6149	    zfs_secpolicy_inject, B_FALSE, POOL_CHECK_NONE);
6150	zfs_ioctl_register_pool(ZFS_IOC_DSOBJ_TO_DSNAME,
6151	    zfs_ioc_dsobj_to_dsname,
6152	    zfs_secpolicy_diff, B_FALSE, POOL_CHECK_NONE);
6153	zfs_ioctl_register_pool(ZFS_IOC_POOL_GET_HISTORY,
6154	    zfs_ioc_pool_get_history,
6155	    zfs_secpolicy_config, B_FALSE, POOL_CHECK_SUSPENDED);
6156
6157	zfs_ioctl_register_pool(ZFS_IOC_POOL_IMPORT, zfs_ioc_pool_import,
6158	    zfs_secpolicy_config, B_TRUE, POOL_CHECK_NONE);
6159
6160	zfs_ioctl_register_pool(ZFS_IOC_CLEAR, zfs_ioc_clear,
6161	    zfs_secpolicy_config, B_TRUE, POOL_CHECK_READONLY);
6162	zfs_ioctl_register_pool(ZFS_IOC_POOL_REOPEN, zfs_ioc_pool_reopen,
6163	    zfs_secpolicy_config, B_TRUE, POOL_CHECK_SUSPENDED);
6164
6165	zfs_ioctl_register_dataset_read(ZFS_IOC_SPACE_WRITTEN,
6166	    zfs_ioc_space_written);
6167	zfs_ioctl_register_dataset_read(ZFS_IOC_OBJSET_RECVD_PROPS,
6168	    zfs_ioc_objset_recvd_props);
6169	zfs_ioctl_register_dataset_read(ZFS_IOC_NEXT_OBJ,
6170	    zfs_ioc_next_obj);
6171	zfs_ioctl_register_dataset_read(ZFS_IOC_GET_FSACL,
6172	    zfs_ioc_get_fsacl);
6173	zfs_ioctl_register_dataset_read(ZFS_IOC_OBJSET_STATS,
6174	    zfs_ioc_objset_stats);
6175	zfs_ioctl_register_dataset_read(ZFS_IOC_OBJSET_ZPLPROPS,
6176	    zfs_ioc_objset_zplprops);
6177	zfs_ioctl_register_dataset_read(ZFS_IOC_DATASET_LIST_NEXT,
6178	    zfs_ioc_dataset_list_next);
6179	zfs_ioctl_register_dataset_read(ZFS_IOC_SNAPSHOT_LIST_NEXT,
6180	    zfs_ioc_snapshot_list_next);
6181	zfs_ioctl_register_dataset_read(ZFS_IOC_SEND_PROGRESS,
6182	    zfs_ioc_send_progress);
6183
6184	zfs_ioctl_register_dataset_read_secpolicy(ZFS_IOC_DIFF,
6185	    zfs_ioc_diff, zfs_secpolicy_diff);
6186	zfs_ioctl_register_dataset_read_secpolicy(ZFS_IOC_OBJ_TO_STATS,
6187	    zfs_ioc_obj_to_stats, zfs_secpolicy_diff);
6188	zfs_ioctl_register_dataset_read_secpolicy(ZFS_IOC_OBJ_TO_PATH,
6189	    zfs_ioc_obj_to_path, zfs_secpolicy_diff);
6190	zfs_ioctl_register_dataset_read_secpolicy(ZFS_IOC_USERSPACE_ONE,
6191	    zfs_ioc_userspace_one, zfs_secpolicy_userspace_one);
6192	zfs_ioctl_register_dataset_read_secpolicy(ZFS_IOC_USERSPACE_MANY,
6193	    zfs_ioc_userspace_many, zfs_secpolicy_userspace_many);
6194	zfs_ioctl_register_dataset_read_secpolicy(ZFS_IOC_SEND,
6195	    zfs_ioc_send, zfs_secpolicy_send);
6196
6197	zfs_ioctl_register_dataset_modify(ZFS_IOC_SET_PROP, zfs_ioc_set_prop,
6198	    zfs_secpolicy_none);
6199	zfs_ioctl_register_dataset_modify(ZFS_IOC_DESTROY, zfs_ioc_destroy,
6200	    zfs_secpolicy_destroy);
6201	zfs_ioctl_register_dataset_modify(ZFS_IOC_RENAME, zfs_ioc_rename,
6202	    zfs_secpolicy_rename);
6203	zfs_ioctl_register_dataset_modify(ZFS_IOC_RECV, zfs_ioc_recv,
6204	    zfs_secpolicy_recv);
6205	zfs_ioctl_register_dataset_modify(ZFS_IOC_PROMOTE, zfs_ioc_promote,
6206	    zfs_secpolicy_promote);
6207	zfs_ioctl_register_dataset_modify(ZFS_IOC_INHERIT_PROP,
6208	    zfs_ioc_inherit_prop, zfs_secpolicy_inherit_prop);
6209	zfs_ioctl_register_dataset_modify(ZFS_IOC_SET_FSACL, zfs_ioc_set_fsacl,
6210	    zfs_secpolicy_set_fsacl);
6211
6212	zfs_ioctl_register_dataset_nolog(ZFS_IOC_SHARE, zfs_ioc_share,
6213	    zfs_secpolicy_share, POOL_CHECK_NONE);
6214	zfs_ioctl_register_dataset_nolog(ZFS_IOC_SMB_ACL, zfs_ioc_smb_acl,
6215	    zfs_secpolicy_smb_acl, POOL_CHECK_NONE);
6216	zfs_ioctl_register_dataset_nolog(ZFS_IOC_USERSPACE_UPGRADE,
6217	    zfs_ioc_userspace_upgrade, zfs_secpolicy_userspace_upgrade,
6218	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY);
6219	zfs_ioctl_register_dataset_nolog(ZFS_IOC_TMP_SNAPSHOT,
6220	    zfs_ioc_tmp_snapshot, zfs_secpolicy_tmp_snapshot,
6221	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY);
6222
6223#ifdef __FreeBSD__
6224	zfs_ioctl_register_dataset_nolog(ZFS_IOC_JAIL, zfs_ioc_jail,
6225	    zfs_secpolicy_config, POOL_CHECK_NONE);
6226	zfs_ioctl_register_dataset_nolog(ZFS_IOC_UNJAIL, zfs_ioc_unjail,
6227	    zfs_secpolicy_config, POOL_CHECK_NONE);
6228	zfs_ioctl_register("fbsd_nextboot", ZFS_IOC_NEXTBOOT,
6229	    zfs_ioc_nextboot, zfs_secpolicy_config, NO_NAME,
6230	    POOL_CHECK_NONE, B_FALSE, B_FALSE);
6231#endif
6232}
6233
6234int
6235pool_status_check(const char *name, zfs_ioc_namecheck_t type,
6236    zfs_ioc_poolcheck_t check)
6237{
6238	spa_t *spa;
6239	int error;
6240
6241	ASSERT(type == POOL_NAME || type == DATASET_NAME);
6242
6243	if (check & POOL_CHECK_NONE)
6244		return (0);
6245
6246	error = spa_open(name, &spa, FTAG);
6247	if (error == 0) {
6248		if ((check & POOL_CHECK_SUSPENDED) && spa_suspended(spa))
6249			error = SET_ERROR(EAGAIN);
6250		else if ((check & POOL_CHECK_READONLY) && !spa_writeable(spa))
6251			error = SET_ERROR(EROFS);
6252		spa_close(spa, FTAG);
6253	}
6254	return (error);
6255}
6256
6257/*
6258 * Find a free minor number.
6259 */
6260minor_t
6261zfsdev_minor_alloc(void)
6262{
6263	static minor_t last_minor;
6264	minor_t m;
6265
6266	ASSERT(MUTEX_HELD(&spa_namespace_lock));
6267
6268	for (m = last_minor + 1; m != last_minor; m++) {
6269		if (m > ZFSDEV_MAX_MINOR)
6270			m = 1;
6271		if (ddi_get_soft_state(zfsdev_state, m) == NULL) {
6272			last_minor = m;
6273			return (m);
6274		}
6275	}
6276
6277	return (0);
6278}
6279
6280static int
6281zfs_ctldev_init(struct cdev *devp)
6282{
6283	minor_t minor;
6284	zfs_soft_state_t *zs;
6285
6286	ASSERT(MUTEX_HELD(&spa_namespace_lock));
6287
6288	minor = zfsdev_minor_alloc();
6289	if (minor == 0)
6290		return (SET_ERROR(ENXIO));
6291
6292	if (ddi_soft_state_zalloc(zfsdev_state, minor) != DDI_SUCCESS)
6293		return (SET_ERROR(EAGAIN));
6294
6295	devfs_set_cdevpriv((void *)(uintptr_t)minor, zfsdev_close);
6296
6297	zs = ddi_get_soft_state(zfsdev_state, minor);
6298	zs->zss_type = ZSST_CTLDEV;
6299	zfs_onexit_init((zfs_onexit_t **)&zs->zss_data);
6300
6301	return (0);
6302}
6303
6304static void
6305zfs_ctldev_destroy(zfs_onexit_t *zo, minor_t minor)
6306{
6307	ASSERT(MUTEX_HELD(&spa_namespace_lock));
6308
6309	zfs_onexit_destroy(zo);
6310	ddi_soft_state_free(zfsdev_state, minor);
6311}
6312
6313void *
6314zfsdev_get_soft_state(minor_t minor, enum zfs_soft_state_type which)
6315{
6316	zfs_soft_state_t *zp;
6317
6318	zp = ddi_get_soft_state(zfsdev_state, minor);
6319	if (zp == NULL || zp->zss_type != which)
6320		return (NULL);
6321
6322	return (zp->zss_data);
6323}
6324
6325static int
6326zfsdev_open(struct cdev *devp, int flag, int mode, struct thread *td)
6327{
6328	int error = 0;
6329
6330#ifdef illumos
6331	if (getminor(*devp) != 0)
6332		return (zvol_open(devp, flag, otyp, cr));
6333#endif
6334
6335	/* This is the control device. Allocate a new minor if requested. */
6336	if (flag & FEXCL) {
6337		mutex_enter(&spa_namespace_lock);
6338		error = zfs_ctldev_init(devp);
6339		mutex_exit(&spa_namespace_lock);
6340	}
6341
6342	return (error);
6343}
6344
6345static void
6346zfsdev_close(void *data)
6347{
6348	zfs_onexit_t *zo;
6349	minor_t minor = (minor_t)(uintptr_t)data;
6350
6351	if (minor == 0)
6352		return;
6353
6354	mutex_enter(&spa_namespace_lock);
6355	zo = zfsdev_get_soft_state(minor, ZSST_CTLDEV);
6356	if (zo == NULL) {
6357		mutex_exit(&spa_namespace_lock);
6358		return;
6359	}
6360	zfs_ctldev_destroy(zo, minor);
6361	mutex_exit(&spa_namespace_lock);
6362}
6363
6364static int
6365zfsdev_ioctl(struct cdev *dev, u_long zcmd, caddr_t arg, int flag,
6366    struct thread *td)
6367{
6368	zfs_cmd_t *zc;
6369	uint_t vecnum;
6370	int error, rc, len;
6371#ifdef illumos
6372	minor_t minor = getminor(dev);
6373#else
6374	zfs_iocparm_t *zc_iocparm;
6375	int cflag, cmd, oldvecnum;
6376	boolean_t newioc, compat;
6377	void *compat_zc = NULL;
6378	cred_t *cr = td->td_ucred;
6379#endif
6380	const zfs_ioc_vec_t *vec;
6381	char *saved_poolname = NULL;
6382	nvlist_t *innvl = NULL;
6383
6384	cflag = ZFS_CMD_COMPAT_NONE;
6385	compat = B_FALSE;
6386	newioc = B_TRUE;	/* "new" style (zfs_iocparm_t) ioctl */
6387
6388	len = IOCPARM_LEN(zcmd);
6389	vecnum = cmd = zcmd & 0xff;
6390
6391	/*
6392	 * Check if we are talking to supported older binaries
6393	 * and translate zfs_cmd if necessary
6394	 */
6395	if (len != sizeof(zfs_iocparm_t)) {
6396		newioc = B_FALSE;
6397		compat = B_TRUE;
6398
6399		vecnum = cmd;
6400
6401		switch (len) {
6402		case sizeof(zfs_cmd_zcmd_t):
6403			cflag = ZFS_CMD_COMPAT_LZC;
6404			break;
6405		case sizeof(zfs_cmd_deadman_t):
6406			cflag = ZFS_CMD_COMPAT_DEADMAN;
6407			break;
6408		case sizeof(zfs_cmd_v28_t):
6409			cflag = ZFS_CMD_COMPAT_V28;
6410			break;
6411		case sizeof(zfs_cmd_v15_t):
6412			cflag = ZFS_CMD_COMPAT_V15;
6413			vecnum = zfs_ioctl_v15_to_v28[cmd];
6414
6415			/*
6416			 * Return without further handling
6417			 * if the command is blacklisted.
6418			 */
6419			if (vecnum == ZFS_IOC_COMPAT_PASS)
6420				return (0);
6421			else if (vecnum == ZFS_IOC_COMPAT_FAIL)
6422				return (ENOTSUP);
6423			break;
6424		default:
6425			return (EINVAL);
6426		}
6427	}
6428
6429#ifdef illumos
6430	vecnum = cmd - ZFS_IOC_FIRST;
6431	ASSERT3U(getmajor(dev), ==, ddi_driver_major(zfs_dip));
6432#endif
6433
6434	if (vecnum >= sizeof (zfs_ioc_vec) / sizeof (zfs_ioc_vec[0]))
6435		return (SET_ERROR(EINVAL));
6436	vec = &zfs_ioc_vec[vecnum];
6437
6438	zc = kmem_zalloc(sizeof(zfs_cmd_t), KM_SLEEP);
6439
6440#ifdef illumos
6441	error = ddi_copyin((void *)arg, zc, sizeof (zfs_cmd_t), flag);
6442	if (error != 0) {
6443		error = SET_ERROR(EFAULT);
6444		goto out;
6445	}
6446#else	/* !illumos */
6447	bzero(zc, sizeof(zfs_cmd_t));
6448
6449	if (newioc) {
6450		zc_iocparm = (void *)arg;
6451
6452		switch (zc_iocparm->zfs_ioctl_version) {
6453		case ZFS_IOCVER_CURRENT:
6454			if (zc_iocparm->zfs_cmd_size != sizeof(zfs_cmd_t)) {
6455				error = SET_ERROR(EINVAL);
6456				goto out;
6457			}
6458			break;
6459		case ZFS_IOCVER_INLANES:
6460			if (zc_iocparm->zfs_cmd_size != sizeof(zfs_cmd_inlanes_t)) {
6461				error = SET_ERROR(EFAULT);
6462				goto out;
6463			}
6464			compat = B_TRUE;
6465			cflag = ZFS_CMD_COMPAT_INLANES;
6466			break;
6467		case ZFS_IOCVER_RESUME:
6468			if (zc_iocparm->zfs_cmd_size != sizeof(zfs_cmd_resume_t)) {
6469				error = SET_ERROR(EFAULT);
6470				goto out;
6471			}
6472			compat = B_TRUE;
6473			cflag = ZFS_CMD_COMPAT_RESUME;
6474			break;
6475		case ZFS_IOCVER_EDBP:
6476			if (zc_iocparm->zfs_cmd_size != sizeof(zfs_cmd_edbp_t)) {
6477				error = SET_ERROR(EFAULT);
6478				goto out;
6479			}
6480			compat = B_TRUE;
6481			cflag = ZFS_CMD_COMPAT_EDBP;
6482			break;
6483		case ZFS_IOCVER_ZCMD:
6484			if (zc_iocparm->zfs_cmd_size > sizeof(zfs_cmd_t) ||
6485			    zc_iocparm->zfs_cmd_size < sizeof(zfs_cmd_zcmd_t)) {
6486				error = SET_ERROR(EFAULT);
6487				goto out;
6488			}
6489			compat = B_TRUE;
6490			cflag = ZFS_CMD_COMPAT_ZCMD;
6491			break;
6492		default:
6493			error = SET_ERROR(EINVAL);
6494			goto out;
6495			/* NOTREACHED */
6496		}
6497
6498		if (compat) {
6499			ASSERT(sizeof(zfs_cmd_t) >= zc_iocparm->zfs_cmd_size);
6500			compat_zc = kmem_zalloc(sizeof(zfs_cmd_t), KM_SLEEP);
6501			bzero(compat_zc, sizeof(zfs_cmd_t));
6502
6503			error = ddi_copyin((void *)(uintptr_t)zc_iocparm->zfs_cmd,
6504			    compat_zc, zc_iocparm->zfs_cmd_size, flag);
6505			if (error != 0) {
6506				error = SET_ERROR(EFAULT);
6507				goto out;
6508			}
6509		} else {
6510			error = ddi_copyin((void *)(uintptr_t)zc_iocparm->zfs_cmd,
6511			    zc, zc_iocparm->zfs_cmd_size, flag);
6512			if (error != 0) {
6513				error = SET_ERROR(EFAULT);
6514				goto out;
6515			}
6516		}
6517	}
6518
6519	if (compat) {
6520		if (newioc) {
6521			ASSERT(compat_zc != NULL);
6522			zfs_cmd_compat_get(zc, compat_zc, cflag);
6523		} else {
6524			ASSERT(compat_zc == NULL);
6525			zfs_cmd_compat_get(zc, arg, cflag);
6526		}
6527		oldvecnum = vecnum;
6528		error = zfs_ioctl_compat_pre(zc, &vecnum, cflag);
6529		if (error != 0)
6530			goto out;
6531		if (oldvecnum != vecnum)
6532			vec = &zfs_ioc_vec[vecnum];
6533	}
6534#endif	/* !illumos */
6535
6536	zc->zc_iflags = flag & FKIOCTL;
6537	if (zc->zc_nvlist_src_size != 0) {
6538		error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
6539		    zc->zc_iflags, &innvl);
6540		if (error != 0)
6541			goto out;
6542	}
6543
6544	/* rewrite innvl for backwards compatibility */
6545	if (compat)
6546		innvl = zfs_ioctl_compat_innvl(zc, innvl, vecnum, cflag);
6547
6548	/*
6549	 * Ensure that all pool/dataset names are valid before we pass down to
6550	 * the lower layers.
6551	 */
6552	zc->zc_name[sizeof (zc->zc_name) - 1] = '\0';
6553	switch (vec->zvec_namecheck) {
6554	case POOL_NAME:
6555		if (pool_namecheck(zc->zc_name, NULL, NULL) != 0)
6556			error = SET_ERROR(EINVAL);
6557		else
6558			error = pool_status_check(zc->zc_name,
6559			    vec->zvec_namecheck, vec->zvec_pool_check);
6560		break;
6561
6562	case DATASET_NAME:
6563		if (dataset_namecheck(zc->zc_name, NULL, NULL) != 0)
6564			error = SET_ERROR(EINVAL);
6565		else
6566			error = pool_status_check(zc->zc_name,
6567			    vec->zvec_namecheck, vec->zvec_pool_check);
6568		break;
6569
6570	case NO_NAME:
6571		break;
6572	}
6573
6574	if (error == 0)
6575		error = vec->zvec_secpolicy(zc, innvl, cr);
6576
6577	if (error != 0)
6578		goto out;
6579
6580	/* legacy ioctls can modify zc_name */
6581	len = strcspn(zc->zc_name, "/@#") + 1;
6582	saved_poolname = kmem_alloc(len, KM_SLEEP);
6583	(void) strlcpy(saved_poolname, zc->zc_name, len);
6584
6585	if (vec->zvec_func != NULL) {
6586		nvlist_t *outnvl;
6587		int puterror = 0;
6588		spa_t *spa;
6589		nvlist_t *lognv = NULL;
6590
6591		ASSERT(vec->zvec_legacy_func == NULL);
6592
6593		/*
6594		 * Add the innvl to the lognv before calling the func,
6595		 * in case the func changes the innvl.
6596		 */
6597		if (vec->zvec_allow_log) {
6598			lognv = fnvlist_alloc();
6599			fnvlist_add_string(lognv, ZPOOL_HIST_IOCTL,
6600			    vec->zvec_name);
6601			if (!nvlist_empty(innvl)) {
6602				fnvlist_add_nvlist(lognv, ZPOOL_HIST_INPUT_NVL,
6603				    innvl);
6604			}
6605		}
6606
6607		outnvl = fnvlist_alloc();
6608		error = vec->zvec_func(zc->zc_name, innvl, outnvl);
6609
6610		/*
6611		 * Some commands can partially execute, modfiy state, and still
6612		 * return an error.  In these cases, attempt to record what
6613		 * was modified.
6614		 */
6615		if ((error == 0 ||
6616		    (cmd == ZFS_IOC_CHANNEL_PROGRAM && error != EINVAL)) &&
6617		    vec->zvec_allow_log &&
6618		    spa_open(zc->zc_name, &spa, FTAG) == 0) {
6619			if (!nvlist_empty(outnvl)) {
6620				fnvlist_add_nvlist(lognv, ZPOOL_HIST_OUTPUT_NVL,
6621				    outnvl);
6622			}
6623			if (error != 0) {
6624				fnvlist_add_int64(lognv, ZPOOL_HIST_ERRNO,
6625				    error);
6626			}
6627			(void) spa_history_log_nvl(spa, lognv);
6628			spa_close(spa, FTAG);
6629		}
6630		fnvlist_free(lognv);
6631
6632		/* rewrite outnvl for backwards compatibility */
6633		if (compat)
6634			outnvl = zfs_ioctl_compat_outnvl(zc, outnvl, vecnum,
6635			    cflag);
6636
6637		if (!nvlist_empty(outnvl) || zc->zc_nvlist_dst_size != 0) {
6638			int smusherror = 0;
6639			if (vec->zvec_smush_outnvlist) {
6640				smusherror = nvlist_smush(outnvl,
6641				    zc->zc_nvlist_dst_size);
6642			}
6643			if (smusherror == 0)
6644				puterror = put_nvlist(zc, outnvl);
6645		}
6646
6647		if (puterror != 0)
6648			error = puterror;
6649
6650		nvlist_free(outnvl);
6651	} else {
6652		error = vec->zvec_legacy_func(zc);
6653	}
6654
6655out:
6656	nvlist_free(innvl);
6657
6658#ifdef illumos
6659	rc = ddi_copyout(zc, (void *)arg, sizeof (zfs_cmd_t), flag);
6660	if (error == 0 && rc != 0)
6661		error = SET_ERROR(EFAULT);
6662#else
6663	if (compat) {
6664		zfs_ioctl_compat_post(zc, cmd, cflag);
6665		if (newioc) {
6666			ASSERT(compat_zc != NULL);
6667			ASSERT(sizeof(zfs_cmd_t) >= zc_iocparm->zfs_cmd_size);
6668
6669			zfs_cmd_compat_put(zc, compat_zc, vecnum, cflag);
6670			rc = ddi_copyout(compat_zc,
6671			    (void *)(uintptr_t)zc_iocparm->zfs_cmd,
6672			    zc_iocparm->zfs_cmd_size, flag);
6673			if (error == 0 && rc != 0)
6674				error = SET_ERROR(EFAULT);
6675			kmem_free(compat_zc, sizeof (zfs_cmd_t));
6676		} else {
6677			zfs_cmd_compat_put(zc, arg, vecnum, cflag);
6678		}
6679	} else {
6680		ASSERT(newioc);
6681
6682		rc = ddi_copyout(zc, (void *)(uintptr_t)zc_iocparm->zfs_cmd,
6683		    sizeof (zfs_cmd_t), flag);
6684		if (error == 0 && rc != 0)
6685			error = SET_ERROR(EFAULT);
6686	}
6687#endif
6688	if (error == 0 && vec->zvec_allow_log) {
6689		char *s = tsd_get(zfs_allow_log_key);
6690		if (s != NULL)
6691			strfree(s);
6692		(void) tsd_set(zfs_allow_log_key, saved_poolname);
6693	} else {
6694		if (saved_poolname != NULL)
6695			strfree(saved_poolname);
6696	}
6697
6698	kmem_free(zc, sizeof (zfs_cmd_t));
6699	return (error);
6700}
6701
6702#ifdef illumos
6703static int
6704zfs_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
6705{
6706	if (cmd != DDI_ATTACH)
6707		return (DDI_FAILURE);
6708
6709	if (ddi_create_minor_node(dip, "zfs", S_IFCHR, 0,
6710	    DDI_PSEUDO, 0) == DDI_FAILURE)
6711		return (DDI_FAILURE);
6712
6713	zfs_dip = dip;
6714
6715	ddi_report_dev(dip);
6716
6717	return (DDI_SUCCESS);
6718}
6719
6720static int
6721zfs_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
6722{
6723	if (spa_busy() || zfs_busy() || zvol_busy())
6724		return (DDI_FAILURE);
6725
6726	if (cmd != DDI_DETACH)
6727		return (DDI_FAILURE);
6728
6729	zfs_dip = NULL;
6730
6731	ddi_prop_remove_all(dip);
6732	ddi_remove_minor_node(dip, NULL);
6733
6734	return (DDI_SUCCESS);
6735}
6736
6737/*ARGSUSED*/
6738static int
6739zfs_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result)
6740{
6741	switch (infocmd) {
6742	case DDI_INFO_DEVT2DEVINFO:
6743		*result = zfs_dip;
6744		return (DDI_SUCCESS);
6745
6746	case DDI_INFO_DEVT2INSTANCE:
6747		*result = (void *)0;
6748		return (DDI_SUCCESS);
6749	}
6750
6751	return (DDI_FAILURE);
6752}
6753#endif	/* illumos */
6754
6755/*
6756 * OK, so this is a little weird.
6757 *
6758 * /dev/zfs is the control node, i.e. minor 0.
6759 * /dev/zvol/[r]dsk/pool/dataset are the zvols, minor > 0.
6760 *
6761 * /dev/zfs has basically nothing to do except serve up ioctls,
6762 * so most of the standard driver entry points are in zvol.c.
6763 */
6764#ifdef illumos
6765static struct cb_ops zfs_cb_ops = {
6766	zfsdev_open,	/* open */
6767	zfsdev_close,	/* close */
6768	zvol_strategy,	/* strategy */
6769	nodev,		/* print */
6770	zvol_dump,	/* dump */
6771	zvol_read,	/* read */
6772	zvol_write,	/* write */
6773	zfsdev_ioctl,	/* ioctl */
6774	nodev,		/* devmap */
6775	nodev,		/* mmap */
6776	nodev,		/* segmap */
6777	nochpoll,	/* poll */
6778	ddi_prop_op,	/* prop_op */
6779	NULL,		/* streamtab */
6780	D_NEW | D_MP | D_64BIT,		/* Driver compatibility flag */
6781	CB_REV,		/* version */
6782	nodev,		/* async read */
6783	nodev,		/* async write */
6784};
6785
6786static struct dev_ops zfs_dev_ops = {
6787	DEVO_REV,	/* version */
6788	0,		/* refcnt */
6789	zfs_info,	/* info */
6790	nulldev,	/* identify */
6791	nulldev,	/* probe */
6792	zfs_attach,	/* attach */
6793	zfs_detach,	/* detach */
6794	nodev,		/* reset */
6795	&zfs_cb_ops,	/* driver operations */
6796	NULL,		/* no bus operations */
6797	NULL,		/* power */
6798	ddi_quiesce_not_needed,	/* quiesce */
6799};
6800
6801static struct modldrv zfs_modldrv = {
6802	&mod_driverops,
6803	"ZFS storage pool",
6804	&zfs_dev_ops
6805};
6806
6807static struct modlinkage modlinkage = {
6808	MODREV_1,
6809	(void *)&zfs_modlfs,
6810	(void *)&zfs_modldrv,
6811	NULL
6812};
6813#endif	/* illumos */
6814
6815static struct cdevsw zfs_cdevsw = {
6816	.d_version =	D_VERSION,
6817	.d_open =	zfsdev_open,
6818	.d_ioctl =	zfsdev_ioctl,
6819	.d_name =	ZFS_DEV_NAME
6820};
6821
6822static void
6823zfs_allow_log_destroy(void *arg)
6824{
6825	char *poolname = arg;
6826	strfree(poolname);
6827}
6828
6829static void
6830zfsdev_init(void)
6831{
6832	zfsdev = make_dev(&zfs_cdevsw, 0x0, UID_ROOT, GID_OPERATOR, 0666,
6833	    ZFS_DEV_NAME);
6834}
6835
6836static void
6837zfsdev_fini(void)
6838{
6839	if (zfsdev != NULL)
6840		destroy_dev(zfsdev);
6841}
6842
6843static struct root_hold_token *zfs_root_token;
6844struct proc *zfsproc;
6845
6846#ifdef illumos
6847int
6848_init(void)
6849{
6850	int error;
6851
6852	spa_init(FREAD | FWRITE);
6853	zfs_init();
6854	zvol_init();
6855	zfs_ioctl_init();
6856
6857	if ((error = mod_install(&modlinkage)) != 0) {
6858		zvol_fini();
6859		zfs_fini();
6860		spa_fini();
6861		return (error);
6862	}
6863
6864	tsd_create(&zfs_fsyncer_key, NULL);
6865	tsd_create(&rrw_tsd_key, rrw_tsd_destroy);
6866	tsd_create(&zfs_allow_log_key, zfs_allow_log_destroy);
6867
6868	error = ldi_ident_from_mod(&modlinkage, &zfs_li);
6869	ASSERT(error == 0);
6870	mutex_init(&zfs_share_lock, NULL, MUTEX_DEFAULT, NULL);
6871
6872	return (0);
6873}
6874
6875int
6876_fini(void)
6877{
6878	int error;
6879
6880	if (spa_busy() || zfs_busy() || zvol_busy() || zio_injection_enabled)
6881		return (SET_ERROR(EBUSY));
6882
6883	if ((error = mod_remove(&modlinkage)) != 0)
6884		return (error);
6885
6886	zvol_fini();
6887	zfs_fini();
6888	spa_fini();
6889	if (zfs_nfsshare_inited)
6890		(void) ddi_modclose(nfs_mod);
6891	if (zfs_smbshare_inited)
6892		(void) ddi_modclose(smbsrv_mod);
6893	if (zfs_nfsshare_inited || zfs_smbshare_inited)
6894		(void) ddi_modclose(sharefs_mod);
6895
6896	tsd_destroy(&zfs_fsyncer_key);
6897	ldi_ident_release(zfs_li);
6898	zfs_li = NULL;
6899	mutex_destroy(&zfs_share_lock);
6900
6901	return (error);
6902}
6903
6904int
6905_info(struct modinfo *modinfop)
6906{
6907	return (mod_info(&modlinkage, modinfop));
6908}
6909#endif	/* illumos */
6910
6911static int zfs__init(void);
6912static int zfs__fini(void);
6913static void zfs_shutdown(void *, int);
6914
6915static eventhandler_tag zfs_shutdown_event_tag;
6916
6917#ifdef __FreeBSD__
6918#define ZFS_MIN_KSTACK_PAGES 4
6919#endif
6920
6921int
6922zfs__init(void)
6923{
6924
6925#ifdef __FreeBSD__
6926#if KSTACK_PAGES < ZFS_MIN_KSTACK_PAGES
6927	printf("ZFS NOTICE: KSTACK_PAGES is %d which could result in stack "
6928	    "overflow panic!\nPlease consider adding "
6929	    "'options KSTACK_PAGES=%d' to your kernel config\n", KSTACK_PAGES,
6930	    ZFS_MIN_KSTACK_PAGES);
6931#endif
6932#endif
6933	zfs_root_token = root_mount_hold("ZFS");
6934
6935	mutex_init(&zfs_share_lock, NULL, MUTEX_DEFAULT, NULL);
6936
6937	spa_init(FREAD | FWRITE);
6938	zfs_init();
6939	zvol_init();
6940	zfs_ioctl_init();
6941
6942	tsd_create(&zfs_fsyncer_key, NULL);
6943	tsd_create(&rrw_tsd_key, rrw_tsd_destroy);
6944	tsd_create(&zfs_allow_log_key, zfs_allow_log_destroy);
6945	tsd_create(&zfs_geom_probe_vdev_key, NULL);
6946
6947	printf("ZFS storage pool version: features support (" SPA_VERSION_STRING ")\n");
6948	root_mount_rel(zfs_root_token);
6949
6950	zfsdev_init();
6951
6952	return (0);
6953}
6954
6955int
6956zfs__fini(void)
6957{
6958	if (spa_busy() || zfs_busy() || zvol_busy() ||
6959	    zio_injection_enabled) {
6960		return (EBUSY);
6961	}
6962
6963	zfsdev_fini();
6964	zvol_fini();
6965	zfs_fini();
6966	spa_fini();
6967
6968	tsd_destroy(&zfs_fsyncer_key);
6969	tsd_destroy(&rrw_tsd_key);
6970	tsd_destroy(&zfs_allow_log_key);
6971
6972	mutex_destroy(&zfs_share_lock);
6973
6974	return (0);
6975}
6976
6977static void
6978zfs_shutdown(void *arg __unused, int howto __unused)
6979{
6980
6981	/*
6982	 * ZFS fini routines can not properly work in a panic-ed system.
6983	 */
6984	if (panicstr == NULL)
6985		(void)zfs__fini();
6986}
6987
6988
6989static int
6990zfs_modevent(module_t mod, int type, void *unused __unused)
6991{
6992	int err;
6993
6994	switch (type) {
6995	case MOD_LOAD:
6996		err = zfs__init();
6997		if (err == 0)
6998			zfs_shutdown_event_tag = EVENTHANDLER_REGISTER(
6999			    shutdown_post_sync, zfs_shutdown, NULL,
7000			    SHUTDOWN_PRI_FIRST);
7001		return (err);
7002	case MOD_UNLOAD:
7003		err = zfs__fini();
7004		if (err == 0 && zfs_shutdown_event_tag != NULL)
7005			EVENTHANDLER_DEREGISTER(shutdown_post_sync,
7006			    zfs_shutdown_event_tag);
7007		return (err);
7008	case MOD_SHUTDOWN:
7009		return (0);
7010	default:
7011		break;
7012	}
7013	return (EOPNOTSUPP);
7014}
7015
7016static moduledata_t zfs_mod = {
7017	"zfsctrl",
7018	zfs_modevent,
7019	0
7020};
7021DECLARE_MODULE(zfsctrl, zfs_mod, SI_SUB_VFS, SI_ORDER_ANY);
7022MODULE_VERSION(zfsctrl, 1);
7023MODULE_DEPEND(zfsctrl, opensolaris, 1, 1, 1);
7024MODULE_DEPEND(zfsctrl, krpc, 1, 1, 1);
7025MODULE_DEPEND(zfsctrl, acl_nfs4, 1, 1, 1);
7026