1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22/*
23 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
24 * Copyright (c) 2011-2012 Pawel Jakub Dawidek. All rights reserved.
25 * Copyright 2013 Martin Matuska <mm@FreeBSD.org>. All rights reserved.
26 * Copyright 2014 Xin Li <delphij@FreeBSD.org>. All rights reserved.
27 * Copyright 2015, OmniTI Computer Consulting, Inc. All rights reserved.
28 * Copyright 2015 Nexenta Systems, Inc.  All rights reserved.
29 * Copyright (c) 2014, 2016 Joyent, Inc. All rights reserved.
30 * Copyright (c) 2011, 2017 by Delphix. All rights reserved.
31 * Copyright (c) 2013 by Saso Kiselkov. All rights reserved.
32 * Copyright (c) 2013 Steven Hartland. All rights reserved.
33 * Copyright (c) 2014 Integros [integros.com]
34 * Copyright 2016 Toomas Soome <tsoome@me.com>
35 * Copyright 2017 RackTop Systems.
36 * Copyright (c) 2018, loli10K <ezomori.nozomu@gmail.com>. All rights reserved.
37 * Copyright (c) 2019 Datto Inc.
38 */
39
40/*
41 * ZFS ioctls.
42 *
43 * This file handles the ioctls to /dev/zfs, used for configuring ZFS storage
44 * pools and filesystems, e.g. with /sbin/zfs and /sbin/zpool.
45 *
46 * There are two ways that we handle ioctls: the legacy way where almost
47 * all of the logic is in the ioctl callback, and the new way where most
48 * of the marshalling is handled in the common entry point, zfsdev_ioctl().
49 *
50 * Non-legacy ioctls should be registered by calling
51 * zfs_ioctl_register() from zfs_ioctl_init().  The ioctl is invoked
52 * from userland by lzc_ioctl().
53 *
54 * The registration arguments are as follows:
55 *
56 * const char *name
57 *   The name of the ioctl.  This is used for history logging.  If the
58 *   ioctl returns successfully (the callback returns 0), and allow_log
59 *   is true, then a history log entry will be recorded with the input &
60 *   output nvlists.  The log entry can be printed with "zpool history -i".
61 *
62 * zfs_ioc_t ioc
63 *   The ioctl request number, which userland will pass to ioctl(2).
64 *   The ioctl numbers can change from release to release, because
65 *   the caller (libzfs) must be matched to the kernel.
66 *
67 * zfs_secpolicy_func_t *secpolicy
68 *   This function will be called before the zfs_ioc_func_t, to
69 *   determine if this operation is permitted.  It should return EPERM
70 *   on failure, and 0 on success.  Checks include determining if the
71 *   dataset is visible in this zone, and if the user has either all
72 *   zfs privileges in the zone (SYS_MOUNT), or has been granted permission
73 *   to do this operation on this dataset with "zfs allow".
74 *
75 * zfs_ioc_namecheck_t namecheck
76 *   This specifies what to expect in the zfs_cmd_t:zc_name -- a pool
77 *   name, a dataset name, or nothing.  If the name is not well-formed,
78 *   the ioctl will fail and the callback will not be called.
79 *   Therefore, the callback can assume that the name is well-formed
80 *   (e.g. is null-terminated, doesn't have more than one '@' character,
81 *   doesn't have invalid characters).
82 *
83 * zfs_ioc_poolcheck_t pool_check
84 *   This specifies requirements on the pool state.  If the pool does
85 *   not meet them (is suspended or is readonly), the ioctl will fail
86 *   and the callback will not be called.  If any checks are specified
87 *   (i.e. it is not POOL_CHECK_NONE), namecheck must not be NO_NAME.
88 *   Multiple checks can be or-ed together (e.g. POOL_CHECK_SUSPENDED |
89 *   POOL_CHECK_READONLY).
90 *
91 * boolean_t smush_outnvlist
92 *   If smush_outnvlist is true, then the output is presumed to be a
93 *   list of errors, and it will be "smushed" down to fit into the
94 *   caller's buffer, by removing some entries and replacing them with a
95 *   single "N_MORE_ERRORS" entry indicating how many were removed.  See
96 *   nvlist_smush() for details.  If smush_outnvlist is false, and the
97 *   outnvlist does not fit into the userland-provided buffer, then the
98 *   ioctl will fail with ENOMEM.
99 *
100 * zfs_ioc_func_t *func
101 *   The callback function that will perform the operation.
102 *
103 *   The callback should return 0 on success, or an error number on
104 *   failure.  If the function fails, the userland ioctl will return -1,
105 *   and errno will be set to the callback's return value.  The callback
106 *   will be called with the following arguments:
107 *
108 *   const char *name
109 *     The name of the pool or dataset to operate on, from
110 *     zfs_cmd_t:zc_name.  The 'namecheck' argument specifies the
111 *     expected type (pool, dataset, or none).
112 *
113 *   nvlist_t *innvl
114 *     The input nvlist, deserialized from zfs_cmd_t:zc_nvlist_src.  Or
115 *     NULL if no input nvlist was provided.  Changes to this nvlist are
116 *     ignored.  If the input nvlist could not be deserialized, the
117 *     ioctl will fail and the callback will not be called.
118 *
119 *   nvlist_t *outnvl
120 *     The output nvlist, initially empty.  The callback can fill it in,
121 *     and it will be returned to userland by serializing it into
122 *     zfs_cmd_t:zc_nvlist_dst.  If it is non-empty, and serialization
123 *     fails (e.g. because the caller didn't supply a large enough
124 *     buffer), then the overall ioctl will fail.  See the
125 *     'smush_nvlist' argument above for additional behaviors.
126 *
127 *     There are two typical uses of the output nvlist:
128 *       - To return state, e.g. property values.  In this case,
129 *         smush_outnvlist should be false.  If the buffer was not large
130 *         enough, the caller will reallocate a larger buffer and try
131 *         the ioctl again.
132 *
133 *       - To return multiple errors from an ioctl which makes on-disk
134 *         changes.  In this case, smush_outnvlist should be true.
135 *         Ioctls which make on-disk modifications should generally not
136 *         use the outnvl if they succeed, because the caller can not
137 *         distinguish between the operation failing, and
138 *         deserialization failing.
139 */
140#ifdef __FreeBSD__
141#include "opt_kstack_pages.h"
142#endif
143
144#include <sys/types.h>
145#include <sys/param.h>
146#include <sys/systm.h>
147#include <sys/conf.h>
148#include <sys/kernel.h>
149#include <sys/lock.h>
150#include <sys/malloc.h>
151#include <sys/mutex.h>
152#include <sys/proc.h>
153#include <sys/errno.h>
154#include <sys/uio.h>
155#include <sys/buf.h>
156#include <sys/file.h>
157#include <sys/kmem.h>
158#include <sys/conf.h>
159#include <sys/cmn_err.h>
160#include <sys/stat.h>
161#include <sys/zfs_ioctl.h>
162#include <sys/zfs_vfsops.h>
163#include <sys/zfs_znode.h>
164#include <sys/zap.h>
165#include <sys/spa.h>
166#include <sys/spa_impl.h>
167#include <sys/vdev.h>
168#include <sys/dmu.h>
169#include <sys/dsl_dir.h>
170#include <sys/dsl_dataset.h>
171#include <sys/dsl_prop.h>
172#include <sys/dsl_deleg.h>
173#include <sys/dmu_objset.h>
174#include <sys/dmu_impl.h>
175#include <sys/dmu_tx.h>
176#include <sys/sunddi.h>
177#include <sys/policy.h>
178#include <sys/zone.h>
179#include <sys/nvpair.h>
180#include <sys/mount.h>
181#include <sys/taskqueue.h>
182#include <sys/sdt.h>
183#include <sys/varargs.h>
184#include <sys/fs/zfs.h>
185#include <sys/zfs_ctldir.h>
186#include <sys/zfs_dir.h>
187#include <sys/zfs_onexit.h>
188#include <sys/zvol.h>
189#include <sys/dsl_scan.h>
190#include <sys/dmu_objset.h>
191#include <sys/dmu_send.h>
192#include <sys/dsl_destroy.h>
193#include <sys/dsl_bookmark.h>
194#include <sys/dsl_userhold.h>
195#include <sys/zfeature.h>
196#include <sys/zcp.h>
197#include <sys/zio_checksum.h>
198#include <sys/vdev_removal.h>
199#include <sys/vdev_impl.h>
200#include <sys/vdev_initialize.h>
201
202#include "zfs_namecheck.h"
203#include "zfs_prop.h"
204#include "zfs_deleg.h"
205#include "zfs_comutil.h"
206#include "zfs_ioctl_compat.h"
207
208#include "lua.h"
209#include "lauxlib.h"
210
211static struct cdev *zfsdev;
212
213extern void zfs_init(void);
214extern void zfs_fini(void);
215
216uint_t zfs_fsyncer_key;
217extern uint_t rrw_tsd_key;
218static uint_t zfs_allow_log_key;
219extern uint_t zfs_geom_probe_vdev_key;
220
221typedef int zfs_ioc_legacy_func_t(zfs_cmd_t *);
222typedef int zfs_ioc_func_t(const char *, nvlist_t *, nvlist_t *);
223typedef int zfs_secpolicy_func_t(zfs_cmd_t *, nvlist_t *, cred_t *);
224
225typedef enum {
226	NO_NAME,
227	POOL_NAME,
228	DATASET_NAME,
229	ENTITY_NAME
230} zfs_ioc_namecheck_t;
231
232typedef enum {
233	POOL_CHECK_NONE		= 1 << 0,
234	POOL_CHECK_SUSPENDED	= 1 << 1,
235	POOL_CHECK_READONLY	= 1 << 2,
236} zfs_ioc_poolcheck_t;
237
238typedef struct zfs_ioc_vec {
239	zfs_ioc_legacy_func_t	*zvec_legacy_func;
240	zfs_ioc_func_t		*zvec_func;
241	zfs_secpolicy_func_t	*zvec_secpolicy;
242	zfs_ioc_namecheck_t	zvec_namecheck;
243	boolean_t		zvec_allow_log;
244	zfs_ioc_poolcheck_t	zvec_pool_check;
245	boolean_t		zvec_smush_outnvlist;
246	const char		*zvec_name;
247} zfs_ioc_vec_t;
248
249/* This array is indexed by zfs_userquota_prop_t */
250static const char *userquota_perms[] = {
251	ZFS_DELEG_PERM_USERUSED,
252	ZFS_DELEG_PERM_USERQUOTA,
253	ZFS_DELEG_PERM_GROUPUSED,
254	ZFS_DELEG_PERM_GROUPQUOTA,
255};
256
257static int zfs_ioc_userspace_upgrade(zfs_cmd_t *zc);
258static int zfs_check_settable(const char *name, nvpair_t *property,
259    cred_t *cr);
260static int zfs_check_clearable(char *dataset, nvlist_t *props,
261    nvlist_t **errors);
262static int zfs_fill_zplprops_root(uint64_t, nvlist_t *, nvlist_t *,
263    boolean_t *);
264int zfs_set_prop_nvlist(const char *, zprop_source_t, nvlist_t *, nvlist_t *);
265static int get_nvlist(uint64_t nvl, uint64_t size, int iflag, nvlist_t **nvp);
266
267static void zfsdev_close(void *data);
268
269static int zfs_prop_activate_feature(spa_t *spa, spa_feature_t feature);
270
271/* _NOTE(PRINTFLIKE(4)) - this is printf-like, but lint is too whiney */
272void
273__dprintf(const char *file, const char *func, int line, const char *fmt, ...)
274{
275	const char *newfile;
276	char buf[512];
277	va_list adx;
278
279	/*
280	 * Get rid of annoying "../common/" prefix to filename.
281	 */
282	newfile = strrchr(file, '/');
283	if (newfile != NULL) {
284		newfile = newfile + 1; /* Get rid of leading / */
285	} else {
286		newfile = file;
287	}
288
289	va_start(adx, fmt);
290	(void) vsnprintf(buf, sizeof (buf), fmt, adx);
291	va_end(adx);
292
293	/*
294	 * To get this data, use the zfs-dprintf probe as so:
295	 * dtrace -q -n 'zfs-dprintf \
296	 *	/stringof(arg0) == "dbuf.c"/ \
297	 *	{printf("%s: %s", stringof(arg1), stringof(arg3))}'
298	 * arg0 = file name
299	 * arg1 = function name
300	 * arg2 = line number
301	 * arg3 = message
302	 */
303	DTRACE_PROBE4(zfs__dprintf,
304	    char *, newfile, char *, func, int, line, char *, buf);
305}
306
307static void
308history_str_free(char *buf)
309{
310	kmem_free(buf, HIS_MAX_RECORD_LEN);
311}
312
313static char *
314history_str_get(zfs_cmd_t *zc)
315{
316	char *buf;
317
318	if (zc->zc_history == 0)
319		return (NULL);
320
321	buf = kmem_alloc(HIS_MAX_RECORD_LEN, KM_SLEEP);
322	if (copyinstr((void *)(uintptr_t)zc->zc_history,
323	    buf, HIS_MAX_RECORD_LEN, NULL) != 0) {
324		history_str_free(buf);
325		return (NULL);
326	}
327
328	buf[HIS_MAX_RECORD_LEN -1] = '\0';
329
330	return (buf);
331}
332
333/*
334 * Check to see if the named dataset is currently defined as bootable
335 */
336static boolean_t
337zfs_is_bootfs(const char *name)
338{
339	objset_t *os;
340
341	if (dmu_objset_hold(name, FTAG, &os) == 0) {
342		boolean_t ret;
343		ret = (dmu_objset_id(os) == spa_bootfs(dmu_objset_spa(os)));
344		dmu_objset_rele(os, FTAG);
345		return (ret);
346	}
347	return (B_FALSE);
348}
349
350/*
351 * Return non-zero if the spa version is less than requested version.
352 */
353static int
354zfs_earlier_version(const char *name, int version)
355{
356	spa_t *spa;
357
358	if (spa_open(name, &spa, FTAG) == 0) {
359		if (spa_version(spa) < version) {
360			spa_close(spa, FTAG);
361			return (1);
362		}
363		spa_close(spa, FTAG);
364	}
365	return (0);
366}
367
368/*
369 * Return TRUE if the ZPL version is less than requested version.
370 */
371static boolean_t
372zpl_earlier_version(const char *name, int version)
373{
374	objset_t *os;
375	boolean_t rc = B_TRUE;
376
377	if (dmu_objset_hold(name, FTAG, &os) == 0) {
378		uint64_t zplversion;
379
380		if (dmu_objset_type(os) != DMU_OST_ZFS) {
381			dmu_objset_rele(os, FTAG);
382			return (B_TRUE);
383		}
384		/* XXX reading from non-owned objset */
385		if (zfs_get_zplprop(os, ZFS_PROP_VERSION, &zplversion) == 0)
386			rc = zplversion < version;
387		dmu_objset_rele(os, FTAG);
388	}
389	return (rc);
390}
391
392static void
393zfs_log_history(zfs_cmd_t *zc)
394{
395	spa_t *spa;
396	char *buf;
397
398	if ((buf = history_str_get(zc)) == NULL)
399		return;
400
401	if (spa_open(zc->zc_name, &spa, FTAG) == 0) {
402		if (spa_version(spa) >= SPA_VERSION_ZPOOL_HISTORY)
403			(void) spa_history_log(spa, buf);
404		spa_close(spa, FTAG);
405	}
406	history_str_free(buf);
407}
408
409/*
410 * Policy for top-level read operations (list pools).  Requires no privileges,
411 * and can be used in the local zone, as there is no associated dataset.
412 */
413/* ARGSUSED */
414static int
415zfs_secpolicy_none(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
416{
417	return (0);
418}
419
420/*
421 * Policy for dataset read operations (list children, get statistics).  Requires
422 * no privileges, but must be visible in the local zone.
423 */
424/* ARGSUSED */
425static int
426zfs_secpolicy_read(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
427{
428	if (INGLOBALZONE(curthread) ||
429	    zone_dataset_visible(zc->zc_name, NULL))
430		return (0);
431
432	return (SET_ERROR(ENOENT));
433}
434
435static int
436zfs_dozonecheck_impl(const char *dataset, uint64_t zoned, cred_t *cr)
437{
438	int writable = 1;
439
440	/*
441	 * The dataset must be visible by this zone -- check this first
442	 * so they don't see EPERM on something they shouldn't know about.
443	 */
444	if (!INGLOBALZONE(curthread) &&
445	    !zone_dataset_visible(dataset, &writable))
446		return (SET_ERROR(ENOENT));
447
448	if (INGLOBALZONE(curthread)) {
449		/*
450		 * If the fs is zoned, only root can access it from the
451		 * global zone.
452		 */
453		if (secpolicy_zfs(cr) && zoned)
454			return (SET_ERROR(EPERM));
455	} else {
456		/*
457		 * If we are in a local zone, the 'zoned' property must be set.
458		 */
459		if (!zoned)
460			return (SET_ERROR(EPERM));
461
462		/* must be writable by this zone */
463		if (!writable)
464			return (SET_ERROR(EPERM));
465	}
466	return (0);
467}
468
469static int
470zfs_dozonecheck(const char *dataset, cred_t *cr)
471{
472	uint64_t zoned;
473
474	if (dsl_prop_get_integer(dataset, "jailed", &zoned, NULL))
475		return (SET_ERROR(ENOENT));
476
477	return (zfs_dozonecheck_impl(dataset, zoned, cr));
478}
479
480static int
481zfs_dozonecheck_ds(const char *dataset, dsl_dataset_t *ds, cred_t *cr)
482{
483	uint64_t zoned;
484
485	if (dsl_prop_get_int_ds(ds, "jailed", &zoned))
486		return (SET_ERROR(ENOENT));
487
488	return (zfs_dozonecheck_impl(dataset, zoned, cr));
489}
490
491static int
492zfs_secpolicy_write_perms_ds(const char *name, dsl_dataset_t *ds,
493    const char *perm, cred_t *cr)
494{
495	int error;
496
497	error = zfs_dozonecheck_ds(name, ds, cr);
498	if (error == 0) {
499		error = secpolicy_zfs(cr);
500		if (error != 0)
501			error = dsl_deleg_access_impl(ds, perm, cr);
502	}
503	return (error);
504}
505
506static int
507zfs_secpolicy_write_perms(const char *name, const char *perm, cred_t *cr)
508{
509	int error;
510	dsl_dataset_t *ds;
511	dsl_pool_t *dp;
512
513	/*
514	 * First do a quick check for root in the global zone, which
515	 * is allowed to do all write_perms.  This ensures that zfs_ioc_*
516	 * will get to handle nonexistent datasets.
517	 */
518	if (INGLOBALZONE(curthread) && secpolicy_zfs(cr) == 0)
519		return (0);
520
521	error = dsl_pool_hold(name, FTAG, &dp);
522	if (error != 0)
523		return (error);
524
525	error = dsl_dataset_hold(dp, name, FTAG, &ds);
526	if (error != 0) {
527		dsl_pool_rele(dp, FTAG);
528		return (error);
529	}
530
531	error = zfs_secpolicy_write_perms_ds(name, ds, perm, cr);
532
533	dsl_dataset_rele(ds, FTAG);
534	dsl_pool_rele(dp, FTAG);
535	return (error);
536}
537
538#ifdef SECLABEL
539/*
540 * Policy for setting the security label property.
541 *
542 * Returns 0 for success, non-zero for access and other errors.
543 */
544static int
545zfs_set_slabel_policy(const char *name, char *strval, cred_t *cr)
546{
547	char		ds_hexsl[MAXNAMELEN];
548	bslabel_t	ds_sl, new_sl;
549	boolean_t	new_default = FALSE;
550	uint64_t	zoned;
551	int		needed_priv = -1;
552	int		error;
553
554	/* First get the existing dataset label. */
555	error = dsl_prop_get(name, zfs_prop_to_name(ZFS_PROP_MLSLABEL),
556	    1, sizeof (ds_hexsl), &ds_hexsl, NULL);
557	if (error != 0)
558		return (SET_ERROR(EPERM));
559
560	if (strcasecmp(strval, ZFS_MLSLABEL_DEFAULT) == 0)
561		new_default = TRUE;
562
563	/* The label must be translatable */
564	if (!new_default && (hexstr_to_label(strval, &new_sl) != 0))
565		return (SET_ERROR(EINVAL));
566
567	/*
568	 * In a non-global zone, disallow attempts to set a label that
569	 * doesn't match that of the zone; otherwise no other checks
570	 * are needed.
571	 */
572	if (!INGLOBALZONE(curproc)) {
573		if (new_default || !blequal(&new_sl, CR_SL(CRED())))
574			return (SET_ERROR(EPERM));
575		return (0);
576	}
577
578	/*
579	 * For global-zone datasets (i.e., those whose zoned property is
580	 * "off", verify that the specified new label is valid for the
581	 * global zone.
582	 */
583	if (dsl_prop_get_integer(name,
584	    zfs_prop_to_name(ZFS_PROP_ZONED), &zoned, NULL))
585		return (SET_ERROR(EPERM));
586	if (!zoned) {
587		if (zfs_check_global_label(name, strval) != 0)
588			return (SET_ERROR(EPERM));
589	}
590
591	/*
592	 * If the existing dataset label is nondefault, check if the
593	 * dataset is mounted (label cannot be changed while mounted).
594	 * Get the zfsvfs; if there isn't one, then the dataset isn't
595	 * mounted (or isn't a dataset, doesn't exist, ...).
596	 */
597	if (strcasecmp(ds_hexsl, ZFS_MLSLABEL_DEFAULT) != 0) {
598		objset_t *os;
599		static char *setsl_tag = "setsl_tag";
600
601		/*
602		 * Try to own the dataset; abort if there is any error,
603		 * (e.g., already mounted, in use, or other error).
604		 */
605		error = dmu_objset_own(name, DMU_OST_ZFS, B_TRUE,
606		    setsl_tag, &os);
607		if (error != 0)
608			return (SET_ERROR(EPERM));
609
610		dmu_objset_disown(os, setsl_tag);
611
612		if (new_default) {
613			needed_priv = PRIV_FILE_DOWNGRADE_SL;
614			goto out_check;
615		}
616
617		if (hexstr_to_label(strval, &new_sl) != 0)
618			return (SET_ERROR(EPERM));
619
620		if (blstrictdom(&ds_sl, &new_sl))
621			needed_priv = PRIV_FILE_DOWNGRADE_SL;
622		else if (blstrictdom(&new_sl, &ds_sl))
623			needed_priv = PRIV_FILE_UPGRADE_SL;
624	} else {
625		/* dataset currently has a default label */
626		if (!new_default)
627			needed_priv = PRIV_FILE_UPGRADE_SL;
628	}
629
630out_check:
631	if (needed_priv != -1)
632		return (PRIV_POLICY(cr, needed_priv, B_FALSE, EPERM, NULL));
633	return (0);
634}
635#endif	/* SECLABEL */
636
637static int
638zfs_secpolicy_setprop(const char *dsname, zfs_prop_t prop, nvpair_t *propval,
639    cred_t *cr)
640{
641	char *strval;
642
643	/*
644	 * Check permissions for special properties.
645	 */
646	switch (prop) {
647	case ZFS_PROP_ZONED:
648		/*
649		 * Disallow setting of 'zoned' from within a local zone.
650		 */
651		if (!INGLOBALZONE(curthread))
652			return (SET_ERROR(EPERM));
653		break;
654
655	case ZFS_PROP_QUOTA:
656	case ZFS_PROP_FILESYSTEM_LIMIT:
657	case ZFS_PROP_SNAPSHOT_LIMIT:
658		if (!INGLOBALZONE(curthread)) {
659			uint64_t zoned;
660			char setpoint[ZFS_MAX_DATASET_NAME_LEN];
661			/*
662			 * Unprivileged users are allowed to modify the
663			 * limit on things *under* (ie. contained by)
664			 * the thing they own.
665			 */
666			if (dsl_prop_get_integer(dsname, "jailed", &zoned,
667			    setpoint))
668				return (SET_ERROR(EPERM));
669			if (!zoned || strlen(dsname) <= strlen(setpoint))
670				return (SET_ERROR(EPERM));
671		}
672		break;
673
674	case ZFS_PROP_MLSLABEL:
675#ifdef SECLABEL
676		if (!is_system_labeled())
677			return (SET_ERROR(EPERM));
678
679		if (nvpair_value_string(propval, &strval) == 0) {
680			int err;
681
682			err = zfs_set_slabel_policy(dsname, strval, CRED());
683			if (err != 0)
684				return (err);
685		}
686#else
687		return (EOPNOTSUPP);
688#endif
689		break;
690	}
691
692	return (zfs_secpolicy_write_perms(dsname, zfs_prop_to_name(prop), cr));
693}
694
695/* ARGSUSED */
696static int
697zfs_secpolicy_set_fsacl(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
698{
699	int error;
700
701	error = zfs_dozonecheck(zc->zc_name, cr);
702	if (error != 0)
703		return (error);
704
705	/*
706	 * permission to set permissions will be evaluated later in
707	 * dsl_deleg_can_allow()
708	 */
709	return (0);
710}
711
712/* ARGSUSED */
713static int
714zfs_secpolicy_rollback(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
715{
716	return (zfs_secpolicy_write_perms(zc->zc_name,
717	    ZFS_DELEG_PERM_ROLLBACK, cr));
718}
719
720/* ARGSUSED */
721static int
722zfs_secpolicy_send(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
723{
724	dsl_pool_t *dp;
725	dsl_dataset_t *ds;
726	char *cp;
727	int error;
728
729	/*
730	 * Generate the current snapshot name from the given objsetid, then
731	 * use that name for the secpolicy/zone checks.
732	 */
733	cp = strchr(zc->zc_name, '@');
734	if (cp == NULL)
735		return (SET_ERROR(EINVAL));
736	error = dsl_pool_hold(zc->zc_name, FTAG, &dp);
737	if (error != 0)
738		return (error);
739
740	error = dsl_dataset_hold_obj(dp, zc->zc_sendobj, FTAG, &ds);
741	if (error != 0) {
742		dsl_pool_rele(dp, FTAG);
743		return (error);
744	}
745
746	dsl_dataset_name(ds, zc->zc_name);
747
748	error = zfs_secpolicy_write_perms_ds(zc->zc_name, ds,
749	    ZFS_DELEG_PERM_SEND, cr);
750	dsl_dataset_rele(ds, FTAG);
751	dsl_pool_rele(dp, FTAG);
752
753	return (error);
754}
755
756/* ARGSUSED */
757static int
758zfs_secpolicy_send_new(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
759{
760	return (zfs_secpolicy_write_perms(zc->zc_name,
761	    ZFS_DELEG_PERM_SEND, cr));
762}
763
764/* ARGSUSED */
765static int
766zfs_secpolicy_deleg_share(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
767{
768	vnode_t *vp;
769	int error;
770
771	if ((error = lookupname(zc->zc_value, UIO_SYSSPACE,
772	    NO_FOLLOW, NULL, &vp)) != 0)
773		return (error);
774
775	/* Now make sure mntpnt and dataset are ZFS */
776
777	if (strcmp(vp->v_vfsp->mnt_stat.f_fstypename, "zfs") != 0 ||
778	    (strcmp((char *)refstr_value(vp->v_vfsp->vfs_resource),
779	    zc->zc_name) != 0)) {
780		VN_RELE(vp);
781		return (SET_ERROR(EPERM));
782	}
783
784	VN_RELE(vp);
785	return (dsl_deleg_access(zc->zc_name,
786	    ZFS_DELEG_PERM_SHARE, cr));
787}
788
789int
790zfs_secpolicy_share(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
791{
792	if (!INGLOBALZONE(curthread))
793		return (SET_ERROR(EPERM));
794
795	if (secpolicy_nfs(cr) == 0) {
796		return (0);
797	} else {
798		return (zfs_secpolicy_deleg_share(zc, innvl, cr));
799	}
800}
801
802int
803zfs_secpolicy_smb_acl(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
804{
805	if (!INGLOBALZONE(curthread))
806		return (SET_ERROR(EPERM));
807
808	if (secpolicy_smb(cr) == 0) {
809		return (0);
810	} else {
811		return (zfs_secpolicy_deleg_share(zc, innvl, cr));
812	}
813}
814
815static int
816zfs_get_parent(const char *datasetname, char *parent, int parentsize)
817{
818	char *cp;
819
820	/*
821	 * Remove the @bla or /bla from the end of the name to get the parent.
822	 */
823	(void) strncpy(parent, datasetname, parentsize);
824	cp = strrchr(parent, '@');
825	if (cp != NULL) {
826		cp[0] = '\0';
827	} else {
828		cp = strrchr(parent, '/');
829		if (cp == NULL)
830			return (SET_ERROR(ENOENT));
831		cp[0] = '\0';
832	}
833
834	return (0);
835}
836
837int
838zfs_secpolicy_destroy_perms(const char *name, cred_t *cr)
839{
840	int error;
841
842	if ((error = zfs_secpolicy_write_perms(name,
843	    ZFS_DELEG_PERM_MOUNT, cr)) != 0)
844		return (error);
845
846	return (zfs_secpolicy_write_perms(name, ZFS_DELEG_PERM_DESTROY, cr));
847}
848
849/* ARGSUSED */
850static int
851zfs_secpolicy_destroy(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
852{
853	return (zfs_secpolicy_destroy_perms(zc->zc_name, cr));
854}
855
856/*
857 * Destroying snapshots with delegated permissions requires
858 * descendant mount and destroy permissions.
859 */
860/* ARGSUSED */
861static int
862zfs_secpolicy_destroy_snaps(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
863{
864	nvlist_t *snaps;
865	nvpair_t *pair, *nextpair;
866	int error = 0;
867
868	if (nvlist_lookup_nvlist(innvl, "snaps", &snaps) != 0)
869		return (SET_ERROR(EINVAL));
870	for (pair = nvlist_next_nvpair(snaps, NULL); pair != NULL;
871	    pair = nextpair) {
872		nextpair = nvlist_next_nvpair(snaps, pair);
873		error = zfs_secpolicy_destroy_perms(nvpair_name(pair), cr);
874		if (error == ENOENT) {
875			/*
876			 * Ignore any snapshots that don't exist (we consider
877			 * them "already destroyed").  Remove the name from the
878			 * nvl here in case the snapshot is created between
879			 * now and when we try to destroy it (in which case
880			 * we don't want to destroy it since we haven't
881			 * checked for permission).
882			 */
883			fnvlist_remove_nvpair(snaps, pair);
884			error = 0;
885		}
886		if (error != 0)
887			break;
888	}
889
890	return (error);
891}
892
893int
894zfs_secpolicy_rename_perms(const char *from, const char *to, cred_t *cr)
895{
896	char	parentname[ZFS_MAX_DATASET_NAME_LEN];
897	int	error;
898
899	if ((error = zfs_secpolicy_write_perms(from,
900	    ZFS_DELEG_PERM_RENAME, cr)) != 0)
901		return (error);
902
903	if ((error = zfs_secpolicy_write_perms(from,
904	    ZFS_DELEG_PERM_MOUNT, cr)) != 0)
905		return (error);
906
907	if ((error = zfs_get_parent(to, parentname,
908	    sizeof (parentname))) != 0)
909		return (error);
910
911	if ((error = zfs_secpolicy_write_perms(parentname,
912	    ZFS_DELEG_PERM_CREATE, cr)) != 0)
913		return (error);
914
915	if ((error = zfs_secpolicy_write_perms(parentname,
916	    ZFS_DELEG_PERM_MOUNT, cr)) != 0)
917		return (error);
918
919	return (error);
920}
921
922/* ARGSUSED */
923static int
924zfs_secpolicy_rename(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
925{
926	char *at = NULL;
927	char *pound;
928	int error;
929
930	if ((pound = strchr(zc->zc_name, '#')) != NULL) {
931		*pound = '\0';
932		error = zfs_secpolicy_write_perms(zc->zc_name,
933		    ZFS_DELEG_PERM_RENAME, cr);
934		if (error == 0) {
935			error = zfs_secpolicy_write_perms(zc->zc_name,
936			    ZFS_DELEG_PERM_BOOKMARK, cr);
937		}
938		*pound = '#';
939		return (error);
940	}
941
942	if ((zc->zc_cookie & 1) != 0) {
943		/*
944		 * This is recursive rename, so the starting snapshot might
945		 * not exist. Check file system or volume permission instead.
946		 */
947		at = strchr(zc->zc_name, '@');
948		if (at == NULL)
949			return (EINVAL);
950		*at = '\0';
951	}
952
953	error = zfs_secpolicy_rename_perms(zc->zc_name, zc->zc_value, cr);
954
955	if (at != NULL)
956		*at = '@';
957
958	return (error);
959}
960
961/* ARGSUSED */
962static int
963zfs_secpolicy_promote(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
964{
965	dsl_pool_t *dp;
966	dsl_dataset_t *clone;
967	int error;
968
969	error = zfs_secpolicy_write_perms(zc->zc_name,
970	    ZFS_DELEG_PERM_PROMOTE, cr);
971	if (error != 0)
972		return (error);
973
974	error = dsl_pool_hold(zc->zc_name, FTAG, &dp);
975	if (error != 0)
976		return (error);
977
978	error = dsl_dataset_hold(dp, zc->zc_name, FTAG, &clone);
979
980	if (error == 0) {
981		char parentname[ZFS_MAX_DATASET_NAME_LEN];
982		dsl_dataset_t *origin = NULL;
983		dsl_dir_t *dd;
984		dd = clone->ds_dir;
985
986		error = dsl_dataset_hold_obj(dd->dd_pool,
987		    dsl_dir_phys(dd)->dd_origin_obj, FTAG, &origin);
988		if (error != 0) {
989			dsl_dataset_rele(clone, FTAG);
990			dsl_pool_rele(dp, FTAG);
991			return (error);
992		}
993
994		error = zfs_secpolicy_write_perms_ds(zc->zc_name, clone,
995		    ZFS_DELEG_PERM_MOUNT, cr);
996
997		dsl_dataset_name(origin, parentname);
998		if (error == 0) {
999			error = zfs_secpolicy_write_perms_ds(parentname, origin,
1000			    ZFS_DELEG_PERM_PROMOTE, cr);
1001		}
1002		dsl_dataset_rele(clone, FTAG);
1003		dsl_dataset_rele(origin, FTAG);
1004	}
1005	dsl_pool_rele(dp, FTAG);
1006	return (error);
1007}
1008
1009/* ARGSUSED */
1010static int
1011zfs_secpolicy_recv(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1012{
1013	int error;
1014
1015	if ((error = zfs_secpolicy_write_perms(zc->zc_name,
1016	    ZFS_DELEG_PERM_RECEIVE, cr)) != 0)
1017		return (error);
1018
1019	if ((error = zfs_secpolicy_write_perms(zc->zc_name,
1020	    ZFS_DELEG_PERM_MOUNT, cr)) != 0)
1021		return (error);
1022
1023	return (zfs_secpolicy_write_perms(zc->zc_name,
1024	    ZFS_DELEG_PERM_CREATE, cr));
1025}
1026
1027int
1028zfs_secpolicy_snapshot_perms(const char *name, cred_t *cr)
1029{
1030	return (zfs_secpolicy_write_perms(name,
1031	    ZFS_DELEG_PERM_SNAPSHOT, cr));
1032}
1033
1034/*
1035 * Check for permission to create each snapshot in the nvlist.
1036 */
1037/* ARGSUSED */
1038static int
1039zfs_secpolicy_snapshot(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1040{
1041	nvlist_t *snaps;
1042	int error;
1043	nvpair_t *pair;
1044
1045	if (nvlist_lookup_nvlist(innvl, "snaps", &snaps) != 0)
1046		return (SET_ERROR(EINVAL));
1047	for (pair = nvlist_next_nvpair(snaps, NULL); pair != NULL;
1048	    pair = nvlist_next_nvpair(snaps, pair)) {
1049		char *name = nvpair_name(pair);
1050		char *atp = strchr(name, '@');
1051
1052		if (atp == NULL) {
1053			error = SET_ERROR(EINVAL);
1054			break;
1055		}
1056		*atp = '\0';
1057		error = zfs_secpolicy_snapshot_perms(name, cr);
1058		*atp = '@';
1059		if (error != 0)
1060			break;
1061	}
1062	return (error);
1063}
1064
1065/*
1066 * Check for permission to create each snapshot in the nvlist.
1067 */
1068/* ARGSUSED */
1069static int
1070zfs_secpolicy_bookmark(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1071{
1072	int error = 0;
1073
1074	for (nvpair_t *pair = nvlist_next_nvpair(innvl, NULL);
1075	    pair != NULL; pair = nvlist_next_nvpair(innvl, pair)) {
1076		char *name = nvpair_name(pair);
1077		char *hashp = strchr(name, '#');
1078
1079		if (hashp == NULL) {
1080			error = SET_ERROR(EINVAL);
1081			break;
1082		}
1083		*hashp = '\0';
1084		error = zfs_secpolicy_write_perms(name,
1085		    ZFS_DELEG_PERM_BOOKMARK, cr);
1086		*hashp = '#';
1087		if (error != 0)
1088			break;
1089	}
1090	return (error);
1091}
1092
1093/* ARGSUSED */
1094static int
1095zfs_secpolicy_remap(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1096{
1097	return (zfs_secpolicy_write_perms(zc->zc_name,
1098	    ZFS_DELEG_PERM_REMAP, cr));
1099}
1100
1101/* ARGSUSED */
1102static int
1103zfs_secpolicy_destroy_bookmarks(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1104{
1105	nvpair_t *pair, *nextpair;
1106	int error = 0;
1107
1108	for (pair = nvlist_next_nvpair(innvl, NULL); pair != NULL;
1109	    pair = nextpair) {
1110		char *name = nvpair_name(pair);
1111		char *hashp = strchr(name, '#');
1112		nextpair = nvlist_next_nvpair(innvl, pair);
1113
1114		if (hashp == NULL) {
1115			error = SET_ERROR(EINVAL);
1116			break;
1117		}
1118
1119		*hashp = '\0';
1120		error = zfs_secpolicy_write_perms(name,
1121		    ZFS_DELEG_PERM_DESTROY, cr);
1122		*hashp = '#';
1123		if (error == ENOENT) {
1124			/*
1125			 * Ignore any filesystems that don't exist (we consider
1126			 * their bookmarks "already destroyed").  Remove
1127			 * the name from the nvl here in case the filesystem
1128			 * is created between now and when we try to destroy
1129			 * the bookmark (in which case we don't want to
1130			 * destroy it since we haven't checked for permission).
1131			 */
1132			fnvlist_remove_nvpair(innvl, pair);
1133			error = 0;
1134		}
1135		if (error != 0)
1136			break;
1137	}
1138
1139	return (error);
1140}
1141
1142/* ARGSUSED */
1143static int
1144zfs_secpolicy_log_history(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1145{
1146	/*
1147	 * Even root must have a proper TSD so that we know what pool
1148	 * to log to.
1149	 */
1150	if (tsd_get(zfs_allow_log_key) == NULL)
1151		return (SET_ERROR(EPERM));
1152	return (0);
1153}
1154
1155static int
1156zfs_secpolicy_create_clone(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1157{
1158	char	parentname[ZFS_MAX_DATASET_NAME_LEN];
1159	int	error;
1160	char	*origin;
1161
1162	if ((error = zfs_get_parent(zc->zc_name, parentname,
1163	    sizeof (parentname))) != 0)
1164		return (error);
1165
1166	if (nvlist_lookup_string(innvl, "origin", &origin) == 0 &&
1167	    (error = zfs_secpolicy_write_perms(origin,
1168	    ZFS_DELEG_PERM_CLONE, cr)) != 0)
1169		return (error);
1170
1171	if ((error = zfs_secpolicy_write_perms(parentname,
1172	    ZFS_DELEG_PERM_CREATE, cr)) != 0)
1173		return (error);
1174
1175	return (zfs_secpolicy_write_perms(parentname,
1176	    ZFS_DELEG_PERM_MOUNT, cr));
1177}
1178
1179/*
1180 * Policy for pool operations - create/destroy pools, add vdevs, etc.  Requires
1181 * SYS_CONFIG privilege, which is not available in a local zone.
1182 */
1183/* ARGSUSED */
1184static int
1185zfs_secpolicy_config(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1186{
1187	if (secpolicy_sys_config(cr, B_FALSE) != 0)
1188		return (SET_ERROR(EPERM));
1189
1190	return (0);
1191}
1192
1193/*
1194 * Policy for object to name lookups.
1195 */
1196/* ARGSUSED */
1197static int
1198zfs_secpolicy_diff(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1199{
1200	int error;
1201
1202	if ((error = secpolicy_sys_config(cr, B_FALSE)) == 0)
1203		return (0);
1204
1205	error = zfs_secpolicy_write_perms(zc->zc_name, ZFS_DELEG_PERM_DIFF, cr);
1206	return (error);
1207}
1208
1209/*
1210 * Policy for fault injection.  Requires all privileges.
1211 */
1212/* ARGSUSED */
1213static int
1214zfs_secpolicy_inject(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1215{
1216	return (secpolicy_zinject(cr));
1217}
1218
1219/* ARGSUSED */
1220static int
1221zfs_secpolicy_inherit_prop(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1222{
1223	zfs_prop_t prop = zfs_name_to_prop(zc->zc_value);
1224
1225	if (prop == ZPROP_INVAL) {
1226		if (!zfs_prop_user(zc->zc_value))
1227			return (SET_ERROR(EINVAL));
1228		return (zfs_secpolicy_write_perms(zc->zc_name,
1229		    ZFS_DELEG_PERM_USERPROP, cr));
1230	} else {
1231		return (zfs_secpolicy_setprop(zc->zc_name, prop,
1232		    NULL, cr));
1233	}
1234}
1235
1236static int
1237zfs_secpolicy_userspace_one(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1238{
1239	int err = zfs_secpolicy_read(zc, innvl, cr);
1240	if (err)
1241		return (err);
1242
1243	if (zc->zc_objset_type >= ZFS_NUM_USERQUOTA_PROPS)
1244		return (SET_ERROR(EINVAL));
1245
1246	if (zc->zc_value[0] == 0) {
1247		/*
1248		 * They are asking about a posix uid/gid.  If it's
1249		 * themself, allow it.
1250		 */
1251		if (zc->zc_objset_type == ZFS_PROP_USERUSED ||
1252		    zc->zc_objset_type == ZFS_PROP_USERQUOTA) {
1253			if (zc->zc_guid == crgetuid(cr))
1254				return (0);
1255		} else {
1256			if (groupmember(zc->zc_guid, cr))
1257				return (0);
1258		}
1259	}
1260
1261	return (zfs_secpolicy_write_perms(zc->zc_name,
1262	    userquota_perms[zc->zc_objset_type], cr));
1263}
1264
1265static int
1266zfs_secpolicy_userspace_many(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1267{
1268	int err = zfs_secpolicy_read(zc, innvl, cr);
1269	if (err)
1270		return (err);
1271
1272	if (zc->zc_objset_type >= ZFS_NUM_USERQUOTA_PROPS)
1273		return (SET_ERROR(EINVAL));
1274
1275	return (zfs_secpolicy_write_perms(zc->zc_name,
1276	    userquota_perms[zc->zc_objset_type], cr));
1277}
1278
1279/* ARGSUSED */
1280static int
1281zfs_secpolicy_userspace_upgrade(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1282{
1283	return (zfs_secpolicy_setprop(zc->zc_name, ZFS_PROP_VERSION,
1284	    NULL, cr));
1285}
1286
1287/* ARGSUSED */
1288static int
1289zfs_secpolicy_hold(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1290{
1291	nvpair_t *pair;
1292	nvlist_t *holds;
1293	int error;
1294
1295	error = nvlist_lookup_nvlist(innvl, "holds", &holds);
1296	if (error != 0)
1297		return (SET_ERROR(EINVAL));
1298
1299	for (pair = nvlist_next_nvpair(holds, NULL); pair != NULL;
1300	    pair = nvlist_next_nvpair(holds, pair)) {
1301		char fsname[ZFS_MAX_DATASET_NAME_LEN];
1302		error = dmu_fsname(nvpair_name(pair), fsname);
1303		if (error != 0)
1304			return (error);
1305		error = zfs_secpolicy_write_perms(fsname,
1306		    ZFS_DELEG_PERM_HOLD, cr);
1307		if (error != 0)
1308			return (error);
1309	}
1310	return (0);
1311}
1312
1313/* ARGSUSED */
1314static int
1315zfs_secpolicy_release(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1316{
1317	nvpair_t *pair;
1318	int error;
1319
1320	for (pair = nvlist_next_nvpair(innvl, NULL); pair != NULL;
1321	    pair = nvlist_next_nvpair(innvl, pair)) {
1322		char fsname[ZFS_MAX_DATASET_NAME_LEN];
1323		error = dmu_fsname(nvpair_name(pair), fsname);
1324		if (error != 0)
1325			return (error);
1326		error = zfs_secpolicy_write_perms(fsname,
1327		    ZFS_DELEG_PERM_RELEASE, cr);
1328		if (error != 0)
1329			return (error);
1330	}
1331	return (0);
1332}
1333
1334/*
1335 * Policy for allowing temporary snapshots to be taken or released
1336 */
1337static int
1338zfs_secpolicy_tmp_snapshot(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1339{
1340	/*
1341	 * A temporary snapshot is the same as a snapshot,
1342	 * hold, destroy and release all rolled into one.
1343	 * Delegated diff alone is sufficient that we allow this.
1344	 */
1345	int error;
1346
1347	if ((error = zfs_secpolicy_write_perms(zc->zc_name,
1348	    ZFS_DELEG_PERM_DIFF, cr)) == 0)
1349		return (0);
1350
1351	error = zfs_secpolicy_snapshot_perms(zc->zc_name, cr);
1352	if (error == 0)
1353		error = zfs_secpolicy_hold(zc, innvl, cr);
1354	if (error == 0)
1355		error = zfs_secpolicy_release(zc, innvl, cr);
1356	if (error == 0)
1357		error = zfs_secpolicy_destroy(zc, innvl, cr);
1358	return (error);
1359}
1360
1361/*
1362 * Returns the nvlist as specified by the user in the zfs_cmd_t.
1363 */
1364static int
1365get_nvlist(uint64_t nvl, uint64_t size, int iflag, nvlist_t **nvp)
1366{
1367	char *packed;
1368	int error;
1369	nvlist_t *list = NULL;
1370
1371	/*
1372	 * Read in and unpack the user-supplied nvlist.
1373	 */
1374	if (size == 0)
1375		return (SET_ERROR(EINVAL));
1376
1377	packed = kmem_alloc(size, KM_SLEEP);
1378
1379	if ((error = ddi_copyin((void *)(uintptr_t)nvl, packed, size,
1380	    iflag)) != 0) {
1381		kmem_free(packed, size);
1382		return (SET_ERROR(EFAULT));
1383	}
1384
1385	if ((error = nvlist_unpack(packed, size, &list, 0)) != 0) {
1386		kmem_free(packed, size);
1387		return (error);
1388	}
1389
1390	kmem_free(packed, size);
1391
1392	*nvp = list;
1393	return (0);
1394}
1395
1396/*
1397 * Reduce the size of this nvlist until it can be serialized in 'max' bytes.
1398 * Entries will be removed from the end of the nvlist, and one int32 entry
1399 * named "N_MORE_ERRORS" will be added indicating how many entries were
1400 * removed.
1401 */
1402static int
1403nvlist_smush(nvlist_t *errors, size_t max)
1404{
1405	size_t size;
1406
1407	size = fnvlist_size(errors);
1408
1409	if (size > max) {
1410		nvpair_t *more_errors;
1411		int n = 0;
1412
1413		if (max < 1024)
1414			return (SET_ERROR(ENOMEM));
1415
1416		fnvlist_add_int32(errors, ZPROP_N_MORE_ERRORS, 0);
1417		more_errors = nvlist_prev_nvpair(errors, NULL);
1418
1419		do {
1420			nvpair_t *pair = nvlist_prev_nvpair(errors,
1421			    more_errors);
1422			fnvlist_remove_nvpair(errors, pair);
1423			n++;
1424			size = fnvlist_size(errors);
1425		} while (size > max);
1426
1427		fnvlist_remove_nvpair(errors, more_errors);
1428		fnvlist_add_int32(errors, ZPROP_N_MORE_ERRORS, n);
1429		ASSERT3U(fnvlist_size(errors), <=, max);
1430	}
1431
1432	return (0);
1433}
1434
1435static int
1436put_nvlist(zfs_cmd_t *zc, nvlist_t *nvl)
1437{
1438	char *packed = NULL;
1439	int error = 0;
1440	size_t size;
1441
1442	size = fnvlist_size(nvl);
1443
1444	if (size > zc->zc_nvlist_dst_size) {
1445		/*
1446		 * Solaris returns ENOMEM here, because even if an error is
1447		 * returned from an ioctl(2), new zc_nvlist_dst_size will be
1448		 * passed to the userland. This is not the case for FreeBSD.
1449		 * We need to return 0, so the kernel will copy the
1450		 * zc_nvlist_dst_size back and the userland can discover that a
1451		 * bigger buffer is needed.
1452		 */
1453		error = 0;
1454	} else {
1455		packed = fnvlist_pack(nvl, &size);
1456		if (ddi_copyout(packed, (void *)(uintptr_t)zc->zc_nvlist_dst,
1457		    size, zc->zc_iflags) != 0)
1458			error = SET_ERROR(EFAULT);
1459		fnvlist_pack_free(packed, size);
1460	}
1461
1462	zc->zc_nvlist_dst_size = size;
1463	zc->zc_nvlist_dst_filled = B_TRUE;
1464	return (error);
1465}
1466
1467int
1468getzfsvfs_impl(objset_t *os, vfs_t **vfsp)
1469{
1470	zfsvfs_t *zfvp;
1471	int error = 0;
1472
1473	if (dmu_objset_type(os) != DMU_OST_ZFS) {
1474		return (SET_ERROR(EINVAL));
1475	}
1476
1477	mutex_enter(&os->os_user_ptr_lock);
1478	zfvp = dmu_objset_get_user(os);
1479	if (zfvp) {
1480		*vfsp = zfvp->z_vfs;
1481		vfs_ref(zfvp->z_vfs);
1482	} else {
1483		error = SET_ERROR(ESRCH);
1484	}
1485	mutex_exit(&os->os_user_ptr_lock);
1486	return (error);
1487}
1488
1489int
1490getzfsvfs(const char *dsname, zfsvfs_t **zfvp)
1491{
1492	objset_t *os;
1493	vfs_t *vfsp;
1494	int error;
1495
1496	error = dmu_objset_hold(dsname, FTAG, &os);
1497	if (error != 0)
1498		return (error);
1499	error = getzfsvfs_impl(os, &vfsp);
1500	dmu_objset_rele(os, FTAG);
1501	if (error != 0)
1502		return (error);
1503
1504	error = vfs_busy(vfsp, 0);
1505	vfs_rel(vfsp);
1506	if (error != 0) {
1507		*zfvp = NULL;
1508		error = SET_ERROR(ESRCH);
1509	} else {
1510		*zfvp = vfsp->vfs_data;
1511	}
1512	return (error);
1513}
1514
1515/*
1516 * Find a zfsvfs_t for a mounted filesystem, or create our own, in which
1517 * case its z_vfs will be NULL, and it will be opened as the owner.
1518 * If 'writer' is set, the z_teardown_lock will be held for RW_WRITER,
1519 * which prevents all vnode ops from running.
1520 */
1521static int
1522zfsvfs_hold(const char *name, void *tag, zfsvfs_t **zfvp, boolean_t writer)
1523{
1524	int error = 0;
1525
1526	if (getzfsvfs(name, zfvp) != 0)
1527		error = zfsvfs_create(name, zfvp);
1528	if (error == 0) {
1529		rrm_enter(&(*zfvp)->z_teardown_lock, (writer) ? RW_WRITER :
1530		    RW_READER, tag);
1531#ifdef illumos
1532		if ((*zfvp)->z_unmounted) {
1533			/*
1534			 * XXX we could probably try again, since the unmounting
1535			 * thread should be just about to disassociate the
1536			 * objset from the zfsvfs.
1537			 */
1538			rrm_exit(&(*zfvp)->z_teardown_lock, tag);
1539			return (SET_ERROR(EBUSY));
1540		}
1541#else
1542		/*
1543		 * vfs_busy() ensures that the filesystem is not and
1544		 * can not be unmounted.
1545		 */
1546		ASSERT(!(*zfvp)->z_unmounted);
1547#endif
1548	}
1549	return (error);
1550}
1551
1552static void
1553zfsvfs_rele(zfsvfs_t *zfsvfs, void *tag)
1554{
1555	rrm_exit(&zfsvfs->z_teardown_lock, tag);
1556
1557	if (zfsvfs->z_vfs) {
1558#ifdef illumos
1559		VFS_RELE(zfsvfs->z_vfs);
1560#else
1561		vfs_unbusy(zfsvfs->z_vfs);
1562#endif
1563	} else {
1564		dmu_objset_disown(zfsvfs->z_os, zfsvfs);
1565		zfsvfs_free(zfsvfs);
1566	}
1567}
1568
1569static int
1570zfs_ioc_pool_create(zfs_cmd_t *zc)
1571{
1572	int error;
1573	nvlist_t *config, *props = NULL;
1574	nvlist_t *rootprops = NULL;
1575	nvlist_t *zplprops = NULL;
1576	char *spa_name = zc->zc_name;
1577
1578	if (error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
1579	    zc->zc_iflags, &config))
1580		return (error);
1581
1582	if (zc->zc_nvlist_src_size != 0 && (error =
1583	    get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
1584	    zc->zc_iflags, &props))) {
1585		nvlist_free(config);
1586		return (error);
1587	}
1588
1589	if (props) {
1590		nvlist_t *nvl = NULL;
1591		uint64_t version = SPA_VERSION;
1592		char *tname;
1593
1594		(void) nvlist_lookup_uint64(props,
1595		    zpool_prop_to_name(ZPOOL_PROP_VERSION), &version);
1596		if (!SPA_VERSION_IS_SUPPORTED(version)) {
1597			error = SET_ERROR(EINVAL);
1598			goto pool_props_bad;
1599		}
1600		(void) nvlist_lookup_nvlist(props, ZPOOL_ROOTFS_PROPS, &nvl);
1601		if (nvl) {
1602			error = nvlist_dup(nvl, &rootprops, KM_SLEEP);
1603			if (error != 0) {
1604				nvlist_free(config);
1605				nvlist_free(props);
1606				return (error);
1607			}
1608			(void) nvlist_remove_all(props, ZPOOL_ROOTFS_PROPS);
1609		}
1610		VERIFY(nvlist_alloc(&zplprops, NV_UNIQUE_NAME, KM_SLEEP) == 0);
1611		error = zfs_fill_zplprops_root(version, rootprops,
1612		    zplprops, NULL);
1613		if (error != 0)
1614			goto pool_props_bad;
1615
1616		if (nvlist_lookup_string(props,
1617		    zpool_prop_to_name(ZPOOL_PROP_TNAME), &tname) == 0)
1618			spa_name = tname;
1619	}
1620
1621	error = spa_create(zc->zc_name, config, props, zplprops);
1622
1623	/*
1624	 * Set the remaining root properties
1625	 */
1626	if (!error && (error = zfs_set_prop_nvlist(spa_name,
1627	    ZPROP_SRC_LOCAL, rootprops, NULL)) != 0)
1628		(void) spa_destroy(spa_name);
1629
1630pool_props_bad:
1631	nvlist_free(rootprops);
1632	nvlist_free(zplprops);
1633	nvlist_free(config);
1634	nvlist_free(props);
1635
1636	return (error);
1637}
1638
1639static int
1640zfs_ioc_pool_destroy(zfs_cmd_t *zc)
1641{
1642	int error;
1643	zfs_log_history(zc);
1644	error = spa_destroy(zc->zc_name);
1645	if (error == 0)
1646		zvol_remove_minors(zc->zc_name);
1647	return (error);
1648}
1649
1650static int
1651zfs_ioc_pool_import(zfs_cmd_t *zc)
1652{
1653	nvlist_t *config, *props = NULL;
1654	uint64_t guid;
1655	int error;
1656
1657	if ((error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
1658	    zc->zc_iflags, &config)) != 0)
1659		return (error);
1660
1661	if (zc->zc_nvlist_src_size != 0 && (error =
1662	    get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
1663	    zc->zc_iflags, &props))) {
1664		nvlist_free(config);
1665		return (error);
1666	}
1667
1668	if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID, &guid) != 0 ||
1669	    guid != zc->zc_guid)
1670		error = SET_ERROR(EINVAL);
1671	else
1672		error = spa_import(zc->zc_name, config, props, zc->zc_cookie);
1673
1674	if (zc->zc_nvlist_dst != 0) {
1675		int err;
1676
1677		if ((err = put_nvlist(zc, config)) != 0)
1678			error = err;
1679	}
1680
1681	nvlist_free(config);
1682
1683	nvlist_free(props);
1684
1685	return (error);
1686}
1687
1688static int
1689zfs_ioc_pool_export(zfs_cmd_t *zc)
1690{
1691	int error;
1692	boolean_t force = (boolean_t)zc->zc_cookie;
1693	boolean_t hardforce = (boolean_t)zc->zc_guid;
1694
1695	zfs_log_history(zc);
1696	error = spa_export(zc->zc_name, NULL, force, hardforce);
1697	if (error == 0)
1698		zvol_remove_minors(zc->zc_name);
1699	return (error);
1700}
1701
1702static int
1703zfs_ioc_pool_configs(zfs_cmd_t *zc)
1704{
1705	nvlist_t *configs;
1706	int error;
1707
1708	if ((configs = spa_all_configs(&zc->zc_cookie)) == NULL)
1709		return (SET_ERROR(EEXIST));
1710
1711	error = put_nvlist(zc, configs);
1712
1713	nvlist_free(configs);
1714
1715	return (error);
1716}
1717
1718/*
1719 * inputs:
1720 * zc_name		name of the pool
1721 *
1722 * outputs:
1723 * zc_cookie		real errno
1724 * zc_nvlist_dst	config nvlist
1725 * zc_nvlist_dst_size	size of config nvlist
1726 */
1727static int
1728zfs_ioc_pool_stats(zfs_cmd_t *zc)
1729{
1730	nvlist_t *config;
1731	int error;
1732	int ret = 0;
1733
1734	error = spa_get_stats(zc->zc_name, &config, zc->zc_value,
1735	    sizeof (zc->zc_value));
1736
1737	if (config != NULL) {
1738		ret = put_nvlist(zc, config);
1739		nvlist_free(config);
1740
1741		/*
1742		 * The config may be present even if 'error' is non-zero.
1743		 * In this case we return success, and preserve the real errno
1744		 * in 'zc_cookie'.
1745		 */
1746		zc->zc_cookie = error;
1747	} else {
1748		ret = error;
1749	}
1750
1751	return (ret);
1752}
1753
1754/*
1755 * Try to import the given pool, returning pool stats as appropriate so that
1756 * user land knows which devices are available and overall pool health.
1757 */
1758static int
1759zfs_ioc_pool_tryimport(zfs_cmd_t *zc)
1760{
1761	nvlist_t *tryconfig, *config;
1762	int error;
1763
1764	if ((error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
1765	    zc->zc_iflags, &tryconfig)) != 0)
1766		return (error);
1767
1768	config = spa_tryimport(tryconfig);
1769
1770	nvlist_free(tryconfig);
1771
1772	if (config == NULL)
1773		return (SET_ERROR(EINVAL));
1774
1775	error = put_nvlist(zc, config);
1776	nvlist_free(config);
1777
1778	return (error);
1779}
1780
1781/*
1782 * inputs:
1783 * zc_name              name of the pool
1784 * zc_cookie            scan func (pool_scan_func_t)
1785 * zc_flags             scrub pause/resume flag (pool_scrub_cmd_t)
1786 */
1787static int
1788zfs_ioc_pool_scan(zfs_cmd_t *zc)
1789{
1790	spa_t *spa;
1791	int error;
1792
1793	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
1794		return (error);
1795
1796	if (zc->zc_flags >= POOL_SCRUB_FLAGS_END)
1797		return (SET_ERROR(EINVAL));
1798
1799	if (zc->zc_flags == POOL_SCRUB_PAUSE)
1800		error = spa_scrub_pause_resume(spa, POOL_SCRUB_PAUSE);
1801	else if (zc->zc_cookie == POOL_SCAN_NONE)
1802		error = spa_scan_stop(spa);
1803	else
1804		error = spa_scan(spa, zc->zc_cookie);
1805
1806	spa_close(spa, FTAG);
1807
1808	return (error);
1809}
1810
1811static int
1812zfs_ioc_pool_freeze(zfs_cmd_t *zc)
1813{
1814	spa_t *spa;
1815	int error;
1816
1817	error = spa_open(zc->zc_name, &spa, FTAG);
1818	if (error == 0) {
1819		spa_freeze(spa);
1820		spa_close(spa, FTAG);
1821	}
1822	return (error);
1823}
1824
1825static int
1826zfs_ioc_pool_upgrade(zfs_cmd_t *zc)
1827{
1828	spa_t *spa;
1829	int error;
1830
1831	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
1832		return (error);
1833
1834	if (zc->zc_cookie < spa_version(spa) ||
1835	    !SPA_VERSION_IS_SUPPORTED(zc->zc_cookie)) {
1836		spa_close(spa, FTAG);
1837		return (SET_ERROR(EINVAL));
1838	}
1839
1840	spa_upgrade(spa, zc->zc_cookie);
1841	spa_close(spa, FTAG);
1842
1843	return (error);
1844}
1845
1846static int
1847zfs_ioc_pool_get_history(zfs_cmd_t *zc)
1848{
1849	spa_t *spa;
1850	char *hist_buf;
1851	uint64_t size;
1852	int error;
1853
1854	if ((size = zc->zc_history_len) == 0)
1855		return (SET_ERROR(EINVAL));
1856
1857	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
1858		return (error);
1859
1860	if (spa_version(spa) < SPA_VERSION_ZPOOL_HISTORY) {
1861		spa_close(spa, FTAG);
1862		return (SET_ERROR(ENOTSUP));
1863	}
1864
1865	hist_buf = kmem_alloc(size, KM_SLEEP);
1866	if ((error = spa_history_get(spa, &zc->zc_history_offset,
1867	    &zc->zc_history_len, hist_buf)) == 0) {
1868		error = ddi_copyout(hist_buf,
1869		    (void *)(uintptr_t)zc->zc_history,
1870		    zc->zc_history_len, zc->zc_iflags);
1871	}
1872
1873	spa_close(spa, FTAG);
1874	kmem_free(hist_buf, size);
1875	return (error);
1876}
1877
1878static int
1879zfs_ioc_pool_reguid(zfs_cmd_t *zc)
1880{
1881	spa_t *spa;
1882	int error;
1883
1884	error = spa_open(zc->zc_name, &spa, FTAG);
1885	if (error == 0) {
1886		error = spa_change_guid(spa);
1887		spa_close(spa, FTAG);
1888	}
1889	return (error);
1890}
1891
1892static int
1893zfs_ioc_dsobj_to_dsname(zfs_cmd_t *zc)
1894{
1895	return (dsl_dsobj_to_dsname(zc->zc_name, zc->zc_obj, zc->zc_value));
1896}
1897
1898/*
1899 * inputs:
1900 * zc_name		name of filesystem
1901 * zc_obj		object to find
1902 *
1903 * outputs:
1904 * zc_value		name of object
1905 */
1906static int
1907zfs_ioc_obj_to_path(zfs_cmd_t *zc)
1908{
1909	objset_t *os;
1910	int error;
1911
1912	/* XXX reading from objset not owned */
1913	if ((error = dmu_objset_hold(zc->zc_name, FTAG, &os)) != 0)
1914		return (error);
1915	if (dmu_objset_type(os) != DMU_OST_ZFS) {
1916		dmu_objset_rele(os, FTAG);
1917		return (SET_ERROR(EINVAL));
1918	}
1919	error = zfs_obj_to_path(os, zc->zc_obj, zc->zc_value,
1920	    sizeof (zc->zc_value));
1921	dmu_objset_rele(os, FTAG);
1922
1923	return (error);
1924}
1925
1926/*
1927 * inputs:
1928 * zc_name		name of filesystem
1929 * zc_obj		object to find
1930 *
1931 * outputs:
1932 * zc_stat		stats on object
1933 * zc_value		path to object
1934 */
1935static int
1936zfs_ioc_obj_to_stats(zfs_cmd_t *zc)
1937{
1938	objset_t *os;
1939	int error;
1940
1941	/* XXX reading from objset not owned */
1942	if ((error = dmu_objset_hold(zc->zc_name, FTAG, &os)) != 0)
1943		return (error);
1944	if (dmu_objset_type(os) != DMU_OST_ZFS) {
1945		dmu_objset_rele(os, FTAG);
1946		return (SET_ERROR(EINVAL));
1947	}
1948	error = zfs_obj_to_stats(os, zc->zc_obj, &zc->zc_stat, zc->zc_value,
1949	    sizeof (zc->zc_value));
1950	dmu_objset_rele(os, FTAG);
1951
1952	return (error);
1953}
1954
1955static int
1956zfs_ioc_vdev_add(zfs_cmd_t *zc)
1957{
1958	spa_t *spa;
1959	int error;
1960	nvlist_t *config, **l2cache, **spares;
1961	uint_t nl2cache = 0, nspares = 0;
1962
1963	error = spa_open(zc->zc_name, &spa, FTAG);
1964	if (error != 0)
1965		return (error);
1966
1967	error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
1968	    zc->zc_iflags, &config);
1969	(void) nvlist_lookup_nvlist_array(config, ZPOOL_CONFIG_L2CACHE,
1970	    &l2cache, &nl2cache);
1971
1972	(void) nvlist_lookup_nvlist_array(config, ZPOOL_CONFIG_SPARES,
1973	    &spares, &nspares);
1974
1975#ifdef illumos
1976	/*
1977	 * A root pool with concatenated devices is not supported.
1978	 * Thus, can not add a device to a root pool.
1979	 *
1980	 * Intent log device can not be added to a rootpool because
1981	 * during mountroot, zil is replayed, a seperated log device
1982	 * can not be accessed during the mountroot time.
1983	 *
1984	 * l2cache and spare devices are ok to be added to a rootpool.
1985	 */
1986	if (spa_bootfs(spa) != 0 && nl2cache == 0 && nspares == 0) {
1987		nvlist_free(config);
1988		spa_close(spa, FTAG);
1989		return (SET_ERROR(EDOM));
1990	}
1991#endif /* illumos */
1992
1993	if (error == 0) {
1994		error = spa_vdev_add(spa, config);
1995		nvlist_free(config);
1996	}
1997	spa_close(spa, FTAG);
1998	return (error);
1999}
2000
2001/*
2002 * inputs:
2003 * zc_name		name of the pool
2004 * zc_guid		guid of vdev to remove
2005 * zc_cookie		cancel removal
2006 */
2007static int
2008zfs_ioc_vdev_remove(zfs_cmd_t *zc)
2009{
2010	spa_t *spa;
2011	int error;
2012
2013	error = spa_open(zc->zc_name, &spa, FTAG);
2014	if (error != 0)
2015		return (error);
2016	if (zc->zc_cookie != 0) {
2017		error = spa_vdev_remove_cancel(spa);
2018	} else {
2019		error = spa_vdev_remove(spa, zc->zc_guid, B_FALSE);
2020	}
2021	spa_close(spa, FTAG);
2022	return (error);
2023}
2024
2025static int
2026zfs_ioc_vdev_set_state(zfs_cmd_t *zc)
2027{
2028	spa_t *spa;
2029	int error;
2030	vdev_state_t newstate = VDEV_STATE_UNKNOWN;
2031
2032	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
2033		return (error);
2034	switch (zc->zc_cookie) {
2035	case VDEV_STATE_ONLINE:
2036		error = vdev_online(spa, zc->zc_guid, zc->zc_obj, &newstate);
2037		break;
2038
2039	case VDEV_STATE_OFFLINE:
2040		error = vdev_offline(spa, zc->zc_guid, zc->zc_obj);
2041		break;
2042
2043	case VDEV_STATE_FAULTED:
2044		if (zc->zc_obj != VDEV_AUX_ERR_EXCEEDED &&
2045		    zc->zc_obj != VDEV_AUX_EXTERNAL)
2046			zc->zc_obj = VDEV_AUX_ERR_EXCEEDED;
2047
2048		error = vdev_fault(spa, zc->zc_guid, zc->zc_obj);
2049		break;
2050
2051	case VDEV_STATE_DEGRADED:
2052		if (zc->zc_obj != VDEV_AUX_ERR_EXCEEDED &&
2053		    zc->zc_obj != VDEV_AUX_EXTERNAL)
2054			zc->zc_obj = VDEV_AUX_ERR_EXCEEDED;
2055
2056		error = vdev_degrade(spa, zc->zc_guid, zc->zc_obj);
2057		break;
2058
2059	default:
2060		error = SET_ERROR(EINVAL);
2061	}
2062	zc->zc_cookie = newstate;
2063	spa_close(spa, FTAG);
2064	return (error);
2065}
2066
2067static int
2068zfs_ioc_vdev_attach(zfs_cmd_t *zc)
2069{
2070	spa_t *spa;
2071	int replacing = zc->zc_cookie;
2072	nvlist_t *config;
2073	int error;
2074
2075	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
2076		return (error);
2077
2078	if ((error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
2079	    zc->zc_iflags, &config)) == 0) {
2080		error = spa_vdev_attach(spa, zc->zc_guid, config, replacing);
2081		nvlist_free(config);
2082	}
2083
2084	spa_close(spa, FTAG);
2085	return (error);
2086}
2087
2088static int
2089zfs_ioc_vdev_detach(zfs_cmd_t *zc)
2090{
2091	spa_t *spa;
2092	int error;
2093
2094	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
2095		return (error);
2096
2097	error = spa_vdev_detach(spa, zc->zc_guid, 0, B_FALSE);
2098
2099	spa_close(spa, FTAG);
2100	return (error);
2101}
2102
2103static int
2104zfs_ioc_vdev_split(zfs_cmd_t *zc)
2105{
2106	spa_t *spa;
2107	nvlist_t *config, *props = NULL;
2108	int error;
2109	boolean_t exp = !!(zc->zc_cookie & ZPOOL_EXPORT_AFTER_SPLIT);
2110
2111	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
2112		return (error);
2113
2114	if (error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
2115	    zc->zc_iflags, &config)) {
2116		spa_close(spa, FTAG);
2117		return (error);
2118	}
2119
2120	if (zc->zc_nvlist_src_size != 0 && (error =
2121	    get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
2122	    zc->zc_iflags, &props))) {
2123		spa_close(spa, FTAG);
2124		nvlist_free(config);
2125		return (error);
2126	}
2127
2128	error = spa_vdev_split_mirror(spa, zc->zc_string, config, props, exp);
2129
2130	spa_close(spa, FTAG);
2131
2132	nvlist_free(config);
2133	nvlist_free(props);
2134
2135	return (error);
2136}
2137
2138static int
2139zfs_ioc_vdev_setpath(zfs_cmd_t *zc)
2140{
2141	spa_t *spa;
2142	char *path = zc->zc_value;
2143	uint64_t guid = zc->zc_guid;
2144	int error;
2145
2146	error = spa_open(zc->zc_name, &spa, FTAG);
2147	if (error != 0)
2148		return (error);
2149
2150	error = spa_vdev_setpath(spa, guid, path);
2151	spa_close(spa, FTAG);
2152	return (error);
2153}
2154
2155static int
2156zfs_ioc_vdev_setfru(zfs_cmd_t *zc)
2157{
2158	spa_t *spa;
2159	char *fru = zc->zc_value;
2160	uint64_t guid = zc->zc_guid;
2161	int error;
2162
2163	error = spa_open(zc->zc_name, &spa, FTAG);
2164	if (error != 0)
2165		return (error);
2166
2167	error = spa_vdev_setfru(spa, guid, fru);
2168	spa_close(spa, FTAG);
2169	return (error);
2170}
2171
2172static int
2173zfs_ioc_objset_stats_impl(zfs_cmd_t *zc, objset_t *os)
2174{
2175	int error = 0;
2176	nvlist_t *nv;
2177
2178	dmu_objset_fast_stat(os, &zc->zc_objset_stats);
2179
2180	if (zc->zc_nvlist_dst != 0 &&
2181	    (error = dsl_prop_get_all(os, &nv)) == 0) {
2182		dmu_objset_stats(os, nv);
2183		/*
2184		 * NB: zvol_get_stats() will read the objset contents,
2185		 * which we aren't supposed to do with a
2186		 * DS_MODE_USER hold, because it could be
2187		 * inconsistent.  So this is a bit of a workaround...
2188		 * XXX reading with out owning
2189		 */
2190		if (!zc->zc_objset_stats.dds_inconsistent &&
2191		    dmu_objset_type(os) == DMU_OST_ZVOL) {
2192			error = zvol_get_stats(os, nv);
2193			if (error == EIO)
2194				return (error);
2195			VERIFY0(error);
2196		}
2197		error = put_nvlist(zc, nv);
2198		nvlist_free(nv);
2199	}
2200
2201	return (error);
2202}
2203
2204/*
2205 * inputs:
2206 * zc_name		name of filesystem
2207 * zc_nvlist_dst_size	size of buffer for property nvlist
2208 *
2209 * outputs:
2210 * zc_objset_stats	stats
2211 * zc_nvlist_dst	property nvlist
2212 * zc_nvlist_dst_size	size of property nvlist
2213 */
2214static int
2215zfs_ioc_objset_stats(zfs_cmd_t *zc)
2216{
2217	objset_t *os;
2218	int error;
2219
2220	error = dmu_objset_hold(zc->zc_name, FTAG, &os);
2221	if (error == 0) {
2222		error = zfs_ioc_objset_stats_impl(zc, os);
2223		dmu_objset_rele(os, FTAG);
2224	}
2225
2226	if (error == ENOMEM)
2227		error = 0;
2228	return (error);
2229}
2230
2231/*
2232 * inputs:
2233 * zc_name		name of filesystem
2234 * zc_nvlist_dst_size	size of buffer for property nvlist
2235 *
2236 * outputs:
2237 * zc_nvlist_dst	received property nvlist
2238 * zc_nvlist_dst_size	size of received property nvlist
2239 *
2240 * Gets received properties (distinct from local properties on or after
2241 * SPA_VERSION_RECVD_PROPS) for callers who want to differentiate received from
2242 * local property values.
2243 */
2244static int
2245zfs_ioc_objset_recvd_props(zfs_cmd_t *zc)
2246{
2247	int error = 0;
2248	nvlist_t *nv;
2249
2250	/*
2251	 * Without this check, we would return local property values if the
2252	 * caller has not already received properties on or after
2253	 * SPA_VERSION_RECVD_PROPS.
2254	 */
2255	if (!dsl_prop_get_hasrecvd(zc->zc_name))
2256		return (SET_ERROR(ENOTSUP));
2257
2258	if (zc->zc_nvlist_dst != 0 &&
2259	    (error = dsl_prop_get_received(zc->zc_name, &nv)) == 0) {
2260		error = put_nvlist(zc, nv);
2261		nvlist_free(nv);
2262	}
2263
2264	return (error);
2265}
2266
2267static int
2268nvl_add_zplprop(objset_t *os, nvlist_t *props, zfs_prop_t prop)
2269{
2270	uint64_t value;
2271	int error;
2272
2273	/*
2274	 * zfs_get_zplprop() will either find a value or give us
2275	 * the default value (if there is one).
2276	 */
2277	if ((error = zfs_get_zplprop(os, prop, &value)) != 0)
2278		return (error);
2279	VERIFY(nvlist_add_uint64(props, zfs_prop_to_name(prop), value) == 0);
2280	return (0);
2281}
2282
2283/*
2284 * inputs:
2285 * zc_name		name of filesystem
2286 * zc_nvlist_dst_size	size of buffer for zpl property nvlist
2287 *
2288 * outputs:
2289 * zc_nvlist_dst	zpl property nvlist
2290 * zc_nvlist_dst_size	size of zpl property nvlist
2291 */
2292static int
2293zfs_ioc_objset_zplprops(zfs_cmd_t *zc)
2294{
2295	objset_t *os;
2296	int err;
2297
2298	/* XXX reading without owning */
2299	if (err = dmu_objset_hold(zc->zc_name, FTAG, &os))
2300		return (err);
2301
2302	dmu_objset_fast_stat(os, &zc->zc_objset_stats);
2303
2304	/*
2305	 * NB: nvl_add_zplprop() will read the objset contents,
2306	 * which we aren't supposed to do with a DS_MODE_USER
2307	 * hold, because it could be inconsistent.
2308	 */
2309	if (zc->zc_nvlist_dst != 0 &&
2310	    !zc->zc_objset_stats.dds_inconsistent &&
2311	    dmu_objset_type(os) == DMU_OST_ZFS) {
2312		nvlist_t *nv;
2313
2314		VERIFY(nvlist_alloc(&nv, NV_UNIQUE_NAME, KM_SLEEP) == 0);
2315		if ((err = nvl_add_zplprop(os, nv, ZFS_PROP_VERSION)) == 0 &&
2316		    (err = nvl_add_zplprop(os, nv, ZFS_PROP_NORMALIZE)) == 0 &&
2317		    (err = nvl_add_zplprop(os, nv, ZFS_PROP_UTF8ONLY)) == 0 &&
2318		    (err = nvl_add_zplprop(os, nv, ZFS_PROP_CASE)) == 0)
2319			err = put_nvlist(zc, nv);
2320		nvlist_free(nv);
2321	} else {
2322		err = SET_ERROR(ENOENT);
2323	}
2324	dmu_objset_rele(os, FTAG);
2325	return (err);
2326}
2327
2328boolean_t
2329dataset_name_hidden(const char *name)
2330{
2331	/*
2332	 * Skip over datasets that are not visible in this zone,
2333	 * internal datasets (which have a $ in their name), and
2334	 * temporary datasets (which have a % in their name).
2335	 */
2336	if (strchr(name, '$') != NULL)
2337		return (B_TRUE);
2338	if (strchr(name, '%') != NULL)
2339		return (B_TRUE);
2340	if (!INGLOBALZONE(curthread) && !zone_dataset_visible(name, NULL))
2341		return (B_TRUE);
2342	return (B_FALSE);
2343}
2344
2345/*
2346 * inputs:
2347 * zc_name		name of filesystem
2348 * zc_cookie		zap cursor
2349 * zc_nvlist_dst_size	size of buffer for property nvlist
2350 *
2351 * outputs:
2352 * zc_name		name of next filesystem
2353 * zc_cookie		zap cursor
2354 * zc_objset_stats	stats
2355 * zc_nvlist_dst	property nvlist
2356 * zc_nvlist_dst_size	size of property nvlist
2357 */
2358static int
2359zfs_ioc_dataset_list_next(zfs_cmd_t *zc)
2360{
2361	objset_t *os;
2362	int error;
2363	char *p;
2364	size_t orig_len = strlen(zc->zc_name);
2365
2366top:
2367	if (error = dmu_objset_hold(zc->zc_name, FTAG, &os)) {
2368		if (error == ENOENT)
2369			error = SET_ERROR(ESRCH);
2370		return (error);
2371	}
2372
2373	p = strrchr(zc->zc_name, '/');
2374	if (p == NULL || p[1] != '\0')
2375		(void) strlcat(zc->zc_name, "/", sizeof (zc->zc_name));
2376	p = zc->zc_name + strlen(zc->zc_name);
2377
2378	do {
2379		error = dmu_dir_list_next(os,
2380		    sizeof (zc->zc_name) - (p - zc->zc_name), p,
2381		    NULL, &zc->zc_cookie);
2382		if (error == ENOENT)
2383			error = SET_ERROR(ESRCH);
2384	} while (error == 0 && dataset_name_hidden(zc->zc_name));
2385	dmu_objset_rele(os, FTAG);
2386
2387	/*
2388	 * If it's an internal dataset (ie. with a '$' in its name),
2389	 * don't try to get stats for it, otherwise we'll return ENOENT.
2390	 */
2391	if (error == 0 && strchr(zc->zc_name, '$') == NULL) {
2392		error = zfs_ioc_objset_stats(zc); /* fill in the stats */
2393		if (error == ENOENT) {
2394			/* We lost a race with destroy, get the next one. */
2395			zc->zc_name[orig_len] = '\0';
2396			goto top;
2397		}
2398	}
2399	return (error);
2400}
2401
2402/*
2403 * inputs:
2404 * zc_name		name of filesystem
2405 * zc_cookie		zap cursor
2406 * zc_nvlist_dst_size	size of buffer for property nvlist
2407 * zc_simple		when set, only name is requested
2408 *
2409 * outputs:
2410 * zc_name		name of next snapshot
2411 * zc_objset_stats	stats
2412 * zc_nvlist_dst	property nvlist
2413 * zc_nvlist_dst_size	size of property nvlist
2414 */
2415static int
2416zfs_ioc_snapshot_list_next(zfs_cmd_t *zc)
2417{
2418	objset_t *os;
2419	int error;
2420
2421	error = dmu_objset_hold(zc->zc_name, FTAG, &os);
2422	if (error != 0) {
2423		return (error == ENOENT ? ESRCH : error);
2424	}
2425
2426	/*
2427	 * A dataset name of maximum length cannot have any snapshots,
2428	 * so exit immediately.
2429	 */
2430	if (strlcat(zc->zc_name, "@", sizeof (zc->zc_name)) >=
2431	    ZFS_MAX_DATASET_NAME_LEN) {
2432		dmu_objset_rele(os, FTAG);
2433		return (SET_ERROR(ESRCH));
2434	}
2435
2436	error = dmu_snapshot_list_next(os,
2437	    sizeof (zc->zc_name) - strlen(zc->zc_name),
2438	    zc->zc_name + strlen(zc->zc_name), &zc->zc_obj, &zc->zc_cookie,
2439	    NULL);
2440
2441	if (error == 0 && !zc->zc_simple) {
2442		dsl_dataset_t *ds;
2443		dsl_pool_t *dp = os->os_dsl_dataset->ds_dir->dd_pool;
2444
2445		error = dsl_dataset_hold_obj(dp, zc->zc_obj, FTAG, &ds);
2446		if (error == 0) {
2447			objset_t *ossnap;
2448
2449			error = dmu_objset_from_ds(ds, &ossnap);
2450			if (error == 0)
2451				error = zfs_ioc_objset_stats_impl(zc, ossnap);
2452			dsl_dataset_rele(ds, FTAG);
2453		}
2454	} else if (error == ENOENT) {
2455		error = SET_ERROR(ESRCH);
2456	}
2457
2458	dmu_objset_rele(os, FTAG);
2459	/* if we failed, undo the @ that we tacked on to zc_name */
2460	if (error != 0)
2461		*strchr(zc->zc_name, '@') = '\0';
2462	return (error);
2463}
2464
2465static int
2466zfs_prop_set_userquota(const char *dsname, nvpair_t *pair)
2467{
2468	const char *propname = nvpair_name(pair);
2469	uint64_t *valary;
2470	unsigned int vallen;
2471	const char *domain;
2472	char *dash;
2473	zfs_userquota_prop_t type;
2474	uint64_t rid;
2475	uint64_t quota;
2476	zfsvfs_t *zfsvfs;
2477	int err;
2478
2479	if (nvpair_type(pair) == DATA_TYPE_NVLIST) {
2480		nvlist_t *attrs;
2481		VERIFY(nvpair_value_nvlist(pair, &attrs) == 0);
2482		if (nvlist_lookup_nvpair(attrs, ZPROP_VALUE,
2483		    &pair) != 0)
2484			return (SET_ERROR(EINVAL));
2485	}
2486
2487	/*
2488	 * A correctly constructed propname is encoded as
2489	 * userquota@<rid>-<domain>.
2490	 */
2491	if ((dash = strchr(propname, '-')) == NULL ||
2492	    nvpair_value_uint64_array(pair, &valary, &vallen) != 0 ||
2493	    vallen != 3)
2494		return (SET_ERROR(EINVAL));
2495
2496	domain = dash + 1;
2497	type = valary[0];
2498	rid = valary[1];
2499	quota = valary[2];
2500
2501	err = zfsvfs_hold(dsname, FTAG, &zfsvfs, B_FALSE);
2502	if (err == 0) {
2503		err = zfs_set_userquota(zfsvfs, type, domain, rid, quota);
2504		zfsvfs_rele(zfsvfs, FTAG);
2505	}
2506
2507	return (err);
2508}
2509
2510/*
2511 * If the named property is one that has a special function to set its value,
2512 * return 0 on success and a positive error code on failure; otherwise if it is
2513 * not one of the special properties handled by this function, return -1.
2514 *
2515 * XXX: It would be better for callers of the property interface if we handled
2516 * these special cases in dsl_prop.c (in the dsl layer).
2517 */
2518static int
2519zfs_prop_set_special(const char *dsname, zprop_source_t source,
2520    nvpair_t *pair)
2521{
2522	const char *propname = nvpair_name(pair);
2523	zfs_prop_t prop = zfs_name_to_prop(propname);
2524	uint64_t intval;
2525	int err = -1;
2526
2527	if (prop == ZPROP_INVAL) {
2528		if (zfs_prop_userquota(propname))
2529			return (zfs_prop_set_userquota(dsname, pair));
2530		return (-1);
2531	}
2532
2533	if (nvpair_type(pair) == DATA_TYPE_NVLIST) {
2534		nvlist_t *attrs;
2535		VERIFY(nvpair_value_nvlist(pair, &attrs) == 0);
2536		VERIFY(nvlist_lookup_nvpair(attrs, ZPROP_VALUE,
2537		    &pair) == 0);
2538	}
2539
2540	if (zfs_prop_get_type(prop) == PROP_TYPE_STRING)
2541		return (-1);
2542
2543	VERIFY(0 == nvpair_value_uint64(pair, &intval));
2544
2545	switch (prop) {
2546	case ZFS_PROP_QUOTA:
2547		err = dsl_dir_set_quota(dsname, source, intval);
2548		break;
2549	case ZFS_PROP_REFQUOTA:
2550		err = dsl_dataset_set_refquota(dsname, source, intval);
2551		break;
2552	case ZFS_PROP_FILESYSTEM_LIMIT:
2553	case ZFS_PROP_SNAPSHOT_LIMIT:
2554		if (intval == UINT64_MAX) {
2555			/* clearing the limit, just do it */
2556			err = 0;
2557		} else {
2558			err = dsl_dir_activate_fs_ss_limit(dsname);
2559		}
2560		/*
2561		 * Set err to -1 to force the zfs_set_prop_nvlist code down the
2562		 * default path to set the value in the nvlist.
2563		 */
2564		if (err == 0)
2565			err = -1;
2566		break;
2567	case ZFS_PROP_RESERVATION:
2568		err = dsl_dir_set_reservation(dsname, source, intval);
2569		break;
2570	case ZFS_PROP_REFRESERVATION:
2571		err = dsl_dataset_set_refreservation(dsname, source, intval);
2572		break;
2573	case ZFS_PROP_VOLSIZE:
2574		err = zvol_set_volsize(dsname, intval);
2575		break;
2576	case ZFS_PROP_VERSION:
2577	{
2578		zfsvfs_t *zfsvfs;
2579
2580		if ((err = zfsvfs_hold(dsname, FTAG, &zfsvfs, B_TRUE)) != 0)
2581			break;
2582
2583		err = zfs_set_version(zfsvfs, intval);
2584		zfsvfs_rele(zfsvfs, FTAG);
2585
2586		if (err == 0 && intval >= ZPL_VERSION_USERSPACE) {
2587			zfs_cmd_t *zc;
2588
2589			zc = kmem_zalloc(sizeof (zfs_cmd_t), KM_SLEEP);
2590			(void) strcpy(zc->zc_name, dsname);
2591			(void) zfs_ioc_userspace_upgrade(zc);
2592			kmem_free(zc, sizeof (zfs_cmd_t));
2593		}
2594		break;
2595	}
2596	default:
2597		err = -1;
2598	}
2599
2600	return (err);
2601}
2602
2603/*
2604 * This function is best effort. If it fails to set any of the given properties,
2605 * it continues to set as many as it can and returns the last error
2606 * encountered. If the caller provides a non-NULL errlist, it will be filled in
2607 * with the list of names of all the properties that failed along with the
2608 * corresponding error numbers.
2609 *
2610 * If every property is set successfully, zero is returned and errlist is not
2611 * modified.
2612 */
2613int
2614zfs_set_prop_nvlist(const char *dsname, zprop_source_t source, nvlist_t *nvl,
2615    nvlist_t *errlist)
2616{
2617	nvpair_t *pair;
2618	nvpair_t *propval;
2619	int rv = 0;
2620	uint64_t intval;
2621	char *strval;
2622	nvlist_t *genericnvl = fnvlist_alloc();
2623	nvlist_t *retrynvl = fnvlist_alloc();
2624
2625retry:
2626	pair = NULL;
2627	while ((pair = nvlist_next_nvpair(nvl, pair)) != NULL) {
2628		const char *propname = nvpair_name(pair);
2629		zfs_prop_t prop = zfs_name_to_prop(propname);
2630		int err = 0;
2631
2632		/* decode the property value */
2633		propval = pair;
2634		if (nvpair_type(pair) == DATA_TYPE_NVLIST) {
2635			nvlist_t *attrs;
2636			attrs = fnvpair_value_nvlist(pair);
2637			if (nvlist_lookup_nvpair(attrs, ZPROP_VALUE,
2638			    &propval) != 0)
2639				err = SET_ERROR(EINVAL);
2640		}
2641
2642		/* Validate value type */
2643		if (err == 0 && prop == ZPROP_INVAL) {
2644			if (zfs_prop_user(propname)) {
2645				if (nvpair_type(propval) != DATA_TYPE_STRING)
2646					err = SET_ERROR(EINVAL);
2647			} else if (zfs_prop_userquota(propname)) {
2648				if (nvpair_type(propval) !=
2649				    DATA_TYPE_UINT64_ARRAY)
2650					err = SET_ERROR(EINVAL);
2651			} else {
2652				err = SET_ERROR(EINVAL);
2653			}
2654		} else if (err == 0) {
2655			if (nvpair_type(propval) == DATA_TYPE_STRING) {
2656				if (zfs_prop_get_type(prop) != PROP_TYPE_STRING)
2657					err = SET_ERROR(EINVAL);
2658			} else if (nvpair_type(propval) == DATA_TYPE_UINT64) {
2659				const char *unused;
2660
2661				intval = fnvpair_value_uint64(propval);
2662
2663				switch (zfs_prop_get_type(prop)) {
2664				case PROP_TYPE_NUMBER:
2665					break;
2666				case PROP_TYPE_STRING:
2667					err = SET_ERROR(EINVAL);
2668					break;
2669				case PROP_TYPE_INDEX:
2670					if (zfs_prop_index_to_string(prop,
2671					    intval, &unused) != 0)
2672						err = SET_ERROR(EINVAL);
2673					break;
2674				default:
2675					cmn_err(CE_PANIC,
2676					    "unknown property type");
2677				}
2678			} else {
2679				err = SET_ERROR(EINVAL);
2680			}
2681		}
2682
2683		/* Validate permissions */
2684		if (err == 0)
2685			err = zfs_check_settable(dsname, pair, CRED());
2686
2687		if (err == 0) {
2688			err = zfs_prop_set_special(dsname, source, pair);
2689			if (err == -1) {
2690				/*
2691				 * For better performance we build up a list of
2692				 * properties to set in a single transaction.
2693				 */
2694				err = nvlist_add_nvpair(genericnvl, pair);
2695			} else if (err != 0 && nvl != retrynvl) {
2696				/*
2697				 * This may be a spurious error caused by
2698				 * receiving quota and reservation out of order.
2699				 * Try again in a second pass.
2700				 */
2701				err = nvlist_add_nvpair(retrynvl, pair);
2702			}
2703		}
2704
2705		if (err != 0) {
2706			if (errlist != NULL)
2707				fnvlist_add_int32(errlist, propname, err);
2708			rv = err;
2709		}
2710	}
2711
2712	if (nvl != retrynvl && !nvlist_empty(retrynvl)) {
2713		nvl = retrynvl;
2714		goto retry;
2715	}
2716
2717	if (!nvlist_empty(genericnvl) &&
2718	    dsl_props_set(dsname, source, genericnvl) != 0) {
2719		/*
2720		 * If this fails, we still want to set as many properties as we
2721		 * can, so try setting them individually.
2722		 */
2723		pair = NULL;
2724		while ((pair = nvlist_next_nvpair(genericnvl, pair)) != NULL) {
2725			const char *propname = nvpair_name(pair);
2726			int err = 0;
2727
2728			propval = pair;
2729			if (nvpair_type(pair) == DATA_TYPE_NVLIST) {
2730				nvlist_t *attrs;
2731				attrs = fnvpair_value_nvlist(pair);
2732				propval = fnvlist_lookup_nvpair(attrs,
2733				    ZPROP_VALUE);
2734			}
2735
2736			if (nvpair_type(propval) == DATA_TYPE_STRING) {
2737				strval = fnvpair_value_string(propval);
2738				err = dsl_prop_set_string(dsname, propname,
2739				    source, strval);
2740			} else {
2741				intval = fnvpair_value_uint64(propval);
2742				err = dsl_prop_set_int(dsname, propname, source,
2743				    intval);
2744			}
2745
2746			if (err != 0) {
2747				if (errlist != NULL) {
2748					fnvlist_add_int32(errlist, propname,
2749					    err);
2750				}
2751				rv = err;
2752			}
2753		}
2754	}
2755	nvlist_free(genericnvl);
2756	nvlist_free(retrynvl);
2757
2758	return (rv);
2759}
2760
2761/*
2762 * Check that all the properties are valid user properties.
2763 */
2764static int
2765zfs_check_userprops(nvlist_t *nvl)
2766{
2767	nvpair_t *pair = NULL;
2768
2769	while ((pair = nvlist_next_nvpair(nvl, pair)) != NULL) {
2770		const char *propname = nvpair_name(pair);
2771
2772		if (!zfs_prop_user(propname) ||
2773		    nvpair_type(pair) != DATA_TYPE_STRING)
2774			return (SET_ERROR(EINVAL));
2775
2776		if (strlen(propname) >= ZAP_MAXNAMELEN)
2777			return (SET_ERROR(ENAMETOOLONG));
2778
2779		if (strlen(fnvpair_value_string(pair)) >= ZAP_MAXVALUELEN)
2780			return (E2BIG);
2781	}
2782	return (0);
2783}
2784
2785static void
2786props_skip(nvlist_t *props, nvlist_t *skipped, nvlist_t **newprops)
2787{
2788	nvpair_t *pair;
2789
2790	VERIFY(nvlist_alloc(newprops, NV_UNIQUE_NAME, KM_SLEEP) == 0);
2791
2792	pair = NULL;
2793	while ((pair = nvlist_next_nvpair(props, pair)) != NULL) {
2794		if (nvlist_exists(skipped, nvpair_name(pair)))
2795			continue;
2796
2797		VERIFY(nvlist_add_nvpair(*newprops, pair) == 0);
2798	}
2799}
2800
2801static int
2802clear_received_props(const char *dsname, nvlist_t *props,
2803    nvlist_t *skipped)
2804{
2805	int err = 0;
2806	nvlist_t *cleared_props = NULL;
2807	props_skip(props, skipped, &cleared_props);
2808	if (!nvlist_empty(cleared_props)) {
2809		/*
2810		 * Acts on local properties until the dataset has received
2811		 * properties at least once on or after SPA_VERSION_RECVD_PROPS.
2812		 */
2813		zprop_source_t flags = (ZPROP_SRC_NONE |
2814		    (dsl_prop_get_hasrecvd(dsname) ? ZPROP_SRC_RECEIVED : 0));
2815		err = zfs_set_prop_nvlist(dsname, flags, cleared_props, NULL);
2816	}
2817	nvlist_free(cleared_props);
2818	return (err);
2819}
2820
2821/*
2822 * inputs:
2823 * zc_name		name of filesystem
2824 * zc_value		name of property to set
2825 * zc_nvlist_src{_size}	nvlist of properties to apply
2826 * zc_cookie		received properties flag
2827 *
2828 * outputs:
2829 * zc_nvlist_dst{_size} error for each unapplied received property
2830 */
2831static int
2832zfs_ioc_set_prop(zfs_cmd_t *zc)
2833{
2834	nvlist_t *nvl;
2835	boolean_t received = zc->zc_cookie;
2836	zprop_source_t source = (received ? ZPROP_SRC_RECEIVED :
2837	    ZPROP_SRC_LOCAL);
2838	nvlist_t *errors;
2839	int error;
2840
2841	if ((error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
2842	    zc->zc_iflags, &nvl)) != 0)
2843		return (error);
2844
2845	if (received) {
2846		nvlist_t *origprops;
2847
2848		if (dsl_prop_get_received(zc->zc_name, &origprops) == 0) {
2849			(void) clear_received_props(zc->zc_name,
2850			    origprops, nvl);
2851			nvlist_free(origprops);
2852		}
2853
2854		error = dsl_prop_set_hasrecvd(zc->zc_name);
2855	}
2856
2857	errors = fnvlist_alloc();
2858	if (error == 0)
2859		error = zfs_set_prop_nvlist(zc->zc_name, source, nvl, errors);
2860
2861	if (zc->zc_nvlist_dst != 0 && errors != NULL) {
2862		(void) put_nvlist(zc, errors);
2863	}
2864
2865	nvlist_free(errors);
2866	nvlist_free(nvl);
2867	return (error);
2868}
2869
2870/*
2871 * inputs:
2872 * zc_name		name of filesystem
2873 * zc_value		name of property to inherit
2874 * zc_cookie		revert to received value if TRUE
2875 *
2876 * outputs:		none
2877 */
2878static int
2879zfs_ioc_inherit_prop(zfs_cmd_t *zc)
2880{
2881	const char *propname = zc->zc_value;
2882	zfs_prop_t prop = zfs_name_to_prop(propname);
2883	boolean_t received = zc->zc_cookie;
2884	zprop_source_t source = (received
2885	    ? ZPROP_SRC_NONE		/* revert to received value, if any */
2886	    : ZPROP_SRC_INHERITED);	/* explicitly inherit */
2887
2888	if (received) {
2889		nvlist_t *dummy;
2890		nvpair_t *pair;
2891		zprop_type_t type;
2892		int err;
2893
2894		/*
2895		 * zfs_prop_set_special() expects properties in the form of an
2896		 * nvpair with type info.
2897		 */
2898		if (prop == ZPROP_INVAL) {
2899			if (!zfs_prop_user(propname))
2900				return (SET_ERROR(EINVAL));
2901
2902			type = PROP_TYPE_STRING;
2903		} else if (prop == ZFS_PROP_VOLSIZE ||
2904		    prop == ZFS_PROP_VERSION) {
2905			return (SET_ERROR(EINVAL));
2906		} else {
2907			type = zfs_prop_get_type(prop);
2908		}
2909
2910		VERIFY(nvlist_alloc(&dummy, NV_UNIQUE_NAME, KM_SLEEP) == 0);
2911
2912		switch (type) {
2913		case PROP_TYPE_STRING:
2914			VERIFY(0 == nvlist_add_string(dummy, propname, ""));
2915			break;
2916		case PROP_TYPE_NUMBER:
2917		case PROP_TYPE_INDEX:
2918			VERIFY(0 == nvlist_add_uint64(dummy, propname, 0));
2919			break;
2920		default:
2921			nvlist_free(dummy);
2922			return (SET_ERROR(EINVAL));
2923		}
2924
2925		pair = nvlist_next_nvpair(dummy, NULL);
2926		err = zfs_prop_set_special(zc->zc_name, source, pair);
2927		nvlist_free(dummy);
2928		if (err != -1)
2929			return (err); /* special property already handled */
2930	} else {
2931		/*
2932		 * Only check this in the non-received case. We want to allow
2933		 * 'inherit -S' to revert non-inheritable properties like quota
2934		 * and reservation to the received or default values even though
2935		 * they are not considered inheritable.
2936		 */
2937		if (prop != ZPROP_INVAL && !zfs_prop_inheritable(prop))
2938			return (SET_ERROR(EINVAL));
2939	}
2940
2941	/* property name has been validated by zfs_secpolicy_inherit_prop() */
2942	return (dsl_prop_inherit(zc->zc_name, zc->zc_value, source));
2943}
2944
2945static int
2946zfs_ioc_pool_set_props(zfs_cmd_t *zc)
2947{
2948	nvlist_t *props;
2949	spa_t *spa;
2950	int error;
2951	nvpair_t *pair;
2952
2953	if (error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
2954	    zc->zc_iflags, &props))
2955		return (error);
2956
2957	/*
2958	 * If the only property is the configfile, then just do a spa_lookup()
2959	 * to handle the faulted case.
2960	 */
2961	pair = nvlist_next_nvpair(props, NULL);
2962	if (pair != NULL && strcmp(nvpair_name(pair),
2963	    zpool_prop_to_name(ZPOOL_PROP_CACHEFILE)) == 0 &&
2964	    nvlist_next_nvpair(props, pair) == NULL) {
2965		mutex_enter(&spa_namespace_lock);
2966		if ((spa = spa_lookup(zc->zc_name)) != NULL) {
2967			spa_configfile_set(spa, props, B_FALSE);
2968			spa_write_cachefile(spa, B_FALSE, B_TRUE);
2969		}
2970		mutex_exit(&spa_namespace_lock);
2971		if (spa != NULL) {
2972			nvlist_free(props);
2973			return (0);
2974		}
2975	}
2976
2977	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0) {
2978		nvlist_free(props);
2979		return (error);
2980	}
2981
2982	error = spa_prop_set(spa, props);
2983
2984	nvlist_free(props);
2985	spa_close(spa, FTAG);
2986
2987	return (error);
2988}
2989
2990static int
2991zfs_ioc_pool_get_props(zfs_cmd_t *zc)
2992{
2993	spa_t *spa;
2994	int error;
2995	nvlist_t *nvp = NULL;
2996
2997	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0) {
2998		/*
2999		 * If the pool is faulted, there may be properties we can still
3000		 * get (such as altroot and cachefile), so attempt to get them
3001		 * anyway.
3002		 */
3003		mutex_enter(&spa_namespace_lock);
3004		if ((spa = spa_lookup(zc->zc_name)) != NULL)
3005			error = spa_prop_get(spa, &nvp);
3006		mutex_exit(&spa_namespace_lock);
3007	} else {
3008		error = spa_prop_get(spa, &nvp);
3009		spa_close(spa, FTAG);
3010	}
3011
3012	if (error == 0 && zc->zc_nvlist_dst != 0)
3013		error = put_nvlist(zc, nvp);
3014	else
3015		error = SET_ERROR(EFAULT);
3016
3017	nvlist_free(nvp);
3018	return (error);
3019}
3020
3021/*
3022 * inputs:
3023 * zc_name		name of filesystem
3024 * zc_nvlist_src{_size}	nvlist of delegated permissions
3025 * zc_perm_action	allow/unallow flag
3026 *
3027 * outputs:		none
3028 */
3029static int
3030zfs_ioc_set_fsacl(zfs_cmd_t *zc)
3031{
3032	int error;
3033	nvlist_t *fsaclnv = NULL;
3034
3035	if ((error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
3036	    zc->zc_iflags, &fsaclnv)) != 0)
3037		return (error);
3038
3039	/*
3040	 * Verify nvlist is constructed correctly
3041	 */
3042	if ((error = zfs_deleg_verify_nvlist(fsaclnv)) != 0) {
3043		nvlist_free(fsaclnv);
3044		return (SET_ERROR(EINVAL));
3045	}
3046
3047	/*
3048	 * If we don't have PRIV_SYS_MOUNT, then validate
3049	 * that user is allowed to hand out each permission in
3050	 * the nvlist(s)
3051	 */
3052
3053	error = secpolicy_zfs(CRED());
3054	if (error != 0) {
3055		if (zc->zc_perm_action == B_FALSE) {
3056			error = dsl_deleg_can_allow(zc->zc_name,
3057			    fsaclnv, CRED());
3058		} else {
3059			error = dsl_deleg_can_unallow(zc->zc_name,
3060			    fsaclnv, CRED());
3061		}
3062	}
3063
3064	if (error == 0)
3065		error = dsl_deleg_set(zc->zc_name, fsaclnv, zc->zc_perm_action);
3066
3067	nvlist_free(fsaclnv);
3068	return (error);
3069}
3070
3071/*
3072 * inputs:
3073 * zc_name		name of filesystem
3074 *
3075 * outputs:
3076 * zc_nvlist_src{_size}	nvlist of delegated permissions
3077 */
3078static int
3079zfs_ioc_get_fsacl(zfs_cmd_t *zc)
3080{
3081	nvlist_t *nvp;
3082	int error;
3083
3084	if ((error = dsl_deleg_get(zc->zc_name, &nvp)) == 0) {
3085		error = put_nvlist(zc, nvp);
3086		nvlist_free(nvp);
3087	}
3088
3089	return (error);
3090}
3091
3092/* ARGSUSED */
3093static void
3094zfs_create_cb(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx)
3095{
3096	zfs_creat_t *zct = arg;
3097
3098	zfs_create_fs(os, cr, zct->zct_zplprops, tx);
3099}
3100
3101#define	ZFS_PROP_UNDEFINED	((uint64_t)-1)
3102
3103/*
3104 * inputs:
3105 * os			parent objset pointer (NULL if root fs)
3106 * fuids_ok		fuids allowed in this version of the spa?
3107 * sa_ok		SAs allowed in this version of the spa?
3108 * createprops		list of properties requested by creator
3109 *
3110 * outputs:
3111 * zplprops	values for the zplprops we attach to the master node object
3112 * is_ci	true if requested file system will be purely case-insensitive
3113 *
3114 * Determine the settings for utf8only, normalization and
3115 * casesensitivity.  Specific values may have been requested by the
3116 * creator and/or we can inherit values from the parent dataset.  If
3117 * the file system is of too early a vintage, a creator can not
3118 * request settings for these properties, even if the requested
3119 * setting is the default value.  We don't actually want to create dsl
3120 * properties for these, so remove them from the source nvlist after
3121 * processing.
3122 */
3123static int
3124zfs_fill_zplprops_impl(objset_t *os, uint64_t zplver,
3125    boolean_t fuids_ok, boolean_t sa_ok, nvlist_t *createprops,
3126    nvlist_t *zplprops, boolean_t *is_ci)
3127{
3128	uint64_t sense = ZFS_PROP_UNDEFINED;
3129	uint64_t norm = ZFS_PROP_UNDEFINED;
3130	uint64_t u8 = ZFS_PROP_UNDEFINED;
3131
3132	ASSERT(zplprops != NULL);
3133
3134	/* parent dataset must be a filesystem */
3135	if (os != NULL && os->os_phys->os_type != DMU_OST_ZFS)
3136		return (SET_ERROR(ZFS_ERR_WRONG_PARENT));
3137
3138	/*
3139	 * Pull out creator prop choices, if any.
3140	 */
3141	if (createprops) {
3142		(void) nvlist_lookup_uint64(createprops,
3143		    zfs_prop_to_name(ZFS_PROP_VERSION), &zplver);
3144		(void) nvlist_lookup_uint64(createprops,
3145		    zfs_prop_to_name(ZFS_PROP_NORMALIZE), &norm);
3146		(void) nvlist_remove_all(createprops,
3147		    zfs_prop_to_name(ZFS_PROP_NORMALIZE));
3148		(void) nvlist_lookup_uint64(createprops,
3149		    zfs_prop_to_name(ZFS_PROP_UTF8ONLY), &u8);
3150		(void) nvlist_remove_all(createprops,
3151		    zfs_prop_to_name(ZFS_PROP_UTF8ONLY));
3152		(void) nvlist_lookup_uint64(createprops,
3153		    zfs_prop_to_name(ZFS_PROP_CASE), &sense);
3154		(void) nvlist_remove_all(createprops,
3155		    zfs_prop_to_name(ZFS_PROP_CASE));
3156	}
3157
3158	/*
3159	 * If the zpl version requested is whacky or the file system
3160	 * or pool is version is too "young" to support normalization
3161	 * and the creator tried to set a value for one of the props,
3162	 * error out.
3163	 */
3164	if ((zplver < ZPL_VERSION_INITIAL || zplver > ZPL_VERSION) ||
3165	    (zplver >= ZPL_VERSION_FUID && !fuids_ok) ||
3166	    (zplver >= ZPL_VERSION_SA && !sa_ok) ||
3167	    (zplver < ZPL_VERSION_NORMALIZATION &&
3168	    (norm != ZFS_PROP_UNDEFINED || u8 != ZFS_PROP_UNDEFINED ||
3169	    sense != ZFS_PROP_UNDEFINED)))
3170		return (SET_ERROR(ENOTSUP));
3171
3172	/*
3173	 * Put the version in the zplprops
3174	 */
3175	VERIFY(nvlist_add_uint64(zplprops,
3176	    zfs_prop_to_name(ZFS_PROP_VERSION), zplver) == 0);
3177
3178	if (norm == ZFS_PROP_UNDEFINED)
3179		VERIFY(zfs_get_zplprop(os, ZFS_PROP_NORMALIZE, &norm) == 0);
3180	VERIFY(nvlist_add_uint64(zplprops,
3181	    zfs_prop_to_name(ZFS_PROP_NORMALIZE), norm) == 0);
3182
3183	/*
3184	 * If we're normalizing, names must always be valid UTF-8 strings.
3185	 */
3186	if (norm)
3187		u8 = 1;
3188	if (u8 == ZFS_PROP_UNDEFINED)
3189		VERIFY(zfs_get_zplprop(os, ZFS_PROP_UTF8ONLY, &u8) == 0);
3190	VERIFY(nvlist_add_uint64(zplprops,
3191	    zfs_prop_to_name(ZFS_PROP_UTF8ONLY), u8) == 0);
3192
3193	if (sense == ZFS_PROP_UNDEFINED)
3194		VERIFY(zfs_get_zplprop(os, ZFS_PROP_CASE, &sense) == 0);
3195	VERIFY(nvlist_add_uint64(zplprops,
3196	    zfs_prop_to_name(ZFS_PROP_CASE), sense) == 0);
3197
3198	if (is_ci)
3199		*is_ci = (sense == ZFS_CASE_INSENSITIVE);
3200
3201	return (0);
3202}
3203
3204static int
3205zfs_fill_zplprops(const char *dataset, nvlist_t *createprops,
3206    nvlist_t *zplprops, boolean_t *is_ci)
3207{
3208	boolean_t fuids_ok, sa_ok;
3209	uint64_t zplver = ZPL_VERSION;
3210	objset_t *os = NULL;
3211	char parentname[ZFS_MAX_DATASET_NAME_LEN];
3212	spa_t *spa;
3213	uint64_t spa_vers;
3214	int error;
3215
3216	zfs_get_parent(dataset, parentname, sizeof (parentname));
3217
3218	if ((error = spa_open(dataset, &spa, FTAG)) != 0)
3219		return (error);
3220
3221	spa_vers = spa_version(spa);
3222	spa_close(spa, FTAG);
3223
3224	zplver = zfs_zpl_version_map(spa_vers);
3225	fuids_ok = (zplver >= ZPL_VERSION_FUID);
3226	sa_ok = (zplver >= ZPL_VERSION_SA);
3227
3228	/*
3229	 * Open parent object set so we can inherit zplprop values.
3230	 */
3231	if ((error = dmu_objset_hold(parentname, FTAG, &os)) != 0)
3232		return (error);
3233
3234	error = zfs_fill_zplprops_impl(os, zplver, fuids_ok, sa_ok, createprops,
3235	    zplprops, is_ci);
3236	dmu_objset_rele(os, FTAG);
3237	return (error);
3238}
3239
3240static int
3241zfs_fill_zplprops_root(uint64_t spa_vers, nvlist_t *createprops,
3242    nvlist_t *zplprops, boolean_t *is_ci)
3243{
3244	boolean_t fuids_ok;
3245	boolean_t sa_ok;
3246	uint64_t zplver = ZPL_VERSION;
3247	int error;
3248
3249	zplver = zfs_zpl_version_map(spa_vers);
3250	fuids_ok = (zplver >= ZPL_VERSION_FUID);
3251	sa_ok = (zplver >= ZPL_VERSION_SA);
3252
3253	error = zfs_fill_zplprops_impl(NULL, zplver, fuids_ok, sa_ok,
3254	    createprops, zplprops, is_ci);
3255	return (error);
3256}
3257
3258/*
3259 * innvl: {
3260 *     "type" -> dmu_objset_type_t (int32)
3261 *     (optional) "props" -> { prop -> value }
3262 * }
3263 *
3264 * outnvl: propname -> error code (int32)
3265 */
3266static int
3267zfs_ioc_create(const char *fsname, nvlist_t *innvl, nvlist_t *outnvl)
3268{
3269	int error = 0;
3270	zfs_creat_t zct = { 0 };
3271	nvlist_t *nvprops = NULL;
3272	void (*cbfunc)(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx);
3273	int32_t type32;
3274	dmu_objset_type_t type;
3275	boolean_t is_insensitive = B_FALSE;
3276
3277	if (nvlist_lookup_int32(innvl, "type", &type32) != 0)
3278		return (SET_ERROR(EINVAL));
3279	type = type32;
3280	(void) nvlist_lookup_nvlist(innvl, "props", &nvprops);
3281
3282	switch (type) {
3283	case DMU_OST_ZFS:
3284		cbfunc = zfs_create_cb;
3285		break;
3286
3287	case DMU_OST_ZVOL:
3288		cbfunc = zvol_create_cb;
3289		break;
3290
3291	default:
3292		cbfunc = NULL;
3293		break;
3294	}
3295	if (strchr(fsname, '@') ||
3296	    strchr(fsname, '%'))
3297		return (SET_ERROR(EINVAL));
3298
3299	zct.zct_props = nvprops;
3300
3301	if (cbfunc == NULL)
3302		return (SET_ERROR(EINVAL));
3303
3304	if (type == DMU_OST_ZVOL) {
3305		uint64_t volsize, volblocksize;
3306
3307		if (nvprops == NULL)
3308			return (SET_ERROR(EINVAL));
3309		if (nvlist_lookup_uint64(nvprops,
3310		    zfs_prop_to_name(ZFS_PROP_VOLSIZE), &volsize) != 0)
3311			return (SET_ERROR(EINVAL));
3312
3313		if ((error = nvlist_lookup_uint64(nvprops,
3314		    zfs_prop_to_name(ZFS_PROP_VOLBLOCKSIZE),
3315		    &volblocksize)) != 0 && error != ENOENT)
3316			return (SET_ERROR(EINVAL));
3317
3318		if (error != 0)
3319			volblocksize = zfs_prop_default_numeric(
3320			    ZFS_PROP_VOLBLOCKSIZE);
3321
3322		if ((error = zvol_check_volblocksize(
3323		    volblocksize)) != 0 ||
3324		    (error = zvol_check_volsize(volsize,
3325		    volblocksize)) != 0)
3326			return (error);
3327	} else if (type == DMU_OST_ZFS) {
3328		int error;
3329
3330		/*
3331		 * We have to have normalization and
3332		 * case-folding flags correct when we do the
3333		 * file system creation, so go figure them out
3334		 * now.
3335		 */
3336		VERIFY(nvlist_alloc(&zct.zct_zplprops,
3337		    NV_UNIQUE_NAME, KM_SLEEP) == 0);
3338		error = zfs_fill_zplprops(fsname, nvprops,
3339		    zct.zct_zplprops, &is_insensitive);
3340		if (error != 0) {
3341			nvlist_free(zct.zct_zplprops);
3342			return (error);
3343		}
3344	}
3345
3346	error = dmu_objset_create(fsname, type,
3347	    is_insensitive ? DS_FLAG_CI_DATASET : 0, cbfunc, &zct);
3348	nvlist_free(zct.zct_zplprops);
3349
3350	/*
3351	 * It would be nice to do this atomically.
3352	 */
3353	if (error == 0) {
3354		error = zfs_set_prop_nvlist(fsname, ZPROP_SRC_LOCAL,
3355		    nvprops, outnvl);
3356		if (error != 0)
3357			(void) dsl_destroy_head(fsname);
3358	}
3359#ifdef __FreeBSD__
3360	if (error == 0 && type == DMU_OST_ZVOL)
3361		zvol_create_minors(fsname);
3362#endif
3363	return (error);
3364}
3365
3366/*
3367 * innvl: {
3368 *     "origin" -> name of origin snapshot
3369 *     (optional) "props" -> { prop -> value }
3370 * }
3371 *
3372 * outnvl: propname -> error code (int32)
3373 */
3374static int
3375zfs_ioc_clone(const char *fsname, nvlist_t *innvl, nvlist_t *outnvl)
3376{
3377	int error = 0;
3378	nvlist_t *nvprops = NULL;
3379	char *origin_name;
3380
3381	if (nvlist_lookup_string(innvl, "origin", &origin_name) != 0)
3382		return (SET_ERROR(EINVAL));
3383	(void) nvlist_lookup_nvlist(innvl, "props", &nvprops);
3384
3385	if (strchr(fsname, '@') ||
3386	    strchr(fsname, '%'))
3387		return (SET_ERROR(EINVAL));
3388
3389	if (dataset_namecheck(origin_name, NULL, NULL) != 0)
3390		return (SET_ERROR(EINVAL));
3391	error = dmu_objset_clone(fsname, origin_name);
3392	if (error != 0)
3393		return (error);
3394
3395	/*
3396	 * It would be nice to do this atomically.
3397	 */
3398	if (error == 0) {
3399		error = zfs_set_prop_nvlist(fsname, ZPROP_SRC_LOCAL,
3400		    nvprops, outnvl);
3401		if (error != 0)
3402			(void) dsl_destroy_head(fsname);
3403	}
3404#ifdef __FreeBSD__
3405	if (error == 0)
3406		zvol_create_minors(fsname);
3407#endif
3408	return (error);
3409}
3410
3411/* ARGSUSED */
3412static int
3413zfs_ioc_remap(const char *fsname, nvlist_t *innvl, nvlist_t *outnvl)
3414{
3415	if (strchr(fsname, '@') ||
3416	    strchr(fsname, '%'))
3417		return (SET_ERROR(EINVAL));
3418
3419	return (dmu_objset_remap_indirects(fsname));
3420}
3421
3422/*
3423 * innvl: {
3424 *     "snaps" -> { snapshot1, snapshot2 }
3425 *     (optional) "props" -> { prop -> value (string) }
3426 * }
3427 *
3428 * outnvl: snapshot -> error code (int32)
3429 */
3430static int
3431zfs_ioc_snapshot(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl)
3432{
3433	nvlist_t *snaps;
3434	nvlist_t *props = NULL;
3435	int error, poollen;
3436	nvpair_t *pair;
3437
3438	(void) nvlist_lookup_nvlist(innvl, "props", &props);
3439	if (!nvlist_empty(props) &&
3440	    zfs_earlier_version(poolname, SPA_VERSION_SNAP_PROPS))
3441		return (SET_ERROR(ENOTSUP));
3442	if ((error = zfs_check_userprops(props)) != 0)
3443		return (error);
3444
3445	if (nvlist_lookup_nvlist(innvl, "snaps", &snaps) != 0)
3446		return (SET_ERROR(EINVAL));
3447	poollen = strlen(poolname);
3448	for (pair = nvlist_next_nvpair(snaps, NULL); pair != NULL;
3449	    pair = nvlist_next_nvpair(snaps, pair)) {
3450		const char *name = nvpair_name(pair);
3451		char *cp = strchr(name, '@');
3452
3453		/*
3454		 * The snap name must contain an @, and the part after it must
3455		 * contain only valid characters.
3456		 */
3457		if (cp == NULL ||
3458		    zfs_component_namecheck(cp + 1, NULL, NULL) != 0)
3459			return (SET_ERROR(EINVAL));
3460
3461		/*
3462		 * The snap must be in the specified pool.
3463		 */
3464		if (strncmp(name, poolname, poollen) != 0 ||
3465		    (name[poollen] != '/' && name[poollen] != '@'))
3466			return (SET_ERROR(EXDEV));
3467
3468		/*
3469		 * Check for permission to set the properties on the fs.
3470		 */
3471		if (!nvlist_empty(props)) {
3472			*cp = '\0';
3473			error = zfs_secpolicy_write_perms(name,
3474			    ZFS_DELEG_PERM_USERPROP, CRED());
3475			*cp = '@';
3476			if (error != 0)
3477				return (error);
3478		}
3479
3480		/* This must be the only snap of this fs. */
3481		for (nvpair_t *pair2 = nvlist_next_nvpair(snaps, pair);
3482		    pair2 != NULL; pair2 = nvlist_next_nvpair(snaps, pair2)) {
3483			if (strncmp(name, nvpair_name(pair2), cp - name + 1)
3484			    == 0) {
3485				return (SET_ERROR(EXDEV));
3486			}
3487		}
3488	}
3489
3490	error = dsl_dataset_snapshot(snaps, props, outnvl);
3491	return (error);
3492}
3493
3494/*
3495 * innvl: "message" -> string
3496 */
3497/* ARGSUSED */
3498static int
3499zfs_ioc_log_history(const char *unused, nvlist_t *innvl, nvlist_t *outnvl)
3500{
3501	char *message;
3502	spa_t *spa;
3503	int error;
3504	char *poolname;
3505
3506	/*
3507	 * The poolname in the ioctl is not set, we get it from the TSD,
3508	 * which was set at the end of the last successful ioctl that allows
3509	 * logging.  The secpolicy func already checked that it is set.
3510	 * Only one log ioctl is allowed after each successful ioctl, so
3511	 * we clear the TSD here.
3512	 */
3513	poolname = tsd_get(zfs_allow_log_key);
3514	(void) tsd_set(zfs_allow_log_key, NULL);
3515	error = spa_open(poolname, &spa, FTAG);
3516	strfree(poolname);
3517	if (error != 0)
3518		return (error);
3519
3520	if (nvlist_lookup_string(innvl, "message", &message) != 0)  {
3521		spa_close(spa, FTAG);
3522		return (SET_ERROR(EINVAL));
3523	}
3524
3525	if (spa_version(spa) < SPA_VERSION_ZPOOL_HISTORY) {
3526		spa_close(spa, FTAG);
3527		return (SET_ERROR(ENOTSUP));
3528	}
3529
3530	error = spa_history_log(spa, message);
3531	spa_close(spa, FTAG);
3532	return (error);
3533}
3534
3535#ifdef __FreeBSD__
3536static int
3537zfs_ioc_nextboot(const char *unused, nvlist_t *innvl, nvlist_t *outnvl)
3538{
3539	char name[MAXNAMELEN];
3540	spa_t *spa;
3541	vdev_t *vd;
3542	char *command;
3543	uint64_t pool_guid;
3544	uint64_t vdev_guid;
3545	int error;
3546
3547	if (nvlist_lookup_uint64(innvl,
3548	    ZPOOL_CONFIG_POOL_GUID, &pool_guid) != 0)
3549		return (EINVAL);
3550	if (nvlist_lookup_uint64(innvl,
3551	    ZPOOL_CONFIG_GUID, &vdev_guid) != 0)
3552		return (EINVAL);
3553	if (nvlist_lookup_string(innvl,
3554	    "command", &command) != 0)
3555		return (EINVAL);
3556
3557	mutex_enter(&spa_namespace_lock);
3558	spa = spa_by_guid(pool_guid, vdev_guid);
3559	if (spa != NULL)
3560		strcpy(name, spa_name(spa));
3561	mutex_exit(&spa_namespace_lock);
3562	if (spa == NULL)
3563		return (ENOENT);
3564
3565	if ((error = spa_open(name, &spa, FTAG)) != 0)
3566		return (error);
3567	spa_vdev_state_enter(spa, SCL_ALL);
3568	vd = spa_lookup_by_guid(spa, vdev_guid, B_TRUE);
3569	if (vd == NULL) {
3570		(void) spa_vdev_state_exit(spa, NULL, ENXIO);
3571		spa_close(spa, FTAG);
3572		return (ENODEV);
3573	}
3574	error = vdev_label_write_pad2(vd, command, strlen(command));
3575	(void) spa_vdev_state_exit(spa, NULL, 0);
3576	txg_wait_synced(spa->spa_dsl_pool, 0);
3577	spa_close(spa, FTAG);
3578	return (error);
3579}
3580#endif
3581
3582/*
3583 * The dp_config_rwlock must not be held when calling this, because the
3584 * unmount may need to write out data.
3585 *
3586 * This function is best-effort.  Callers must deal gracefully if it
3587 * remains mounted (or is remounted after this call).
3588 *
3589 * Returns 0 if the argument is not a snapshot, or it is not currently a
3590 * filesystem, or we were able to unmount it.  Returns error code otherwise.
3591 */
3592void
3593zfs_unmount_snap(const char *snapname)
3594{
3595	vfs_t *vfsp = NULL;
3596	zfsvfs_t *zfsvfs = NULL;
3597
3598	if (strchr(snapname, '@') == NULL)
3599		return;
3600
3601	int err = getzfsvfs(snapname, &zfsvfs);
3602	if (err != 0) {
3603		ASSERT3P(zfsvfs, ==, NULL);
3604		return;
3605	}
3606	vfsp = zfsvfs->z_vfs;
3607
3608	ASSERT(!dsl_pool_config_held(dmu_objset_pool(zfsvfs->z_os)));
3609
3610#ifdef illumos
3611	err = vn_vfswlock(vfsp->vfs_vnodecovered);
3612	VFS_RELE(vfsp);
3613	if (err != 0)
3614		return;
3615#endif
3616
3617	/*
3618	 * Always force the unmount for snapshots.
3619	 */
3620#ifdef illumos
3621	(void) dounmount(vfsp, MS_FORCE, kcred);
3622#else
3623	vfs_ref(vfsp);
3624	vfs_unbusy(vfsp);
3625	(void) dounmount(vfsp, MS_FORCE, curthread);
3626#endif
3627}
3628
3629/* ARGSUSED */
3630static int
3631zfs_unmount_snap_cb(const char *snapname, void *arg)
3632{
3633	zfs_unmount_snap(snapname);
3634	return (0);
3635}
3636
3637/*
3638 * When a clone is destroyed, its origin may also need to be destroyed,
3639 * in which case it must be unmounted.  This routine will do that unmount
3640 * if necessary.
3641 */
3642void
3643zfs_destroy_unmount_origin(const char *fsname)
3644{
3645	int error;
3646	objset_t *os;
3647	dsl_dataset_t *ds;
3648
3649	error = dmu_objset_hold(fsname, FTAG, &os);
3650	if (error != 0)
3651		return;
3652	ds = dmu_objset_ds(os);
3653	if (dsl_dir_is_clone(ds->ds_dir) && DS_IS_DEFER_DESTROY(ds->ds_prev)) {
3654		char originname[ZFS_MAX_DATASET_NAME_LEN];
3655		dsl_dataset_name(ds->ds_prev, originname);
3656		dmu_objset_rele(os, FTAG);
3657		zfs_unmount_snap(originname);
3658	} else {
3659		dmu_objset_rele(os, FTAG);
3660	}
3661}
3662
3663/*
3664 * innvl: {
3665 *     "snaps" -> { snapshot1, snapshot2 }
3666 *     (optional boolean) "defer"
3667 * }
3668 *
3669 * outnvl: snapshot -> error code (int32)
3670 *
3671 */
3672/* ARGSUSED */
3673static int
3674zfs_ioc_destroy_snaps(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl)
3675{
3676	int error, poollen;
3677	nvlist_t *snaps;
3678	nvpair_t *pair;
3679	boolean_t defer;
3680
3681	if (nvlist_lookup_nvlist(innvl, "snaps", &snaps) != 0)
3682		return (SET_ERROR(EINVAL));
3683	defer = nvlist_exists(innvl, "defer");
3684
3685	poollen = strlen(poolname);
3686	for (pair = nvlist_next_nvpair(snaps, NULL); pair != NULL;
3687	    pair = nvlist_next_nvpair(snaps, pair)) {
3688		const char *name = nvpair_name(pair);
3689
3690		/*
3691		 * The snap must be in the specified pool to prevent the
3692		 * invalid removal of zvol minors below.
3693		 */
3694		if (strncmp(name, poolname, poollen) != 0 ||
3695		    (name[poollen] != '/' && name[poollen] != '@'))
3696			return (SET_ERROR(EXDEV));
3697
3698		zfs_unmount_snap(nvpair_name(pair));
3699#if defined(__FreeBSD__)
3700		zvol_remove_minors(name);
3701#endif
3702	}
3703
3704	return (dsl_destroy_snapshots_nvl(snaps, defer, outnvl));
3705}
3706
3707/*
3708 * Create bookmarks.  Bookmark names are of the form <fs>#<bmark>.
3709 * All bookmarks must be in the same pool.
3710 *
3711 * innvl: {
3712 *     bookmark1 -> snapshot1, bookmark2 -> snapshot2
3713 * }
3714 *
3715 * outnvl: bookmark -> error code (int32)
3716 *
3717 */
3718/* ARGSUSED */
3719static int
3720zfs_ioc_bookmark(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl)
3721{
3722	for (nvpair_t *pair = nvlist_next_nvpair(innvl, NULL);
3723	    pair != NULL; pair = nvlist_next_nvpair(innvl, pair)) {
3724		char *snap_name;
3725
3726		/*
3727		 * Verify the snapshot argument.
3728		 */
3729		if (nvpair_value_string(pair, &snap_name) != 0)
3730			return (SET_ERROR(EINVAL));
3731
3732
3733		/* Verify that the keys (bookmarks) are unique */
3734		for (nvpair_t *pair2 = nvlist_next_nvpair(innvl, pair);
3735		    pair2 != NULL; pair2 = nvlist_next_nvpair(innvl, pair2)) {
3736			if (strcmp(nvpair_name(pair), nvpair_name(pair2)) == 0)
3737				return (SET_ERROR(EINVAL));
3738		}
3739	}
3740
3741	return (dsl_bookmark_create(innvl, outnvl));
3742}
3743
3744/*
3745 * innvl: {
3746 *     property 1, property 2, ...
3747 * }
3748 *
3749 * outnvl: {
3750 *     bookmark name 1 -> { property 1, property 2, ... },
3751 *     bookmark name 2 -> { property 1, property 2, ... }
3752 * }
3753 *
3754 */
3755static int
3756zfs_ioc_get_bookmarks(const char *fsname, nvlist_t *innvl, nvlist_t *outnvl)
3757{
3758	return (dsl_get_bookmarks(fsname, innvl, outnvl));
3759}
3760
3761/*
3762 * innvl: {
3763 *     bookmark name 1, bookmark name 2
3764 * }
3765 *
3766 * outnvl: bookmark -> error code (int32)
3767 *
3768 */
3769static int
3770zfs_ioc_destroy_bookmarks(const char *poolname, nvlist_t *innvl,
3771    nvlist_t *outnvl)
3772{
3773	int error, poollen;
3774
3775	poollen = strlen(poolname);
3776	for (nvpair_t *pair = nvlist_next_nvpair(innvl, NULL);
3777	    pair != NULL; pair = nvlist_next_nvpair(innvl, pair)) {
3778		const char *name = nvpair_name(pair);
3779		const char *cp = strchr(name, '#');
3780
3781		/*
3782		 * The bookmark name must contain an #, and the part after it
3783		 * must contain only valid characters.
3784		 */
3785		if (cp == NULL ||
3786		    zfs_component_namecheck(cp + 1, NULL, NULL) != 0)
3787			return (SET_ERROR(EINVAL));
3788
3789		/*
3790		 * The bookmark must be in the specified pool.
3791		 */
3792		if (strncmp(name, poolname, poollen) != 0 ||
3793		    (name[poollen] != '/' && name[poollen] != '#'))
3794			return (SET_ERROR(EXDEV));
3795	}
3796
3797	error = dsl_bookmark_destroy(innvl, outnvl);
3798	return (error);
3799}
3800
3801static int
3802zfs_ioc_channel_program(const char *poolname, nvlist_t *innvl,
3803    nvlist_t *outnvl)
3804{
3805	char *program;
3806	uint64_t instrlimit, memlimit;
3807	boolean_t sync_flag;
3808	nvpair_t *nvarg = NULL;
3809
3810	if (0 != nvlist_lookup_string(innvl, ZCP_ARG_PROGRAM, &program)) {
3811		return (EINVAL);
3812	}
3813	if (0 != nvlist_lookup_boolean_value(innvl, ZCP_ARG_SYNC, &sync_flag)) {
3814		sync_flag = B_TRUE;
3815	}
3816	if (0 != nvlist_lookup_uint64(innvl, ZCP_ARG_INSTRLIMIT, &instrlimit)) {
3817		instrlimit = ZCP_DEFAULT_INSTRLIMIT;
3818	}
3819	if (0 != nvlist_lookup_uint64(innvl, ZCP_ARG_MEMLIMIT, &memlimit)) {
3820		memlimit = ZCP_DEFAULT_MEMLIMIT;
3821	}
3822	if (0 != nvlist_lookup_nvpair(innvl, ZCP_ARG_ARGLIST, &nvarg)) {
3823		return (EINVAL);
3824	}
3825
3826	if (instrlimit == 0 || instrlimit > zfs_lua_max_instrlimit)
3827		return (EINVAL);
3828	if (memlimit == 0 || memlimit > zfs_lua_max_memlimit)
3829		return (EINVAL);
3830
3831	return (zcp_eval(poolname, program, sync_flag, instrlimit, memlimit,
3832	    nvarg, outnvl));
3833}
3834
3835/*
3836 * innvl: unused
3837 * outnvl: empty
3838 */
3839/* ARGSUSED */
3840static int
3841zfs_ioc_pool_checkpoint(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl)
3842{
3843	return (spa_checkpoint(poolname));
3844}
3845
3846/*
3847 * innvl: unused
3848 * outnvl: empty
3849 */
3850/* ARGSUSED */
3851static int
3852zfs_ioc_pool_discard_checkpoint(const char *poolname, nvlist_t *innvl,
3853    nvlist_t *outnvl)
3854{
3855	return (spa_checkpoint_discard(poolname));
3856}
3857
3858/*
3859 * inputs:
3860 * zc_name		name of dataset to destroy
3861 * zc_defer_destroy	mark for deferred destroy
3862 *
3863 * outputs:		none
3864 */
3865static int
3866zfs_ioc_destroy(zfs_cmd_t *zc)
3867{
3868	objset_t *os;
3869	dmu_objset_type_t ost;
3870	int err;
3871
3872	err = dmu_objset_hold(zc->zc_name, FTAG, &os);
3873	if (err != 0)
3874		return (err);
3875	ost = dmu_objset_type(os);
3876	dmu_objset_rele(os, FTAG);
3877
3878	if (ost == DMU_OST_ZFS)
3879		zfs_unmount_snap(zc->zc_name);
3880
3881	if (strchr(zc->zc_name, '@'))
3882		err = dsl_destroy_snapshot(zc->zc_name, zc->zc_defer_destroy);
3883	else
3884		err = dsl_destroy_head(zc->zc_name);
3885	if (ost == DMU_OST_ZVOL && err == 0)
3886#ifdef __FreeBSD__
3887		zvol_remove_minors(zc->zc_name);
3888#else
3889		(void) zvol_remove_minor(zc->zc_name);
3890#endif
3891	return (err);
3892}
3893
3894/*
3895 * innvl: {
3896 *     vdevs: {
3897 *         guid 1, guid 2, ...
3898 *     },
3899 *     func: POOL_INITIALIZE_{CANCEL|DO|SUSPEND}
3900 * }
3901 *
3902 * outnvl: {
3903 *     [func: EINVAL (if provided command type didn't make sense)],
3904 *     [vdevs: {
3905 *         guid1: errno, (see function body for possible errnos)
3906 *         ...
3907 *     }]
3908 * }
3909 *
3910 */
3911static int
3912zfs_ioc_pool_initialize(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl)
3913{
3914	spa_t *spa;
3915	int error;
3916
3917	error = spa_open(poolname, &spa, FTAG);
3918	if (error != 0)
3919		return (error);
3920
3921	uint64_t cmd_type;
3922	if (nvlist_lookup_uint64(innvl, ZPOOL_INITIALIZE_COMMAND,
3923	    &cmd_type) != 0) {
3924		spa_close(spa, FTAG);
3925		return (SET_ERROR(EINVAL));
3926	}
3927	if (!(cmd_type == POOL_INITIALIZE_CANCEL ||
3928	    cmd_type == POOL_INITIALIZE_DO ||
3929	    cmd_type == POOL_INITIALIZE_SUSPEND)) {
3930		spa_close(spa, FTAG);
3931		return (SET_ERROR(EINVAL));
3932	}
3933
3934	nvlist_t *vdev_guids;
3935	if (nvlist_lookup_nvlist(innvl, ZPOOL_INITIALIZE_VDEVS,
3936	    &vdev_guids) != 0) {
3937		spa_close(spa, FTAG);
3938		return (SET_ERROR(EINVAL));
3939	}
3940
3941	nvlist_t *vdev_errlist = fnvlist_alloc();
3942	int total_errors = 0;
3943
3944	for (nvpair_t *pair = nvlist_next_nvpair(vdev_guids, NULL);
3945	    pair != NULL; pair = nvlist_next_nvpair(vdev_guids, pair)) {
3946		uint64_t vdev_guid = fnvpair_value_uint64(pair);
3947
3948		error = spa_vdev_initialize(spa, vdev_guid, cmd_type);
3949		if (error != 0) {
3950			char guid_as_str[MAXNAMELEN];
3951
3952			(void) snprintf(guid_as_str, sizeof (guid_as_str),
3953			    "%llu", (unsigned long long)vdev_guid);
3954			fnvlist_add_int64(vdev_errlist, guid_as_str, error);
3955			total_errors++;
3956		}
3957	}
3958	if (fnvlist_size(vdev_errlist) > 0) {
3959		fnvlist_add_nvlist(outnvl, ZPOOL_INITIALIZE_VDEVS,
3960		    vdev_errlist);
3961	}
3962	fnvlist_free(vdev_errlist);
3963
3964	spa_close(spa, FTAG);
3965	return (total_errors > 0 ? EINVAL : 0);
3966}
3967
3968/*
3969 * fsname is name of dataset to rollback (to most recent snapshot)
3970 *
3971 * innvl may contain name of expected target snapshot
3972 *
3973 * outnvl: "target" -> name of most recent snapshot
3974 * }
3975 */
3976/* ARGSUSED */
3977static int
3978zfs_ioc_rollback(const char *fsname, nvlist_t *innvl, nvlist_t *outnvl)
3979{
3980	zfsvfs_t *zfsvfs;
3981	char *target = NULL;
3982	int error;
3983
3984	(void) nvlist_lookup_string(innvl, "target", &target);
3985	if (target != NULL) {
3986		const char *cp = strchr(target, '@');
3987
3988		/*
3989		 * The snap name must contain an @, and the part after it must
3990		 * contain only valid characters.
3991		 */
3992		if (cp == NULL ||
3993		    zfs_component_namecheck(cp + 1, NULL, NULL) != 0)
3994			return (SET_ERROR(EINVAL));
3995	}
3996
3997	if (getzfsvfs(fsname, &zfsvfs) == 0) {
3998		dsl_dataset_t *ds;
3999
4000		ds = dmu_objset_ds(zfsvfs->z_os);
4001		error = zfs_suspend_fs(zfsvfs);
4002		if (error == 0) {
4003			int resume_err;
4004
4005			error = dsl_dataset_rollback(fsname, target, zfsvfs,
4006			    outnvl);
4007			resume_err = zfs_resume_fs(zfsvfs, ds);
4008			error = error ? error : resume_err;
4009		}
4010#ifdef illumos
4011		VFS_RELE(zfsvfs->z_vfs);
4012#else
4013		vfs_unbusy(zfsvfs->z_vfs);
4014#endif
4015	} else {
4016		error = dsl_dataset_rollback(fsname, target, NULL, outnvl);
4017	}
4018	return (error);
4019}
4020
4021static int
4022recursive_unmount(const char *fsname, void *arg)
4023{
4024	const char *snapname = arg;
4025	char fullname[ZFS_MAX_DATASET_NAME_LEN];
4026
4027	(void) snprintf(fullname, sizeof (fullname), "%s@%s", fsname, snapname);
4028	zfs_unmount_snap(fullname);
4029
4030	return (0);
4031}
4032
4033/*
4034 * inputs:
4035 * zc_name	old name of dataset or bookmark
4036 * zc_value	new name of dataset or bookmark
4037 * zc_cookie	recursive flag (only valid for snapshots)
4038 *
4039 * outputs:	none
4040 */
4041static int
4042zfs_ioc_rename(zfs_cmd_t *zc)
4043{
4044	objset_t *os;
4045	dmu_objset_type_t ost;
4046	boolean_t recursive = zc->zc_cookie & 1;
4047	char *pos, *pos2;
4048	boolean_t allow_mounted = B_TRUE;
4049	int err;
4050
4051#ifdef __FreeBSD__
4052	allow_mounted = (zc->zc_cookie & 2) != 0;
4053#endif
4054
4055	zc->zc_name[sizeof (zc->zc_name) - 1] = '\0';
4056	zc->zc_value[sizeof (zc->zc_value) - 1] = '\0';
4057
4058	pos = strchr(zc->zc_name, '#');
4059	if (pos != NULL) {
4060		/* Bookmarks must be in same fs. */
4061		pos2 = strchr(zc->zc_value, '#');
4062		if (pos2 == NULL)
4063			return (SET_ERROR(EINVAL));
4064
4065		/* Recursive flag is not supported yet. */
4066		if (recursive)
4067			return (SET_ERROR(ENOTSUP));
4068
4069		*pos = '\0';
4070		*pos2 = '\0';
4071		if (strcmp(zc->zc_name, zc->zc_value) == 0) {
4072			err = dsl_bookmark_rename(zc->zc_name,
4073			    pos + 1, pos2 + 1);
4074		} else {
4075			err = SET_ERROR(EXDEV);
4076		}
4077		*pos = '#';
4078		*pos2 = '#';
4079		return (err);
4080	}
4081
4082	/* "zfs rename" from and to ...%recv datasets should both fail */
4083	if (dataset_namecheck(zc->zc_name, NULL, NULL) != 0 ||
4084	    dataset_namecheck(zc->zc_value, NULL, NULL) != 0 ||
4085	    strchr(zc->zc_name, '%') || strchr(zc->zc_value, '%'))
4086		return (SET_ERROR(EINVAL));
4087
4088	err = dmu_objset_hold(zc->zc_name, FTAG, &os);
4089	if (err != 0)
4090		return (err);
4091	ost = dmu_objset_type(os);
4092	dmu_objset_rele(os, FTAG);
4093
4094	pos = strchr(zc->zc_name, '@');
4095	if (pos != NULL) {
4096		/* Snapshots must be in same fs. */
4097		pos2 = strchr(zc->zc_value, '@');
4098		if (pos2 == NULL)
4099			return (SET_ERROR(EINVAL));
4100		*pos = '\0';
4101		*pos2 = '\0';
4102		if (strcmp(zc->zc_name, zc->zc_value) != 0) {
4103			err = SET_ERROR(EXDEV);
4104		} else {
4105			if (ost == DMU_OST_ZFS && !allow_mounted) {
4106				err = dmu_objset_find(zc->zc_name,
4107				    recursive_unmount, pos + 1,
4108				    recursive ? DS_FIND_CHILDREN : 0);
4109			}
4110			if (err == 0) {
4111				err = dsl_dataset_rename_snapshot(zc->zc_name,
4112				    pos + 1, pos2 + 1, recursive);
4113			}
4114		}
4115		*pos = '@';
4116		*pos2 = '@';
4117		return (err);
4118	} else {
4119#ifdef illumos
4120		if (ost == DMU_OST_ZVOL)
4121			(void) zvol_remove_minor(zc->zc_name);
4122#endif
4123		return (dsl_dir_rename(zc->zc_name, zc->zc_value));
4124	}
4125}
4126
4127static int
4128zfs_check_settable(const char *dsname, nvpair_t *pair, cred_t *cr)
4129{
4130	const char *propname = nvpair_name(pair);
4131	boolean_t issnap = (strchr(dsname, '@') != NULL);
4132	zfs_prop_t prop = zfs_name_to_prop(propname);
4133	uint64_t intval;
4134	int err;
4135
4136	if (prop == ZPROP_INVAL) {
4137		if (zfs_prop_user(propname)) {
4138			if (err = zfs_secpolicy_write_perms(dsname,
4139			    ZFS_DELEG_PERM_USERPROP, cr))
4140				return (err);
4141			return (0);
4142		}
4143
4144		if (!issnap && zfs_prop_userquota(propname)) {
4145			const char *perm = NULL;
4146			const char *uq_prefix =
4147			    zfs_userquota_prop_prefixes[ZFS_PROP_USERQUOTA];
4148			const char *gq_prefix =
4149			    zfs_userquota_prop_prefixes[ZFS_PROP_GROUPQUOTA];
4150
4151			if (strncmp(propname, uq_prefix,
4152			    strlen(uq_prefix)) == 0) {
4153				perm = ZFS_DELEG_PERM_USERQUOTA;
4154			} else if (strncmp(propname, gq_prefix,
4155			    strlen(gq_prefix)) == 0) {
4156				perm = ZFS_DELEG_PERM_GROUPQUOTA;
4157			} else {
4158				/* USERUSED and GROUPUSED are read-only */
4159				return (SET_ERROR(EINVAL));
4160			}
4161
4162			if (err = zfs_secpolicy_write_perms(dsname, perm, cr))
4163				return (err);
4164			return (0);
4165		}
4166
4167		return (SET_ERROR(EINVAL));
4168	}
4169
4170	if (issnap)
4171		return (SET_ERROR(EINVAL));
4172
4173	if (nvpair_type(pair) == DATA_TYPE_NVLIST) {
4174		/*
4175		 * dsl_prop_get_all_impl() returns properties in this
4176		 * format.
4177		 */
4178		nvlist_t *attrs;
4179		VERIFY(nvpair_value_nvlist(pair, &attrs) == 0);
4180		VERIFY(nvlist_lookup_nvpair(attrs, ZPROP_VALUE,
4181		    &pair) == 0);
4182	}
4183
4184	/*
4185	 * Check that this value is valid for this pool version
4186	 */
4187	switch (prop) {
4188	case ZFS_PROP_COMPRESSION:
4189		/*
4190		 * If the user specified gzip compression, make sure
4191		 * the SPA supports it. We ignore any errors here since
4192		 * we'll catch them later.
4193		 */
4194		if (nvpair_value_uint64(pair, &intval) == 0) {
4195			if (intval >= ZIO_COMPRESS_GZIP_1 &&
4196			    intval <= ZIO_COMPRESS_GZIP_9 &&
4197			    zfs_earlier_version(dsname,
4198			    SPA_VERSION_GZIP_COMPRESSION)) {
4199				return (SET_ERROR(ENOTSUP));
4200			}
4201
4202			if (intval == ZIO_COMPRESS_ZLE &&
4203			    zfs_earlier_version(dsname,
4204			    SPA_VERSION_ZLE_COMPRESSION))
4205				return (SET_ERROR(ENOTSUP));
4206
4207			if (intval == ZIO_COMPRESS_LZ4) {
4208				spa_t *spa;
4209
4210				if ((err = spa_open(dsname, &spa, FTAG)) != 0)
4211					return (err);
4212
4213				if (!spa_feature_is_enabled(spa,
4214				    SPA_FEATURE_LZ4_COMPRESS)) {
4215					spa_close(spa, FTAG);
4216					return (SET_ERROR(ENOTSUP));
4217				}
4218				spa_close(spa, FTAG);
4219			}
4220
4221			/*
4222			 * If this is a bootable dataset then
4223			 * verify that the compression algorithm
4224			 * is supported for booting. We must return
4225			 * something other than ENOTSUP since it
4226			 * implies a downrev pool version.
4227			 */
4228			if (zfs_is_bootfs(dsname) &&
4229			    !BOOTFS_COMPRESS_VALID(intval)) {
4230				return (SET_ERROR(ERANGE));
4231			}
4232		}
4233		break;
4234
4235	case ZFS_PROP_COPIES:
4236		if (zfs_earlier_version(dsname, SPA_VERSION_DITTO_BLOCKS))
4237			return (SET_ERROR(ENOTSUP));
4238		break;
4239
4240	case ZFS_PROP_RECORDSIZE:
4241		/* Record sizes above 128k need the feature to be enabled */
4242		if (nvpair_value_uint64(pair, &intval) == 0 &&
4243		    intval > SPA_OLD_MAXBLOCKSIZE) {
4244			spa_t *spa;
4245
4246			/*
4247			 * We don't allow setting the property above 1MB,
4248			 * unless the tunable has been changed.
4249			 */
4250			if (intval > zfs_max_recordsize ||
4251			    intval > SPA_MAXBLOCKSIZE)
4252				return (SET_ERROR(ERANGE));
4253
4254			if ((err = spa_open(dsname, &spa, FTAG)) != 0)
4255				return (err);
4256
4257			if (!spa_feature_is_enabled(spa,
4258			    SPA_FEATURE_LARGE_BLOCKS)) {
4259				spa_close(spa, FTAG);
4260				return (SET_ERROR(ENOTSUP));
4261			}
4262			spa_close(spa, FTAG);
4263		}
4264		break;
4265
4266	case ZFS_PROP_SHARESMB:
4267		if (zpl_earlier_version(dsname, ZPL_VERSION_FUID))
4268			return (SET_ERROR(ENOTSUP));
4269		break;
4270
4271	case ZFS_PROP_ACLINHERIT:
4272		if (nvpair_type(pair) == DATA_TYPE_UINT64 &&
4273		    nvpair_value_uint64(pair, &intval) == 0) {
4274			if (intval == ZFS_ACL_PASSTHROUGH_X &&
4275			    zfs_earlier_version(dsname,
4276			    SPA_VERSION_PASSTHROUGH_X))
4277				return (SET_ERROR(ENOTSUP));
4278		}
4279		break;
4280
4281	case ZFS_PROP_CHECKSUM:
4282	case ZFS_PROP_DEDUP:
4283	{
4284		spa_feature_t feature;
4285		spa_t *spa;
4286
4287		/* dedup feature version checks */
4288		if (prop == ZFS_PROP_DEDUP &&
4289		    zfs_earlier_version(dsname, SPA_VERSION_DEDUP))
4290			return (SET_ERROR(ENOTSUP));
4291
4292		if (nvpair_value_uint64(pair, &intval) != 0)
4293			return (SET_ERROR(EINVAL));
4294
4295		/* check prop value is enabled in features */
4296		feature = zio_checksum_to_feature(intval & ZIO_CHECKSUM_MASK);
4297		if (feature == SPA_FEATURE_NONE)
4298			break;
4299
4300		if ((err = spa_open(dsname, &spa, FTAG)) != 0)
4301			return (err);
4302
4303		if (!spa_feature_is_enabled(spa, feature)) {
4304			spa_close(spa, FTAG);
4305			return (SET_ERROR(ENOTSUP));
4306		}
4307		spa_close(spa, FTAG);
4308		break;
4309	}
4310	}
4311
4312	return (zfs_secpolicy_setprop(dsname, prop, pair, CRED()));
4313}
4314
4315/*
4316 * Checks for a race condition to make sure we don't increment a feature flag
4317 * multiple times.
4318 */
4319static int
4320zfs_prop_activate_feature_check(void *arg, dmu_tx_t *tx)
4321{
4322	spa_t *spa = dmu_tx_pool(tx)->dp_spa;
4323	spa_feature_t *featurep = arg;
4324
4325	if (!spa_feature_is_active(spa, *featurep))
4326		return (0);
4327	else
4328		return (SET_ERROR(EBUSY));
4329}
4330
4331/*
4332 * The callback invoked on feature activation in the sync task caused by
4333 * zfs_prop_activate_feature.
4334 */
4335static void
4336zfs_prop_activate_feature_sync(void *arg, dmu_tx_t *tx)
4337{
4338	spa_t *spa = dmu_tx_pool(tx)->dp_spa;
4339	spa_feature_t *featurep = arg;
4340
4341	spa_feature_incr(spa, *featurep, tx);
4342}
4343
4344/*
4345 * Activates a feature on a pool in response to a property setting. This
4346 * creates a new sync task which modifies the pool to reflect the feature
4347 * as being active.
4348 */
4349static int
4350zfs_prop_activate_feature(spa_t *spa, spa_feature_t feature)
4351{
4352	int err;
4353
4354	/* EBUSY here indicates that the feature is already active */
4355	err = dsl_sync_task(spa_name(spa),
4356	    zfs_prop_activate_feature_check, zfs_prop_activate_feature_sync,
4357	    &feature, 2, ZFS_SPACE_CHECK_RESERVED);
4358
4359	if (err != 0 && err != EBUSY)
4360		return (err);
4361	else
4362		return (0);
4363}
4364
4365/*
4366 * Removes properties from the given props list that fail permission checks
4367 * needed to clear them and to restore them in case of a receive error. For each
4368 * property, make sure we have both set and inherit permissions.
4369 *
4370 * Returns the first error encountered if any permission checks fail. If the
4371 * caller provides a non-NULL errlist, it also gives the complete list of names
4372 * of all the properties that failed a permission check along with the
4373 * corresponding error numbers. The caller is responsible for freeing the
4374 * returned errlist.
4375 *
4376 * If every property checks out successfully, zero is returned and the list
4377 * pointed at by errlist is NULL.
4378 */
4379static int
4380zfs_check_clearable(char *dataset, nvlist_t *props, nvlist_t **errlist)
4381{
4382	zfs_cmd_t *zc;
4383	nvpair_t *pair, *next_pair;
4384	nvlist_t *errors;
4385	int err, rv = 0;
4386
4387	if (props == NULL)
4388		return (0);
4389
4390	VERIFY(nvlist_alloc(&errors, NV_UNIQUE_NAME, KM_SLEEP) == 0);
4391
4392	zc = kmem_alloc(sizeof (zfs_cmd_t), KM_SLEEP);
4393	(void) strcpy(zc->zc_name, dataset);
4394	pair = nvlist_next_nvpair(props, NULL);
4395	while (pair != NULL) {
4396		next_pair = nvlist_next_nvpair(props, pair);
4397
4398		(void) strcpy(zc->zc_value, nvpair_name(pair));
4399		if ((err = zfs_check_settable(dataset, pair, CRED())) != 0 ||
4400		    (err = zfs_secpolicy_inherit_prop(zc, NULL, CRED())) != 0) {
4401			VERIFY(nvlist_remove_nvpair(props, pair) == 0);
4402			VERIFY(nvlist_add_int32(errors,
4403			    zc->zc_value, err) == 0);
4404		}
4405		pair = next_pair;
4406	}
4407	kmem_free(zc, sizeof (zfs_cmd_t));
4408
4409	if ((pair = nvlist_next_nvpair(errors, NULL)) == NULL) {
4410		nvlist_free(errors);
4411		errors = NULL;
4412	} else {
4413		VERIFY(nvpair_value_int32(pair, &rv) == 0);
4414	}
4415
4416	if (errlist == NULL)
4417		nvlist_free(errors);
4418	else
4419		*errlist = errors;
4420
4421	return (rv);
4422}
4423
4424static boolean_t
4425propval_equals(nvpair_t *p1, nvpair_t *p2)
4426{
4427	if (nvpair_type(p1) == DATA_TYPE_NVLIST) {
4428		/* dsl_prop_get_all_impl() format */
4429		nvlist_t *attrs;
4430		VERIFY(nvpair_value_nvlist(p1, &attrs) == 0);
4431		VERIFY(nvlist_lookup_nvpair(attrs, ZPROP_VALUE,
4432		    &p1) == 0);
4433	}
4434
4435	if (nvpair_type(p2) == DATA_TYPE_NVLIST) {
4436		nvlist_t *attrs;
4437		VERIFY(nvpair_value_nvlist(p2, &attrs) == 0);
4438		VERIFY(nvlist_lookup_nvpair(attrs, ZPROP_VALUE,
4439		    &p2) == 0);
4440	}
4441
4442	if (nvpair_type(p1) != nvpair_type(p2))
4443		return (B_FALSE);
4444
4445	if (nvpair_type(p1) == DATA_TYPE_STRING) {
4446		char *valstr1, *valstr2;
4447
4448		VERIFY(nvpair_value_string(p1, (char **)&valstr1) == 0);
4449		VERIFY(nvpair_value_string(p2, (char **)&valstr2) == 0);
4450		return (strcmp(valstr1, valstr2) == 0);
4451	} else {
4452		uint64_t intval1, intval2;
4453
4454		VERIFY(nvpair_value_uint64(p1, &intval1) == 0);
4455		VERIFY(nvpair_value_uint64(p2, &intval2) == 0);
4456		return (intval1 == intval2);
4457	}
4458}
4459
4460/*
4461 * Remove properties from props if they are not going to change (as determined
4462 * by comparison with origprops). Remove them from origprops as well, since we
4463 * do not need to clear or restore properties that won't change.
4464 */
4465static void
4466props_reduce(nvlist_t *props, nvlist_t *origprops)
4467{
4468	nvpair_t *pair, *next_pair;
4469
4470	if (origprops == NULL)
4471		return; /* all props need to be received */
4472
4473	pair = nvlist_next_nvpair(props, NULL);
4474	while (pair != NULL) {
4475		const char *propname = nvpair_name(pair);
4476		nvpair_t *match;
4477
4478		next_pair = nvlist_next_nvpair(props, pair);
4479
4480		if ((nvlist_lookup_nvpair(origprops, propname,
4481		    &match) != 0) || !propval_equals(pair, match))
4482			goto next; /* need to set received value */
4483
4484		/* don't clear the existing received value */
4485		(void) nvlist_remove_nvpair(origprops, match);
4486		/* don't bother receiving the property */
4487		(void) nvlist_remove_nvpair(props, pair);
4488next:
4489		pair = next_pair;
4490	}
4491}
4492
4493/*
4494 * Extract properties that cannot be set PRIOR to the receipt of a dataset.
4495 * For example, refquota cannot be set until after the receipt of a dataset,
4496 * because in replication streams, an older/earlier snapshot may exceed the
4497 * refquota.  We want to receive the older/earlier snapshot, but setting
4498 * refquota pre-receipt will set the dsl's ACTUAL quota, which will prevent
4499 * the older/earlier snapshot from being received (with EDQUOT).
4500 *
4501 * The ZFS test "zfs_receive_011_pos" demonstrates such a scenario.
4502 *
4503 * libzfs will need to be judicious handling errors encountered by props
4504 * extracted by this function.
4505 */
4506static nvlist_t *
4507extract_delay_props(nvlist_t *props)
4508{
4509	nvlist_t *delayprops;
4510	nvpair_t *nvp, *tmp;
4511	static const zfs_prop_t delayable[] = { ZFS_PROP_REFQUOTA, 0 };
4512	int i;
4513
4514	VERIFY(nvlist_alloc(&delayprops, NV_UNIQUE_NAME, KM_SLEEP) == 0);
4515
4516	for (nvp = nvlist_next_nvpair(props, NULL); nvp != NULL;
4517	    nvp = nvlist_next_nvpair(props, nvp)) {
4518		/*
4519		 * strcmp() is safe because zfs_prop_to_name() always returns
4520		 * a bounded string.
4521		 */
4522		for (i = 0; delayable[i] != 0; i++) {
4523			if (strcmp(zfs_prop_to_name(delayable[i]),
4524			    nvpair_name(nvp)) == 0) {
4525				break;
4526			}
4527		}
4528		if (delayable[i] != 0) {
4529			tmp = nvlist_prev_nvpair(props, nvp);
4530			VERIFY(nvlist_add_nvpair(delayprops, nvp) == 0);
4531			VERIFY(nvlist_remove_nvpair(props, nvp) == 0);
4532			nvp = tmp;
4533		}
4534	}
4535
4536	if (nvlist_empty(delayprops)) {
4537		nvlist_free(delayprops);
4538		delayprops = NULL;
4539	}
4540	return (delayprops);
4541}
4542
4543#ifdef	DEBUG
4544static boolean_t zfs_ioc_recv_inject_err;
4545#endif
4546
4547/*
4548 * inputs:
4549 * zc_name		name of containing filesystem
4550 * zc_nvlist_src{_size}	nvlist of properties to apply
4551 * zc_value		name of snapshot to create
4552 * zc_string		name of clone origin (if DRR_FLAG_CLONE)
4553 * zc_cookie		file descriptor to recv from
4554 * zc_begin_record	the BEGIN record of the stream (not byteswapped)
4555 * zc_guid		force flag
4556 * zc_cleanup_fd	cleanup-on-exit file descriptor
4557 * zc_action_handle	handle for this guid/ds mapping (or zero on first call)
4558 * zc_resumable		if data is incomplete assume sender will resume
4559 *
4560 * outputs:
4561 * zc_cookie		number of bytes read
4562 * zc_nvlist_dst{_size} error for each unapplied received property
4563 * zc_obj		zprop_errflags_t
4564 * zc_action_handle	handle for this guid/ds mapping
4565 */
4566static int
4567zfs_ioc_recv(zfs_cmd_t *zc)
4568{
4569	file_t *fp;
4570	dmu_recv_cookie_t drc;
4571	boolean_t force = (boolean_t)zc->zc_guid;
4572	int fd;
4573	int error = 0;
4574	int props_error = 0;
4575	nvlist_t *errors;
4576	offset_t off;
4577	nvlist_t *props = NULL; /* sent properties */
4578	nvlist_t *origprops = NULL; /* existing properties */
4579	nvlist_t *delayprops = NULL; /* sent properties applied post-receive */
4580	char *origin = NULL;
4581	char *tosnap;
4582	char tofs[ZFS_MAX_DATASET_NAME_LEN];
4583	cap_rights_t rights;
4584	boolean_t first_recvd_props = B_FALSE;
4585
4586	if (dataset_namecheck(zc->zc_value, NULL, NULL) != 0 ||
4587	    strchr(zc->zc_value, '@') == NULL ||
4588	    strchr(zc->zc_value, '%'))
4589		return (SET_ERROR(EINVAL));
4590
4591	(void) strcpy(tofs, zc->zc_value);
4592	tosnap = strchr(tofs, '@');
4593	*tosnap++ = '\0';
4594
4595	if (zc->zc_nvlist_src != 0 &&
4596	    (error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
4597	    zc->zc_iflags, &props)) != 0)
4598		return (error);
4599
4600	fd = zc->zc_cookie;
4601#ifdef illumos
4602	fp = getf(fd);
4603#else
4604	fget_read(curthread, fd, cap_rights_init(&rights, CAP_PREAD), &fp);
4605#endif
4606	if (fp == NULL) {
4607		nvlist_free(props);
4608		return (SET_ERROR(EBADF));
4609	}
4610
4611	errors = fnvlist_alloc();
4612
4613	if (zc->zc_string[0])
4614		origin = zc->zc_string;
4615
4616	error = dmu_recv_begin(tofs, tosnap,
4617	    &zc->zc_begin_record, force, zc->zc_resumable, origin, &drc);
4618	if (error != 0)
4619		goto out;
4620
4621	/*
4622	 * Set properties before we receive the stream so that they are applied
4623	 * to the new data. Note that we must call dmu_recv_stream() if
4624	 * dmu_recv_begin() succeeds.
4625	 */
4626	if (props != NULL && !drc.drc_newfs) {
4627		if (spa_version(dsl_dataset_get_spa(drc.drc_ds)) >=
4628		    SPA_VERSION_RECVD_PROPS &&
4629		    !dsl_prop_get_hasrecvd(tofs))
4630			first_recvd_props = B_TRUE;
4631
4632		/*
4633		 * If new received properties are supplied, they are to
4634		 * completely replace the existing received properties, so stash
4635		 * away the existing ones.
4636		 */
4637		if (dsl_prop_get_received(tofs, &origprops) == 0) {
4638			nvlist_t *errlist = NULL;
4639			/*
4640			 * Don't bother writing a property if its value won't
4641			 * change (and avoid the unnecessary security checks).
4642			 *
4643			 * The first receive after SPA_VERSION_RECVD_PROPS is a
4644			 * special case where we blow away all local properties
4645			 * regardless.
4646			 */
4647			if (!first_recvd_props)
4648				props_reduce(props, origprops);
4649			if (zfs_check_clearable(tofs, origprops, &errlist) != 0)
4650				(void) nvlist_merge(errors, errlist, 0);
4651			nvlist_free(errlist);
4652
4653			if (clear_received_props(tofs, origprops,
4654			    first_recvd_props ? NULL : props) != 0)
4655				zc->zc_obj |= ZPROP_ERR_NOCLEAR;
4656		} else {
4657			zc->zc_obj |= ZPROP_ERR_NOCLEAR;
4658		}
4659	}
4660
4661	if (props != NULL) {
4662		props_error = dsl_prop_set_hasrecvd(tofs);
4663
4664		if (props_error == 0) {
4665			delayprops = extract_delay_props(props);
4666			(void) zfs_set_prop_nvlist(tofs, ZPROP_SRC_RECEIVED,
4667			    props, errors);
4668		}
4669	}
4670
4671	off = fp->f_offset;
4672	error = dmu_recv_stream(&drc, fp, &off, zc->zc_cleanup_fd,
4673	    &zc->zc_action_handle);
4674
4675	if (error == 0) {
4676		zfsvfs_t *zfsvfs = NULL;
4677
4678		if (getzfsvfs(tofs, &zfsvfs) == 0) {
4679			/* online recv */
4680			dsl_dataset_t *ds;
4681			int end_err;
4682
4683			ds = dmu_objset_ds(zfsvfs->z_os);
4684			error = zfs_suspend_fs(zfsvfs);
4685			/*
4686			 * If the suspend fails, then the recv_end will
4687			 * likely also fail, and clean up after itself.
4688			 */
4689			end_err = dmu_recv_end(&drc, zfsvfs);
4690			if (error == 0)
4691				error = zfs_resume_fs(zfsvfs, ds);
4692			error = error ? error : end_err;
4693#ifdef illumos
4694			VFS_RELE(zfsvfs->z_vfs);
4695#else
4696			vfs_unbusy(zfsvfs->z_vfs);
4697#endif
4698		} else {
4699			error = dmu_recv_end(&drc, NULL);
4700		}
4701
4702		/* Set delayed properties now, after we're done receiving. */
4703		if (delayprops != NULL && error == 0) {
4704			(void) zfs_set_prop_nvlist(tofs, ZPROP_SRC_RECEIVED,
4705			    delayprops, errors);
4706		}
4707	}
4708
4709	if (delayprops != NULL) {
4710		/*
4711		 * Merge delayed props back in with initial props, in case
4712		 * we're DEBUG and zfs_ioc_recv_inject_err is set (which means
4713		 * we have to make sure clear_received_props() includes
4714		 * the delayed properties).
4715		 *
4716		 * Since zfs_ioc_recv_inject_err is only in DEBUG kernels,
4717		 * using ASSERT() will be just like a VERIFY.
4718		 */
4719		ASSERT(nvlist_merge(props, delayprops, 0) == 0);
4720		nvlist_free(delayprops);
4721	}
4722
4723	/*
4724	 * Now that all props, initial and delayed, are set, report the prop
4725	 * errors to the caller.
4726	 */
4727	if (zc->zc_nvlist_dst_size != 0 &&
4728	    (nvlist_smush(errors, zc->zc_nvlist_dst_size) != 0 ||
4729	    put_nvlist(zc, errors) != 0)) {
4730		/*
4731		 * Caller made zc->zc_nvlist_dst less than the minimum expected
4732		 * size or supplied an invalid address.
4733		 */
4734		props_error = SET_ERROR(EINVAL);
4735	}
4736
4737	zc->zc_cookie = off - fp->f_offset;
4738	if (off >= 0 && off <= MAXOFFSET_T)
4739		fp->f_offset = off;
4740
4741#ifdef	DEBUG
4742	if (zfs_ioc_recv_inject_err) {
4743		zfs_ioc_recv_inject_err = B_FALSE;
4744		error = 1;
4745	}
4746#endif
4747
4748#ifdef __FreeBSD__
4749	if (error == 0)
4750		zvol_create_minors(tofs);
4751#endif
4752
4753	/*
4754	 * On error, restore the original props.
4755	 */
4756	if (error != 0 && props != NULL && !drc.drc_newfs) {
4757		if (clear_received_props(tofs, props, NULL) != 0) {
4758			/*
4759			 * We failed to clear the received properties.
4760			 * Since we may have left a $recvd value on the
4761			 * system, we can't clear the $hasrecvd flag.
4762			 */
4763			zc->zc_obj |= ZPROP_ERR_NORESTORE;
4764		} else if (first_recvd_props) {
4765			dsl_prop_unset_hasrecvd(tofs);
4766		}
4767
4768		if (origprops == NULL && !drc.drc_newfs) {
4769			/* We failed to stash the original properties. */
4770			zc->zc_obj |= ZPROP_ERR_NORESTORE;
4771		}
4772
4773		/*
4774		 * dsl_props_set() will not convert RECEIVED to LOCAL on or
4775		 * after SPA_VERSION_RECVD_PROPS, so we need to specify LOCAL
4776		 * explictly if we're restoring local properties cleared in the
4777		 * first new-style receive.
4778		 */
4779		if (origprops != NULL &&
4780		    zfs_set_prop_nvlist(tofs, (first_recvd_props ?
4781		    ZPROP_SRC_LOCAL : ZPROP_SRC_RECEIVED),
4782		    origprops, NULL) != 0) {
4783			/*
4784			 * We stashed the original properties but failed to
4785			 * restore them.
4786			 */
4787			zc->zc_obj |= ZPROP_ERR_NORESTORE;
4788		}
4789	}
4790out:
4791	nvlist_free(props);
4792	nvlist_free(origprops);
4793	nvlist_free(errors);
4794	releasef(fd);
4795
4796	if (error == 0)
4797		error = props_error;
4798
4799	return (error);
4800}
4801
4802/*
4803 * inputs:
4804 * zc_name	name of snapshot to send
4805 * zc_cookie	file descriptor to send stream to
4806 * zc_obj	fromorigin flag (mutually exclusive with zc_fromobj)
4807 * zc_sendobj	objsetid of snapshot to send
4808 * zc_fromobj	objsetid of incremental fromsnap (may be zero)
4809 * zc_guid	if set, estimate size of stream only.  zc_cookie is ignored.
4810 *		output size in zc_objset_type.
4811 * zc_flags	lzc_send_flags
4812 *
4813 * outputs:
4814 * zc_objset_type	estimated size, if zc_guid is set
4815 *
4816 * NOTE: This is no longer the preferred interface, any new functionality
4817 *	  should be added to zfs_ioc_send_new() instead.
4818 */
4819static int
4820zfs_ioc_send(zfs_cmd_t *zc)
4821{
4822	int error;
4823	offset_t off;
4824	boolean_t estimate = (zc->zc_guid != 0);
4825	boolean_t embedok = (zc->zc_flags & 0x1);
4826	boolean_t large_block_ok = (zc->zc_flags & 0x2);
4827	boolean_t compressok = (zc->zc_flags & 0x4);
4828
4829	if (zc->zc_obj != 0) {
4830		dsl_pool_t *dp;
4831		dsl_dataset_t *tosnap;
4832
4833		error = dsl_pool_hold(zc->zc_name, FTAG, &dp);
4834		if (error != 0)
4835			return (error);
4836
4837		error = dsl_dataset_hold_obj(dp, zc->zc_sendobj, FTAG, &tosnap);
4838		if (error != 0) {
4839			dsl_pool_rele(dp, FTAG);
4840			return (error);
4841		}
4842
4843		if (dsl_dir_is_clone(tosnap->ds_dir))
4844			zc->zc_fromobj =
4845			    dsl_dir_phys(tosnap->ds_dir)->dd_origin_obj;
4846		dsl_dataset_rele(tosnap, FTAG);
4847		dsl_pool_rele(dp, FTAG);
4848	}
4849
4850	if (estimate) {
4851		dsl_pool_t *dp;
4852		dsl_dataset_t *tosnap;
4853		dsl_dataset_t *fromsnap = NULL;
4854
4855		error = dsl_pool_hold(zc->zc_name, FTAG, &dp);
4856		if (error != 0)
4857			return (error);
4858
4859		error = dsl_dataset_hold_obj(dp, zc->zc_sendobj, FTAG, &tosnap);
4860		if (error != 0) {
4861			dsl_pool_rele(dp, FTAG);
4862			return (error);
4863		}
4864
4865		if (zc->zc_fromobj != 0) {
4866			error = dsl_dataset_hold_obj(dp, zc->zc_fromobj,
4867			    FTAG, &fromsnap);
4868			if (error != 0) {
4869				dsl_dataset_rele(tosnap, FTAG);
4870				dsl_pool_rele(dp, FTAG);
4871				return (error);
4872			}
4873		}
4874
4875		error = dmu_send_estimate(tosnap, fromsnap, compressok,
4876		    &zc->zc_objset_type);
4877
4878		if (fromsnap != NULL)
4879			dsl_dataset_rele(fromsnap, FTAG);
4880		dsl_dataset_rele(tosnap, FTAG);
4881		dsl_pool_rele(dp, FTAG);
4882	} else {
4883		file_t *fp;
4884		cap_rights_t rights;
4885
4886#ifdef illumos
4887		fp = getf(zc->zc_cookie);
4888#else
4889		fget_write(curthread, zc->zc_cookie,
4890		    cap_rights_init(&rights, CAP_WRITE), &fp);
4891#endif
4892		if (fp == NULL)
4893			return (SET_ERROR(EBADF));
4894
4895		off = fp->f_offset;
4896		error = dmu_send_obj(zc->zc_name, zc->zc_sendobj,
4897		    zc->zc_fromobj, embedok, large_block_ok, compressok,
4898#ifdef illumos
4899		    zc->zc_cookie, fp->f_vnode, &off);
4900#else
4901		    zc->zc_cookie, fp, &off);
4902#endif
4903
4904		if (off >= 0 && off <= MAXOFFSET_T)
4905			fp->f_offset = off;
4906		releasef(zc->zc_cookie);
4907	}
4908	return (error);
4909}
4910
4911/*
4912 * inputs:
4913 * zc_name	name of snapshot on which to report progress
4914 * zc_cookie	file descriptor of send stream
4915 *
4916 * outputs:
4917 * zc_cookie	number of bytes written in send stream thus far
4918 */
4919static int
4920zfs_ioc_send_progress(zfs_cmd_t *zc)
4921{
4922	dsl_pool_t *dp;
4923	dsl_dataset_t *ds;
4924	dmu_sendarg_t *dsp = NULL;
4925	int error;
4926
4927	error = dsl_pool_hold(zc->zc_name, FTAG, &dp);
4928	if (error != 0)
4929		return (error);
4930
4931	error = dsl_dataset_hold(dp, zc->zc_name, FTAG, &ds);
4932	if (error != 0) {
4933		dsl_pool_rele(dp, FTAG);
4934		return (error);
4935	}
4936
4937	mutex_enter(&ds->ds_sendstream_lock);
4938
4939	/*
4940	 * Iterate over all the send streams currently active on this dataset.
4941	 * If there's one which matches the specified file descriptor _and_ the
4942	 * stream was started by the current process, return the progress of
4943	 * that stream.
4944	 */
4945	for (dsp = list_head(&ds->ds_sendstreams); dsp != NULL;
4946	    dsp = list_next(&ds->ds_sendstreams, dsp)) {
4947		if (dsp->dsa_outfd == zc->zc_cookie &&
4948		    dsp->dsa_proc == curproc)
4949			break;
4950	}
4951
4952	if (dsp != NULL)
4953		zc->zc_cookie = *(dsp->dsa_off);
4954	else
4955		error = SET_ERROR(ENOENT);
4956
4957	mutex_exit(&ds->ds_sendstream_lock);
4958	dsl_dataset_rele(ds, FTAG);
4959	dsl_pool_rele(dp, FTAG);
4960	return (error);
4961}
4962
4963static int
4964zfs_ioc_inject_fault(zfs_cmd_t *zc)
4965{
4966	int id, error;
4967
4968	error = zio_inject_fault(zc->zc_name, (int)zc->zc_guid, &id,
4969	    &zc->zc_inject_record);
4970
4971	if (error == 0)
4972		zc->zc_guid = (uint64_t)id;
4973
4974	return (error);
4975}
4976
4977static int
4978zfs_ioc_clear_fault(zfs_cmd_t *zc)
4979{
4980	return (zio_clear_fault((int)zc->zc_guid));
4981}
4982
4983static int
4984zfs_ioc_inject_list_next(zfs_cmd_t *zc)
4985{
4986	int id = (int)zc->zc_guid;
4987	int error;
4988
4989	error = zio_inject_list_next(&id, zc->zc_name, sizeof (zc->zc_name),
4990	    &zc->zc_inject_record);
4991
4992	zc->zc_guid = id;
4993
4994	return (error);
4995}
4996
4997static int
4998zfs_ioc_error_log(zfs_cmd_t *zc)
4999{
5000	spa_t *spa;
5001	int error;
5002	size_t count = (size_t)zc->zc_nvlist_dst_size;
5003
5004	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
5005		return (error);
5006
5007	error = spa_get_errlog(spa, (void *)(uintptr_t)zc->zc_nvlist_dst,
5008	    &count);
5009	if (error == 0)
5010		zc->zc_nvlist_dst_size = count;
5011	else
5012		zc->zc_nvlist_dst_size = spa_get_errlog_size(spa);
5013
5014	spa_close(spa, FTAG);
5015
5016	return (error);
5017}
5018
5019static int
5020zfs_ioc_clear(zfs_cmd_t *zc)
5021{
5022	spa_t *spa;
5023	vdev_t *vd;
5024	int error;
5025
5026	/*
5027	 * On zpool clear we also fix up missing slogs
5028	 */
5029	mutex_enter(&spa_namespace_lock);
5030	spa = spa_lookup(zc->zc_name);
5031	if (spa == NULL) {
5032		mutex_exit(&spa_namespace_lock);
5033		return (SET_ERROR(EIO));
5034	}
5035	if (spa_get_log_state(spa) == SPA_LOG_MISSING) {
5036		/* we need to let spa_open/spa_load clear the chains */
5037		spa_set_log_state(spa, SPA_LOG_CLEAR);
5038	}
5039	spa->spa_last_open_failed = 0;
5040	mutex_exit(&spa_namespace_lock);
5041
5042	if (zc->zc_cookie & ZPOOL_NO_REWIND) {
5043		error = spa_open(zc->zc_name, &spa, FTAG);
5044	} else {
5045		nvlist_t *policy;
5046		nvlist_t *config = NULL;
5047
5048		if (zc->zc_nvlist_src == 0)
5049			return (SET_ERROR(EINVAL));
5050
5051		if ((error = get_nvlist(zc->zc_nvlist_src,
5052		    zc->zc_nvlist_src_size, zc->zc_iflags, &policy)) == 0) {
5053			error = spa_open_rewind(zc->zc_name, &spa, FTAG,
5054			    policy, &config);
5055			if (config != NULL) {
5056				int err;
5057
5058				if ((err = put_nvlist(zc, config)) != 0)
5059					error = err;
5060				nvlist_free(config);
5061			}
5062			nvlist_free(policy);
5063		}
5064	}
5065
5066	if (error != 0)
5067		return (error);
5068
5069	spa_vdev_state_enter(spa, SCL_NONE);
5070
5071	if (zc->zc_guid == 0) {
5072		vd = NULL;
5073	} else {
5074		vd = spa_lookup_by_guid(spa, zc->zc_guid, B_TRUE);
5075		if (vd == NULL) {
5076			(void) spa_vdev_state_exit(spa, NULL, ENODEV);
5077			spa_close(spa, FTAG);
5078			return (SET_ERROR(ENODEV));
5079		}
5080	}
5081
5082	vdev_clear(spa, vd);
5083
5084	(void) spa_vdev_state_exit(spa, NULL, 0);
5085
5086	/*
5087	 * Resume any suspended I/Os.
5088	 */
5089	if (zio_resume(spa) != 0)
5090		error = SET_ERROR(EIO);
5091
5092	spa_close(spa, FTAG);
5093
5094	return (error);
5095}
5096
5097static int
5098zfs_ioc_pool_reopen(zfs_cmd_t *zc)
5099{
5100	spa_t *spa;
5101	int error;
5102
5103	error = spa_open(zc->zc_name, &spa, FTAG);
5104	if (error != 0)
5105		return (error);
5106
5107	spa_vdev_state_enter(spa, SCL_NONE);
5108
5109	/*
5110	 * If a resilver is already in progress then set the
5111	 * spa_scrub_reopen flag to B_TRUE so that we don't restart
5112	 * the scan as a side effect of the reopen. Otherwise, let
5113	 * vdev_open() decided if a resilver is required.
5114	 */
5115	spa->spa_scrub_reopen = dsl_scan_resilvering(spa->spa_dsl_pool);
5116	vdev_reopen(spa->spa_root_vdev);
5117	spa->spa_scrub_reopen = B_FALSE;
5118
5119	(void) spa_vdev_state_exit(spa, NULL, 0);
5120	spa_close(spa, FTAG);
5121	return (0);
5122}
5123/*
5124 * inputs:
5125 * zc_name	name of filesystem
5126 *
5127 * outputs:
5128 * zc_string	name of conflicting snapshot, if there is one
5129 */
5130static int
5131zfs_ioc_promote(zfs_cmd_t *zc)
5132{
5133	dsl_pool_t *dp;
5134	dsl_dataset_t *ds, *ods;
5135	char origin[ZFS_MAX_DATASET_NAME_LEN];
5136	char *cp;
5137	int error;
5138
5139	zc->zc_name[sizeof (zc->zc_name) - 1] = '\0';
5140	if (dataset_namecheck(zc->zc_name, NULL, NULL) != 0 ||
5141	    strchr(zc->zc_name, '%'))
5142		return (SET_ERROR(EINVAL));
5143
5144	error = dsl_pool_hold(zc->zc_name, FTAG, &dp);
5145	if (error != 0)
5146		return (error);
5147
5148	error = dsl_dataset_hold(dp, zc->zc_name, FTAG, &ds);
5149	if (error != 0) {
5150		dsl_pool_rele(dp, FTAG);
5151		return (error);
5152	}
5153
5154	if (!dsl_dir_is_clone(ds->ds_dir)) {
5155		dsl_dataset_rele(ds, FTAG);
5156		dsl_pool_rele(dp, FTAG);
5157		return (SET_ERROR(EINVAL));
5158	}
5159
5160	error = dsl_dataset_hold_obj(dp,
5161	    dsl_dir_phys(ds->ds_dir)->dd_origin_obj, FTAG, &ods);
5162	if (error != 0) {
5163		dsl_dataset_rele(ds, FTAG);
5164		dsl_pool_rele(dp, FTAG);
5165		return (error);
5166	}
5167
5168	dsl_dataset_name(ods, origin);
5169	dsl_dataset_rele(ods, FTAG);
5170	dsl_dataset_rele(ds, FTAG);
5171	dsl_pool_rele(dp, FTAG);
5172
5173	/*
5174	 * We don't need to unmount *all* the origin fs's snapshots, but
5175	 * it's easier.
5176	 */
5177	cp = strchr(origin, '@');
5178	if (cp)
5179		*cp = '\0';
5180	(void) dmu_objset_find(origin,
5181	    zfs_unmount_snap_cb, NULL, DS_FIND_SNAPSHOTS);
5182	return (dsl_dataset_promote(zc->zc_name, zc->zc_string));
5183}
5184
5185/*
5186 * Retrieve a single {user|group}{used|quota}@... property.
5187 *
5188 * inputs:
5189 * zc_name	name of filesystem
5190 * zc_objset_type zfs_userquota_prop_t
5191 * zc_value	domain name (eg. "S-1-234-567-89")
5192 * zc_guid	RID/UID/GID
5193 *
5194 * outputs:
5195 * zc_cookie	property value
5196 */
5197static int
5198zfs_ioc_userspace_one(zfs_cmd_t *zc)
5199{
5200	zfsvfs_t *zfsvfs;
5201	int error;
5202
5203	if (zc->zc_objset_type >= ZFS_NUM_USERQUOTA_PROPS)
5204		return (SET_ERROR(EINVAL));
5205
5206	error = zfsvfs_hold(zc->zc_name, FTAG, &zfsvfs, B_FALSE);
5207	if (error != 0)
5208		return (error);
5209
5210	error = zfs_userspace_one(zfsvfs,
5211	    zc->zc_objset_type, zc->zc_value, zc->zc_guid, &zc->zc_cookie);
5212	zfsvfs_rele(zfsvfs, FTAG);
5213
5214	return (error);
5215}
5216
5217/*
5218 * inputs:
5219 * zc_name		name of filesystem
5220 * zc_cookie		zap cursor
5221 * zc_objset_type	zfs_userquota_prop_t
5222 * zc_nvlist_dst[_size] buffer to fill (not really an nvlist)
5223 *
5224 * outputs:
5225 * zc_nvlist_dst[_size]	data buffer (array of zfs_useracct_t)
5226 * zc_cookie	zap cursor
5227 */
5228static int
5229zfs_ioc_userspace_many(zfs_cmd_t *zc)
5230{
5231	zfsvfs_t *zfsvfs;
5232	int bufsize = zc->zc_nvlist_dst_size;
5233
5234	if (bufsize <= 0)
5235		return (SET_ERROR(ENOMEM));
5236
5237	int error = zfsvfs_hold(zc->zc_name, FTAG, &zfsvfs, B_FALSE);
5238	if (error != 0)
5239		return (error);
5240
5241	void *buf = kmem_alloc(bufsize, KM_SLEEP);
5242
5243	error = zfs_userspace_many(zfsvfs, zc->zc_objset_type, &zc->zc_cookie,
5244	    buf, &zc->zc_nvlist_dst_size);
5245
5246	if (error == 0) {
5247		error = ddi_copyout(buf,
5248		    (void *)(uintptr_t)zc->zc_nvlist_dst,
5249		    zc->zc_nvlist_dst_size, zc->zc_iflags);
5250	}
5251	kmem_free(buf, bufsize);
5252	zfsvfs_rele(zfsvfs, FTAG);
5253
5254	return (error);
5255}
5256
5257/*
5258 * inputs:
5259 * zc_name		name of filesystem
5260 *
5261 * outputs:
5262 * none
5263 */
5264static int
5265zfs_ioc_userspace_upgrade(zfs_cmd_t *zc)
5266{
5267	objset_t *os;
5268	int error = 0;
5269	zfsvfs_t *zfsvfs;
5270
5271	if (getzfsvfs(zc->zc_name, &zfsvfs) == 0) {
5272		if (!dmu_objset_userused_enabled(zfsvfs->z_os)) {
5273			/*
5274			 * If userused is not enabled, it may be because the
5275			 * objset needs to be closed & reopened (to grow the
5276			 * objset_phys_t).  Suspend/resume the fs will do that.
5277			 */
5278			dsl_dataset_t *ds, *newds;
5279
5280			ds = dmu_objset_ds(zfsvfs->z_os);
5281			error = zfs_suspend_fs(zfsvfs);
5282			if (error == 0) {
5283				dmu_objset_refresh_ownership(ds, &newds,
5284				    zfsvfs);
5285				error = zfs_resume_fs(zfsvfs, newds);
5286			}
5287		}
5288		if (error == 0)
5289			error = dmu_objset_userspace_upgrade(zfsvfs->z_os);
5290#ifdef illumos
5291		VFS_RELE(zfsvfs->z_vfs);
5292#else
5293		vfs_unbusy(zfsvfs->z_vfs);
5294#endif
5295	} else {
5296		/* XXX kind of reading contents without owning */
5297		error = dmu_objset_hold(zc->zc_name, FTAG, &os);
5298		if (error != 0)
5299			return (error);
5300
5301		error = dmu_objset_userspace_upgrade(os);
5302		dmu_objset_rele(os, FTAG);
5303	}
5304
5305	return (error);
5306}
5307
5308#ifdef illumos
5309/*
5310 * We don't want to have a hard dependency
5311 * against some special symbols in sharefs
5312 * nfs, and smbsrv.  Determine them if needed when
5313 * the first file system is shared.
5314 * Neither sharefs, nfs or smbsrv are unloadable modules.
5315 */
5316int (*znfsexport_fs)(void *arg);
5317int (*zshare_fs)(enum sharefs_sys_op, share_t *, uint32_t);
5318int (*zsmbexport_fs)(void *arg, boolean_t add_share);
5319
5320int zfs_nfsshare_inited;
5321int zfs_smbshare_inited;
5322
5323ddi_modhandle_t nfs_mod;
5324ddi_modhandle_t sharefs_mod;
5325ddi_modhandle_t smbsrv_mod;
5326#endif	/* illumos */
5327kmutex_t zfs_share_lock;
5328
5329#ifdef illumos
5330static int
5331zfs_init_sharefs()
5332{
5333	int error;
5334
5335	ASSERT(MUTEX_HELD(&zfs_share_lock));
5336	/* Both NFS and SMB shares also require sharetab support. */
5337	if (sharefs_mod == NULL && ((sharefs_mod =
5338	    ddi_modopen("fs/sharefs",
5339	    KRTLD_MODE_FIRST, &error)) == NULL)) {
5340		return (SET_ERROR(ENOSYS));
5341	}
5342	if (zshare_fs == NULL && ((zshare_fs =
5343	    (int (*)(enum sharefs_sys_op, share_t *, uint32_t))
5344	    ddi_modsym(sharefs_mod, "sharefs_impl", &error)) == NULL)) {
5345		return (SET_ERROR(ENOSYS));
5346	}
5347	return (0);
5348}
5349#endif	/* illumos */
5350
5351static int
5352zfs_ioc_share(zfs_cmd_t *zc)
5353{
5354#ifdef illumos
5355	int error;
5356	int opcode;
5357
5358	switch (zc->zc_share.z_sharetype) {
5359	case ZFS_SHARE_NFS:
5360	case ZFS_UNSHARE_NFS:
5361		if (zfs_nfsshare_inited == 0) {
5362			mutex_enter(&zfs_share_lock);
5363			if (nfs_mod == NULL && ((nfs_mod = ddi_modopen("fs/nfs",
5364			    KRTLD_MODE_FIRST, &error)) == NULL)) {
5365				mutex_exit(&zfs_share_lock);
5366				return (SET_ERROR(ENOSYS));
5367			}
5368			if (znfsexport_fs == NULL &&
5369			    ((znfsexport_fs = (int (*)(void *))
5370			    ddi_modsym(nfs_mod,
5371			    "nfs_export", &error)) == NULL)) {
5372				mutex_exit(&zfs_share_lock);
5373				return (SET_ERROR(ENOSYS));
5374			}
5375			error = zfs_init_sharefs();
5376			if (error != 0) {
5377				mutex_exit(&zfs_share_lock);
5378				return (SET_ERROR(ENOSYS));
5379			}
5380			zfs_nfsshare_inited = 1;
5381			mutex_exit(&zfs_share_lock);
5382		}
5383		break;
5384	case ZFS_SHARE_SMB:
5385	case ZFS_UNSHARE_SMB:
5386		if (zfs_smbshare_inited == 0) {
5387			mutex_enter(&zfs_share_lock);
5388			if (smbsrv_mod == NULL && ((smbsrv_mod =
5389			    ddi_modopen("drv/smbsrv",
5390			    KRTLD_MODE_FIRST, &error)) == NULL)) {
5391				mutex_exit(&zfs_share_lock);
5392				return (SET_ERROR(ENOSYS));
5393			}
5394			if (zsmbexport_fs == NULL && ((zsmbexport_fs =
5395			    (int (*)(void *, boolean_t))ddi_modsym(smbsrv_mod,
5396			    "smb_server_share", &error)) == NULL)) {
5397				mutex_exit(&zfs_share_lock);
5398				return (SET_ERROR(ENOSYS));
5399			}
5400			error = zfs_init_sharefs();
5401			if (error != 0) {
5402				mutex_exit(&zfs_share_lock);
5403				return (SET_ERROR(ENOSYS));
5404			}
5405			zfs_smbshare_inited = 1;
5406			mutex_exit(&zfs_share_lock);
5407		}
5408		break;
5409	default:
5410		return (SET_ERROR(EINVAL));
5411	}
5412
5413	switch (zc->zc_share.z_sharetype) {
5414	case ZFS_SHARE_NFS:
5415	case ZFS_UNSHARE_NFS:
5416		if (error =
5417		    znfsexport_fs((void *)
5418		    (uintptr_t)zc->zc_share.z_exportdata))
5419			return (error);
5420		break;
5421	case ZFS_SHARE_SMB:
5422	case ZFS_UNSHARE_SMB:
5423		if (error = zsmbexport_fs((void *)
5424		    (uintptr_t)zc->zc_share.z_exportdata,
5425		    zc->zc_share.z_sharetype == ZFS_SHARE_SMB ?
5426		    B_TRUE: B_FALSE)) {
5427			return (error);
5428		}
5429		break;
5430	}
5431
5432	opcode = (zc->zc_share.z_sharetype == ZFS_SHARE_NFS ||
5433	    zc->zc_share.z_sharetype == ZFS_SHARE_SMB) ?
5434	    SHAREFS_ADD : SHAREFS_REMOVE;
5435
5436	/*
5437	 * Add or remove share from sharetab
5438	 */
5439	error = zshare_fs(opcode,
5440	    (void *)(uintptr_t)zc->zc_share.z_sharedata,
5441	    zc->zc_share.z_sharemax);
5442
5443	return (error);
5444
5445#else	/* !illumos */
5446	return (ENOSYS);
5447#endif	/* illumos */
5448}
5449
5450ace_t full_access[] = {
5451	{(uid_t)-1, ACE_ALL_PERMS, ACE_EVERYONE, 0}
5452};
5453
5454/*
5455 * inputs:
5456 * zc_name		name of containing filesystem
5457 * zc_obj		object # beyond which we want next in-use object #
5458 *
5459 * outputs:
5460 * zc_obj		next in-use object #
5461 */
5462static int
5463zfs_ioc_next_obj(zfs_cmd_t *zc)
5464{
5465	objset_t *os = NULL;
5466	int error;
5467
5468	error = dmu_objset_hold(zc->zc_name, FTAG, &os);
5469	if (error != 0)
5470		return (error);
5471
5472	error = dmu_object_next(os, &zc->zc_obj, B_FALSE, 0);
5473
5474	dmu_objset_rele(os, FTAG);
5475	return (error);
5476}
5477
5478/*
5479 * inputs:
5480 * zc_name		name of filesystem
5481 * zc_value		prefix name for snapshot
5482 * zc_cleanup_fd	cleanup-on-exit file descriptor for calling process
5483 *
5484 * outputs:
5485 * zc_value		short name of new snapshot
5486 */
5487static int
5488zfs_ioc_tmp_snapshot(zfs_cmd_t *zc)
5489{
5490	char *snap_name;
5491	char *hold_name;
5492	int error;
5493	minor_t minor;
5494
5495	error = zfs_onexit_fd_hold(zc->zc_cleanup_fd, &minor);
5496	if (error != 0)
5497		return (error);
5498
5499	snap_name = kmem_asprintf("%s-%016llx", zc->zc_value,
5500	    (u_longlong_t)ddi_get_lbolt64());
5501	hold_name = kmem_asprintf("%%%s", zc->zc_value);
5502
5503	error = dsl_dataset_snapshot_tmp(zc->zc_name, snap_name, minor,
5504	    hold_name);
5505	if (error == 0)
5506		(void) strcpy(zc->zc_value, snap_name);
5507	strfree(snap_name);
5508	strfree(hold_name);
5509	zfs_onexit_fd_rele(zc->zc_cleanup_fd);
5510	return (error);
5511}
5512
5513/*
5514 * inputs:
5515 * zc_name		name of "to" snapshot
5516 * zc_value		name of "from" snapshot
5517 * zc_cookie		file descriptor to write diff data on
5518 *
5519 * outputs:
5520 * dmu_diff_record_t's to the file descriptor
5521 */
5522static int
5523zfs_ioc_diff(zfs_cmd_t *zc)
5524{
5525	file_t *fp;
5526	cap_rights_t rights;
5527	offset_t off;
5528	int error;
5529
5530#ifdef illumos
5531	fp = getf(zc->zc_cookie);
5532#else
5533	fget_write(curthread, zc->zc_cookie,
5534		    cap_rights_init(&rights, CAP_WRITE), &fp);
5535#endif
5536	if (fp == NULL)
5537		return (SET_ERROR(EBADF));
5538
5539	off = fp->f_offset;
5540
5541#ifdef illumos
5542	error = dmu_diff(zc->zc_name, zc->zc_value, fp->f_vnode, &off);
5543#else
5544	error = dmu_diff(zc->zc_name, zc->zc_value, fp, &off);
5545#endif
5546
5547	if (off >= 0 && off <= MAXOFFSET_T)
5548		fp->f_offset = off;
5549	releasef(zc->zc_cookie);
5550
5551	return (error);
5552}
5553
5554#ifdef illumos
5555/*
5556 * Remove all ACL files in shares dir
5557 */
5558static int
5559zfs_smb_acl_purge(znode_t *dzp)
5560{
5561	zap_cursor_t	zc;
5562	zap_attribute_t	zap;
5563	zfsvfs_t *zfsvfs = dzp->z_zfsvfs;
5564	int error;
5565
5566	for (zap_cursor_init(&zc, zfsvfs->z_os, dzp->z_id);
5567	    (error = zap_cursor_retrieve(&zc, &zap)) == 0;
5568	    zap_cursor_advance(&zc)) {
5569		if ((error = VOP_REMOVE(ZTOV(dzp), zap.za_name, kcred,
5570		    NULL, 0)) != 0)
5571			break;
5572	}
5573	zap_cursor_fini(&zc);
5574	return (error);
5575}
5576#endif	/* illumos */
5577
5578static int
5579zfs_ioc_smb_acl(zfs_cmd_t *zc)
5580{
5581#ifdef illumos
5582	vnode_t *vp;
5583	znode_t *dzp;
5584	vnode_t *resourcevp = NULL;
5585	znode_t *sharedir;
5586	zfsvfs_t *zfsvfs;
5587	nvlist_t *nvlist;
5588	char *src, *target;
5589	vattr_t vattr;
5590	vsecattr_t vsec;
5591	int error = 0;
5592
5593	if ((error = lookupname(zc->zc_value, UIO_SYSSPACE,
5594	    NO_FOLLOW, NULL, &vp)) != 0)
5595		return (error);
5596
5597	/* Now make sure mntpnt and dataset are ZFS */
5598
5599	if (strcmp(vp->v_vfsp->mnt_stat.f_fstypename, "zfs") != 0 ||
5600	    (strcmp((char *)refstr_value(vp->v_vfsp->vfs_resource),
5601	    zc->zc_name) != 0)) {
5602		VN_RELE(vp);
5603		return (SET_ERROR(EINVAL));
5604	}
5605
5606	dzp = VTOZ(vp);
5607	zfsvfs = dzp->z_zfsvfs;
5608	ZFS_ENTER(zfsvfs);
5609
5610	/*
5611	 * Create share dir if its missing.
5612	 */
5613	mutex_enter(&zfsvfs->z_lock);
5614	if (zfsvfs->z_shares_dir == 0) {
5615		dmu_tx_t *tx;
5616
5617		tx = dmu_tx_create(zfsvfs->z_os);
5618		dmu_tx_hold_zap(tx, MASTER_NODE_OBJ, TRUE,
5619		    ZFS_SHARES_DIR);
5620		dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, FALSE, NULL);
5621		error = dmu_tx_assign(tx, TXG_WAIT);
5622		if (error != 0) {
5623			dmu_tx_abort(tx);
5624		} else {
5625			error = zfs_create_share_dir(zfsvfs, tx);
5626			dmu_tx_commit(tx);
5627		}
5628		if (error != 0) {
5629			mutex_exit(&zfsvfs->z_lock);
5630			VN_RELE(vp);
5631			ZFS_EXIT(zfsvfs);
5632			return (error);
5633		}
5634	}
5635	mutex_exit(&zfsvfs->z_lock);
5636
5637	ASSERT(zfsvfs->z_shares_dir);
5638	if ((error = zfs_zget(zfsvfs, zfsvfs->z_shares_dir, &sharedir)) != 0) {
5639		VN_RELE(vp);
5640		ZFS_EXIT(zfsvfs);
5641		return (error);
5642	}
5643
5644	switch (zc->zc_cookie) {
5645	case ZFS_SMB_ACL_ADD:
5646		vattr.va_mask = AT_MODE|AT_UID|AT_GID|AT_TYPE;
5647		vattr.va_type = VREG;
5648		vattr.va_mode = S_IFREG|0777;
5649		vattr.va_uid = 0;
5650		vattr.va_gid = 0;
5651
5652		vsec.vsa_mask = VSA_ACE;
5653		vsec.vsa_aclentp = &full_access;
5654		vsec.vsa_aclentsz = sizeof (full_access);
5655		vsec.vsa_aclcnt = 1;
5656
5657		error = VOP_CREATE(ZTOV(sharedir), zc->zc_string,
5658		    &vattr, EXCL, 0, &resourcevp, kcred, 0, NULL, &vsec);
5659		if (resourcevp)
5660			VN_RELE(resourcevp);
5661		break;
5662
5663	case ZFS_SMB_ACL_REMOVE:
5664		error = VOP_REMOVE(ZTOV(sharedir), zc->zc_string, kcred,
5665		    NULL, 0);
5666		break;
5667
5668	case ZFS_SMB_ACL_RENAME:
5669		if ((error = get_nvlist(zc->zc_nvlist_src,
5670		    zc->zc_nvlist_src_size, zc->zc_iflags, &nvlist)) != 0) {
5671			VN_RELE(vp);
5672			VN_RELE(ZTOV(sharedir));
5673			ZFS_EXIT(zfsvfs);
5674			return (error);
5675		}
5676		if (nvlist_lookup_string(nvlist, ZFS_SMB_ACL_SRC, &src) ||
5677		    nvlist_lookup_string(nvlist, ZFS_SMB_ACL_TARGET,
5678		    &target)) {
5679			VN_RELE(vp);
5680			VN_RELE(ZTOV(sharedir));
5681			ZFS_EXIT(zfsvfs);
5682			nvlist_free(nvlist);
5683			return (error);
5684		}
5685		error = VOP_RENAME(ZTOV(sharedir), src, ZTOV(sharedir), target,
5686		    kcred, NULL, 0);
5687		nvlist_free(nvlist);
5688		break;
5689
5690	case ZFS_SMB_ACL_PURGE:
5691		error = zfs_smb_acl_purge(sharedir);
5692		break;
5693
5694	default:
5695		error = SET_ERROR(EINVAL);
5696		break;
5697	}
5698
5699	VN_RELE(vp);
5700	VN_RELE(ZTOV(sharedir));
5701
5702	ZFS_EXIT(zfsvfs);
5703
5704	return (error);
5705#else	/* !illumos */
5706	return (EOPNOTSUPP);
5707#endif	/* illumos */
5708}
5709
5710/*
5711 * innvl: {
5712 *     "holds" -> { snapname -> holdname (string), ... }
5713 *     (optional) "cleanup_fd" -> fd (int32)
5714 * }
5715 *
5716 * outnvl: {
5717 *     snapname -> error value (int32)
5718 *     ...
5719 * }
5720 */
5721/* ARGSUSED */
5722static int
5723zfs_ioc_hold(const char *pool, nvlist_t *args, nvlist_t *errlist)
5724{
5725	nvpair_t *pair;
5726	nvlist_t *holds;
5727	int cleanup_fd = -1;
5728	int error;
5729	minor_t minor = 0;
5730
5731	error = nvlist_lookup_nvlist(args, "holds", &holds);
5732	if (error != 0)
5733		return (SET_ERROR(EINVAL));
5734
5735	/* make sure the user didn't pass us any invalid (empty) tags */
5736	for (pair = nvlist_next_nvpair(holds, NULL); pair != NULL;
5737	    pair = nvlist_next_nvpair(holds, pair)) {
5738		char *htag;
5739
5740		error = nvpair_value_string(pair, &htag);
5741		if (error != 0)
5742			return (SET_ERROR(error));
5743
5744		if (strlen(htag) == 0)
5745			return (SET_ERROR(EINVAL));
5746	}
5747
5748	if (nvlist_lookup_int32(args, "cleanup_fd", &cleanup_fd) == 0) {
5749		error = zfs_onexit_fd_hold(cleanup_fd, &minor);
5750		if (error != 0)
5751			return (error);
5752	}
5753
5754	error = dsl_dataset_user_hold(holds, minor, errlist);
5755	if (minor != 0)
5756		zfs_onexit_fd_rele(cleanup_fd);
5757	return (error);
5758}
5759
5760/*
5761 * innvl is not used.
5762 *
5763 * outnvl: {
5764 *    holdname -> time added (uint64 seconds since epoch)
5765 *    ...
5766 * }
5767 */
5768/* ARGSUSED */
5769static int
5770zfs_ioc_get_holds(const char *snapname, nvlist_t *args, nvlist_t *outnvl)
5771{
5772	return (dsl_dataset_get_holds(snapname, outnvl));
5773}
5774
5775/*
5776 * innvl: {
5777 *     snapname -> { holdname, ... }
5778 *     ...
5779 * }
5780 *
5781 * outnvl: {
5782 *     snapname -> error value (int32)
5783 *     ...
5784 * }
5785 */
5786/* ARGSUSED */
5787static int
5788zfs_ioc_release(const char *pool, nvlist_t *holds, nvlist_t *errlist)
5789{
5790	return (dsl_dataset_user_release(holds, errlist));
5791}
5792
5793/*
5794 * inputs:
5795 * zc_name		name of new filesystem or snapshot
5796 * zc_value		full name of old snapshot
5797 *
5798 * outputs:
5799 * zc_cookie		space in bytes
5800 * zc_objset_type	compressed space in bytes
5801 * zc_perm_action	uncompressed space in bytes
5802 */
5803static int
5804zfs_ioc_space_written(zfs_cmd_t *zc)
5805{
5806	int error;
5807	dsl_pool_t *dp;
5808	dsl_dataset_t *new, *old;
5809
5810	error = dsl_pool_hold(zc->zc_name, FTAG, &dp);
5811	if (error != 0)
5812		return (error);
5813	error = dsl_dataset_hold(dp, zc->zc_name, FTAG, &new);
5814	if (error != 0) {
5815		dsl_pool_rele(dp, FTAG);
5816		return (error);
5817	}
5818	error = dsl_dataset_hold(dp, zc->zc_value, FTAG, &old);
5819	if (error != 0) {
5820		dsl_dataset_rele(new, FTAG);
5821		dsl_pool_rele(dp, FTAG);
5822		return (error);
5823	}
5824
5825	error = dsl_dataset_space_written(old, new, &zc->zc_cookie,
5826	    &zc->zc_objset_type, &zc->zc_perm_action);
5827	dsl_dataset_rele(old, FTAG);
5828	dsl_dataset_rele(new, FTAG);
5829	dsl_pool_rele(dp, FTAG);
5830	return (error);
5831}
5832
5833/*
5834 * innvl: {
5835 *     "firstsnap" -> snapshot name
5836 * }
5837 *
5838 * outnvl: {
5839 *     "used" -> space in bytes
5840 *     "compressed" -> compressed space in bytes
5841 *     "uncompressed" -> uncompressed space in bytes
5842 * }
5843 */
5844static int
5845zfs_ioc_space_snaps(const char *lastsnap, nvlist_t *innvl, nvlist_t *outnvl)
5846{
5847	int error;
5848	dsl_pool_t *dp;
5849	dsl_dataset_t *new, *old;
5850	char *firstsnap;
5851	uint64_t used, comp, uncomp;
5852
5853	if (nvlist_lookup_string(innvl, "firstsnap", &firstsnap) != 0)
5854		return (SET_ERROR(EINVAL));
5855
5856	error = dsl_pool_hold(lastsnap, FTAG, &dp);
5857	if (error != 0)
5858		return (error);
5859
5860	error = dsl_dataset_hold(dp, lastsnap, FTAG, &new);
5861	if (error == 0 && !new->ds_is_snapshot) {
5862		dsl_dataset_rele(new, FTAG);
5863		error = SET_ERROR(EINVAL);
5864	}
5865	if (error != 0) {
5866		dsl_pool_rele(dp, FTAG);
5867		return (error);
5868	}
5869	error = dsl_dataset_hold(dp, firstsnap, FTAG, &old);
5870	if (error == 0 && !old->ds_is_snapshot) {
5871		dsl_dataset_rele(old, FTAG);
5872		error = SET_ERROR(EINVAL);
5873	}
5874	if (error != 0) {
5875		dsl_dataset_rele(new, FTAG);
5876		dsl_pool_rele(dp, FTAG);
5877		return (error);
5878	}
5879
5880	error = dsl_dataset_space_wouldfree(old, new, &used, &comp, &uncomp);
5881	dsl_dataset_rele(old, FTAG);
5882	dsl_dataset_rele(new, FTAG);
5883	dsl_pool_rele(dp, FTAG);
5884	fnvlist_add_uint64(outnvl, "used", used);
5885	fnvlist_add_uint64(outnvl, "compressed", comp);
5886	fnvlist_add_uint64(outnvl, "uncompressed", uncomp);
5887	return (error);
5888}
5889
5890static int
5891zfs_ioc_jail(zfs_cmd_t *zc)
5892{
5893
5894	return (zone_dataset_attach(curthread->td_ucred, zc->zc_name,
5895	    (int)zc->zc_jailid));
5896}
5897
5898static int
5899zfs_ioc_unjail(zfs_cmd_t *zc)
5900{
5901
5902	return (zone_dataset_detach(curthread->td_ucred, zc->zc_name,
5903	    (int)zc->zc_jailid));
5904}
5905
5906/*
5907 * innvl: {
5908 *     "fd" -> file descriptor to write stream to (int32)
5909 *     (optional) "fromsnap" -> full snap name to send an incremental from
5910 *     (optional) "largeblockok" -> (value ignored)
5911 *         indicates that blocks > 128KB are permitted
5912 *     (optional) "embedok" -> (value ignored)
5913 *         presence indicates DRR_WRITE_EMBEDDED records are permitted
5914 *     (optional) "compressok" -> (value ignored)
5915 *         presence indicates compressed DRR_WRITE records are permitted
5916 *     (optional) "resume_object" and "resume_offset" -> (uint64)
5917 *         if present, resume send stream from specified object and offset.
5918 * }
5919 *
5920 * outnvl is unused
5921 */
5922/* ARGSUSED */
5923static int
5924zfs_ioc_send_new(const char *snapname, nvlist_t *innvl, nvlist_t *outnvl)
5925{
5926	cap_rights_t rights;
5927	file_t *fp;
5928	int error;
5929	offset_t off;
5930	char *fromname = NULL;
5931	int fd;
5932	boolean_t largeblockok;
5933	boolean_t embedok;
5934	boolean_t compressok;
5935	uint64_t resumeobj = 0;
5936	uint64_t resumeoff = 0;
5937
5938	error = nvlist_lookup_int32(innvl, "fd", &fd);
5939	if (error != 0)
5940		return (SET_ERROR(EINVAL));
5941
5942	(void) nvlist_lookup_string(innvl, "fromsnap", &fromname);
5943
5944	largeblockok = nvlist_exists(innvl, "largeblockok");
5945	embedok = nvlist_exists(innvl, "embedok");
5946	compressok = nvlist_exists(innvl, "compressok");
5947
5948	(void) nvlist_lookup_uint64(innvl, "resume_object", &resumeobj);
5949	(void) nvlist_lookup_uint64(innvl, "resume_offset", &resumeoff);
5950
5951#ifdef illumos
5952	file_t *fp = getf(fd);
5953#else
5954	fget_write(curthread, fd, cap_rights_init(&rights, CAP_WRITE), &fp);
5955#endif
5956	if (fp == NULL)
5957		return (SET_ERROR(EBADF));
5958
5959	off = fp->f_offset;
5960	error = dmu_send(snapname, fromname, embedok, largeblockok, compressok,
5961#ifdef illumos
5962	    fd, resumeobj, resumeoff, fp->f_vnode, &off);
5963#else
5964	    fd, resumeobj, resumeoff, fp, &off);
5965#endif
5966
5967#ifdef illumos
5968	if (VOP_SEEK(fp->f_vnode, fp->f_offset, &off, NULL) == 0)
5969		fp->f_offset = off;
5970#else
5971	fp->f_offset = off;
5972#endif
5973
5974	releasef(fd);
5975	return (error);
5976}
5977
5978/*
5979 * Determine approximately how large a zfs send stream will be -- the number
5980 * of bytes that will be written to the fd supplied to zfs_ioc_send_new().
5981 *
5982 * innvl: {
5983 *     (optional) "from" -> full snap or bookmark name to send an incremental
5984 *                          from
5985 *     (optional) "largeblockok" -> (value ignored)
5986 *         indicates that blocks > 128KB are permitted
5987 *     (optional) "embedok" -> (value ignored)
5988 *         presence indicates DRR_WRITE_EMBEDDED records are permitted
5989 *     (optional) "compressok" -> (value ignored)
5990 *         presence indicates compressed DRR_WRITE records are permitted
5991 * }
5992 *
5993 * outnvl: {
5994 *     "space" -> bytes of space (uint64)
5995 * }
5996 */
5997static int
5998zfs_ioc_send_space(const char *snapname, nvlist_t *innvl, nvlist_t *outnvl)
5999{
6000	dsl_pool_t *dp;
6001	dsl_dataset_t *tosnap;
6002	int error;
6003	char *fromname;
6004	boolean_t compressok;
6005	uint64_t space;
6006
6007	error = dsl_pool_hold(snapname, FTAG, &dp);
6008	if (error != 0)
6009		return (error);
6010
6011	error = dsl_dataset_hold(dp, snapname, FTAG, &tosnap);
6012	if (error != 0) {
6013		dsl_pool_rele(dp, FTAG);
6014		return (error);
6015	}
6016
6017	compressok = nvlist_exists(innvl, "compressok");
6018
6019	error = nvlist_lookup_string(innvl, "from", &fromname);
6020	if (error == 0) {
6021		if (strchr(fromname, '@') != NULL) {
6022			/*
6023			 * If from is a snapshot, hold it and use the more
6024			 * efficient dmu_send_estimate to estimate send space
6025			 * size using deadlists.
6026			 */
6027			dsl_dataset_t *fromsnap;
6028			error = dsl_dataset_hold(dp, fromname, FTAG, &fromsnap);
6029			if (error != 0)
6030				goto out;
6031			error = dmu_send_estimate(tosnap, fromsnap, compressok,
6032			    &space);
6033			dsl_dataset_rele(fromsnap, FTAG);
6034		} else if (strchr(fromname, '#') != NULL) {
6035			/*
6036			 * If from is a bookmark, fetch the creation TXG of the
6037			 * snapshot it was created from and use that to find
6038			 * blocks that were born after it.
6039			 */
6040			zfs_bookmark_phys_t frombm;
6041
6042			error = dsl_bookmark_lookup(dp, fromname, tosnap,
6043			    &frombm);
6044			if (error != 0)
6045				goto out;
6046			error = dmu_send_estimate_from_txg(tosnap,
6047			    frombm.zbm_creation_txg, compressok, &space);
6048		} else {
6049			/*
6050			 * from is not properly formatted as a snapshot or
6051			 * bookmark
6052			 */
6053			error = SET_ERROR(EINVAL);
6054			goto out;
6055		}
6056	} else {
6057		/*
6058		 * If estimating the size of a full send, use dmu_send_estimate.
6059		 */
6060		error = dmu_send_estimate(tosnap, NULL, compressok, &space);
6061	}
6062
6063	fnvlist_add_uint64(outnvl, "space", space);
6064
6065out:
6066	dsl_dataset_rele(tosnap, FTAG);
6067	dsl_pool_rele(dp, FTAG);
6068	return (error);
6069}
6070
6071static zfs_ioc_vec_t zfs_ioc_vec[ZFS_IOC_LAST - ZFS_IOC_FIRST];
6072
6073static void
6074zfs_ioctl_register_legacy(zfs_ioc_t ioc, zfs_ioc_legacy_func_t *func,
6075    zfs_secpolicy_func_t *secpolicy, zfs_ioc_namecheck_t namecheck,
6076    boolean_t log_history, zfs_ioc_poolcheck_t pool_check)
6077{
6078	zfs_ioc_vec_t *vec = &zfs_ioc_vec[ioc - ZFS_IOC_FIRST];
6079
6080	ASSERT3U(ioc, >=, ZFS_IOC_FIRST);
6081	ASSERT3U(ioc, <, ZFS_IOC_LAST);
6082	ASSERT3P(vec->zvec_legacy_func, ==, NULL);
6083	ASSERT3P(vec->zvec_func, ==, NULL);
6084
6085	vec->zvec_legacy_func = func;
6086	vec->zvec_secpolicy = secpolicy;
6087	vec->zvec_namecheck = namecheck;
6088	vec->zvec_allow_log = log_history;
6089	vec->zvec_pool_check = pool_check;
6090}
6091
6092/*
6093 * See the block comment at the beginning of this file for details on
6094 * each argument to this function.
6095 */
6096static void
6097zfs_ioctl_register(const char *name, zfs_ioc_t ioc, zfs_ioc_func_t *func,
6098    zfs_secpolicy_func_t *secpolicy, zfs_ioc_namecheck_t namecheck,
6099    zfs_ioc_poolcheck_t pool_check, boolean_t smush_outnvlist,
6100    boolean_t allow_log)
6101{
6102	zfs_ioc_vec_t *vec = &zfs_ioc_vec[ioc - ZFS_IOC_FIRST];
6103
6104	ASSERT3U(ioc, >=, ZFS_IOC_FIRST);
6105	ASSERT3U(ioc, <, ZFS_IOC_LAST);
6106	ASSERT3P(vec->zvec_legacy_func, ==, NULL);
6107	ASSERT3P(vec->zvec_func, ==, NULL);
6108
6109	/* if we are logging, the name must be valid */
6110	ASSERT(!allow_log || namecheck != NO_NAME);
6111
6112	vec->zvec_name = name;
6113	vec->zvec_func = func;
6114	vec->zvec_secpolicy = secpolicy;
6115	vec->zvec_namecheck = namecheck;
6116	vec->zvec_pool_check = pool_check;
6117	vec->zvec_smush_outnvlist = smush_outnvlist;
6118	vec->zvec_allow_log = allow_log;
6119}
6120
6121static void
6122zfs_ioctl_register_pool(zfs_ioc_t ioc, zfs_ioc_legacy_func_t *func,
6123    zfs_secpolicy_func_t *secpolicy, boolean_t log_history,
6124    zfs_ioc_poolcheck_t pool_check)
6125{
6126	zfs_ioctl_register_legacy(ioc, func, secpolicy,
6127	    POOL_NAME, log_history, pool_check);
6128}
6129
6130static void
6131zfs_ioctl_register_dataset_nolog(zfs_ioc_t ioc, zfs_ioc_legacy_func_t *func,
6132    zfs_secpolicy_func_t *secpolicy, zfs_ioc_poolcheck_t pool_check)
6133{
6134	zfs_ioctl_register_legacy(ioc, func, secpolicy,
6135	    DATASET_NAME, B_FALSE, pool_check);
6136}
6137
6138static void
6139zfs_ioctl_register_pool_modify(zfs_ioc_t ioc, zfs_ioc_legacy_func_t *func)
6140{
6141	zfs_ioctl_register_legacy(ioc, func, zfs_secpolicy_config,
6142	    POOL_NAME, B_TRUE, POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY);
6143}
6144
6145static void
6146zfs_ioctl_register_pool_meta(zfs_ioc_t ioc, zfs_ioc_legacy_func_t *func,
6147    zfs_secpolicy_func_t *secpolicy)
6148{
6149	zfs_ioctl_register_legacy(ioc, func, secpolicy,
6150	    NO_NAME, B_FALSE, POOL_CHECK_NONE);
6151}
6152
6153static void
6154zfs_ioctl_register_dataset_read_secpolicy(zfs_ioc_t ioc,
6155    zfs_ioc_legacy_func_t *func, zfs_secpolicy_func_t *secpolicy)
6156{
6157	zfs_ioctl_register_legacy(ioc, func, secpolicy,
6158	    DATASET_NAME, B_FALSE, POOL_CHECK_SUSPENDED);
6159}
6160
6161static void
6162zfs_ioctl_register_dataset_read(zfs_ioc_t ioc, zfs_ioc_legacy_func_t *func)
6163{
6164	zfs_ioctl_register_dataset_read_secpolicy(ioc, func,
6165	    zfs_secpolicy_read);
6166}
6167
6168static void
6169zfs_ioctl_register_dataset_modify(zfs_ioc_t ioc, zfs_ioc_legacy_func_t *func,
6170    zfs_secpolicy_func_t *secpolicy)
6171{
6172	zfs_ioctl_register_legacy(ioc, func, secpolicy,
6173	    DATASET_NAME, B_TRUE, POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY);
6174}
6175
6176static void
6177zfs_ioctl_init(void)
6178{
6179	zfs_ioctl_register("snapshot", ZFS_IOC_SNAPSHOT,
6180	    zfs_ioc_snapshot, zfs_secpolicy_snapshot, POOL_NAME,
6181	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE);
6182
6183	zfs_ioctl_register("log_history", ZFS_IOC_LOG_HISTORY,
6184	    zfs_ioc_log_history, zfs_secpolicy_log_history, NO_NAME,
6185	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_FALSE, B_FALSE);
6186
6187	zfs_ioctl_register("space_snaps", ZFS_IOC_SPACE_SNAPS,
6188	    zfs_ioc_space_snaps, zfs_secpolicy_read, DATASET_NAME,
6189	    POOL_CHECK_SUSPENDED, B_FALSE, B_FALSE);
6190
6191	zfs_ioctl_register("send", ZFS_IOC_SEND_NEW,
6192	    zfs_ioc_send_new, zfs_secpolicy_send_new, DATASET_NAME,
6193	    POOL_CHECK_SUSPENDED, B_FALSE, B_FALSE);
6194
6195	zfs_ioctl_register("send_space", ZFS_IOC_SEND_SPACE,
6196	    zfs_ioc_send_space, zfs_secpolicy_read, DATASET_NAME,
6197	    POOL_CHECK_SUSPENDED, B_FALSE, B_FALSE);
6198
6199	zfs_ioctl_register("create", ZFS_IOC_CREATE,
6200	    zfs_ioc_create, zfs_secpolicy_create_clone, DATASET_NAME,
6201	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE);
6202
6203	zfs_ioctl_register("clone", ZFS_IOC_CLONE,
6204	    zfs_ioc_clone, zfs_secpolicy_create_clone, DATASET_NAME,
6205	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE);
6206
6207	zfs_ioctl_register("remap", ZFS_IOC_REMAP,
6208	    zfs_ioc_remap, zfs_secpolicy_remap, DATASET_NAME,
6209	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_FALSE, B_TRUE);
6210
6211	zfs_ioctl_register("destroy_snaps", ZFS_IOC_DESTROY_SNAPS,
6212	    zfs_ioc_destroy_snaps, zfs_secpolicy_destroy_snaps, POOL_NAME,
6213	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE);
6214
6215	zfs_ioctl_register("hold", ZFS_IOC_HOLD,
6216	    zfs_ioc_hold, zfs_secpolicy_hold, POOL_NAME,
6217	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE);
6218	zfs_ioctl_register("release", ZFS_IOC_RELEASE,
6219	    zfs_ioc_release, zfs_secpolicy_release, POOL_NAME,
6220	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE);
6221
6222	zfs_ioctl_register("get_holds", ZFS_IOC_GET_HOLDS,
6223	    zfs_ioc_get_holds, zfs_secpolicy_read, DATASET_NAME,
6224	    POOL_CHECK_SUSPENDED, B_FALSE, B_FALSE);
6225
6226	zfs_ioctl_register("rollback", ZFS_IOC_ROLLBACK,
6227	    zfs_ioc_rollback, zfs_secpolicy_rollback, DATASET_NAME,
6228	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_FALSE, B_TRUE);
6229
6230	zfs_ioctl_register("bookmark", ZFS_IOC_BOOKMARK,
6231	    zfs_ioc_bookmark, zfs_secpolicy_bookmark, POOL_NAME,
6232	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE);
6233
6234	zfs_ioctl_register("get_bookmarks", ZFS_IOC_GET_BOOKMARKS,
6235	    zfs_ioc_get_bookmarks, zfs_secpolicy_read, DATASET_NAME,
6236	    POOL_CHECK_SUSPENDED, B_FALSE, B_FALSE);
6237
6238	zfs_ioctl_register("destroy_bookmarks", ZFS_IOC_DESTROY_BOOKMARKS,
6239	    zfs_ioc_destroy_bookmarks, zfs_secpolicy_destroy_bookmarks,
6240	    POOL_NAME,
6241	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE);
6242
6243	zfs_ioctl_register("channel_program", ZFS_IOC_CHANNEL_PROGRAM,
6244	    zfs_ioc_channel_program, zfs_secpolicy_config,
6245	    POOL_NAME, POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE,
6246	    B_TRUE);
6247
6248	zfs_ioctl_register("zpool_checkpoint", ZFS_IOC_POOL_CHECKPOINT,
6249	    zfs_ioc_pool_checkpoint, zfs_secpolicy_config, POOL_NAME,
6250	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE);
6251
6252	zfs_ioctl_register("zpool_discard_checkpoint",
6253	    ZFS_IOC_POOL_DISCARD_CHECKPOINT, zfs_ioc_pool_discard_checkpoint,
6254	    zfs_secpolicy_config, POOL_NAME,
6255	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE);
6256
6257	zfs_ioctl_register("initialize", ZFS_IOC_POOL_INITIALIZE,
6258	    zfs_ioc_pool_initialize, zfs_secpolicy_config, POOL_NAME,
6259	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE);
6260
6261	/* IOCTLS that use the legacy function signature */
6262
6263	zfs_ioctl_register_legacy(ZFS_IOC_POOL_FREEZE, zfs_ioc_pool_freeze,
6264	    zfs_secpolicy_config, NO_NAME, B_FALSE, POOL_CHECK_READONLY);
6265
6266	zfs_ioctl_register_pool(ZFS_IOC_POOL_CREATE, zfs_ioc_pool_create,
6267	    zfs_secpolicy_config, B_TRUE, POOL_CHECK_NONE);
6268	zfs_ioctl_register_pool_modify(ZFS_IOC_POOL_SCAN,
6269	    zfs_ioc_pool_scan);
6270	zfs_ioctl_register_pool_modify(ZFS_IOC_POOL_UPGRADE,
6271	    zfs_ioc_pool_upgrade);
6272	zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_ADD,
6273	    zfs_ioc_vdev_add);
6274	zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_REMOVE,
6275	    zfs_ioc_vdev_remove);
6276	zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_SET_STATE,
6277	    zfs_ioc_vdev_set_state);
6278	zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_ATTACH,
6279	    zfs_ioc_vdev_attach);
6280	zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_DETACH,
6281	    zfs_ioc_vdev_detach);
6282	zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_SETPATH,
6283	    zfs_ioc_vdev_setpath);
6284	zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_SETFRU,
6285	    zfs_ioc_vdev_setfru);
6286	zfs_ioctl_register_pool_modify(ZFS_IOC_POOL_SET_PROPS,
6287	    zfs_ioc_pool_set_props);
6288	zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_SPLIT,
6289	    zfs_ioc_vdev_split);
6290	zfs_ioctl_register_pool_modify(ZFS_IOC_POOL_REGUID,
6291	    zfs_ioc_pool_reguid);
6292
6293	zfs_ioctl_register_pool_meta(ZFS_IOC_POOL_CONFIGS,
6294	    zfs_ioc_pool_configs, zfs_secpolicy_none);
6295	zfs_ioctl_register_pool_meta(ZFS_IOC_POOL_TRYIMPORT,
6296	    zfs_ioc_pool_tryimport, zfs_secpolicy_config);
6297	zfs_ioctl_register_pool_meta(ZFS_IOC_INJECT_FAULT,
6298	    zfs_ioc_inject_fault, zfs_secpolicy_inject);
6299	zfs_ioctl_register_pool_meta(ZFS_IOC_CLEAR_FAULT,
6300	    zfs_ioc_clear_fault, zfs_secpolicy_inject);
6301	zfs_ioctl_register_pool_meta(ZFS_IOC_INJECT_LIST_NEXT,
6302	    zfs_ioc_inject_list_next, zfs_secpolicy_inject);
6303
6304	/*
6305	 * pool destroy, and export don't log the history as part of
6306	 * zfsdev_ioctl, but rather zfs_ioc_pool_export
6307	 * does the logging of those commands.
6308	 */
6309	zfs_ioctl_register_pool(ZFS_IOC_POOL_DESTROY, zfs_ioc_pool_destroy,
6310	    zfs_secpolicy_config, B_FALSE, POOL_CHECK_NONE);
6311	zfs_ioctl_register_pool(ZFS_IOC_POOL_EXPORT, zfs_ioc_pool_export,
6312	    zfs_secpolicy_config, B_FALSE, POOL_CHECK_NONE);
6313
6314	zfs_ioctl_register_pool(ZFS_IOC_POOL_STATS, zfs_ioc_pool_stats,
6315	    zfs_secpolicy_read, B_FALSE, POOL_CHECK_NONE);
6316	zfs_ioctl_register_pool(ZFS_IOC_POOL_GET_PROPS, zfs_ioc_pool_get_props,
6317	    zfs_secpolicy_read, B_FALSE, POOL_CHECK_NONE);
6318
6319	zfs_ioctl_register_pool(ZFS_IOC_ERROR_LOG, zfs_ioc_error_log,
6320	    zfs_secpolicy_inject, B_FALSE, POOL_CHECK_NONE);
6321	zfs_ioctl_register_pool(ZFS_IOC_DSOBJ_TO_DSNAME,
6322	    zfs_ioc_dsobj_to_dsname,
6323	    zfs_secpolicy_diff, B_FALSE, POOL_CHECK_NONE);
6324	zfs_ioctl_register_pool(ZFS_IOC_POOL_GET_HISTORY,
6325	    zfs_ioc_pool_get_history,
6326	    zfs_secpolicy_config, B_FALSE, POOL_CHECK_SUSPENDED);
6327
6328	zfs_ioctl_register_pool(ZFS_IOC_POOL_IMPORT, zfs_ioc_pool_import,
6329	    zfs_secpolicy_config, B_TRUE, POOL_CHECK_NONE);
6330
6331	zfs_ioctl_register_pool(ZFS_IOC_CLEAR, zfs_ioc_clear,
6332	    zfs_secpolicy_config, B_TRUE, POOL_CHECK_READONLY);
6333	zfs_ioctl_register_pool(ZFS_IOC_POOL_REOPEN, zfs_ioc_pool_reopen,
6334	    zfs_secpolicy_config, B_TRUE, POOL_CHECK_SUSPENDED);
6335
6336	zfs_ioctl_register_dataset_read(ZFS_IOC_SPACE_WRITTEN,
6337	    zfs_ioc_space_written);
6338	zfs_ioctl_register_dataset_read(ZFS_IOC_OBJSET_RECVD_PROPS,
6339	    zfs_ioc_objset_recvd_props);
6340	zfs_ioctl_register_dataset_read(ZFS_IOC_NEXT_OBJ,
6341	    zfs_ioc_next_obj);
6342	zfs_ioctl_register_dataset_read(ZFS_IOC_GET_FSACL,
6343	    zfs_ioc_get_fsacl);
6344	zfs_ioctl_register_dataset_read(ZFS_IOC_OBJSET_STATS,
6345	    zfs_ioc_objset_stats);
6346	zfs_ioctl_register_dataset_read(ZFS_IOC_OBJSET_ZPLPROPS,
6347	    zfs_ioc_objset_zplprops);
6348	zfs_ioctl_register_dataset_read(ZFS_IOC_DATASET_LIST_NEXT,
6349	    zfs_ioc_dataset_list_next);
6350	zfs_ioctl_register_dataset_read(ZFS_IOC_SNAPSHOT_LIST_NEXT,
6351	    zfs_ioc_snapshot_list_next);
6352	zfs_ioctl_register_dataset_read(ZFS_IOC_SEND_PROGRESS,
6353	    zfs_ioc_send_progress);
6354
6355	zfs_ioctl_register_dataset_read_secpolicy(ZFS_IOC_DIFF,
6356	    zfs_ioc_diff, zfs_secpolicy_diff);
6357	zfs_ioctl_register_dataset_read_secpolicy(ZFS_IOC_OBJ_TO_STATS,
6358	    zfs_ioc_obj_to_stats, zfs_secpolicy_diff);
6359	zfs_ioctl_register_dataset_read_secpolicy(ZFS_IOC_OBJ_TO_PATH,
6360	    zfs_ioc_obj_to_path, zfs_secpolicy_diff);
6361	zfs_ioctl_register_dataset_read_secpolicy(ZFS_IOC_USERSPACE_ONE,
6362	    zfs_ioc_userspace_one, zfs_secpolicy_userspace_one);
6363	zfs_ioctl_register_dataset_read_secpolicy(ZFS_IOC_USERSPACE_MANY,
6364	    zfs_ioc_userspace_many, zfs_secpolicy_userspace_many);
6365	zfs_ioctl_register_dataset_read_secpolicy(ZFS_IOC_SEND,
6366	    zfs_ioc_send, zfs_secpolicy_send);
6367
6368	zfs_ioctl_register_dataset_modify(ZFS_IOC_SET_PROP, zfs_ioc_set_prop,
6369	    zfs_secpolicy_none);
6370	zfs_ioctl_register_dataset_modify(ZFS_IOC_DESTROY, zfs_ioc_destroy,
6371	    zfs_secpolicy_destroy);
6372	zfs_ioctl_register_dataset_modify(ZFS_IOC_RECV, zfs_ioc_recv,
6373	    zfs_secpolicy_recv);
6374	zfs_ioctl_register_dataset_modify(ZFS_IOC_PROMOTE, zfs_ioc_promote,
6375	    zfs_secpolicy_promote);
6376	zfs_ioctl_register_dataset_modify(ZFS_IOC_INHERIT_PROP,
6377	    zfs_ioc_inherit_prop, zfs_secpolicy_inherit_prop);
6378	zfs_ioctl_register_dataset_modify(ZFS_IOC_SET_FSACL, zfs_ioc_set_fsacl,
6379	    zfs_secpolicy_set_fsacl);
6380
6381	/*
6382	 * Not using zfs_ioctl_register_dataset_modify as DATASET_NAME check
6383	 * won't allow a bookmark name.
6384	 */
6385	zfs_ioctl_register_legacy(ZFS_IOC_RENAME, zfs_ioc_rename,
6386	    zfs_secpolicy_rename, ENTITY_NAME, B_TRUE,
6387	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY);
6388
6389	zfs_ioctl_register_dataset_nolog(ZFS_IOC_SHARE, zfs_ioc_share,
6390	    zfs_secpolicy_share, POOL_CHECK_NONE);
6391	zfs_ioctl_register_dataset_nolog(ZFS_IOC_SMB_ACL, zfs_ioc_smb_acl,
6392	    zfs_secpolicy_smb_acl, POOL_CHECK_NONE);
6393	zfs_ioctl_register_dataset_nolog(ZFS_IOC_USERSPACE_UPGRADE,
6394	    zfs_ioc_userspace_upgrade, zfs_secpolicy_userspace_upgrade,
6395	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY);
6396	zfs_ioctl_register_dataset_nolog(ZFS_IOC_TMP_SNAPSHOT,
6397	    zfs_ioc_tmp_snapshot, zfs_secpolicy_tmp_snapshot,
6398	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY);
6399
6400#ifdef __FreeBSD__
6401	zfs_ioctl_register_dataset_nolog(ZFS_IOC_JAIL, zfs_ioc_jail,
6402	    zfs_secpolicy_config, POOL_CHECK_NONE);
6403	zfs_ioctl_register_dataset_nolog(ZFS_IOC_UNJAIL, zfs_ioc_unjail,
6404	    zfs_secpolicy_config, POOL_CHECK_NONE);
6405	zfs_ioctl_register("fbsd_nextboot", ZFS_IOC_NEXTBOOT,
6406	    zfs_ioc_nextboot, zfs_secpolicy_config, NO_NAME,
6407	    POOL_CHECK_NONE, B_FALSE, B_FALSE);
6408#endif
6409}
6410
6411int
6412pool_status_check(const char *name, zfs_ioc_namecheck_t type,
6413    zfs_ioc_poolcheck_t check)
6414{
6415	spa_t *spa;
6416	int error;
6417
6418	ASSERT(type == POOL_NAME || type == DATASET_NAME ||
6419	    type == ENTITY_NAME);
6420
6421	if (check & POOL_CHECK_NONE)
6422		return (0);
6423
6424	error = spa_open(name, &spa, FTAG);
6425	if (error == 0) {
6426		if ((check & POOL_CHECK_SUSPENDED) && spa_suspended(spa))
6427			error = SET_ERROR(EAGAIN);
6428		else if ((check & POOL_CHECK_READONLY) && !spa_writeable(spa))
6429			error = SET_ERROR(EROFS);
6430		spa_close(spa, FTAG);
6431	}
6432	return (error);
6433}
6434
6435/*
6436 * Find a free minor number.
6437 */
6438minor_t
6439zfsdev_minor_alloc(void)
6440{
6441	static minor_t last_minor;
6442	minor_t m;
6443
6444	ASSERT(MUTEX_HELD(&spa_namespace_lock));
6445
6446	for (m = last_minor + 1; m != last_minor; m++) {
6447		if (m > ZFSDEV_MAX_MINOR)
6448			m = 1;
6449		if (ddi_get_soft_state(zfsdev_state, m) == NULL) {
6450			last_minor = m;
6451			return (m);
6452		}
6453	}
6454
6455	return (0);
6456}
6457
6458static int
6459zfs_ctldev_init(struct cdev *devp)
6460{
6461	minor_t minor;
6462	zfs_soft_state_t *zs;
6463
6464	ASSERT(MUTEX_HELD(&spa_namespace_lock));
6465
6466	minor = zfsdev_minor_alloc();
6467	if (minor == 0)
6468		return (SET_ERROR(ENXIO));
6469
6470	if (ddi_soft_state_zalloc(zfsdev_state, minor) != DDI_SUCCESS)
6471		return (SET_ERROR(EAGAIN));
6472
6473	devfs_set_cdevpriv((void *)(uintptr_t)minor, zfsdev_close);
6474
6475	zs = ddi_get_soft_state(zfsdev_state, minor);
6476	zs->zss_type = ZSST_CTLDEV;
6477	zfs_onexit_init((zfs_onexit_t **)&zs->zss_data);
6478
6479	return (0);
6480}
6481
6482static void
6483zfs_ctldev_destroy(zfs_onexit_t *zo, minor_t minor)
6484{
6485	ASSERT(MUTEX_HELD(&spa_namespace_lock));
6486
6487	zfs_onexit_destroy(zo);
6488	ddi_soft_state_free(zfsdev_state, minor);
6489}
6490
6491void *
6492zfsdev_get_soft_state(minor_t minor, enum zfs_soft_state_type which)
6493{
6494	zfs_soft_state_t *zp;
6495
6496	zp = ddi_get_soft_state(zfsdev_state, minor);
6497	if (zp == NULL || zp->zss_type != which)
6498		return (NULL);
6499
6500	return (zp->zss_data);
6501}
6502
6503static int
6504zfsdev_open(struct cdev *devp, int flag, int mode, struct thread *td)
6505{
6506	int error = 0;
6507
6508#ifdef illumos
6509	if (getminor(*devp) != 0)
6510		return (zvol_open(devp, flag, otyp, cr));
6511#endif
6512
6513	/* This is the control device. Allocate a new minor if requested. */
6514	if (flag & FEXCL) {
6515		mutex_enter(&spa_namespace_lock);
6516		error = zfs_ctldev_init(devp);
6517		mutex_exit(&spa_namespace_lock);
6518	}
6519
6520	return (error);
6521}
6522
6523static void
6524zfsdev_close(void *data)
6525{
6526	zfs_onexit_t *zo;
6527	minor_t minor = (minor_t)(uintptr_t)data;
6528
6529	if (minor == 0)
6530		return;
6531
6532	mutex_enter(&spa_namespace_lock);
6533	zo = zfsdev_get_soft_state(minor, ZSST_CTLDEV);
6534	if (zo == NULL) {
6535		mutex_exit(&spa_namespace_lock);
6536		return;
6537	}
6538	zfs_ctldev_destroy(zo, minor);
6539	mutex_exit(&spa_namespace_lock);
6540}
6541
6542static int
6543zfsdev_ioctl(struct cdev *dev, u_long zcmd, caddr_t arg, int flag,
6544    struct thread *td)
6545{
6546	zfs_cmd_t *zc;
6547	uint_t vecnum;
6548	int error, rc, len;
6549#ifdef illumos
6550	minor_t minor = getminor(dev);
6551#else
6552	zfs_iocparm_t *zc_iocparm;
6553	int cflag, cmd, oldvecnum;
6554	boolean_t newioc, compat;
6555	void *compat_zc = NULL;
6556	cred_t *cr = td->td_ucred;
6557#endif
6558	const zfs_ioc_vec_t *vec;
6559	char *saved_poolname = NULL;
6560	nvlist_t *innvl = NULL;
6561
6562	cflag = ZFS_CMD_COMPAT_NONE;
6563	compat = B_FALSE;
6564	newioc = B_TRUE;	/* "new" style (zfs_iocparm_t) ioctl */
6565
6566	len = IOCPARM_LEN(zcmd);
6567	vecnum = cmd = zcmd & 0xff;
6568
6569	/*
6570	 * Check if we are talking to supported older binaries
6571	 * and translate zfs_cmd if necessary
6572	 */
6573	if (len != sizeof(zfs_iocparm_t)) {
6574		newioc = B_FALSE;
6575		compat = B_TRUE;
6576
6577		vecnum = cmd;
6578
6579		switch (len) {
6580		case sizeof(zfs_cmd_zcmd_t):
6581			cflag = ZFS_CMD_COMPAT_LZC;
6582			break;
6583		case sizeof(zfs_cmd_deadman_t):
6584			cflag = ZFS_CMD_COMPAT_DEADMAN;
6585			break;
6586		case sizeof(zfs_cmd_v28_t):
6587			cflag = ZFS_CMD_COMPAT_V28;
6588			break;
6589		case sizeof(zfs_cmd_v15_t):
6590			if (cmd >= sizeof(zfs_ioctl_v15_to_v28) /
6591			    sizeof(zfs_ioctl_v15_to_v28[0]))
6592				return (EINVAL);
6593
6594			cflag = ZFS_CMD_COMPAT_V15;
6595			vecnum = zfs_ioctl_v15_to_v28[cmd];
6596
6597			/*
6598			 * Return without further handling
6599			 * if the command is blacklisted.
6600			 */
6601			if (vecnum == ZFS_IOC_COMPAT_PASS)
6602				return (0);
6603			else if (vecnum == ZFS_IOC_COMPAT_FAIL)
6604				return (ENOTSUP);
6605			break;
6606		default:
6607			return (EINVAL);
6608		}
6609	}
6610
6611#ifdef illumos
6612	vecnum = cmd - ZFS_IOC_FIRST;
6613	ASSERT3U(getmajor(dev), ==, ddi_driver_major(zfs_dip));
6614#endif
6615
6616	if (vecnum >= sizeof (zfs_ioc_vec) / sizeof (zfs_ioc_vec[0]))
6617		return (SET_ERROR(EINVAL));
6618	vec = &zfs_ioc_vec[vecnum];
6619
6620	zc = kmem_zalloc(sizeof(zfs_cmd_t), KM_SLEEP);
6621
6622#ifdef illumos
6623	error = ddi_copyin((void *)arg, zc, sizeof (zfs_cmd_t), flag);
6624	if (error != 0) {
6625		error = SET_ERROR(EFAULT);
6626		goto out;
6627	}
6628#else	/* !illumos */
6629	bzero(zc, sizeof(zfs_cmd_t));
6630
6631	if (newioc) {
6632		zc_iocparm = (void *)arg;
6633
6634		switch (zc_iocparm->zfs_ioctl_version) {
6635		case ZFS_IOCVER_CURRENT:
6636			if (zc_iocparm->zfs_cmd_size != sizeof(zfs_cmd_t)) {
6637				error = SET_ERROR(EINVAL);
6638				goto out;
6639			}
6640			break;
6641		case ZFS_IOCVER_INLANES:
6642			if (zc_iocparm->zfs_cmd_size != sizeof(zfs_cmd_inlanes_t)) {
6643				error = SET_ERROR(EFAULT);
6644				goto out;
6645			}
6646			compat = B_TRUE;
6647			cflag = ZFS_CMD_COMPAT_INLANES;
6648			break;
6649		case ZFS_IOCVER_RESUME:
6650			if (zc_iocparm->zfs_cmd_size != sizeof(zfs_cmd_resume_t)) {
6651				error = SET_ERROR(EFAULT);
6652				goto out;
6653			}
6654			compat = B_TRUE;
6655			cflag = ZFS_CMD_COMPAT_RESUME;
6656			break;
6657		case ZFS_IOCVER_EDBP:
6658			if (zc_iocparm->zfs_cmd_size != sizeof(zfs_cmd_edbp_t)) {
6659				error = SET_ERROR(EFAULT);
6660				goto out;
6661			}
6662			compat = B_TRUE;
6663			cflag = ZFS_CMD_COMPAT_EDBP;
6664			break;
6665		case ZFS_IOCVER_ZCMD:
6666			if (zc_iocparm->zfs_cmd_size > sizeof(zfs_cmd_t) ||
6667			    zc_iocparm->zfs_cmd_size < sizeof(zfs_cmd_zcmd_t)) {
6668				error = SET_ERROR(EFAULT);
6669				goto out;
6670			}
6671			compat = B_TRUE;
6672			cflag = ZFS_CMD_COMPAT_ZCMD;
6673			break;
6674		default:
6675			error = SET_ERROR(EINVAL);
6676			goto out;
6677			/* NOTREACHED */
6678		}
6679
6680		if (compat) {
6681			ASSERT(sizeof(zfs_cmd_t) >= zc_iocparm->zfs_cmd_size);
6682			compat_zc = kmem_zalloc(sizeof(zfs_cmd_t), KM_SLEEP);
6683			bzero(compat_zc, sizeof(zfs_cmd_t));
6684
6685			error = ddi_copyin((void *)(uintptr_t)zc_iocparm->zfs_cmd,
6686			    compat_zc, zc_iocparm->zfs_cmd_size, flag);
6687			if (error != 0) {
6688				error = SET_ERROR(EFAULT);
6689				goto out;
6690			}
6691		} else {
6692			error = ddi_copyin((void *)(uintptr_t)zc_iocparm->zfs_cmd,
6693			    zc, zc_iocparm->zfs_cmd_size, flag);
6694			if (error != 0) {
6695				error = SET_ERROR(EFAULT);
6696				goto out;
6697			}
6698		}
6699	}
6700
6701	if (compat) {
6702		if (newioc) {
6703			ASSERT(compat_zc != NULL);
6704			zfs_cmd_compat_get(zc, compat_zc, cflag);
6705		} else {
6706			ASSERT(compat_zc == NULL);
6707			zfs_cmd_compat_get(zc, arg, cflag);
6708		}
6709		oldvecnum = vecnum;
6710		error = zfs_ioctl_compat_pre(zc, &vecnum, cflag);
6711		if (error != 0)
6712			goto out;
6713		if (oldvecnum != vecnum)
6714			vec = &zfs_ioc_vec[vecnum];
6715	}
6716#endif	/* !illumos */
6717
6718	zc->zc_iflags = flag & FKIOCTL;
6719	if (zc->zc_nvlist_src_size != 0) {
6720		error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
6721		    zc->zc_iflags, &innvl);
6722		if (error != 0)
6723			goto out;
6724	}
6725
6726	/* rewrite innvl for backwards compatibility */
6727	if (compat)
6728		innvl = zfs_ioctl_compat_innvl(zc, innvl, vecnum, cflag);
6729
6730	/*
6731	 * Ensure that all pool/dataset names are valid before we pass down to
6732	 * the lower layers.
6733	 */
6734	zc->zc_name[sizeof (zc->zc_name) - 1] = '\0';
6735	switch (vec->zvec_namecheck) {
6736	case POOL_NAME:
6737		if (pool_namecheck(zc->zc_name, NULL, NULL) != 0)
6738			error = SET_ERROR(EINVAL);
6739		else
6740			error = pool_status_check(zc->zc_name,
6741			    vec->zvec_namecheck, vec->zvec_pool_check);
6742		break;
6743
6744	case DATASET_NAME:
6745		if (dataset_namecheck(zc->zc_name, NULL, NULL) != 0)
6746			error = SET_ERROR(EINVAL);
6747		else
6748			error = pool_status_check(zc->zc_name,
6749			    vec->zvec_namecheck, vec->zvec_pool_check);
6750		break;
6751
6752	case ENTITY_NAME:
6753		if (entity_namecheck(zc->zc_name, NULL, NULL) != 0) {
6754			error = SET_ERROR(EINVAL);
6755		} else {
6756			error = pool_status_check(zc->zc_name,
6757			    vec->zvec_namecheck, vec->zvec_pool_check);
6758		}
6759		break;
6760
6761	case NO_NAME:
6762		break;
6763	}
6764
6765	if (error == 0)
6766		error = vec->zvec_secpolicy(zc, innvl, cr);
6767
6768	if (error != 0)
6769		goto out;
6770
6771	/* legacy ioctls can modify zc_name */
6772	len = strcspn(zc->zc_name, "/@#") + 1;
6773	saved_poolname = kmem_alloc(len, KM_SLEEP);
6774	(void) strlcpy(saved_poolname, zc->zc_name, len);
6775
6776	if (vec->zvec_func != NULL) {
6777		nvlist_t *outnvl;
6778		int puterror = 0;
6779		spa_t *spa;
6780		nvlist_t *lognv = NULL;
6781
6782		ASSERT(vec->zvec_legacy_func == NULL);
6783
6784		/*
6785		 * Add the innvl to the lognv before calling the func,
6786		 * in case the func changes the innvl.
6787		 */
6788		if (vec->zvec_allow_log) {
6789			lognv = fnvlist_alloc();
6790			fnvlist_add_string(lognv, ZPOOL_HIST_IOCTL,
6791			    vec->zvec_name);
6792			if (!nvlist_empty(innvl)) {
6793				fnvlist_add_nvlist(lognv, ZPOOL_HIST_INPUT_NVL,
6794				    innvl);
6795			}
6796		}
6797
6798		outnvl = fnvlist_alloc();
6799		error = vec->zvec_func(zc->zc_name, innvl, outnvl);
6800
6801		/*
6802		 * Some commands can partially execute, modfiy state, and still
6803		 * return an error.  In these cases, attempt to record what
6804		 * was modified.
6805		 */
6806		if ((error == 0 ||
6807		    (cmd == ZFS_IOC_CHANNEL_PROGRAM && error != EINVAL)) &&
6808		    vec->zvec_allow_log &&
6809		    spa_open(zc->zc_name, &spa, FTAG) == 0) {
6810			if (!nvlist_empty(outnvl)) {
6811				fnvlist_add_nvlist(lognv, ZPOOL_HIST_OUTPUT_NVL,
6812				    outnvl);
6813			}
6814			if (error != 0) {
6815				fnvlist_add_int64(lognv, ZPOOL_HIST_ERRNO,
6816				    error);
6817			}
6818			(void) spa_history_log_nvl(spa, lognv);
6819			spa_close(spa, FTAG);
6820		}
6821		fnvlist_free(lognv);
6822
6823		/* rewrite outnvl for backwards compatibility */
6824		if (compat)
6825			outnvl = zfs_ioctl_compat_outnvl(zc, outnvl, vecnum,
6826			    cflag);
6827
6828		if (!nvlist_empty(outnvl) || zc->zc_nvlist_dst_size != 0) {
6829			int smusherror = 0;
6830			if (vec->zvec_smush_outnvlist) {
6831				smusherror = nvlist_smush(outnvl,
6832				    zc->zc_nvlist_dst_size);
6833			}
6834			if (smusherror == 0)
6835				puterror = put_nvlist(zc, outnvl);
6836		}
6837
6838		if (puterror != 0)
6839			error = puterror;
6840
6841		nvlist_free(outnvl);
6842	} else {
6843		error = vec->zvec_legacy_func(zc);
6844	}
6845
6846out:
6847	nvlist_free(innvl);
6848
6849#ifdef illumos
6850	rc = ddi_copyout(zc, (void *)arg, sizeof (zfs_cmd_t), flag);
6851	if (error == 0 && rc != 0)
6852		error = SET_ERROR(EFAULT);
6853#else
6854	if (compat) {
6855		zfs_ioctl_compat_post(zc, cmd, cflag);
6856		if (newioc) {
6857			ASSERT(compat_zc != NULL);
6858			ASSERT(sizeof(zfs_cmd_t) >= zc_iocparm->zfs_cmd_size);
6859
6860			zfs_cmd_compat_put(zc, compat_zc, vecnum, cflag);
6861			rc = ddi_copyout(compat_zc,
6862			    (void *)(uintptr_t)zc_iocparm->zfs_cmd,
6863			    zc_iocparm->zfs_cmd_size, flag);
6864			if (error == 0 && rc != 0)
6865				error = SET_ERROR(EFAULT);
6866			kmem_free(compat_zc, sizeof (zfs_cmd_t));
6867		} else {
6868			zfs_cmd_compat_put(zc, arg, vecnum, cflag);
6869		}
6870	} else {
6871		ASSERT(newioc);
6872
6873		rc = ddi_copyout(zc, (void *)(uintptr_t)zc_iocparm->zfs_cmd,
6874		    sizeof (zfs_cmd_t), flag);
6875		if (error == 0 && rc != 0)
6876			error = SET_ERROR(EFAULT);
6877	}
6878#endif
6879	if (error == 0 && vec->zvec_allow_log) {
6880		char *s = tsd_get(zfs_allow_log_key);
6881		if (s != NULL)
6882			strfree(s);
6883		(void) tsd_set(zfs_allow_log_key, saved_poolname);
6884	} else {
6885		if (saved_poolname != NULL)
6886			strfree(saved_poolname);
6887	}
6888
6889	kmem_free(zc, sizeof (zfs_cmd_t));
6890	return (error);
6891}
6892
6893#ifdef illumos
6894static int
6895zfs_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
6896{
6897	if (cmd != DDI_ATTACH)
6898		return (DDI_FAILURE);
6899
6900	if (ddi_create_minor_node(dip, "zfs", S_IFCHR, 0,
6901	    DDI_PSEUDO, 0) == DDI_FAILURE)
6902		return (DDI_FAILURE);
6903
6904	zfs_dip = dip;
6905
6906	ddi_report_dev(dip);
6907
6908	return (DDI_SUCCESS);
6909}
6910
6911static int
6912zfs_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
6913{
6914	if (spa_busy() || zfs_busy() || zvol_busy())
6915		return (DDI_FAILURE);
6916
6917	if (cmd != DDI_DETACH)
6918		return (DDI_FAILURE);
6919
6920	zfs_dip = NULL;
6921
6922	ddi_prop_remove_all(dip);
6923	ddi_remove_minor_node(dip, NULL);
6924
6925	return (DDI_SUCCESS);
6926}
6927
6928/*ARGSUSED*/
6929static int
6930zfs_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result)
6931{
6932	switch (infocmd) {
6933	case DDI_INFO_DEVT2DEVINFO:
6934		*result = zfs_dip;
6935		return (DDI_SUCCESS);
6936
6937	case DDI_INFO_DEVT2INSTANCE:
6938		*result = (void *)0;
6939		return (DDI_SUCCESS);
6940	}
6941
6942	return (DDI_FAILURE);
6943}
6944#endif	/* illumos */
6945
6946/*
6947 * OK, so this is a little weird.
6948 *
6949 * /dev/zfs is the control node, i.e. minor 0.
6950 * /dev/zvol/[r]dsk/pool/dataset are the zvols, minor > 0.
6951 *
6952 * /dev/zfs has basically nothing to do except serve up ioctls,
6953 * so most of the standard driver entry points are in zvol.c.
6954 */
6955#ifdef illumos
6956static struct cb_ops zfs_cb_ops = {
6957	zfsdev_open,	/* open */
6958	zfsdev_close,	/* close */
6959	zvol_strategy,	/* strategy */
6960	nodev,		/* print */
6961	zvol_dump,	/* dump */
6962	zvol_read,	/* read */
6963	zvol_write,	/* write */
6964	zfsdev_ioctl,	/* ioctl */
6965	nodev,		/* devmap */
6966	nodev,		/* mmap */
6967	nodev,		/* segmap */
6968	nochpoll,	/* poll */
6969	ddi_prop_op,	/* prop_op */
6970	NULL,		/* streamtab */
6971	D_NEW | D_MP | D_64BIT,		/* Driver compatibility flag */
6972	CB_REV,		/* version */
6973	nodev,		/* async read */
6974	nodev,		/* async write */
6975};
6976
6977static struct dev_ops zfs_dev_ops = {
6978	DEVO_REV,	/* version */
6979	0,		/* refcnt */
6980	zfs_info,	/* info */
6981	nulldev,	/* identify */
6982	nulldev,	/* probe */
6983	zfs_attach,	/* attach */
6984	zfs_detach,	/* detach */
6985	nodev,		/* reset */
6986	&zfs_cb_ops,	/* driver operations */
6987	NULL,		/* no bus operations */
6988	NULL,		/* power */
6989	ddi_quiesce_not_needed,	/* quiesce */
6990};
6991
6992static struct modldrv zfs_modldrv = {
6993	&mod_driverops,
6994	"ZFS storage pool",
6995	&zfs_dev_ops
6996};
6997
6998static struct modlinkage modlinkage = {
6999	MODREV_1,
7000	(void *)&zfs_modlfs,
7001	(void *)&zfs_modldrv,
7002	NULL
7003};
7004#endif	/* illumos */
7005
7006static struct cdevsw zfs_cdevsw = {
7007	.d_version =	D_VERSION,
7008	.d_open =	zfsdev_open,
7009	.d_ioctl =	zfsdev_ioctl,
7010	.d_name =	ZFS_DEV_NAME
7011};
7012
7013static void
7014zfs_allow_log_destroy(void *arg)
7015{
7016	char *poolname = arg;
7017	strfree(poolname);
7018}
7019
7020static void
7021zfsdev_init(void)
7022{
7023	zfsdev = make_dev(&zfs_cdevsw, 0x0, UID_ROOT, GID_OPERATOR, 0666,
7024	    ZFS_DEV_NAME);
7025}
7026
7027static void
7028zfsdev_fini(void)
7029{
7030	if (zfsdev != NULL)
7031		destroy_dev(zfsdev);
7032}
7033
7034static struct root_hold_token *zfs_root_token;
7035struct proc *zfsproc;
7036
7037#ifdef illumos
7038int
7039_init(void)
7040{
7041	int error;
7042
7043	spa_init(FREAD | FWRITE);
7044	zfs_init();
7045	zvol_init();
7046	zfs_ioctl_init();
7047
7048	if ((error = mod_install(&modlinkage)) != 0) {
7049		zvol_fini();
7050		zfs_fini();
7051		spa_fini();
7052		return (error);
7053	}
7054
7055	tsd_create(&zfs_fsyncer_key, NULL);
7056	tsd_create(&rrw_tsd_key, rrw_tsd_destroy);
7057	tsd_create(&zfs_allow_log_key, zfs_allow_log_destroy);
7058
7059	error = ldi_ident_from_mod(&modlinkage, &zfs_li);
7060	ASSERT(error == 0);
7061	mutex_init(&zfs_share_lock, NULL, MUTEX_DEFAULT, NULL);
7062
7063	return (0);
7064}
7065
7066int
7067_fini(void)
7068{
7069	int error;
7070
7071	if (spa_busy() || zfs_busy() || zvol_busy() || zio_injection_enabled)
7072		return (SET_ERROR(EBUSY));
7073
7074	if ((error = mod_remove(&modlinkage)) != 0)
7075		return (error);
7076
7077	zvol_fini();
7078	zfs_fini();
7079	spa_fini();
7080	if (zfs_nfsshare_inited)
7081		(void) ddi_modclose(nfs_mod);
7082	if (zfs_smbshare_inited)
7083		(void) ddi_modclose(smbsrv_mod);
7084	if (zfs_nfsshare_inited || zfs_smbshare_inited)
7085		(void) ddi_modclose(sharefs_mod);
7086
7087	tsd_destroy(&zfs_fsyncer_key);
7088	ldi_ident_release(zfs_li);
7089	zfs_li = NULL;
7090	mutex_destroy(&zfs_share_lock);
7091
7092	return (error);
7093}
7094
7095int
7096_info(struct modinfo *modinfop)
7097{
7098	return (mod_info(&modlinkage, modinfop));
7099}
7100#endif	/* illumos */
7101
7102static int zfs__init(void);
7103static int zfs__fini(void);
7104static void zfs_shutdown(void *, int);
7105
7106static eventhandler_tag zfs_shutdown_event_tag;
7107
7108#ifdef __FreeBSD__
7109#define ZFS_MIN_KSTACK_PAGES 4
7110#endif
7111
7112int
7113zfs__init(void)
7114{
7115
7116#ifdef __FreeBSD__
7117#if KSTACK_PAGES < ZFS_MIN_KSTACK_PAGES
7118	printf("ZFS NOTICE: KSTACK_PAGES is %d which could result in stack "
7119	    "overflow panic!\nPlease consider adding "
7120	    "'options KSTACK_PAGES=%d' to your kernel config\n", KSTACK_PAGES,
7121	    ZFS_MIN_KSTACK_PAGES);
7122#endif
7123#endif
7124	zfs_root_token = root_mount_hold("ZFS");
7125
7126	mutex_init(&zfs_share_lock, NULL, MUTEX_DEFAULT, NULL);
7127
7128	spa_init(FREAD | FWRITE);
7129	zfs_init();
7130	zvol_init();
7131	zfs_ioctl_init();
7132
7133	tsd_create(&zfs_fsyncer_key, NULL);
7134	tsd_create(&rrw_tsd_key, rrw_tsd_destroy);
7135	tsd_create(&zfs_allow_log_key, zfs_allow_log_destroy);
7136	tsd_create(&zfs_geom_probe_vdev_key, NULL);
7137
7138	printf("ZFS storage pool version: features support (" SPA_VERSION_STRING ")\n");
7139	root_mount_rel(zfs_root_token);
7140
7141	zfsdev_init();
7142
7143	return (0);
7144}
7145
7146int
7147zfs__fini(void)
7148{
7149	if (spa_busy() || zfs_busy() || zvol_busy() ||
7150	    zio_injection_enabled) {
7151		return (EBUSY);
7152	}
7153
7154	zfsdev_fini();
7155	zvol_fini();
7156	zfs_fini();
7157	spa_fini();
7158
7159	tsd_destroy(&zfs_fsyncer_key);
7160	tsd_destroy(&rrw_tsd_key);
7161	tsd_destroy(&zfs_allow_log_key);
7162
7163	mutex_destroy(&zfs_share_lock);
7164
7165	return (0);
7166}
7167
7168static void
7169zfs_shutdown(void *arg __unused, int howto __unused)
7170{
7171
7172	/*
7173	 * ZFS fini routines can not properly work in a panic-ed system.
7174	 */
7175	if (panicstr == NULL)
7176		(void)zfs__fini();
7177}
7178
7179
7180static int
7181zfs_modevent(module_t mod, int type, void *unused __unused)
7182{
7183	int err;
7184
7185	switch (type) {
7186	case MOD_LOAD:
7187		err = zfs__init();
7188		if (err == 0)
7189			zfs_shutdown_event_tag = EVENTHANDLER_REGISTER(
7190			    shutdown_post_sync, zfs_shutdown, NULL,
7191			    SHUTDOWN_PRI_FIRST);
7192		return (err);
7193	case MOD_UNLOAD:
7194		err = zfs__fini();
7195		if (err == 0 && zfs_shutdown_event_tag != NULL)
7196			EVENTHANDLER_DEREGISTER(shutdown_post_sync,
7197			    zfs_shutdown_event_tag);
7198		return (err);
7199	case MOD_SHUTDOWN:
7200		return (0);
7201	default:
7202		break;
7203	}
7204	return (EOPNOTSUPP);
7205}
7206
7207static moduledata_t zfs_mod = {
7208	"zfsctrl",
7209	zfs_modevent,
7210	0
7211};
7212DECLARE_MODULE(zfsctrl, zfs_mod, SI_SUB_VFS, SI_ORDER_ANY);
7213MODULE_VERSION(zfsctrl, 1);
7214MODULE_DEPEND(zfsctrl, opensolaris, 1, 1, 1);
7215MODULE_DEPEND(zfsctrl, krpc, 1, 1, 1);
7216MODULE_DEPEND(zfsctrl, acl_nfs4, 1, 1, 1);
7217