zfs_ioctl.c revision 324010
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22/*
23 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
24 * Copyright (c) 2011-2012 Pawel Jakub Dawidek. All rights reserved.
25 * Copyright 2013 Martin Matuska <mm@FreeBSD.org>. All rights reserved.
26 * Copyright 2014 Xin Li <delphij@FreeBSD.org>. All rights reserved.
27 * Copyright 2015, OmniTI Computer Consulting, Inc. All rights reserved.
28 * Copyright 2015 Nexenta Systems, Inc.  All rights reserved.
29 * Copyright (c) 2014, 2016 Joyent, Inc. All rights reserved.
30 * Copyright (c) 2011, 2015 by Delphix. All rights reserved.
31 * Copyright (c) 2013 by Saso Kiselkov. All rights reserved.
32 * Copyright (c) 2013 Steven Hartland. All rights reserved.
33 * Copyright (c) 2014 Integros [integros.com]
34 * Copyright 2017 RackTop Systems.
35 * Copyright (c) 2017 Datto Inc.
36 */
37
38/*
39 * ZFS ioctls.
40 *
41 * This file handles the ioctls to /dev/zfs, used for configuring ZFS storage
42 * pools and filesystems, e.g. with /sbin/zfs and /sbin/zpool.
43 *
44 * There are two ways that we handle ioctls: the legacy way where almost
45 * all of the logic is in the ioctl callback, and the new way where most
46 * of the marshalling is handled in the common entry point, zfsdev_ioctl().
47 *
48 * Non-legacy ioctls should be registered by calling
49 * zfs_ioctl_register() from zfs_ioctl_init().  The ioctl is invoked
50 * from userland by lzc_ioctl().
51 *
52 * The registration arguments are as follows:
53 *
54 * const char *name
55 *   The name of the ioctl.  This is used for history logging.  If the
56 *   ioctl returns successfully (the callback returns 0), and allow_log
57 *   is true, then a history log entry will be recorded with the input &
58 *   output nvlists.  The log entry can be printed with "zpool history -i".
59 *
60 * zfs_ioc_t ioc
61 *   The ioctl request number, which userland will pass to ioctl(2).
62 *   The ioctl numbers can change from release to release, because
63 *   the caller (libzfs) must be matched to the kernel.
64 *
65 * zfs_secpolicy_func_t *secpolicy
66 *   This function will be called before the zfs_ioc_func_t, to
67 *   determine if this operation is permitted.  It should return EPERM
68 *   on failure, and 0 on success.  Checks include determining if the
69 *   dataset is visible in this zone, and if the user has either all
70 *   zfs privileges in the zone (SYS_MOUNT), or has been granted permission
71 *   to do this operation on this dataset with "zfs allow".
72 *
73 * zfs_ioc_namecheck_t namecheck
74 *   This specifies what to expect in the zfs_cmd_t:zc_name -- a pool
75 *   name, a dataset name, or nothing.  If the name is not well-formed,
76 *   the ioctl will fail and the callback will not be called.
77 *   Therefore, the callback can assume that the name is well-formed
78 *   (e.g. is null-terminated, doesn't have more than one '@' character,
79 *   doesn't have invalid characters).
80 *
81 * zfs_ioc_poolcheck_t pool_check
82 *   This specifies requirements on the pool state.  If the pool does
83 *   not meet them (is suspended or is readonly), the ioctl will fail
84 *   and the callback will not be called.  If any checks are specified
85 *   (i.e. it is not POOL_CHECK_NONE), namecheck must not be NO_NAME.
86 *   Multiple checks can be or-ed together (e.g. POOL_CHECK_SUSPENDED |
87 *   POOL_CHECK_READONLY).
88 *
89 * boolean_t smush_outnvlist
90 *   If smush_outnvlist is true, then the output is presumed to be a
91 *   list of errors, and it will be "smushed" down to fit into the
92 *   caller's buffer, by removing some entries and replacing them with a
93 *   single "N_MORE_ERRORS" entry indicating how many were removed.  See
94 *   nvlist_smush() for details.  If smush_outnvlist is false, and the
95 *   outnvlist does not fit into the userland-provided buffer, then the
96 *   ioctl will fail with ENOMEM.
97 *
98 * zfs_ioc_func_t *func
99 *   The callback function that will perform the operation.
100 *
101 *   The callback should return 0 on success, or an error number on
102 *   failure.  If the function fails, the userland ioctl will return -1,
103 *   and errno will be set to the callback's return value.  The callback
104 *   will be called with the following arguments:
105 *
106 *   const char *name
107 *     The name of the pool or dataset to operate on, from
108 *     zfs_cmd_t:zc_name.  The 'namecheck' argument specifies the
109 *     expected type (pool, dataset, or none).
110 *
111 *   nvlist_t *innvl
112 *     The input nvlist, deserialized from zfs_cmd_t:zc_nvlist_src.  Or
113 *     NULL if no input nvlist was provided.  Changes to this nvlist are
114 *     ignored.  If the input nvlist could not be deserialized, the
115 *     ioctl will fail and the callback will not be called.
116 *
117 *   nvlist_t *outnvl
118 *     The output nvlist, initially empty.  The callback can fill it in,
119 *     and it will be returned to userland by serializing it into
120 *     zfs_cmd_t:zc_nvlist_dst.  If it is non-empty, and serialization
121 *     fails (e.g. because the caller didn't supply a large enough
122 *     buffer), then the overall ioctl will fail.  See the
123 *     'smush_nvlist' argument above for additional behaviors.
124 *
125 *     There are two typical uses of the output nvlist:
126 *       - To return state, e.g. property values.  In this case,
127 *         smush_outnvlist should be false.  If the buffer was not large
128 *         enough, the caller will reallocate a larger buffer and try
129 *         the ioctl again.
130 *
131 *       - To return multiple errors from an ioctl which makes on-disk
132 *         changes.  In this case, smush_outnvlist should be true.
133 *         Ioctls which make on-disk modifications should generally not
134 *         use the outnvl if they succeed, because the caller can not
135 *         distinguish between the operation failing, and
136 *         deserialization failing.
137 */
138#ifdef __FreeBSD__
139#include "opt_kstack_pages.h"
140#endif
141
142#include <sys/types.h>
143#include <sys/param.h>
144#include <sys/systm.h>
145#include <sys/conf.h>
146#include <sys/kernel.h>
147#include <sys/lock.h>
148#include <sys/malloc.h>
149#include <sys/mutex.h>
150#include <sys/proc.h>
151#include <sys/errno.h>
152#include <sys/uio.h>
153#include <sys/buf.h>
154#include <sys/file.h>
155#include <sys/kmem.h>
156#include <sys/conf.h>
157#include <sys/cmn_err.h>
158#include <sys/stat.h>
159#include <sys/zfs_ioctl.h>
160#include <sys/zfs_vfsops.h>
161#include <sys/zfs_znode.h>
162#include <sys/zap.h>
163#include <sys/spa.h>
164#include <sys/spa_impl.h>
165#include <sys/vdev.h>
166#include <sys/dmu.h>
167#include <sys/dsl_dir.h>
168#include <sys/dsl_dataset.h>
169#include <sys/dsl_prop.h>
170#include <sys/dsl_deleg.h>
171#include <sys/dmu_objset.h>
172#include <sys/dmu_impl.h>
173#include <sys/dmu_tx.h>
174#include <sys/sunddi.h>
175#include <sys/policy.h>
176#include <sys/zone.h>
177#include <sys/nvpair.h>
178#include <sys/mount.h>
179#include <sys/taskqueue.h>
180#include <sys/sdt.h>
181#include <sys/varargs.h>
182#include <sys/fs/zfs.h>
183#include <sys/zfs_ctldir.h>
184#include <sys/zfs_dir.h>
185#include <sys/zfs_onexit.h>
186#include <sys/zvol.h>
187#include <sys/dsl_scan.h>
188#include <sys/dmu_objset.h>
189#include <sys/dmu_send.h>
190#include <sys/dsl_destroy.h>
191#include <sys/dsl_bookmark.h>
192#include <sys/dsl_userhold.h>
193#include <sys/zfeature.h>
194#include <sys/zio_checksum.h>
195
196#include "zfs_namecheck.h"
197#include "zfs_prop.h"
198#include "zfs_deleg.h"
199#include "zfs_comutil.h"
200#include "zfs_ioctl_compat.h"
201
202CTASSERT(sizeof(zfs_cmd_t) < IOCPARM_MAX);
203
204static struct cdev *zfsdev;
205
206extern void zfs_init(void);
207extern void zfs_fini(void);
208
209uint_t zfs_fsyncer_key;
210extern uint_t rrw_tsd_key;
211static uint_t zfs_allow_log_key;
212extern uint_t zfs_geom_probe_vdev_key;
213
214typedef int zfs_ioc_legacy_func_t(zfs_cmd_t *);
215typedef int zfs_ioc_func_t(const char *, nvlist_t *, nvlist_t *);
216typedef int zfs_secpolicy_func_t(zfs_cmd_t *, nvlist_t *, cred_t *);
217
218typedef enum {
219	NO_NAME,
220	POOL_NAME,
221	DATASET_NAME
222} zfs_ioc_namecheck_t;
223
224typedef enum {
225	POOL_CHECK_NONE		= 1 << 0,
226	POOL_CHECK_SUSPENDED	= 1 << 1,
227	POOL_CHECK_READONLY	= 1 << 2,
228} zfs_ioc_poolcheck_t;
229
230typedef struct zfs_ioc_vec {
231	zfs_ioc_legacy_func_t	*zvec_legacy_func;
232	zfs_ioc_func_t		*zvec_func;
233	zfs_secpolicy_func_t	*zvec_secpolicy;
234	zfs_ioc_namecheck_t	zvec_namecheck;
235	boolean_t		zvec_allow_log;
236	zfs_ioc_poolcheck_t	zvec_pool_check;
237	boolean_t		zvec_smush_outnvlist;
238	const char		*zvec_name;
239} zfs_ioc_vec_t;
240
241/* This array is indexed by zfs_userquota_prop_t */
242static const char *userquota_perms[] = {
243	ZFS_DELEG_PERM_USERUSED,
244	ZFS_DELEG_PERM_USERQUOTA,
245	ZFS_DELEG_PERM_GROUPUSED,
246	ZFS_DELEG_PERM_GROUPQUOTA,
247};
248
249static int zfs_ioc_userspace_upgrade(zfs_cmd_t *zc);
250static int zfs_check_settable(const char *name, nvpair_t *property,
251    cred_t *cr);
252static int zfs_check_clearable(char *dataset, nvlist_t *props,
253    nvlist_t **errors);
254static int zfs_fill_zplprops_root(uint64_t, nvlist_t *, nvlist_t *,
255    boolean_t *);
256int zfs_set_prop_nvlist(const char *, zprop_source_t, nvlist_t *, nvlist_t *);
257static int get_nvlist(uint64_t nvl, uint64_t size, int iflag, nvlist_t **nvp);
258
259static void zfsdev_close(void *data);
260
261static int zfs_prop_activate_feature(spa_t *spa, spa_feature_t feature);
262
263/* _NOTE(PRINTFLIKE(4)) - this is printf-like, but lint is too whiney */
264void
265__dprintf(const char *file, const char *func, int line, const char *fmt, ...)
266{
267	const char *newfile;
268	char buf[512];
269	va_list adx;
270
271	/*
272	 * Get rid of annoying "../common/" prefix to filename.
273	 */
274	newfile = strrchr(file, '/');
275	if (newfile != NULL) {
276		newfile = newfile + 1; /* Get rid of leading / */
277	} else {
278		newfile = file;
279	}
280
281	va_start(adx, fmt);
282	(void) vsnprintf(buf, sizeof (buf), fmt, adx);
283	va_end(adx);
284
285	/*
286	 * To get this data, use the zfs-dprintf probe as so:
287	 * dtrace -q -n 'zfs-dprintf \
288	 *	/stringof(arg0) == "dbuf.c"/ \
289	 *	{printf("%s: %s", stringof(arg1), stringof(arg3))}'
290	 * arg0 = file name
291	 * arg1 = function name
292	 * arg2 = line number
293	 * arg3 = message
294	 */
295	DTRACE_PROBE4(zfs__dprintf,
296	    char *, newfile, char *, func, int, line, char *, buf);
297}
298
299static void
300history_str_free(char *buf)
301{
302	kmem_free(buf, HIS_MAX_RECORD_LEN);
303}
304
305static char *
306history_str_get(zfs_cmd_t *zc)
307{
308	char *buf;
309
310	if (zc->zc_history == 0)
311		return (NULL);
312
313	buf = kmem_alloc(HIS_MAX_RECORD_LEN, KM_SLEEP);
314	if (copyinstr((void *)(uintptr_t)zc->zc_history,
315	    buf, HIS_MAX_RECORD_LEN, NULL) != 0) {
316		history_str_free(buf);
317		return (NULL);
318	}
319
320	buf[HIS_MAX_RECORD_LEN -1] = '\0';
321
322	return (buf);
323}
324
325/*
326 * Check to see if the named dataset is currently defined as bootable
327 */
328static boolean_t
329zfs_is_bootfs(const char *name)
330{
331	objset_t *os;
332
333	if (dmu_objset_hold(name, FTAG, &os) == 0) {
334		boolean_t ret;
335		ret = (dmu_objset_id(os) == spa_bootfs(dmu_objset_spa(os)));
336		dmu_objset_rele(os, FTAG);
337		return (ret);
338	}
339	return (B_FALSE);
340}
341
342/*
343 * Return non-zero if the spa version is less than requested version.
344 */
345static int
346zfs_earlier_version(const char *name, int version)
347{
348	spa_t *spa;
349
350	if (spa_open(name, &spa, FTAG) == 0) {
351		if (spa_version(spa) < version) {
352			spa_close(spa, FTAG);
353			return (1);
354		}
355		spa_close(spa, FTAG);
356	}
357	return (0);
358}
359
360/*
361 * Return TRUE if the ZPL version is less than requested version.
362 */
363static boolean_t
364zpl_earlier_version(const char *name, int version)
365{
366	objset_t *os;
367	boolean_t rc = B_TRUE;
368
369	if (dmu_objset_hold(name, FTAG, &os) == 0) {
370		uint64_t zplversion;
371
372		if (dmu_objset_type(os) != DMU_OST_ZFS) {
373			dmu_objset_rele(os, FTAG);
374			return (B_TRUE);
375		}
376		/* XXX reading from non-owned objset */
377		if (zfs_get_zplprop(os, ZFS_PROP_VERSION, &zplversion) == 0)
378			rc = zplversion < version;
379		dmu_objset_rele(os, FTAG);
380	}
381	return (rc);
382}
383
384static void
385zfs_log_history(zfs_cmd_t *zc)
386{
387	spa_t *spa;
388	char *buf;
389
390	if ((buf = history_str_get(zc)) == NULL)
391		return;
392
393	if (spa_open(zc->zc_name, &spa, FTAG) == 0) {
394		if (spa_version(spa) >= SPA_VERSION_ZPOOL_HISTORY)
395			(void) spa_history_log(spa, buf);
396		spa_close(spa, FTAG);
397	}
398	history_str_free(buf);
399}
400
401/*
402 * Policy for top-level read operations (list pools).  Requires no privileges,
403 * and can be used in the local zone, as there is no associated dataset.
404 */
405/* ARGSUSED */
406static int
407zfs_secpolicy_none(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
408{
409	return (0);
410}
411
412/*
413 * Policy for dataset read operations (list children, get statistics).  Requires
414 * no privileges, but must be visible in the local zone.
415 */
416/* ARGSUSED */
417static int
418zfs_secpolicy_read(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
419{
420	if (INGLOBALZONE(curthread) ||
421	    zone_dataset_visible(zc->zc_name, NULL))
422		return (0);
423
424	return (SET_ERROR(ENOENT));
425}
426
427static int
428zfs_dozonecheck_impl(const char *dataset, uint64_t zoned, cred_t *cr)
429{
430	int writable = 1;
431
432	/*
433	 * The dataset must be visible by this zone -- check this first
434	 * so they don't see EPERM on something they shouldn't know about.
435	 */
436	if (!INGLOBALZONE(curthread) &&
437	    !zone_dataset_visible(dataset, &writable))
438		return (SET_ERROR(ENOENT));
439
440	if (INGLOBALZONE(curthread)) {
441		/*
442		 * If the fs is zoned, only root can access it from the
443		 * global zone.
444		 */
445		if (secpolicy_zfs(cr) && zoned)
446			return (SET_ERROR(EPERM));
447	} else {
448		/*
449		 * If we are in a local zone, the 'zoned' property must be set.
450		 */
451		if (!zoned)
452			return (SET_ERROR(EPERM));
453
454		/* must be writable by this zone */
455		if (!writable)
456			return (SET_ERROR(EPERM));
457	}
458	return (0);
459}
460
461static int
462zfs_dozonecheck(const char *dataset, cred_t *cr)
463{
464	uint64_t zoned;
465
466	if (dsl_prop_get_integer(dataset, "jailed", &zoned, NULL))
467		return (SET_ERROR(ENOENT));
468
469	return (zfs_dozonecheck_impl(dataset, zoned, cr));
470}
471
472static int
473zfs_dozonecheck_ds(const char *dataset, dsl_dataset_t *ds, cred_t *cr)
474{
475	uint64_t zoned;
476
477	if (dsl_prop_get_int_ds(ds, "jailed", &zoned))
478		return (SET_ERROR(ENOENT));
479
480	return (zfs_dozonecheck_impl(dataset, zoned, cr));
481}
482
483static int
484zfs_secpolicy_write_perms_ds(const char *name, dsl_dataset_t *ds,
485    const char *perm, cred_t *cr)
486{
487	int error;
488
489	error = zfs_dozonecheck_ds(name, ds, cr);
490	if (error == 0) {
491		error = secpolicy_zfs(cr);
492		if (error != 0)
493			error = dsl_deleg_access_impl(ds, perm, cr);
494	}
495	return (error);
496}
497
498static int
499zfs_secpolicy_write_perms(const char *name, const char *perm, cred_t *cr)
500{
501	int error;
502	dsl_dataset_t *ds;
503	dsl_pool_t *dp;
504
505	/*
506	 * First do a quick check for root in the global zone, which
507	 * is allowed to do all write_perms.  This ensures that zfs_ioc_*
508	 * will get to handle nonexistent datasets.
509	 */
510	if (INGLOBALZONE(curthread) && secpolicy_zfs(cr) == 0)
511		return (0);
512
513	error = dsl_pool_hold(name, FTAG, &dp);
514	if (error != 0)
515		return (error);
516
517	error = dsl_dataset_hold(dp, name, FTAG, &ds);
518	if (error != 0) {
519		dsl_pool_rele(dp, FTAG);
520		return (error);
521	}
522
523	error = zfs_secpolicy_write_perms_ds(name, ds, perm, cr);
524
525	dsl_dataset_rele(ds, FTAG);
526	dsl_pool_rele(dp, FTAG);
527	return (error);
528}
529
530#ifdef SECLABEL
531/*
532 * Policy for setting the security label property.
533 *
534 * Returns 0 for success, non-zero for access and other errors.
535 */
536static int
537zfs_set_slabel_policy(const char *name, char *strval, cred_t *cr)
538{
539	char		ds_hexsl[MAXNAMELEN];
540	bslabel_t	ds_sl, new_sl;
541	boolean_t	new_default = FALSE;
542	uint64_t	zoned;
543	int		needed_priv = -1;
544	int		error;
545
546	/* First get the existing dataset label. */
547	error = dsl_prop_get(name, zfs_prop_to_name(ZFS_PROP_MLSLABEL),
548	    1, sizeof (ds_hexsl), &ds_hexsl, NULL);
549	if (error != 0)
550		return (SET_ERROR(EPERM));
551
552	if (strcasecmp(strval, ZFS_MLSLABEL_DEFAULT) == 0)
553		new_default = TRUE;
554
555	/* The label must be translatable */
556	if (!new_default && (hexstr_to_label(strval, &new_sl) != 0))
557		return (SET_ERROR(EINVAL));
558
559	/*
560	 * In a non-global zone, disallow attempts to set a label that
561	 * doesn't match that of the zone; otherwise no other checks
562	 * are needed.
563	 */
564	if (!INGLOBALZONE(curproc)) {
565		if (new_default || !blequal(&new_sl, CR_SL(CRED())))
566			return (SET_ERROR(EPERM));
567		return (0);
568	}
569
570	/*
571	 * For global-zone datasets (i.e., those whose zoned property is
572	 * "off", verify that the specified new label is valid for the
573	 * global zone.
574	 */
575	if (dsl_prop_get_integer(name,
576	    zfs_prop_to_name(ZFS_PROP_ZONED), &zoned, NULL))
577		return (SET_ERROR(EPERM));
578	if (!zoned) {
579		if (zfs_check_global_label(name, strval) != 0)
580			return (SET_ERROR(EPERM));
581	}
582
583	/*
584	 * If the existing dataset label is nondefault, check if the
585	 * dataset is mounted (label cannot be changed while mounted).
586	 * Get the zfsvfs; if there isn't one, then the dataset isn't
587	 * mounted (or isn't a dataset, doesn't exist, ...).
588	 */
589	if (strcasecmp(ds_hexsl, ZFS_MLSLABEL_DEFAULT) != 0) {
590		objset_t *os;
591		static char *setsl_tag = "setsl_tag";
592
593		/*
594		 * Try to own the dataset; abort if there is any error,
595		 * (e.g., already mounted, in use, or other error).
596		 */
597		error = dmu_objset_own(name, DMU_OST_ZFS, B_TRUE,
598		    setsl_tag, &os);
599		if (error != 0)
600			return (SET_ERROR(EPERM));
601
602		dmu_objset_disown(os, setsl_tag);
603
604		if (new_default) {
605			needed_priv = PRIV_FILE_DOWNGRADE_SL;
606			goto out_check;
607		}
608
609		if (hexstr_to_label(strval, &new_sl) != 0)
610			return (SET_ERROR(EPERM));
611
612		if (blstrictdom(&ds_sl, &new_sl))
613			needed_priv = PRIV_FILE_DOWNGRADE_SL;
614		else if (blstrictdom(&new_sl, &ds_sl))
615			needed_priv = PRIV_FILE_UPGRADE_SL;
616	} else {
617		/* dataset currently has a default label */
618		if (!new_default)
619			needed_priv = PRIV_FILE_UPGRADE_SL;
620	}
621
622out_check:
623	if (needed_priv != -1)
624		return (PRIV_POLICY(cr, needed_priv, B_FALSE, EPERM, NULL));
625	return (0);
626}
627#endif	/* SECLABEL */
628
629static int
630zfs_secpolicy_setprop(const char *dsname, zfs_prop_t prop, nvpair_t *propval,
631    cred_t *cr)
632{
633	char *strval;
634
635	/*
636	 * Check permissions for special properties.
637	 */
638	switch (prop) {
639	case ZFS_PROP_ZONED:
640		/*
641		 * Disallow setting of 'zoned' from within a local zone.
642		 */
643		if (!INGLOBALZONE(curthread))
644			return (SET_ERROR(EPERM));
645		break;
646
647	case ZFS_PROP_QUOTA:
648	case ZFS_PROP_FILESYSTEM_LIMIT:
649	case ZFS_PROP_SNAPSHOT_LIMIT:
650		if (!INGLOBALZONE(curthread)) {
651			uint64_t zoned;
652			char setpoint[ZFS_MAX_DATASET_NAME_LEN];
653			/*
654			 * Unprivileged users are allowed to modify the
655			 * limit on things *under* (ie. contained by)
656			 * the thing they own.
657			 */
658			if (dsl_prop_get_integer(dsname, "jailed", &zoned,
659			    setpoint))
660				return (SET_ERROR(EPERM));
661			if (!zoned || strlen(dsname) <= strlen(setpoint))
662				return (SET_ERROR(EPERM));
663		}
664		break;
665
666	case ZFS_PROP_MLSLABEL:
667#ifdef SECLABEL
668		if (!is_system_labeled())
669			return (SET_ERROR(EPERM));
670
671		if (nvpair_value_string(propval, &strval) == 0) {
672			int err;
673
674			err = zfs_set_slabel_policy(dsname, strval, CRED());
675			if (err != 0)
676				return (err);
677		}
678#else
679		return (EOPNOTSUPP);
680#endif
681		break;
682	}
683
684	return (zfs_secpolicy_write_perms(dsname, zfs_prop_to_name(prop), cr));
685}
686
687/* ARGSUSED */
688static int
689zfs_secpolicy_set_fsacl(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
690{
691	int error;
692
693	error = zfs_dozonecheck(zc->zc_name, cr);
694	if (error != 0)
695		return (error);
696
697	/*
698	 * permission to set permissions will be evaluated later in
699	 * dsl_deleg_can_allow()
700	 */
701	return (0);
702}
703
704/* ARGSUSED */
705static int
706zfs_secpolicy_rollback(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
707{
708	return (zfs_secpolicy_write_perms(zc->zc_name,
709	    ZFS_DELEG_PERM_ROLLBACK, cr));
710}
711
712/* ARGSUSED */
713static int
714zfs_secpolicy_send(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
715{
716	dsl_pool_t *dp;
717	dsl_dataset_t *ds;
718	char *cp;
719	int error;
720
721	/*
722	 * Generate the current snapshot name from the given objsetid, then
723	 * use that name for the secpolicy/zone checks.
724	 */
725	cp = strchr(zc->zc_name, '@');
726	if (cp == NULL)
727		return (SET_ERROR(EINVAL));
728	error = dsl_pool_hold(zc->zc_name, FTAG, &dp);
729	if (error != 0)
730		return (error);
731
732	error = dsl_dataset_hold_obj(dp, zc->zc_sendobj, FTAG, &ds);
733	if (error != 0) {
734		dsl_pool_rele(dp, FTAG);
735		return (error);
736	}
737
738	dsl_dataset_name(ds, zc->zc_name);
739
740	error = zfs_secpolicy_write_perms_ds(zc->zc_name, ds,
741	    ZFS_DELEG_PERM_SEND, cr);
742	dsl_dataset_rele(ds, FTAG);
743	dsl_pool_rele(dp, FTAG);
744
745	return (error);
746}
747
748/* ARGSUSED */
749static int
750zfs_secpolicy_send_new(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
751{
752	return (zfs_secpolicy_write_perms(zc->zc_name,
753	    ZFS_DELEG_PERM_SEND, cr));
754}
755
756/* ARGSUSED */
757static int
758zfs_secpolicy_deleg_share(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
759{
760	vnode_t *vp;
761	int error;
762
763	if ((error = lookupname(zc->zc_value, UIO_SYSSPACE,
764	    NO_FOLLOW, NULL, &vp)) != 0)
765		return (error);
766
767	/* Now make sure mntpnt and dataset are ZFS */
768
769	if (strcmp(vp->v_vfsp->mnt_stat.f_fstypename, "zfs") != 0 ||
770	    (strcmp((char *)refstr_value(vp->v_vfsp->vfs_resource),
771	    zc->zc_name) != 0)) {
772		VN_RELE(vp);
773		return (SET_ERROR(EPERM));
774	}
775
776	VN_RELE(vp);
777	return (dsl_deleg_access(zc->zc_name,
778	    ZFS_DELEG_PERM_SHARE, cr));
779}
780
781int
782zfs_secpolicy_share(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
783{
784	if (!INGLOBALZONE(curthread))
785		return (SET_ERROR(EPERM));
786
787	if (secpolicy_nfs(cr) == 0) {
788		return (0);
789	} else {
790		return (zfs_secpolicy_deleg_share(zc, innvl, cr));
791	}
792}
793
794int
795zfs_secpolicy_smb_acl(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
796{
797	if (!INGLOBALZONE(curthread))
798		return (SET_ERROR(EPERM));
799
800	if (secpolicy_smb(cr) == 0) {
801		return (0);
802	} else {
803		return (zfs_secpolicy_deleg_share(zc, innvl, cr));
804	}
805}
806
807static int
808zfs_get_parent(const char *datasetname, char *parent, int parentsize)
809{
810	char *cp;
811
812	/*
813	 * Remove the @bla or /bla from the end of the name to get the parent.
814	 */
815	(void) strncpy(parent, datasetname, parentsize);
816	cp = strrchr(parent, '@');
817	if (cp != NULL) {
818		cp[0] = '\0';
819	} else {
820		cp = strrchr(parent, '/');
821		if (cp == NULL)
822			return (SET_ERROR(ENOENT));
823		cp[0] = '\0';
824	}
825
826	return (0);
827}
828
829int
830zfs_secpolicy_destroy_perms(const char *name, cred_t *cr)
831{
832	int error;
833
834	if ((error = zfs_secpolicy_write_perms(name,
835	    ZFS_DELEG_PERM_MOUNT, cr)) != 0)
836		return (error);
837
838	return (zfs_secpolicy_write_perms(name, ZFS_DELEG_PERM_DESTROY, cr));
839}
840
841/* ARGSUSED */
842static int
843zfs_secpolicy_destroy(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
844{
845	return (zfs_secpolicy_destroy_perms(zc->zc_name, cr));
846}
847
848/*
849 * Destroying snapshots with delegated permissions requires
850 * descendant mount and destroy permissions.
851 */
852/* ARGSUSED */
853static int
854zfs_secpolicy_destroy_snaps(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
855{
856	nvlist_t *snaps;
857	nvpair_t *pair, *nextpair;
858	int error = 0;
859
860	if (nvlist_lookup_nvlist(innvl, "snaps", &snaps) != 0)
861		return (SET_ERROR(EINVAL));
862	for (pair = nvlist_next_nvpair(snaps, NULL); pair != NULL;
863	    pair = nextpair) {
864		nextpair = nvlist_next_nvpair(snaps, pair);
865		error = zfs_secpolicy_destroy_perms(nvpair_name(pair), cr);
866		if (error == ENOENT) {
867			/*
868			 * Ignore any snapshots that don't exist (we consider
869			 * them "already destroyed").  Remove the name from the
870			 * nvl here in case the snapshot is created between
871			 * now and when we try to destroy it (in which case
872			 * we don't want to destroy it since we haven't
873			 * checked for permission).
874			 */
875			fnvlist_remove_nvpair(snaps, pair);
876			error = 0;
877		}
878		if (error != 0)
879			break;
880	}
881
882	return (error);
883}
884
885int
886zfs_secpolicy_rename_perms(const char *from, const char *to, cred_t *cr)
887{
888	char	parentname[ZFS_MAX_DATASET_NAME_LEN];
889	int	error;
890
891	if ((error = zfs_secpolicy_write_perms(from,
892	    ZFS_DELEG_PERM_RENAME, cr)) != 0)
893		return (error);
894
895	if ((error = zfs_secpolicy_write_perms(from,
896	    ZFS_DELEG_PERM_MOUNT, cr)) != 0)
897		return (error);
898
899	if ((error = zfs_get_parent(to, parentname,
900	    sizeof (parentname))) != 0)
901		return (error);
902
903	if ((error = zfs_secpolicy_write_perms(parentname,
904	    ZFS_DELEG_PERM_CREATE, cr)) != 0)
905		return (error);
906
907	if ((error = zfs_secpolicy_write_perms(parentname,
908	    ZFS_DELEG_PERM_MOUNT, cr)) != 0)
909		return (error);
910
911	return (error);
912}
913
914/* ARGSUSED */
915static int
916zfs_secpolicy_rename(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
917{
918	char *at = NULL;
919	int error;
920
921	if ((zc->zc_cookie & 1) != 0) {
922		/*
923		 * This is recursive rename, so the starting snapshot might
924		 * not exist. Check file system or volume permission instead.
925		 */
926		at = strchr(zc->zc_name, '@');
927		if (at == NULL)
928			return (EINVAL);
929		*at = '\0';
930	}
931
932	error = zfs_secpolicy_rename_perms(zc->zc_name, zc->zc_value, cr);
933
934	if (at != NULL)
935		*at = '@';
936
937	return (error);
938}
939
940/* ARGSUSED */
941static int
942zfs_secpolicy_promote(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
943{
944	dsl_pool_t *dp;
945	dsl_dataset_t *clone;
946	int error;
947
948	error = zfs_secpolicy_write_perms(zc->zc_name,
949	    ZFS_DELEG_PERM_PROMOTE, cr);
950	if (error != 0)
951		return (error);
952
953	error = dsl_pool_hold(zc->zc_name, FTAG, &dp);
954	if (error != 0)
955		return (error);
956
957	error = dsl_dataset_hold(dp, zc->zc_name, FTAG, &clone);
958
959	if (error == 0) {
960		char parentname[ZFS_MAX_DATASET_NAME_LEN];
961		dsl_dataset_t *origin = NULL;
962		dsl_dir_t *dd;
963		dd = clone->ds_dir;
964
965		error = dsl_dataset_hold_obj(dd->dd_pool,
966		    dsl_dir_phys(dd)->dd_origin_obj, FTAG, &origin);
967		if (error != 0) {
968			dsl_dataset_rele(clone, FTAG);
969			dsl_pool_rele(dp, FTAG);
970			return (error);
971		}
972
973		error = zfs_secpolicy_write_perms_ds(zc->zc_name, clone,
974		    ZFS_DELEG_PERM_MOUNT, cr);
975
976		dsl_dataset_name(origin, parentname);
977		if (error == 0) {
978			error = zfs_secpolicy_write_perms_ds(parentname, origin,
979			    ZFS_DELEG_PERM_PROMOTE, cr);
980		}
981		dsl_dataset_rele(clone, FTAG);
982		dsl_dataset_rele(origin, FTAG);
983	}
984	dsl_pool_rele(dp, FTAG);
985	return (error);
986}
987
988/* ARGSUSED */
989static int
990zfs_secpolicy_recv(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
991{
992	int error;
993
994	if ((error = zfs_secpolicy_write_perms(zc->zc_name,
995	    ZFS_DELEG_PERM_RECEIVE, cr)) != 0)
996		return (error);
997
998	if ((error = zfs_secpolicy_write_perms(zc->zc_name,
999	    ZFS_DELEG_PERM_MOUNT, cr)) != 0)
1000		return (error);
1001
1002	return (zfs_secpolicy_write_perms(zc->zc_name,
1003	    ZFS_DELEG_PERM_CREATE, cr));
1004}
1005
1006int
1007zfs_secpolicy_snapshot_perms(const char *name, cred_t *cr)
1008{
1009	return (zfs_secpolicy_write_perms(name,
1010	    ZFS_DELEG_PERM_SNAPSHOT, cr));
1011}
1012
1013/*
1014 * Check for permission to create each snapshot in the nvlist.
1015 */
1016/* ARGSUSED */
1017static int
1018zfs_secpolicy_snapshot(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1019{
1020	nvlist_t *snaps;
1021	int error;
1022	nvpair_t *pair;
1023
1024	if (nvlist_lookup_nvlist(innvl, "snaps", &snaps) != 0)
1025		return (SET_ERROR(EINVAL));
1026	for (pair = nvlist_next_nvpair(snaps, NULL); pair != NULL;
1027	    pair = nvlist_next_nvpair(snaps, pair)) {
1028		char *name = nvpair_name(pair);
1029		char *atp = strchr(name, '@');
1030
1031		if (atp == NULL) {
1032			error = SET_ERROR(EINVAL);
1033			break;
1034		}
1035		*atp = '\0';
1036		error = zfs_secpolicy_snapshot_perms(name, cr);
1037		*atp = '@';
1038		if (error != 0)
1039			break;
1040	}
1041	return (error);
1042}
1043
1044/*
1045 * Check for permission to create each snapshot in the nvlist.
1046 */
1047/* ARGSUSED */
1048static int
1049zfs_secpolicy_bookmark(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1050{
1051	int error = 0;
1052
1053	for (nvpair_t *pair = nvlist_next_nvpair(innvl, NULL);
1054	    pair != NULL; pair = nvlist_next_nvpair(innvl, pair)) {
1055		char *name = nvpair_name(pair);
1056		char *hashp = strchr(name, '#');
1057
1058		if (hashp == NULL) {
1059			error = SET_ERROR(EINVAL);
1060			break;
1061		}
1062		*hashp = '\0';
1063		error = zfs_secpolicy_write_perms(name,
1064		    ZFS_DELEG_PERM_BOOKMARK, cr);
1065		*hashp = '#';
1066		if (error != 0)
1067			break;
1068	}
1069	return (error);
1070}
1071
1072/* ARGSUSED */
1073static int
1074zfs_secpolicy_destroy_bookmarks(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1075{
1076	nvpair_t *pair, *nextpair;
1077	int error = 0;
1078
1079	for (pair = nvlist_next_nvpair(innvl, NULL); pair != NULL;
1080	    pair = nextpair) {
1081		char *name = nvpair_name(pair);
1082		char *hashp = strchr(name, '#');
1083		nextpair = nvlist_next_nvpair(innvl, pair);
1084
1085		if (hashp == NULL) {
1086			error = SET_ERROR(EINVAL);
1087			break;
1088		}
1089
1090		*hashp = '\0';
1091		error = zfs_secpolicy_write_perms(name,
1092		    ZFS_DELEG_PERM_DESTROY, cr);
1093		*hashp = '#';
1094		if (error == ENOENT) {
1095			/*
1096			 * Ignore any filesystems that don't exist (we consider
1097			 * their bookmarks "already destroyed").  Remove
1098			 * the name from the nvl here in case the filesystem
1099			 * is created between now and when we try to destroy
1100			 * the bookmark (in which case we don't want to
1101			 * destroy it since we haven't checked for permission).
1102			 */
1103			fnvlist_remove_nvpair(innvl, pair);
1104			error = 0;
1105		}
1106		if (error != 0)
1107			break;
1108	}
1109
1110	return (error);
1111}
1112
1113/* ARGSUSED */
1114static int
1115zfs_secpolicy_log_history(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1116{
1117	/*
1118	 * Even root must have a proper TSD so that we know what pool
1119	 * to log to.
1120	 */
1121	if (tsd_get(zfs_allow_log_key) == NULL)
1122		return (SET_ERROR(EPERM));
1123	return (0);
1124}
1125
1126static int
1127zfs_secpolicy_create_clone(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1128{
1129	char	parentname[ZFS_MAX_DATASET_NAME_LEN];
1130	int	error;
1131	char	*origin;
1132
1133	if ((error = zfs_get_parent(zc->zc_name, parentname,
1134	    sizeof (parentname))) != 0)
1135		return (error);
1136
1137	if (nvlist_lookup_string(innvl, "origin", &origin) == 0 &&
1138	    (error = zfs_secpolicy_write_perms(origin,
1139	    ZFS_DELEG_PERM_CLONE, cr)) != 0)
1140		return (error);
1141
1142	if ((error = zfs_secpolicy_write_perms(parentname,
1143	    ZFS_DELEG_PERM_CREATE, cr)) != 0)
1144		return (error);
1145
1146	return (zfs_secpolicy_write_perms(parentname,
1147	    ZFS_DELEG_PERM_MOUNT, cr));
1148}
1149
1150/*
1151 * Policy for pool operations - create/destroy pools, add vdevs, etc.  Requires
1152 * SYS_CONFIG privilege, which is not available in a local zone.
1153 */
1154/* ARGSUSED */
1155static int
1156zfs_secpolicy_config(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1157{
1158	if (secpolicy_sys_config(cr, B_FALSE) != 0)
1159		return (SET_ERROR(EPERM));
1160
1161	return (0);
1162}
1163
1164/*
1165 * Policy for object to name lookups.
1166 */
1167/* ARGSUSED */
1168static int
1169zfs_secpolicy_diff(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1170{
1171	int error;
1172
1173	if ((error = secpolicy_sys_config(cr, B_FALSE)) == 0)
1174		return (0);
1175
1176	error = zfs_secpolicy_write_perms(zc->zc_name, ZFS_DELEG_PERM_DIFF, cr);
1177	return (error);
1178}
1179
1180/*
1181 * Policy for fault injection.  Requires all privileges.
1182 */
1183/* ARGSUSED */
1184static int
1185zfs_secpolicy_inject(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1186{
1187	return (secpolicy_zinject(cr));
1188}
1189
1190/* ARGSUSED */
1191static int
1192zfs_secpolicy_inherit_prop(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1193{
1194	zfs_prop_t prop = zfs_name_to_prop(zc->zc_value);
1195
1196	if (prop == ZPROP_INVAL) {
1197		if (!zfs_prop_user(zc->zc_value))
1198			return (SET_ERROR(EINVAL));
1199		return (zfs_secpolicy_write_perms(zc->zc_name,
1200		    ZFS_DELEG_PERM_USERPROP, cr));
1201	} else {
1202		return (zfs_secpolicy_setprop(zc->zc_name, prop,
1203		    NULL, cr));
1204	}
1205}
1206
1207static int
1208zfs_secpolicy_userspace_one(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1209{
1210	int err = zfs_secpolicy_read(zc, innvl, cr);
1211	if (err)
1212		return (err);
1213
1214	if (zc->zc_objset_type >= ZFS_NUM_USERQUOTA_PROPS)
1215		return (SET_ERROR(EINVAL));
1216
1217	if (zc->zc_value[0] == 0) {
1218		/*
1219		 * They are asking about a posix uid/gid.  If it's
1220		 * themself, allow it.
1221		 */
1222		if (zc->zc_objset_type == ZFS_PROP_USERUSED ||
1223		    zc->zc_objset_type == ZFS_PROP_USERQUOTA) {
1224			if (zc->zc_guid == crgetuid(cr))
1225				return (0);
1226		} else {
1227			if (groupmember(zc->zc_guid, cr))
1228				return (0);
1229		}
1230	}
1231
1232	return (zfs_secpolicy_write_perms(zc->zc_name,
1233	    userquota_perms[zc->zc_objset_type], cr));
1234}
1235
1236static int
1237zfs_secpolicy_userspace_many(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1238{
1239	int err = zfs_secpolicy_read(zc, innvl, cr);
1240	if (err)
1241		return (err);
1242
1243	if (zc->zc_objset_type >= ZFS_NUM_USERQUOTA_PROPS)
1244		return (SET_ERROR(EINVAL));
1245
1246	return (zfs_secpolicy_write_perms(zc->zc_name,
1247	    userquota_perms[zc->zc_objset_type], cr));
1248}
1249
1250/* ARGSUSED */
1251static int
1252zfs_secpolicy_userspace_upgrade(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1253{
1254	return (zfs_secpolicy_setprop(zc->zc_name, ZFS_PROP_VERSION,
1255	    NULL, cr));
1256}
1257
1258/* ARGSUSED */
1259static int
1260zfs_secpolicy_hold(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1261{
1262	nvpair_t *pair;
1263	nvlist_t *holds;
1264	int error;
1265
1266	error = nvlist_lookup_nvlist(innvl, "holds", &holds);
1267	if (error != 0)
1268		return (SET_ERROR(EINVAL));
1269
1270	for (pair = nvlist_next_nvpair(holds, NULL); pair != NULL;
1271	    pair = nvlist_next_nvpair(holds, pair)) {
1272		char fsname[ZFS_MAX_DATASET_NAME_LEN];
1273		error = dmu_fsname(nvpair_name(pair), fsname);
1274		if (error != 0)
1275			return (error);
1276		error = zfs_secpolicy_write_perms(fsname,
1277		    ZFS_DELEG_PERM_HOLD, cr);
1278		if (error != 0)
1279			return (error);
1280	}
1281	return (0);
1282}
1283
1284/* ARGSUSED */
1285static int
1286zfs_secpolicy_release(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1287{
1288	nvpair_t *pair;
1289	int error;
1290
1291	for (pair = nvlist_next_nvpair(innvl, NULL); pair != NULL;
1292	    pair = nvlist_next_nvpair(innvl, pair)) {
1293		char fsname[ZFS_MAX_DATASET_NAME_LEN];
1294		error = dmu_fsname(nvpair_name(pair), fsname);
1295		if (error != 0)
1296			return (error);
1297		error = zfs_secpolicy_write_perms(fsname,
1298		    ZFS_DELEG_PERM_RELEASE, cr);
1299		if (error != 0)
1300			return (error);
1301	}
1302	return (0);
1303}
1304
1305/*
1306 * Policy for allowing temporary snapshots to be taken or released
1307 */
1308static int
1309zfs_secpolicy_tmp_snapshot(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1310{
1311	/*
1312	 * A temporary snapshot is the same as a snapshot,
1313	 * hold, destroy and release all rolled into one.
1314	 * Delegated diff alone is sufficient that we allow this.
1315	 */
1316	int error;
1317
1318	if ((error = zfs_secpolicy_write_perms(zc->zc_name,
1319	    ZFS_DELEG_PERM_DIFF, cr)) == 0)
1320		return (0);
1321
1322	error = zfs_secpolicy_snapshot_perms(zc->zc_name, cr);
1323	if (error == 0)
1324		error = zfs_secpolicy_hold(zc, innvl, cr);
1325	if (error == 0)
1326		error = zfs_secpolicy_release(zc, innvl, cr);
1327	if (error == 0)
1328		error = zfs_secpolicy_destroy(zc, innvl, cr);
1329	return (error);
1330}
1331
1332/*
1333 * Returns the nvlist as specified by the user in the zfs_cmd_t.
1334 */
1335static int
1336get_nvlist(uint64_t nvl, uint64_t size, int iflag, nvlist_t **nvp)
1337{
1338	char *packed;
1339	int error;
1340	nvlist_t *list = NULL;
1341
1342	/*
1343	 * Read in and unpack the user-supplied nvlist.
1344	 */
1345	if (size == 0)
1346		return (SET_ERROR(EINVAL));
1347
1348	packed = kmem_alloc(size, KM_SLEEP);
1349
1350	if ((error = ddi_copyin((void *)(uintptr_t)nvl, packed, size,
1351	    iflag)) != 0) {
1352		kmem_free(packed, size);
1353		return (SET_ERROR(EFAULT));
1354	}
1355
1356	if ((error = nvlist_unpack(packed, size, &list, 0)) != 0) {
1357		kmem_free(packed, size);
1358		return (error);
1359	}
1360
1361	kmem_free(packed, size);
1362
1363	*nvp = list;
1364	return (0);
1365}
1366
1367/*
1368 * Reduce the size of this nvlist until it can be serialized in 'max' bytes.
1369 * Entries will be removed from the end of the nvlist, and one int32 entry
1370 * named "N_MORE_ERRORS" will be added indicating how many entries were
1371 * removed.
1372 */
1373static int
1374nvlist_smush(nvlist_t *errors, size_t max)
1375{
1376	size_t size;
1377
1378	size = fnvlist_size(errors);
1379
1380	if (size > max) {
1381		nvpair_t *more_errors;
1382		int n = 0;
1383
1384		if (max < 1024)
1385			return (SET_ERROR(ENOMEM));
1386
1387		fnvlist_add_int32(errors, ZPROP_N_MORE_ERRORS, 0);
1388		more_errors = nvlist_prev_nvpair(errors, NULL);
1389
1390		do {
1391			nvpair_t *pair = nvlist_prev_nvpair(errors,
1392			    more_errors);
1393			fnvlist_remove_nvpair(errors, pair);
1394			n++;
1395			size = fnvlist_size(errors);
1396		} while (size > max);
1397
1398		fnvlist_remove_nvpair(errors, more_errors);
1399		fnvlist_add_int32(errors, ZPROP_N_MORE_ERRORS, n);
1400		ASSERT3U(fnvlist_size(errors), <=, max);
1401	}
1402
1403	return (0);
1404}
1405
1406static int
1407put_nvlist(zfs_cmd_t *zc, nvlist_t *nvl)
1408{
1409	char *packed = NULL;
1410	int error = 0;
1411	size_t size;
1412
1413	size = fnvlist_size(nvl);
1414
1415	if (size > zc->zc_nvlist_dst_size) {
1416		/*
1417		 * Solaris returns ENOMEM here, because even if an error is
1418		 * returned from an ioctl(2), new zc_nvlist_dst_size will be
1419		 * passed to the userland. This is not the case for FreeBSD.
1420		 * We need to return 0, so the kernel will copy the
1421		 * zc_nvlist_dst_size back and the userland can discover that a
1422		 * bigger buffer is needed.
1423		 */
1424		error = 0;
1425	} else {
1426		packed = fnvlist_pack(nvl, &size);
1427		if (ddi_copyout(packed, (void *)(uintptr_t)zc->zc_nvlist_dst,
1428		    size, zc->zc_iflags) != 0)
1429			error = SET_ERROR(EFAULT);
1430		fnvlist_pack_free(packed, size);
1431	}
1432
1433	zc->zc_nvlist_dst_size = size;
1434	zc->zc_nvlist_dst_filled = B_TRUE;
1435	return (error);
1436}
1437
1438static int
1439getzfsvfs(const char *dsname, zfsvfs_t **zfvp)
1440{
1441	objset_t *os;
1442	vfs_t *vfsp;
1443	int error;
1444
1445	error = dmu_objset_hold(dsname, FTAG, &os);
1446	if (error != 0)
1447		return (error);
1448	if (dmu_objset_type(os) != DMU_OST_ZFS) {
1449		dmu_objset_rele(os, FTAG);
1450		return (SET_ERROR(EINVAL));
1451	}
1452
1453	mutex_enter(&os->os_user_ptr_lock);
1454	*zfvp = dmu_objset_get_user(os);
1455	if (*zfvp) {
1456		vfsp = (*zfvp)->z_vfs;
1457		vfs_ref(vfsp);
1458	} else {
1459		error = SET_ERROR(ESRCH);
1460	}
1461	mutex_exit(&os->os_user_ptr_lock);
1462	dmu_objset_rele(os, FTAG);
1463	if (error == 0) {
1464		error = vfs_busy(vfsp, 0);
1465		vfs_rel(vfsp);
1466		if (error != 0) {
1467			*zfvp = NULL;
1468			error = SET_ERROR(ESRCH);
1469		}
1470	}
1471	return (error);
1472}
1473
1474/*
1475 * Find a zfsvfs_t for a mounted filesystem, or create our own, in which
1476 * case its z_vfs will be NULL, and it will be opened as the owner.
1477 * If 'writer' is set, the z_teardown_lock will be held for RW_WRITER,
1478 * which prevents all vnode ops from running.
1479 */
1480static int
1481zfsvfs_hold(const char *name, void *tag, zfsvfs_t **zfvp, boolean_t writer)
1482{
1483	int error = 0;
1484
1485	if (getzfsvfs(name, zfvp) != 0)
1486		error = zfsvfs_create(name, zfvp);
1487	if (error == 0) {
1488		rrm_enter(&(*zfvp)->z_teardown_lock, (writer) ? RW_WRITER :
1489		    RW_READER, tag);
1490		if ((*zfvp)->z_unmounted) {
1491			/*
1492			 * XXX we could probably try again, since the unmounting
1493			 * thread should be just about to disassociate the
1494			 * objset from the zfsvfs.
1495			 */
1496			rrm_exit(&(*zfvp)->z_teardown_lock, tag);
1497			return (SET_ERROR(EBUSY));
1498		}
1499	}
1500	return (error);
1501}
1502
1503static void
1504zfsvfs_rele(zfsvfs_t *zfsvfs, void *tag)
1505{
1506	rrm_exit(&zfsvfs->z_teardown_lock, tag);
1507
1508	if (zfsvfs->z_vfs) {
1509#ifdef illumos
1510		VFS_RELE(zfsvfs->z_vfs);
1511#else
1512		vfs_unbusy(zfsvfs->z_vfs);
1513#endif
1514	} else {
1515		dmu_objset_disown(zfsvfs->z_os, zfsvfs);
1516		zfsvfs_free(zfsvfs);
1517	}
1518}
1519
1520static int
1521zfs_ioc_pool_create(zfs_cmd_t *zc)
1522{
1523	int error;
1524	nvlist_t *config, *props = NULL;
1525	nvlist_t *rootprops = NULL;
1526	nvlist_t *zplprops = NULL;
1527
1528	if (error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
1529	    zc->zc_iflags, &config))
1530		return (error);
1531
1532	if (zc->zc_nvlist_src_size != 0 && (error =
1533	    get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
1534	    zc->zc_iflags, &props))) {
1535		nvlist_free(config);
1536		return (error);
1537	}
1538
1539	if (props) {
1540		nvlist_t *nvl = NULL;
1541		uint64_t version = SPA_VERSION;
1542
1543		(void) nvlist_lookup_uint64(props,
1544		    zpool_prop_to_name(ZPOOL_PROP_VERSION), &version);
1545		if (!SPA_VERSION_IS_SUPPORTED(version)) {
1546			error = SET_ERROR(EINVAL);
1547			goto pool_props_bad;
1548		}
1549		(void) nvlist_lookup_nvlist(props, ZPOOL_ROOTFS_PROPS, &nvl);
1550		if (nvl) {
1551			error = nvlist_dup(nvl, &rootprops, KM_SLEEP);
1552			if (error != 0) {
1553				nvlist_free(config);
1554				nvlist_free(props);
1555				return (error);
1556			}
1557			(void) nvlist_remove_all(props, ZPOOL_ROOTFS_PROPS);
1558		}
1559		VERIFY(nvlist_alloc(&zplprops, NV_UNIQUE_NAME, KM_SLEEP) == 0);
1560		error = zfs_fill_zplprops_root(version, rootprops,
1561		    zplprops, NULL);
1562		if (error != 0)
1563			goto pool_props_bad;
1564	}
1565
1566	error = spa_create(zc->zc_name, config, props, zplprops);
1567
1568	/*
1569	 * Set the remaining root properties
1570	 */
1571	if (!error && (error = zfs_set_prop_nvlist(zc->zc_name,
1572	    ZPROP_SRC_LOCAL, rootprops, NULL)) != 0)
1573		(void) spa_destroy(zc->zc_name);
1574
1575pool_props_bad:
1576	nvlist_free(rootprops);
1577	nvlist_free(zplprops);
1578	nvlist_free(config);
1579	nvlist_free(props);
1580
1581	return (error);
1582}
1583
1584static int
1585zfs_ioc_pool_destroy(zfs_cmd_t *zc)
1586{
1587	int error;
1588	zfs_log_history(zc);
1589	error = spa_destroy(zc->zc_name);
1590	if (error == 0)
1591		zvol_remove_minors(zc->zc_name);
1592	return (error);
1593}
1594
1595static int
1596zfs_ioc_pool_import(zfs_cmd_t *zc)
1597{
1598	nvlist_t *config, *props = NULL;
1599	uint64_t guid;
1600	int error;
1601
1602	if ((error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
1603	    zc->zc_iflags, &config)) != 0)
1604		return (error);
1605
1606	if (zc->zc_nvlist_src_size != 0 && (error =
1607	    get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
1608	    zc->zc_iflags, &props))) {
1609		nvlist_free(config);
1610		return (error);
1611	}
1612
1613	if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID, &guid) != 0 ||
1614	    guid != zc->zc_guid)
1615		error = SET_ERROR(EINVAL);
1616	else
1617		error = spa_import(zc->zc_name, config, props, zc->zc_cookie);
1618
1619	if (zc->zc_nvlist_dst != 0) {
1620		int err;
1621
1622		if ((err = put_nvlist(zc, config)) != 0)
1623			error = err;
1624	}
1625
1626	nvlist_free(config);
1627
1628	nvlist_free(props);
1629
1630	return (error);
1631}
1632
1633static int
1634zfs_ioc_pool_export(zfs_cmd_t *zc)
1635{
1636	int error;
1637	boolean_t force = (boolean_t)zc->zc_cookie;
1638	boolean_t hardforce = (boolean_t)zc->zc_guid;
1639
1640	zfs_log_history(zc);
1641	error = spa_export(zc->zc_name, NULL, force, hardforce);
1642	if (error == 0)
1643		zvol_remove_minors(zc->zc_name);
1644	return (error);
1645}
1646
1647static int
1648zfs_ioc_pool_configs(zfs_cmd_t *zc)
1649{
1650	nvlist_t *configs;
1651	int error;
1652
1653	if ((configs = spa_all_configs(&zc->zc_cookie)) == NULL)
1654		return (SET_ERROR(EEXIST));
1655
1656	error = put_nvlist(zc, configs);
1657
1658	nvlist_free(configs);
1659
1660	return (error);
1661}
1662
1663/*
1664 * inputs:
1665 * zc_name		name of the pool
1666 *
1667 * outputs:
1668 * zc_cookie		real errno
1669 * zc_nvlist_dst	config nvlist
1670 * zc_nvlist_dst_size	size of config nvlist
1671 */
1672static int
1673zfs_ioc_pool_stats(zfs_cmd_t *zc)
1674{
1675	nvlist_t *config;
1676	int error;
1677	int ret = 0;
1678
1679	error = spa_get_stats(zc->zc_name, &config, zc->zc_value,
1680	    sizeof (zc->zc_value));
1681
1682	if (config != NULL) {
1683		ret = put_nvlist(zc, config);
1684		nvlist_free(config);
1685
1686		/*
1687		 * The config may be present even if 'error' is non-zero.
1688		 * In this case we return success, and preserve the real errno
1689		 * in 'zc_cookie'.
1690		 */
1691		zc->zc_cookie = error;
1692	} else {
1693		ret = error;
1694	}
1695
1696	return (ret);
1697}
1698
1699/*
1700 * Try to import the given pool, returning pool stats as appropriate so that
1701 * user land knows which devices are available and overall pool health.
1702 */
1703static int
1704zfs_ioc_pool_tryimport(zfs_cmd_t *zc)
1705{
1706	nvlist_t *tryconfig, *config;
1707	int error;
1708
1709	if ((error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
1710	    zc->zc_iflags, &tryconfig)) != 0)
1711		return (error);
1712
1713	config = spa_tryimport(tryconfig);
1714
1715	nvlist_free(tryconfig);
1716
1717	if (config == NULL)
1718		return (SET_ERROR(EINVAL));
1719
1720	error = put_nvlist(zc, config);
1721	nvlist_free(config);
1722
1723	return (error);
1724}
1725
1726/*
1727 * inputs:
1728 * zc_name              name of the pool
1729 * zc_cookie            scan func (pool_scan_func_t)
1730 * zc_flags             scrub pause/resume flag (pool_scrub_cmd_t)
1731 */
1732static int
1733zfs_ioc_pool_scan(zfs_cmd_t *zc)
1734{
1735	spa_t *spa;
1736	int error;
1737
1738	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
1739		return (error);
1740
1741	if (zc->zc_flags >= POOL_SCRUB_FLAGS_END)
1742		return (SET_ERROR(EINVAL));
1743
1744	if (zc->zc_flags == POOL_SCRUB_PAUSE)
1745		error = spa_scrub_pause_resume(spa, POOL_SCRUB_PAUSE);
1746	else if (zc->zc_cookie == POOL_SCAN_NONE)
1747		error = spa_scan_stop(spa);
1748	else
1749		error = spa_scan(spa, zc->zc_cookie);
1750
1751	spa_close(spa, FTAG);
1752
1753	return (error);
1754}
1755
1756static int
1757zfs_ioc_pool_freeze(zfs_cmd_t *zc)
1758{
1759	spa_t *spa;
1760	int error;
1761
1762	error = spa_open(zc->zc_name, &spa, FTAG);
1763	if (error == 0) {
1764		spa_freeze(spa);
1765		spa_close(spa, FTAG);
1766	}
1767	return (error);
1768}
1769
1770static int
1771zfs_ioc_pool_upgrade(zfs_cmd_t *zc)
1772{
1773	spa_t *spa;
1774	int error;
1775
1776	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
1777		return (error);
1778
1779	if (zc->zc_cookie < spa_version(spa) ||
1780	    !SPA_VERSION_IS_SUPPORTED(zc->zc_cookie)) {
1781		spa_close(spa, FTAG);
1782		return (SET_ERROR(EINVAL));
1783	}
1784
1785	spa_upgrade(spa, zc->zc_cookie);
1786	spa_close(spa, FTAG);
1787
1788	return (error);
1789}
1790
1791static int
1792zfs_ioc_pool_get_history(zfs_cmd_t *zc)
1793{
1794	spa_t *spa;
1795	char *hist_buf;
1796	uint64_t size;
1797	int error;
1798
1799	if ((size = zc->zc_history_len) == 0)
1800		return (SET_ERROR(EINVAL));
1801
1802	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
1803		return (error);
1804
1805	if (spa_version(spa) < SPA_VERSION_ZPOOL_HISTORY) {
1806		spa_close(spa, FTAG);
1807		return (SET_ERROR(ENOTSUP));
1808	}
1809
1810	hist_buf = kmem_alloc(size, KM_SLEEP);
1811	if ((error = spa_history_get(spa, &zc->zc_history_offset,
1812	    &zc->zc_history_len, hist_buf)) == 0) {
1813		error = ddi_copyout(hist_buf,
1814		    (void *)(uintptr_t)zc->zc_history,
1815		    zc->zc_history_len, zc->zc_iflags);
1816	}
1817
1818	spa_close(spa, FTAG);
1819	kmem_free(hist_buf, size);
1820	return (error);
1821}
1822
1823static int
1824zfs_ioc_pool_reguid(zfs_cmd_t *zc)
1825{
1826	spa_t *spa;
1827	int error;
1828
1829	error = spa_open(zc->zc_name, &spa, FTAG);
1830	if (error == 0) {
1831		error = spa_change_guid(spa);
1832		spa_close(spa, FTAG);
1833	}
1834	return (error);
1835}
1836
1837static int
1838zfs_ioc_dsobj_to_dsname(zfs_cmd_t *zc)
1839{
1840	return (dsl_dsobj_to_dsname(zc->zc_name, zc->zc_obj, zc->zc_value));
1841}
1842
1843/*
1844 * inputs:
1845 * zc_name		name of filesystem
1846 * zc_obj		object to find
1847 *
1848 * outputs:
1849 * zc_value		name of object
1850 */
1851static int
1852zfs_ioc_obj_to_path(zfs_cmd_t *zc)
1853{
1854	objset_t *os;
1855	int error;
1856
1857	/* XXX reading from objset not owned */
1858	if ((error = dmu_objset_hold(zc->zc_name, FTAG, &os)) != 0)
1859		return (error);
1860	if (dmu_objset_type(os) != DMU_OST_ZFS) {
1861		dmu_objset_rele(os, FTAG);
1862		return (SET_ERROR(EINVAL));
1863	}
1864	error = zfs_obj_to_path(os, zc->zc_obj, zc->zc_value,
1865	    sizeof (zc->zc_value));
1866	dmu_objset_rele(os, FTAG);
1867
1868	return (error);
1869}
1870
1871/*
1872 * inputs:
1873 * zc_name		name of filesystem
1874 * zc_obj		object to find
1875 *
1876 * outputs:
1877 * zc_stat		stats on object
1878 * zc_value		path to object
1879 */
1880static int
1881zfs_ioc_obj_to_stats(zfs_cmd_t *zc)
1882{
1883	objset_t *os;
1884	int error;
1885
1886	/* XXX reading from objset not owned */
1887	if ((error = dmu_objset_hold(zc->zc_name, FTAG, &os)) != 0)
1888		return (error);
1889	if (dmu_objset_type(os) != DMU_OST_ZFS) {
1890		dmu_objset_rele(os, FTAG);
1891		return (SET_ERROR(EINVAL));
1892	}
1893	error = zfs_obj_to_stats(os, zc->zc_obj, &zc->zc_stat, zc->zc_value,
1894	    sizeof (zc->zc_value));
1895	dmu_objset_rele(os, FTAG);
1896
1897	return (error);
1898}
1899
1900static int
1901zfs_ioc_vdev_add(zfs_cmd_t *zc)
1902{
1903	spa_t *spa;
1904	int error;
1905	nvlist_t *config, **l2cache, **spares;
1906	uint_t nl2cache = 0, nspares = 0;
1907
1908	error = spa_open(zc->zc_name, &spa, FTAG);
1909	if (error != 0)
1910		return (error);
1911
1912	error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
1913	    zc->zc_iflags, &config);
1914	(void) nvlist_lookup_nvlist_array(config, ZPOOL_CONFIG_L2CACHE,
1915	    &l2cache, &nl2cache);
1916
1917	(void) nvlist_lookup_nvlist_array(config, ZPOOL_CONFIG_SPARES,
1918	    &spares, &nspares);
1919
1920#ifdef illumos
1921	/*
1922	 * A root pool with concatenated devices is not supported.
1923	 * Thus, can not add a device to a root pool.
1924	 *
1925	 * Intent log device can not be added to a rootpool because
1926	 * during mountroot, zil is replayed, a seperated log device
1927	 * can not be accessed during the mountroot time.
1928	 *
1929	 * l2cache and spare devices are ok to be added to a rootpool.
1930	 */
1931	if (spa_bootfs(spa) != 0 && nl2cache == 0 && nspares == 0) {
1932		nvlist_free(config);
1933		spa_close(spa, FTAG);
1934		return (SET_ERROR(EDOM));
1935	}
1936#endif /* illumos */
1937
1938	if (error == 0) {
1939		error = spa_vdev_add(spa, config);
1940		nvlist_free(config);
1941	}
1942	spa_close(spa, FTAG);
1943	return (error);
1944}
1945
1946/*
1947 * inputs:
1948 * zc_name		name of the pool
1949 * zc_nvlist_conf	nvlist of devices to remove
1950 * zc_cookie		to stop the remove?
1951 */
1952static int
1953zfs_ioc_vdev_remove(zfs_cmd_t *zc)
1954{
1955	spa_t *spa;
1956	int error;
1957
1958	error = spa_open(zc->zc_name, &spa, FTAG);
1959	if (error != 0)
1960		return (error);
1961	error = spa_vdev_remove(spa, zc->zc_guid, B_FALSE);
1962	spa_close(spa, FTAG);
1963	return (error);
1964}
1965
1966static int
1967zfs_ioc_vdev_set_state(zfs_cmd_t *zc)
1968{
1969	spa_t *spa;
1970	int error;
1971	vdev_state_t newstate = VDEV_STATE_UNKNOWN;
1972
1973	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
1974		return (error);
1975	switch (zc->zc_cookie) {
1976	case VDEV_STATE_ONLINE:
1977		error = vdev_online(spa, zc->zc_guid, zc->zc_obj, &newstate);
1978		break;
1979
1980	case VDEV_STATE_OFFLINE:
1981		error = vdev_offline(spa, zc->zc_guid, zc->zc_obj);
1982		break;
1983
1984	case VDEV_STATE_FAULTED:
1985		if (zc->zc_obj != VDEV_AUX_ERR_EXCEEDED &&
1986		    zc->zc_obj != VDEV_AUX_EXTERNAL)
1987			zc->zc_obj = VDEV_AUX_ERR_EXCEEDED;
1988
1989		error = vdev_fault(spa, zc->zc_guid, zc->zc_obj);
1990		break;
1991
1992	case VDEV_STATE_DEGRADED:
1993		if (zc->zc_obj != VDEV_AUX_ERR_EXCEEDED &&
1994		    zc->zc_obj != VDEV_AUX_EXTERNAL)
1995			zc->zc_obj = VDEV_AUX_ERR_EXCEEDED;
1996
1997		error = vdev_degrade(spa, zc->zc_guid, zc->zc_obj);
1998		break;
1999
2000	default:
2001		error = SET_ERROR(EINVAL);
2002	}
2003	zc->zc_cookie = newstate;
2004	spa_close(spa, FTAG);
2005	return (error);
2006}
2007
2008static int
2009zfs_ioc_vdev_attach(zfs_cmd_t *zc)
2010{
2011	spa_t *spa;
2012	int replacing = zc->zc_cookie;
2013	nvlist_t *config;
2014	int error;
2015
2016	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
2017		return (error);
2018
2019	if ((error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
2020	    zc->zc_iflags, &config)) == 0) {
2021		error = spa_vdev_attach(spa, zc->zc_guid, config, replacing);
2022		nvlist_free(config);
2023	}
2024
2025	spa_close(spa, FTAG);
2026	return (error);
2027}
2028
2029static int
2030zfs_ioc_vdev_detach(zfs_cmd_t *zc)
2031{
2032	spa_t *spa;
2033	int error;
2034
2035	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
2036		return (error);
2037
2038	error = spa_vdev_detach(spa, zc->zc_guid, 0, B_FALSE);
2039
2040	spa_close(spa, FTAG);
2041	return (error);
2042}
2043
2044static int
2045zfs_ioc_vdev_split(zfs_cmd_t *zc)
2046{
2047	spa_t *spa;
2048	nvlist_t *config, *props = NULL;
2049	int error;
2050	boolean_t exp = !!(zc->zc_cookie & ZPOOL_EXPORT_AFTER_SPLIT);
2051
2052	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
2053		return (error);
2054
2055	if (error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
2056	    zc->zc_iflags, &config)) {
2057		spa_close(spa, FTAG);
2058		return (error);
2059	}
2060
2061	if (zc->zc_nvlist_src_size != 0 && (error =
2062	    get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
2063	    zc->zc_iflags, &props))) {
2064		spa_close(spa, FTAG);
2065		nvlist_free(config);
2066		return (error);
2067	}
2068
2069	error = spa_vdev_split_mirror(spa, zc->zc_string, config, props, exp);
2070
2071	spa_close(spa, FTAG);
2072
2073	nvlist_free(config);
2074	nvlist_free(props);
2075
2076	return (error);
2077}
2078
2079static int
2080zfs_ioc_vdev_setpath(zfs_cmd_t *zc)
2081{
2082	spa_t *spa;
2083	char *path = zc->zc_value;
2084	uint64_t guid = zc->zc_guid;
2085	int error;
2086
2087	error = spa_open(zc->zc_name, &spa, FTAG);
2088	if (error != 0)
2089		return (error);
2090
2091	error = spa_vdev_setpath(spa, guid, path);
2092	spa_close(spa, FTAG);
2093	return (error);
2094}
2095
2096static int
2097zfs_ioc_vdev_setfru(zfs_cmd_t *zc)
2098{
2099	spa_t *spa;
2100	char *fru = zc->zc_value;
2101	uint64_t guid = zc->zc_guid;
2102	int error;
2103
2104	error = spa_open(zc->zc_name, &spa, FTAG);
2105	if (error != 0)
2106		return (error);
2107
2108	error = spa_vdev_setfru(spa, guid, fru);
2109	spa_close(spa, FTAG);
2110	return (error);
2111}
2112
2113static int
2114zfs_ioc_objset_stats_impl(zfs_cmd_t *zc, objset_t *os)
2115{
2116	int error = 0;
2117	nvlist_t *nv;
2118
2119	dmu_objset_fast_stat(os, &zc->zc_objset_stats);
2120
2121	if (zc->zc_nvlist_dst != 0 &&
2122	    (error = dsl_prop_get_all(os, &nv)) == 0) {
2123		dmu_objset_stats(os, nv);
2124		/*
2125		 * NB: zvol_get_stats() will read the objset contents,
2126		 * which we aren't supposed to do with a
2127		 * DS_MODE_USER hold, because it could be
2128		 * inconsistent.  So this is a bit of a workaround...
2129		 * XXX reading with out owning
2130		 */
2131		if (!zc->zc_objset_stats.dds_inconsistent &&
2132		    dmu_objset_type(os) == DMU_OST_ZVOL) {
2133			error = zvol_get_stats(os, nv);
2134			if (error == EIO)
2135				return (error);
2136			VERIFY0(error);
2137		}
2138		error = put_nvlist(zc, nv);
2139		nvlist_free(nv);
2140	}
2141
2142	return (error);
2143}
2144
2145/*
2146 * inputs:
2147 * zc_name		name of filesystem
2148 * zc_nvlist_dst_size	size of buffer for property nvlist
2149 *
2150 * outputs:
2151 * zc_objset_stats	stats
2152 * zc_nvlist_dst	property nvlist
2153 * zc_nvlist_dst_size	size of property nvlist
2154 */
2155static int
2156zfs_ioc_objset_stats(zfs_cmd_t *zc)
2157{
2158	objset_t *os;
2159	int error;
2160
2161	error = dmu_objset_hold(zc->zc_name, FTAG, &os);
2162	if (error == 0) {
2163		error = zfs_ioc_objset_stats_impl(zc, os);
2164		dmu_objset_rele(os, FTAG);
2165	}
2166
2167	if (error == ENOMEM)
2168		error = 0;
2169	return (error);
2170}
2171
2172/*
2173 * inputs:
2174 * zc_name		name of filesystem
2175 * zc_nvlist_dst_size	size of buffer for property nvlist
2176 *
2177 * outputs:
2178 * zc_nvlist_dst	received property nvlist
2179 * zc_nvlist_dst_size	size of received property nvlist
2180 *
2181 * Gets received properties (distinct from local properties on or after
2182 * SPA_VERSION_RECVD_PROPS) for callers who want to differentiate received from
2183 * local property values.
2184 */
2185static int
2186zfs_ioc_objset_recvd_props(zfs_cmd_t *zc)
2187{
2188	int error = 0;
2189	nvlist_t *nv;
2190
2191	/*
2192	 * Without this check, we would return local property values if the
2193	 * caller has not already received properties on or after
2194	 * SPA_VERSION_RECVD_PROPS.
2195	 */
2196	if (!dsl_prop_get_hasrecvd(zc->zc_name))
2197		return (SET_ERROR(ENOTSUP));
2198
2199	if (zc->zc_nvlist_dst != 0 &&
2200	    (error = dsl_prop_get_received(zc->zc_name, &nv)) == 0) {
2201		error = put_nvlist(zc, nv);
2202		nvlist_free(nv);
2203	}
2204
2205	return (error);
2206}
2207
2208static int
2209nvl_add_zplprop(objset_t *os, nvlist_t *props, zfs_prop_t prop)
2210{
2211	uint64_t value;
2212	int error;
2213
2214	/*
2215	 * zfs_get_zplprop() will either find a value or give us
2216	 * the default value (if there is one).
2217	 */
2218	if ((error = zfs_get_zplprop(os, prop, &value)) != 0)
2219		return (error);
2220	VERIFY(nvlist_add_uint64(props, zfs_prop_to_name(prop), value) == 0);
2221	return (0);
2222}
2223
2224/*
2225 * inputs:
2226 * zc_name		name of filesystem
2227 * zc_nvlist_dst_size	size of buffer for zpl property nvlist
2228 *
2229 * outputs:
2230 * zc_nvlist_dst	zpl property nvlist
2231 * zc_nvlist_dst_size	size of zpl property nvlist
2232 */
2233static int
2234zfs_ioc_objset_zplprops(zfs_cmd_t *zc)
2235{
2236	objset_t *os;
2237	int err;
2238
2239	/* XXX reading without owning */
2240	if (err = dmu_objset_hold(zc->zc_name, FTAG, &os))
2241		return (err);
2242
2243	dmu_objset_fast_stat(os, &zc->zc_objset_stats);
2244
2245	/*
2246	 * NB: nvl_add_zplprop() will read the objset contents,
2247	 * which we aren't supposed to do with a DS_MODE_USER
2248	 * hold, because it could be inconsistent.
2249	 */
2250	if (zc->zc_nvlist_dst != 0 &&
2251	    !zc->zc_objset_stats.dds_inconsistent &&
2252	    dmu_objset_type(os) == DMU_OST_ZFS) {
2253		nvlist_t *nv;
2254
2255		VERIFY(nvlist_alloc(&nv, NV_UNIQUE_NAME, KM_SLEEP) == 0);
2256		if ((err = nvl_add_zplprop(os, nv, ZFS_PROP_VERSION)) == 0 &&
2257		    (err = nvl_add_zplprop(os, nv, ZFS_PROP_NORMALIZE)) == 0 &&
2258		    (err = nvl_add_zplprop(os, nv, ZFS_PROP_UTF8ONLY)) == 0 &&
2259		    (err = nvl_add_zplprop(os, nv, ZFS_PROP_CASE)) == 0)
2260			err = put_nvlist(zc, nv);
2261		nvlist_free(nv);
2262	} else {
2263		err = SET_ERROR(ENOENT);
2264	}
2265	dmu_objset_rele(os, FTAG);
2266	return (err);
2267}
2268
2269boolean_t
2270dataset_name_hidden(const char *name)
2271{
2272	/*
2273	 * Skip over datasets that are not visible in this zone,
2274	 * internal datasets (which have a $ in their name), and
2275	 * temporary datasets (which have a % in their name).
2276	 */
2277	if (strchr(name, '$') != NULL)
2278		return (B_TRUE);
2279	if (strchr(name, '%') != NULL)
2280		return (B_TRUE);
2281	if (!INGLOBALZONE(curthread) && !zone_dataset_visible(name, NULL))
2282		return (B_TRUE);
2283	return (B_FALSE);
2284}
2285
2286/*
2287 * inputs:
2288 * zc_name		name of filesystem
2289 * zc_cookie		zap cursor
2290 * zc_nvlist_dst_size	size of buffer for property nvlist
2291 *
2292 * outputs:
2293 * zc_name		name of next filesystem
2294 * zc_cookie		zap cursor
2295 * zc_objset_stats	stats
2296 * zc_nvlist_dst	property nvlist
2297 * zc_nvlist_dst_size	size of property nvlist
2298 */
2299static int
2300zfs_ioc_dataset_list_next(zfs_cmd_t *zc)
2301{
2302	objset_t *os;
2303	int error;
2304	char *p;
2305	size_t orig_len = strlen(zc->zc_name);
2306
2307top:
2308	if (error = dmu_objset_hold(zc->zc_name, FTAG, &os)) {
2309		if (error == ENOENT)
2310			error = SET_ERROR(ESRCH);
2311		return (error);
2312	}
2313
2314	p = strrchr(zc->zc_name, '/');
2315	if (p == NULL || p[1] != '\0')
2316		(void) strlcat(zc->zc_name, "/", sizeof (zc->zc_name));
2317	p = zc->zc_name + strlen(zc->zc_name);
2318
2319	do {
2320		error = dmu_dir_list_next(os,
2321		    sizeof (zc->zc_name) - (p - zc->zc_name), p,
2322		    NULL, &zc->zc_cookie);
2323		if (error == ENOENT)
2324			error = SET_ERROR(ESRCH);
2325	} while (error == 0 && dataset_name_hidden(zc->zc_name));
2326	dmu_objset_rele(os, FTAG);
2327
2328	/*
2329	 * If it's an internal dataset (ie. with a '$' in its name),
2330	 * don't try to get stats for it, otherwise we'll return ENOENT.
2331	 */
2332	if (error == 0 && strchr(zc->zc_name, '$') == NULL) {
2333		error = zfs_ioc_objset_stats(zc); /* fill in the stats */
2334		if (error == ENOENT) {
2335			/* We lost a race with destroy, get the next one. */
2336			zc->zc_name[orig_len] = '\0';
2337			goto top;
2338		}
2339	}
2340	return (error);
2341}
2342
2343/*
2344 * inputs:
2345 * zc_name		name of filesystem
2346 * zc_cookie		zap cursor
2347 * zc_nvlist_dst_size	size of buffer for property nvlist
2348 * zc_simple		when set, only name is requested
2349 *
2350 * outputs:
2351 * zc_name		name of next snapshot
2352 * zc_objset_stats	stats
2353 * zc_nvlist_dst	property nvlist
2354 * zc_nvlist_dst_size	size of property nvlist
2355 */
2356static int
2357zfs_ioc_snapshot_list_next(zfs_cmd_t *zc)
2358{
2359	objset_t *os;
2360	int error;
2361
2362	error = dmu_objset_hold(zc->zc_name, FTAG, &os);
2363	if (error != 0) {
2364		return (error == ENOENT ? ESRCH : error);
2365	}
2366
2367	/*
2368	 * A dataset name of maximum length cannot have any snapshots,
2369	 * so exit immediately.
2370	 */
2371	if (strlcat(zc->zc_name, "@", sizeof (zc->zc_name)) >=
2372	    ZFS_MAX_DATASET_NAME_LEN) {
2373		dmu_objset_rele(os, FTAG);
2374		return (SET_ERROR(ESRCH));
2375	}
2376
2377	error = dmu_snapshot_list_next(os,
2378	    sizeof (zc->zc_name) - strlen(zc->zc_name),
2379	    zc->zc_name + strlen(zc->zc_name), &zc->zc_obj, &zc->zc_cookie,
2380	    NULL);
2381
2382	if (error == 0 && !zc->zc_simple) {
2383		dsl_dataset_t *ds;
2384		dsl_pool_t *dp = os->os_dsl_dataset->ds_dir->dd_pool;
2385
2386		error = dsl_dataset_hold_obj(dp, zc->zc_obj, FTAG, &ds);
2387		if (error == 0) {
2388			objset_t *ossnap;
2389
2390			error = dmu_objset_from_ds(ds, &ossnap);
2391			if (error == 0)
2392				error = zfs_ioc_objset_stats_impl(zc, ossnap);
2393			dsl_dataset_rele(ds, FTAG);
2394		}
2395	} else if (error == ENOENT) {
2396		error = SET_ERROR(ESRCH);
2397	}
2398
2399	dmu_objset_rele(os, FTAG);
2400	/* if we failed, undo the @ that we tacked on to zc_name */
2401	if (error != 0)
2402		*strchr(zc->zc_name, '@') = '\0';
2403	return (error);
2404}
2405
2406static int
2407zfs_prop_set_userquota(const char *dsname, nvpair_t *pair)
2408{
2409	const char *propname = nvpair_name(pair);
2410	uint64_t *valary;
2411	unsigned int vallen;
2412	const char *domain;
2413	char *dash;
2414	zfs_userquota_prop_t type;
2415	uint64_t rid;
2416	uint64_t quota;
2417	zfsvfs_t *zfsvfs;
2418	int err;
2419
2420	if (nvpair_type(pair) == DATA_TYPE_NVLIST) {
2421		nvlist_t *attrs;
2422		VERIFY(nvpair_value_nvlist(pair, &attrs) == 0);
2423		if (nvlist_lookup_nvpair(attrs, ZPROP_VALUE,
2424		    &pair) != 0)
2425			return (SET_ERROR(EINVAL));
2426	}
2427
2428	/*
2429	 * A correctly constructed propname is encoded as
2430	 * userquota@<rid>-<domain>.
2431	 */
2432	if ((dash = strchr(propname, '-')) == NULL ||
2433	    nvpair_value_uint64_array(pair, &valary, &vallen) != 0 ||
2434	    vallen != 3)
2435		return (SET_ERROR(EINVAL));
2436
2437	domain = dash + 1;
2438	type = valary[0];
2439	rid = valary[1];
2440	quota = valary[2];
2441
2442	err = zfsvfs_hold(dsname, FTAG, &zfsvfs, B_FALSE);
2443	if (err == 0) {
2444		err = zfs_set_userquota(zfsvfs, type, domain, rid, quota);
2445		zfsvfs_rele(zfsvfs, FTAG);
2446	}
2447
2448	return (err);
2449}
2450
2451/*
2452 * If the named property is one that has a special function to set its value,
2453 * return 0 on success and a positive error code on failure; otherwise if it is
2454 * not one of the special properties handled by this function, return -1.
2455 *
2456 * XXX: It would be better for callers of the property interface if we handled
2457 * these special cases in dsl_prop.c (in the dsl layer).
2458 */
2459static int
2460zfs_prop_set_special(const char *dsname, zprop_source_t source,
2461    nvpair_t *pair)
2462{
2463	const char *propname = nvpair_name(pair);
2464	zfs_prop_t prop = zfs_name_to_prop(propname);
2465	uint64_t intval;
2466	int err = -1;
2467
2468	if (prop == ZPROP_INVAL) {
2469		if (zfs_prop_userquota(propname))
2470			return (zfs_prop_set_userquota(dsname, pair));
2471		return (-1);
2472	}
2473
2474	if (nvpair_type(pair) == DATA_TYPE_NVLIST) {
2475		nvlist_t *attrs;
2476		VERIFY(nvpair_value_nvlist(pair, &attrs) == 0);
2477		VERIFY(nvlist_lookup_nvpair(attrs, ZPROP_VALUE,
2478		    &pair) == 0);
2479	}
2480
2481	if (zfs_prop_get_type(prop) == PROP_TYPE_STRING)
2482		return (-1);
2483
2484	VERIFY(0 == nvpair_value_uint64(pair, &intval));
2485
2486	switch (prop) {
2487	case ZFS_PROP_QUOTA:
2488		err = dsl_dir_set_quota(dsname, source, intval);
2489		break;
2490	case ZFS_PROP_REFQUOTA:
2491		err = dsl_dataset_set_refquota(dsname, source, intval);
2492		break;
2493	case ZFS_PROP_FILESYSTEM_LIMIT:
2494	case ZFS_PROP_SNAPSHOT_LIMIT:
2495		if (intval == UINT64_MAX) {
2496			/* clearing the limit, just do it */
2497			err = 0;
2498		} else {
2499			err = dsl_dir_activate_fs_ss_limit(dsname);
2500		}
2501		/*
2502		 * Set err to -1 to force the zfs_set_prop_nvlist code down the
2503		 * default path to set the value in the nvlist.
2504		 */
2505		if (err == 0)
2506			err = -1;
2507		break;
2508	case ZFS_PROP_RESERVATION:
2509		err = dsl_dir_set_reservation(dsname, source, intval);
2510		break;
2511	case ZFS_PROP_REFRESERVATION:
2512		err = dsl_dataset_set_refreservation(dsname, source, intval);
2513		break;
2514	case ZFS_PROP_VOLSIZE:
2515		err = zvol_set_volsize(dsname, intval);
2516		break;
2517	case ZFS_PROP_VERSION:
2518	{
2519		zfsvfs_t *zfsvfs;
2520
2521		if ((err = zfsvfs_hold(dsname, FTAG, &zfsvfs, B_TRUE)) != 0)
2522			break;
2523
2524		err = zfs_set_version(zfsvfs, intval);
2525		zfsvfs_rele(zfsvfs, FTAG);
2526
2527		if (err == 0 && intval >= ZPL_VERSION_USERSPACE) {
2528			zfs_cmd_t *zc;
2529
2530			zc = kmem_zalloc(sizeof (zfs_cmd_t), KM_SLEEP);
2531			(void) strcpy(zc->zc_name, dsname);
2532			(void) zfs_ioc_userspace_upgrade(zc);
2533			kmem_free(zc, sizeof (zfs_cmd_t));
2534		}
2535		break;
2536	}
2537	default:
2538		err = -1;
2539	}
2540
2541	return (err);
2542}
2543
2544/*
2545 * This function is best effort. If it fails to set any of the given properties,
2546 * it continues to set as many as it can and returns the last error
2547 * encountered. If the caller provides a non-NULL errlist, it will be filled in
2548 * with the list of names of all the properties that failed along with the
2549 * corresponding error numbers.
2550 *
2551 * If every property is set successfully, zero is returned and errlist is not
2552 * modified.
2553 */
2554int
2555zfs_set_prop_nvlist(const char *dsname, zprop_source_t source, nvlist_t *nvl,
2556    nvlist_t *errlist)
2557{
2558	nvpair_t *pair;
2559	nvpair_t *propval;
2560	int rv = 0;
2561	uint64_t intval;
2562	char *strval;
2563	nvlist_t *genericnvl = fnvlist_alloc();
2564	nvlist_t *retrynvl = fnvlist_alloc();
2565
2566retry:
2567	pair = NULL;
2568	while ((pair = nvlist_next_nvpair(nvl, pair)) != NULL) {
2569		const char *propname = nvpair_name(pair);
2570		zfs_prop_t prop = zfs_name_to_prop(propname);
2571		int err = 0;
2572
2573		/* decode the property value */
2574		propval = pair;
2575		if (nvpair_type(pair) == DATA_TYPE_NVLIST) {
2576			nvlist_t *attrs;
2577			attrs = fnvpair_value_nvlist(pair);
2578			if (nvlist_lookup_nvpair(attrs, ZPROP_VALUE,
2579			    &propval) != 0)
2580				err = SET_ERROR(EINVAL);
2581		}
2582
2583		/* Validate value type */
2584		if (err == 0 && prop == ZPROP_INVAL) {
2585			if (zfs_prop_user(propname)) {
2586				if (nvpair_type(propval) != DATA_TYPE_STRING)
2587					err = SET_ERROR(EINVAL);
2588			} else if (zfs_prop_userquota(propname)) {
2589				if (nvpair_type(propval) !=
2590				    DATA_TYPE_UINT64_ARRAY)
2591					err = SET_ERROR(EINVAL);
2592			} else {
2593				err = SET_ERROR(EINVAL);
2594			}
2595		} else if (err == 0) {
2596			if (nvpair_type(propval) == DATA_TYPE_STRING) {
2597				if (zfs_prop_get_type(prop) != PROP_TYPE_STRING)
2598					err = SET_ERROR(EINVAL);
2599			} else if (nvpair_type(propval) == DATA_TYPE_UINT64) {
2600				const char *unused;
2601
2602				intval = fnvpair_value_uint64(propval);
2603
2604				switch (zfs_prop_get_type(prop)) {
2605				case PROP_TYPE_NUMBER:
2606					break;
2607				case PROP_TYPE_STRING:
2608					err = SET_ERROR(EINVAL);
2609					break;
2610				case PROP_TYPE_INDEX:
2611					if (zfs_prop_index_to_string(prop,
2612					    intval, &unused) != 0)
2613						err = SET_ERROR(EINVAL);
2614					break;
2615				default:
2616					cmn_err(CE_PANIC,
2617					    "unknown property type");
2618				}
2619			} else {
2620				err = SET_ERROR(EINVAL);
2621			}
2622		}
2623
2624		/* Validate permissions */
2625		if (err == 0)
2626			err = zfs_check_settable(dsname, pair, CRED());
2627
2628		if (err == 0) {
2629			err = zfs_prop_set_special(dsname, source, pair);
2630			if (err == -1) {
2631				/*
2632				 * For better performance we build up a list of
2633				 * properties to set in a single transaction.
2634				 */
2635				err = nvlist_add_nvpair(genericnvl, pair);
2636			} else if (err != 0 && nvl != retrynvl) {
2637				/*
2638				 * This may be a spurious error caused by
2639				 * receiving quota and reservation out of order.
2640				 * Try again in a second pass.
2641				 */
2642				err = nvlist_add_nvpair(retrynvl, pair);
2643			}
2644		}
2645
2646		if (err != 0) {
2647			if (errlist != NULL)
2648				fnvlist_add_int32(errlist, propname, err);
2649			rv = err;
2650		}
2651	}
2652
2653	if (nvl != retrynvl && !nvlist_empty(retrynvl)) {
2654		nvl = retrynvl;
2655		goto retry;
2656	}
2657
2658	if (!nvlist_empty(genericnvl) &&
2659	    dsl_props_set(dsname, source, genericnvl) != 0) {
2660		/*
2661		 * If this fails, we still want to set as many properties as we
2662		 * can, so try setting them individually.
2663		 */
2664		pair = NULL;
2665		while ((pair = nvlist_next_nvpair(genericnvl, pair)) != NULL) {
2666			const char *propname = nvpair_name(pair);
2667			int err = 0;
2668
2669			propval = pair;
2670			if (nvpair_type(pair) == DATA_TYPE_NVLIST) {
2671				nvlist_t *attrs;
2672				attrs = fnvpair_value_nvlist(pair);
2673				propval = fnvlist_lookup_nvpair(attrs,
2674				    ZPROP_VALUE);
2675			}
2676
2677			if (nvpair_type(propval) == DATA_TYPE_STRING) {
2678				strval = fnvpair_value_string(propval);
2679				err = dsl_prop_set_string(dsname, propname,
2680				    source, strval);
2681			} else {
2682				intval = fnvpair_value_uint64(propval);
2683				err = dsl_prop_set_int(dsname, propname, source,
2684				    intval);
2685			}
2686
2687			if (err != 0) {
2688				if (errlist != NULL) {
2689					fnvlist_add_int32(errlist, propname,
2690					    err);
2691				}
2692				rv = err;
2693			}
2694		}
2695	}
2696	nvlist_free(genericnvl);
2697	nvlist_free(retrynvl);
2698
2699	return (rv);
2700}
2701
2702/*
2703 * Check that all the properties are valid user properties.
2704 */
2705static int
2706zfs_check_userprops(const char *fsname, nvlist_t *nvl)
2707{
2708	nvpair_t *pair = NULL;
2709	int error = 0;
2710
2711	while ((pair = nvlist_next_nvpair(nvl, pair)) != NULL) {
2712		const char *propname = nvpair_name(pair);
2713
2714		if (!zfs_prop_user(propname) ||
2715		    nvpair_type(pair) != DATA_TYPE_STRING)
2716			return (SET_ERROR(EINVAL));
2717
2718		if (error = zfs_secpolicy_write_perms(fsname,
2719		    ZFS_DELEG_PERM_USERPROP, CRED()))
2720			return (error);
2721
2722		if (strlen(propname) >= ZAP_MAXNAMELEN)
2723			return (SET_ERROR(ENAMETOOLONG));
2724
2725		if (strlen(fnvpair_value_string(pair)) >= ZAP_MAXVALUELEN)
2726			return (E2BIG);
2727	}
2728	return (0);
2729}
2730
2731static void
2732props_skip(nvlist_t *props, nvlist_t *skipped, nvlist_t **newprops)
2733{
2734	nvpair_t *pair;
2735
2736	VERIFY(nvlist_alloc(newprops, NV_UNIQUE_NAME, KM_SLEEP) == 0);
2737
2738	pair = NULL;
2739	while ((pair = nvlist_next_nvpair(props, pair)) != NULL) {
2740		if (nvlist_exists(skipped, nvpair_name(pair)))
2741			continue;
2742
2743		VERIFY(nvlist_add_nvpair(*newprops, pair) == 0);
2744	}
2745}
2746
2747static int
2748clear_received_props(const char *dsname, nvlist_t *props,
2749    nvlist_t *skipped)
2750{
2751	int err = 0;
2752	nvlist_t *cleared_props = NULL;
2753	props_skip(props, skipped, &cleared_props);
2754	if (!nvlist_empty(cleared_props)) {
2755		/*
2756		 * Acts on local properties until the dataset has received
2757		 * properties at least once on or after SPA_VERSION_RECVD_PROPS.
2758		 */
2759		zprop_source_t flags = (ZPROP_SRC_NONE |
2760		    (dsl_prop_get_hasrecvd(dsname) ? ZPROP_SRC_RECEIVED : 0));
2761		err = zfs_set_prop_nvlist(dsname, flags, cleared_props, NULL);
2762	}
2763	nvlist_free(cleared_props);
2764	return (err);
2765}
2766
2767/*
2768 * inputs:
2769 * zc_name		name of filesystem
2770 * zc_value		name of property to set
2771 * zc_nvlist_src{_size}	nvlist of properties to apply
2772 * zc_cookie		received properties flag
2773 *
2774 * outputs:
2775 * zc_nvlist_dst{_size} error for each unapplied received property
2776 */
2777static int
2778zfs_ioc_set_prop(zfs_cmd_t *zc)
2779{
2780	nvlist_t *nvl;
2781	boolean_t received = zc->zc_cookie;
2782	zprop_source_t source = (received ? ZPROP_SRC_RECEIVED :
2783	    ZPROP_SRC_LOCAL);
2784	nvlist_t *errors;
2785	int error;
2786
2787	if ((error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
2788	    zc->zc_iflags, &nvl)) != 0)
2789		return (error);
2790
2791	if (received) {
2792		nvlist_t *origprops;
2793
2794		if (dsl_prop_get_received(zc->zc_name, &origprops) == 0) {
2795			(void) clear_received_props(zc->zc_name,
2796			    origprops, nvl);
2797			nvlist_free(origprops);
2798		}
2799
2800		error = dsl_prop_set_hasrecvd(zc->zc_name);
2801	}
2802
2803	errors = fnvlist_alloc();
2804	if (error == 0)
2805		error = zfs_set_prop_nvlist(zc->zc_name, source, nvl, errors);
2806
2807	if (zc->zc_nvlist_dst != 0 && errors != NULL) {
2808		(void) put_nvlist(zc, errors);
2809	}
2810
2811	nvlist_free(errors);
2812	nvlist_free(nvl);
2813	return (error);
2814}
2815
2816/*
2817 * inputs:
2818 * zc_name		name of filesystem
2819 * zc_value		name of property to inherit
2820 * zc_cookie		revert to received value if TRUE
2821 *
2822 * outputs:		none
2823 */
2824static int
2825zfs_ioc_inherit_prop(zfs_cmd_t *zc)
2826{
2827	const char *propname = zc->zc_value;
2828	zfs_prop_t prop = zfs_name_to_prop(propname);
2829	boolean_t received = zc->zc_cookie;
2830	zprop_source_t source = (received
2831	    ? ZPROP_SRC_NONE		/* revert to received value, if any */
2832	    : ZPROP_SRC_INHERITED);	/* explicitly inherit */
2833
2834	if (received) {
2835		nvlist_t *dummy;
2836		nvpair_t *pair;
2837		zprop_type_t type;
2838		int err;
2839
2840		/*
2841		 * zfs_prop_set_special() expects properties in the form of an
2842		 * nvpair with type info.
2843		 */
2844		if (prop == ZPROP_INVAL) {
2845			if (!zfs_prop_user(propname))
2846				return (SET_ERROR(EINVAL));
2847
2848			type = PROP_TYPE_STRING;
2849		} else if (prop == ZFS_PROP_VOLSIZE ||
2850		    prop == ZFS_PROP_VERSION) {
2851			return (SET_ERROR(EINVAL));
2852		} else {
2853			type = zfs_prop_get_type(prop);
2854		}
2855
2856		VERIFY(nvlist_alloc(&dummy, NV_UNIQUE_NAME, KM_SLEEP) == 0);
2857
2858		switch (type) {
2859		case PROP_TYPE_STRING:
2860			VERIFY(0 == nvlist_add_string(dummy, propname, ""));
2861			break;
2862		case PROP_TYPE_NUMBER:
2863		case PROP_TYPE_INDEX:
2864			VERIFY(0 == nvlist_add_uint64(dummy, propname, 0));
2865			break;
2866		default:
2867			nvlist_free(dummy);
2868			return (SET_ERROR(EINVAL));
2869		}
2870
2871		pair = nvlist_next_nvpair(dummy, NULL);
2872		err = zfs_prop_set_special(zc->zc_name, source, pair);
2873		nvlist_free(dummy);
2874		if (err != -1)
2875			return (err); /* special property already handled */
2876	} else {
2877		/*
2878		 * Only check this in the non-received case. We want to allow
2879		 * 'inherit -S' to revert non-inheritable properties like quota
2880		 * and reservation to the received or default values even though
2881		 * they are not considered inheritable.
2882		 */
2883		if (prop != ZPROP_INVAL && !zfs_prop_inheritable(prop))
2884			return (SET_ERROR(EINVAL));
2885	}
2886
2887	/* property name has been validated by zfs_secpolicy_inherit_prop() */
2888	return (dsl_prop_inherit(zc->zc_name, zc->zc_value, source));
2889}
2890
2891static int
2892zfs_ioc_pool_set_props(zfs_cmd_t *zc)
2893{
2894	nvlist_t *props;
2895	spa_t *spa;
2896	int error;
2897	nvpair_t *pair;
2898
2899	if (error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
2900	    zc->zc_iflags, &props))
2901		return (error);
2902
2903	/*
2904	 * If the only property is the configfile, then just do a spa_lookup()
2905	 * to handle the faulted case.
2906	 */
2907	pair = nvlist_next_nvpair(props, NULL);
2908	if (pair != NULL && strcmp(nvpair_name(pair),
2909	    zpool_prop_to_name(ZPOOL_PROP_CACHEFILE)) == 0 &&
2910	    nvlist_next_nvpair(props, pair) == NULL) {
2911		mutex_enter(&spa_namespace_lock);
2912		if ((spa = spa_lookup(zc->zc_name)) != NULL) {
2913			spa_configfile_set(spa, props, B_FALSE);
2914			spa_config_sync(spa, B_FALSE, B_TRUE);
2915		}
2916		mutex_exit(&spa_namespace_lock);
2917		if (spa != NULL) {
2918			nvlist_free(props);
2919			return (0);
2920		}
2921	}
2922
2923	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0) {
2924		nvlist_free(props);
2925		return (error);
2926	}
2927
2928	error = spa_prop_set(spa, props);
2929
2930	nvlist_free(props);
2931	spa_close(spa, FTAG);
2932
2933	return (error);
2934}
2935
2936static int
2937zfs_ioc_pool_get_props(zfs_cmd_t *zc)
2938{
2939	spa_t *spa;
2940	int error;
2941	nvlist_t *nvp = NULL;
2942
2943	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0) {
2944		/*
2945		 * If the pool is faulted, there may be properties we can still
2946		 * get (such as altroot and cachefile), so attempt to get them
2947		 * anyway.
2948		 */
2949		mutex_enter(&spa_namespace_lock);
2950		if ((spa = spa_lookup(zc->zc_name)) != NULL)
2951			error = spa_prop_get(spa, &nvp);
2952		mutex_exit(&spa_namespace_lock);
2953	} else {
2954		error = spa_prop_get(spa, &nvp);
2955		spa_close(spa, FTAG);
2956	}
2957
2958	if (error == 0 && zc->zc_nvlist_dst != 0)
2959		error = put_nvlist(zc, nvp);
2960	else
2961		error = SET_ERROR(EFAULT);
2962
2963	nvlist_free(nvp);
2964	return (error);
2965}
2966
2967/*
2968 * inputs:
2969 * zc_name		name of filesystem
2970 * zc_nvlist_src{_size}	nvlist of delegated permissions
2971 * zc_perm_action	allow/unallow flag
2972 *
2973 * outputs:		none
2974 */
2975static int
2976zfs_ioc_set_fsacl(zfs_cmd_t *zc)
2977{
2978	int error;
2979	nvlist_t *fsaclnv = NULL;
2980
2981	if ((error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
2982	    zc->zc_iflags, &fsaclnv)) != 0)
2983		return (error);
2984
2985	/*
2986	 * Verify nvlist is constructed correctly
2987	 */
2988	if ((error = zfs_deleg_verify_nvlist(fsaclnv)) != 0) {
2989		nvlist_free(fsaclnv);
2990		return (SET_ERROR(EINVAL));
2991	}
2992
2993	/*
2994	 * If we don't have PRIV_SYS_MOUNT, then validate
2995	 * that user is allowed to hand out each permission in
2996	 * the nvlist(s)
2997	 */
2998
2999	error = secpolicy_zfs(CRED());
3000	if (error != 0) {
3001		if (zc->zc_perm_action == B_FALSE) {
3002			error = dsl_deleg_can_allow(zc->zc_name,
3003			    fsaclnv, CRED());
3004		} else {
3005			error = dsl_deleg_can_unallow(zc->zc_name,
3006			    fsaclnv, CRED());
3007		}
3008	}
3009
3010	if (error == 0)
3011		error = dsl_deleg_set(zc->zc_name, fsaclnv, zc->zc_perm_action);
3012
3013	nvlist_free(fsaclnv);
3014	return (error);
3015}
3016
3017/*
3018 * inputs:
3019 * zc_name		name of filesystem
3020 *
3021 * outputs:
3022 * zc_nvlist_src{_size}	nvlist of delegated permissions
3023 */
3024static int
3025zfs_ioc_get_fsacl(zfs_cmd_t *zc)
3026{
3027	nvlist_t *nvp;
3028	int error;
3029
3030	if ((error = dsl_deleg_get(zc->zc_name, &nvp)) == 0) {
3031		error = put_nvlist(zc, nvp);
3032		nvlist_free(nvp);
3033	}
3034
3035	return (error);
3036}
3037
3038/*
3039 * Search the vfs list for a specified resource.  Returns a pointer to it
3040 * or NULL if no suitable entry is found. The caller of this routine
3041 * is responsible for releasing the returned vfs pointer.
3042 */
3043static vfs_t *
3044zfs_get_vfs(const char *resource)
3045{
3046	vfs_t *vfsp;
3047
3048	mtx_lock(&mountlist_mtx);
3049	TAILQ_FOREACH(vfsp, &mountlist, mnt_list) {
3050		if (strcmp(refstr_value(vfsp->vfs_resource), resource) == 0) {
3051			vfs_ref(vfsp);
3052			break;
3053		}
3054	}
3055	mtx_unlock(&mountlist_mtx);
3056	return (vfsp);
3057}
3058
3059/* ARGSUSED */
3060static void
3061zfs_create_cb(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx)
3062{
3063	zfs_creat_t *zct = arg;
3064
3065	zfs_create_fs(os, cr, zct->zct_zplprops, tx);
3066}
3067
3068#define	ZFS_PROP_UNDEFINED	((uint64_t)-1)
3069
3070/*
3071 * inputs:
3072 * os			parent objset pointer (NULL if root fs)
3073 * fuids_ok		fuids allowed in this version of the spa?
3074 * sa_ok		SAs allowed in this version of the spa?
3075 * createprops		list of properties requested by creator
3076 *
3077 * outputs:
3078 * zplprops	values for the zplprops we attach to the master node object
3079 * is_ci	true if requested file system will be purely case-insensitive
3080 *
3081 * Determine the settings for utf8only, normalization and
3082 * casesensitivity.  Specific values may have been requested by the
3083 * creator and/or we can inherit values from the parent dataset.  If
3084 * the file system is of too early a vintage, a creator can not
3085 * request settings for these properties, even if the requested
3086 * setting is the default value.  We don't actually want to create dsl
3087 * properties for these, so remove them from the source nvlist after
3088 * processing.
3089 */
3090static int
3091zfs_fill_zplprops_impl(objset_t *os, uint64_t zplver,
3092    boolean_t fuids_ok, boolean_t sa_ok, nvlist_t *createprops,
3093    nvlist_t *zplprops, boolean_t *is_ci)
3094{
3095	uint64_t sense = ZFS_PROP_UNDEFINED;
3096	uint64_t norm = ZFS_PROP_UNDEFINED;
3097	uint64_t u8 = ZFS_PROP_UNDEFINED;
3098
3099	ASSERT(zplprops != NULL);
3100
3101	if (os != NULL && os->os_phys->os_type != DMU_OST_ZFS)
3102		return (SET_ERROR(EINVAL));
3103
3104	/*
3105	 * Pull out creator prop choices, if any.
3106	 */
3107	if (createprops) {
3108		(void) nvlist_lookup_uint64(createprops,
3109		    zfs_prop_to_name(ZFS_PROP_VERSION), &zplver);
3110		(void) nvlist_lookup_uint64(createprops,
3111		    zfs_prop_to_name(ZFS_PROP_NORMALIZE), &norm);
3112		(void) nvlist_remove_all(createprops,
3113		    zfs_prop_to_name(ZFS_PROP_NORMALIZE));
3114		(void) nvlist_lookup_uint64(createprops,
3115		    zfs_prop_to_name(ZFS_PROP_UTF8ONLY), &u8);
3116		(void) nvlist_remove_all(createprops,
3117		    zfs_prop_to_name(ZFS_PROP_UTF8ONLY));
3118		(void) nvlist_lookup_uint64(createprops,
3119		    zfs_prop_to_name(ZFS_PROP_CASE), &sense);
3120		(void) nvlist_remove_all(createprops,
3121		    zfs_prop_to_name(ZFS_PROP_CASE));
3122	}
3123
3124	/*
3125	 * If the zpl version requested is whacky or the file system
3126	 * or pool is version is too "young" to support normalization
3127	 * and the creator tried to set a value for one of the props,
3128	 * error out.
3129	 */
3130	if ((zplver < ZPL_VERSION_INITIAL || zplver > ZPL_VERSION) ||
3131	    (zplver >= ZPL_VERSION_FUID && !fuids_ok) ||
3132	    (zplver >= ZPL_VERSION_SA && !sa_ok) ||
3133	    (zplver < ZPL_VERSION_NORMALIZATION &&
3134	    (norm != ZFS_PROP_UNDEFINED || u8 != ZFS_PROP_UNDEFINED ||
3135	    sense != ZFS_PROP_UNDEFINED)))
3136		return (SET_ERROR(ENOTSUP));
3137
3138	/*
3139	 * Put the version in the zplprops
3140	 */
3141	VERIFY(nvlist_add_uint64(zplprops,
3142	    zfs_prop_to_name(ZFS_PROP_VERSION), zplver) == 0);
3143
3144	if (norm == ZFS_PROP_UNDEFINED)
3145		VERIFY(zfs_get_zplprop(os, ZFS_PROP_NORMALIZE, &norm) == 0);
3146	VERIFY(nvlist_add_uint64(zplprops,
3147	    zfs_prop_to_name(ZFS_PROP_NORMALIZE), norm) == 0);
3148
3149	/*
3150	 * If we're normalizing, names must always be valid UTF-8 strings.
3151	 */
3152	if (norm)
3153		u8 = 1;
3154	if (u8 == ZFS_PROP_UNDEFINED)
3155		VERIFY(zfs_get_zplprop(os, ZFS_PROP_UTF8ONLY, &u8) == 0);
3156	VERIFY(nvlist_add_uint64(zplprops,
3157	    zfs_prop_to_name(ZFS_PROP_UTF8ONLY), u8) == 0);
3158
3159	if (sense == ZFS_PROP_UNDEFINED)
3160		VERIFY(zfs_get_zplprop(os, ZFS_PROP_CASE, &sense) == 0);
3161	VERIFY(nvlist_add_uint64(zplprops,
3162	    zfs_prop_to_name(ZFS_PROP_CASE), sense) == 0);
3163
3164	if (is_ci)
3165		*is_ci = (sense == ZFS_CASE_INSENSITIVE);
3166
3167	return (0);
3168}
3169
3170static int
3171zfs_fill_zplprops(const char *dataset, nvlist_t *createprops,
3172    nvlist_t *zplprops, boolean_t *is_ci)
3173{
3174	boolean_t fuids_ok, sa_ok;
3175	uint64_t zplver = ZPL_VERSION;
3176	objset_t *os = NULL;
3177	char parentname[ZFS_MAX_DATASET_NAME_LEN];
3178	char *cp;
3179	spa_t *spa;
3180	uint64_t spa_vers;
3181	int error;
3182
3183	(void) strlcpy(parentname, dataset, sizeof (parentname));
3184	cp = strrchr(parentname, '/');
3185	ASSERT(cp != NULL);
3186	cp[0] = '\0';
3187
3188	if ((error = spa_open(dataset, &spa, FTAG)) != 0)
3189		return (error);
3190
3191	spa_vers = spa_version(spa);
3192	spa_close(spa, FTAG);
3193
3194	zplver = zfs_zpl_version_map(spa_vers);
3195	fuids_ok = (zplver >= ZPL_VERSION_FUID);
3196	sa_ok = (zplver >= ZPL_VERSION_SA);
3197
3198	/*
3199	 * Open parent object set so we can inherit zplprop values.
3200	 */
3201	if ((error = dmu_objset_hold(parentname, FTAG, &os)) != 0)
3202		return (error);
3203
3204	error = zfs_fill_zplprops_impl(os, zplver, fuids_ok, sa_ok, createprops,
3205	    zplprops, is_ci);
3206	dmu_objset_rele(os, FTAG);
3207	return (error);
3208}
3209
3210static int
3211zfs_fill_zplprops_root(uint64_t spa_vers, nvlist_t *createprops,
3212    nvlist_t *zplprops, boolean_t *is_ci)
3213{
3214	boolean_t fuids_ok;
3215	boolean_t sa_ok;
3216	uint64_t zplver = ZPL_VERSION;
3217	int error;
3218
3219	zplver = zfs_zpl_version_map(spa_vers);
3220	fuids_ok = (zplver >= ZPL_VERSION_FUID);
3221	sa_ok = (zplver >= ZPL_VERSION_SA);
3222
3223	error = zfs_fill_zplprops_impl(NULL, zplver, fuids_ok, sa_ok,
3224	    createprops, zplprops, is_ci);
3225	return (error);
3226}
3227
3228/*
3229 * innvl: {
3230 *     "type" -> dmu_objset_type_t (int32)
3231 *     (optional) "props" -> { prop -> value }
3232 * }
3233 *
3234 * outnvl: propname -> error code (int32)
3235 */
3236static int
3237zfs_ioc_create(const char *fsname, nvlist_t *innvl, nvlist_t *outnvl)
3238{
3239	int error = 0;
3240	zfs_creat_t zct = { 0 };
3241	nvlist_t *nvprops = NULL;
3242	void (*cbfunc)(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx);
3243	int32_t type32;
3244	dmu_objset_type_t type;
3245	boolean_t is_insensitive = B_FALSE;
3246
3247	if (nvlist_lookup_int32(innvl, "type", &type32) != 0)
3248		return (SET_ERROR(EINVAL));
3249	type = type32;
3250	(void) nvlist_lookup_nvlist(innvl, "props", &nvprops);
3251
3252	switch (type) {
3253	case DMU_OST_ZFS:
3254		cbfunc = zfs_create_cb;
3255		break;
3256
3257	case DMU_OST_ZVOL:
3258		cbfunc = zvol_create_cb;
3259		break;
3260
3261	default:
3262		cbfunc = NULL;
3263		break;
3264	}
3265	if (strchr(fsname, '@') ||
3266	    strchr(fsname, '%'))
3267		return (SET_ERROR(EINVAL));
3268
3269	zct.zct_props = nvprops;
3270
3271	if (cbfunc == NULL)
3272		return (SET_ERROR(EINVAL));
3273
3274	if (type == DMU_OST_ZVOL) {
3275		uint64_t volsize, volblocksize;
3276
3277		if (nvprops == NULL)
3278			return (SET_ERROR(EINVAL));
3279		if (nvlist_lookup_uint64(nvprops,
3280		    zfs_prop_to_name(ZFS_PROP_VOLSIZE), &volsize) != 0)
3281			return (SET_ERROR(EINVAL));
3282
3283		if ((error = nvlist_lookup_uint64(nvprops,
3284		    zfs_prop_to_name(ZFS_PROP_VOLBLOCKSIZE),
3285		    &volblocksize)) != 0 && error != ENOENT)
3286			return (SET_ERROR(EINVAL));
3287
3288		if (error != 0)
3289			volblocksize = zfs_prop_default_numeric(
3290			    ZFS_PROP_VOLBLOCKSIZE);
3291
3292		if ((error = zvol_check_volblocksize(
3293		    volblocksize)) != 0 ||
3294		    (error = zvol_check_volsize(volsize,
3295		    volblocksize)) != 0)
3296			return (error);
3297	} else if (type == DMU_OST_ZFS) {
3298		int error;
3299
3300		/*
3301		 * We have to have normalization and
3302		 * case-folding flags correct when we do the
3303		 * file system creation, so go figure them out
3304		 * now.
3305		 */
3306		VERIFY(nvlist_alloc(&zct.zct_zplprops,
3307		    NV_UNIQUE_NAME, KM_SLEEP) == 0);
3308		error = zfs_fill_zplprops(fsname, nvprops,
3309		    zct.zct_zplprops, &is_insensitive);
3310		if (error != 0) {
3311			nvlist_free(zct.zct_zplprops);
3312			return (error);
3313		}
3314	}
3315
3316	error = dmu_objset_create(fsname, type,
3317	    is_insensitive ? DS_FLAG_CI_DATASET : 0, cbfunc, &zct);
3318	nvlist_free(zct.zct_zplprops);
3319
3320	/*
3321	 * It would be nice to do this atomically.
3322	 */
3323	if (error == 0) {
3324		error = zfs_set_prop_nvlist(fsname, ZPROP_SRC_LOCAL,
3325		    nvprops, outnvl);
3326		if (error != 0)
3327			(void) dsl_destroy_head(fsname);
3328	}
3329#ifdef __FreeBSD__
3330	if (error == 0 && type == DMU_OST_ZVOL)
3331		zvol_create_minors(fsname);
3332#endif
3333	return (error);
3334}
3335
3336/*
3337 * innvl: {
3338 *     "origin" -> name of origin snapshot
3339 *     (optional) "props" -> { prop -> value }
3340 * }
3341 *
3342 * outnvl: propname -> error code (int32)
3343 */
3344static int
3345zfs_ioc_clone(const char *fsname, nvlist_t *innvl, nvlist_t *outnvl)
3346{
3347	int error = 0;
3348	nvlist_t *nvprops = NULL;
3349	char *origin_name;
3350
3351	if (nvlist_lookup_string(innvl, "origin", &origin_name) != 0)
3352		return (SET_ERROR(EINVAL));
3353	(void) nvlist_lookup_nvlist(innvl, "props", &nvprops);
3354
3355	if (strchr(fsname, '@') ||
3356	    strchr(fsname, '%'))
3357		return (SET_ERROR(EINVAL));
3358
3359	if (dataset_namecheck(origin_name, NULL, NULL) != 0)
3360		return (SET_ERROR(EINVAL));
3361	error = dmu_objset_clone(fsname, origin_name);
3362	if (error != 0)
3363		return (error);
3364
3365	/*
3366	 * It would be nice to do this atomically.
3367	 */
3368	if (error == 0) {
3369		error = zfs_set_prop_nvlist(fsname, ZPROP_SRC_LOCAL,
3370		    nvprops, outnvl);
3371		if (error != 0)
3372			(void) dsl_destroy_head(fsname);
3373	}
3374#ifdef __FreeBSD__
3375	if (error == 0)
3376		zvol_create_minors(fsname);
3377#endif
3378	return (error);
3379}
3380
3381/*
3382 * innvl: {
3383 *     "snaps" -> { snapshot1, snapshot2 }
3384 *     (optional) "props" -> { prop -> value (string) }
3385 * }
3386 *
3387 * outnvl: snapshot -> error code (int32)
3388 */
3389static int
3390zfs_ioc_snapshot(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl)
3391{
3392	nvlist_t *snaps;
3393	nvlist_t *props = NULL;
3394	int error, poollen;
3395	nvpair_t *pair;
3396
3397	(void) nvlist_lookup_nvlist(innvl, "props", &props);
3398	if ((error = zfs_check_userprops(poolname, props)) != 0)
3399		return (error);
3400
3401	if (!nvlist_empty(props) &&
3402	    zfs_earlier_version(poolname, SPA_VERSION_SNAP_PROPS))
3403		return (SET_ERROR(ENOTSUP));
3404
3405	if (nvlist_lookup_nvlist(innvl, "snaps", &snaps) != 0)
3406		return (SET_ERROR(EINVAL));
3407	poollen = strlen(poolname);
3408	for (pair = nvlist_next_nvpair(snaps, NULL); pair != NULL;
3409	    pair = nvlist_next_nvpair(snaps, pair)) {
3410		const char *name = nvpair_name(pair);
3411		const char *cp = strchr(name, '@');
3412
3413		/*
3414		 * The snap name must contain an @, and the part after it must
3415		 * contain only valid characters.
3416		 */
3417		if (cp == NULL ||
3418		    zfs_component_namecheck(cp + 1, NULL, NULL) != 0)
3419			return (SET_ERROR(EINVAL));
3420
3421		/*
3422		 * The snap must be in the specified pool.
3423		 */
3424		if (strncmp(name, poolname, poollen) != 0 ||
3425		    (name[poollen] != '/' && name[poollen] != '@'))
3426			return (SET_ERROR(EXDEV));
3427
3428		/* This must be the only snap of this fs. */
3429		for (nvpair_t *pair2 = nvlist_next_nvpair(snaps, pair);
3430		    pair2 != NULL; pair2 = nvlist_next_nvpair(snaps, pair2)) {
3431			if (strncmp(name, nvpair_name(pair2), cp - name + 1)
3432			    == 0) {
3433				return (SET_ERROR(EXDEV));
3434			}
3435		}
3436	}
3437
3438	error = dsl_dataset_snapshot(snaps, props, outnvl);
3439	return (error);
3440}
3441
3442/*
3443 * innvl: "message" -> string
3444 */
3445/* ARGSUSED */
3446static int
3447zfs_ioc_log_history(const char *unused, nvlist_t *innvl, nvlist_t *outnvl)
3448{
3449	char *message;
3450	spa_t *spa;
3451	int error;
3452	char *poolname;
3453
3454	/*
3455	 * The poolname in the ioctl is not set, we get it from the TSD,
3456	 * which was set at the end of the last successful ioctl that allows
3457	 * logging.  The secpolicy func already checked that it is set.
3458	 * Only one log ioctl is allowed after each successful ioctl, so
3459	 * we clear the TSD here.
3460	 */
3461	poolname = tsd_get(zfs_allow_log_key);
3462	(void) tsd_set(zfs_allow_log_key, NULL);
3463	error = spa_open(poolname, &spa, FTAG);
3464	strfree(poolname);
3465	if (error != 0)
3466		return (error);
3467
3468	if (nvlist_lookup_string(innvl, "message", &message) != 0)  {
3469		spa_close(spa, FTAG);
3470		return (SET_ERROR(EINVAL));
3471	}
3472
3473	if (spa_version(spa) < SPA_VERSION_ZPOOL_HISTORY) {
3474		spa_close(spa, FTAG);
3475		return (SET_ERROR(ENOTSUP));
3476	}
3477
3478	error = spa_history_log(spa, message);
3479	spa_close(spa, FTAG);
3480	return (error);
3481}
3482
3483#ifdef __FreeBSD__
3484static int
3485zfs_ioc_nextboot(const char *unused, nvlist_t *innvl, nvlist_t *outnvl)
3486{
3487	char name[MAXNAMELEN];
3488	spa_t *spa;
3489	vdev_t *vd;
3490	char *command;
3491	uint64_t pool_guid;
3492	uint64_t vdev_guid;
3493	int error;
3494
3495	if (nvlist_lookup_uint64(innvl,
3496	    ZPOOL_CONFIG_POOL_GUID, &pool_guid) != 0)
3497		return (EINVAL);
3498	if (nvlist_lookup_uint64(innvl,
3499	    ZPOOL_CONFIG_GUID, &vdev_guid) != 0)
3500		return (EINVAL);
3501	if (nvlist_lookup_string(innvl,
3502	    "command", &command) != 0)
3503		return (EINVAL);
3504
3505	mutex_enter(&spa_namespace_lock);
3506	spa = spa_by_guid(pool_guid, vdev_guid);
3507	if (spa != NULL)
3508		strcpy(name, spa_name(spa));
3509	mutex_exit(&spa_namespace_lock);
3510	if (spa == NULL)
3511		return (ENOENT);
3512
3513	if ((error = spa_open(name, &spa, FTAG)) != 0)
3514		return (error);
3515	spa_vdev_state_enter(spa, SCL_ALL);
3516	vd = spa_lookup_by_guid(spa, vdev_guid, B_TRUE);
3517	if (vd == NULL) {
3518		(void) spa_vdev_state_exit(spa, NULL, ENXIO);
3519		spa_close(spa, FTAG);
3520		return (ENODEV);
3521	}
3522	error = vdev_label_write_pad2(vd, command, strlen(command));
3523	(void) spa_vdev_state_exit(spa, NULL, 0);
3524	txg_wait_synced(spa->spa_dsl_pool, 0);
3525	spa_close(spa, FTAG);
3526	return (error);
3527}
3528#endif
3529
3530/*
3531 * The dp_config_rwlock must not be held when calling this, because the
3532 * unmount may need to write out data.
3533 *
3534 * This function is best-effort.  Callers must deal gracefully if it
3535 * remains mounted (or is remounted after this call).
3536 *
3537 * Returns 0 if the argument is not a snapshot, or it is not currently a
3538 * filesystem, or we were able to unmount it.  Returns error code otherwise.
3539 */
3540int
3541zfs_unmount_snap(const char *snapname)
3542{
3543	vfs_t *vfsp;
3544	zfsvfs_t *zfsvfs;
3545#ifdef illumos
3546	int err;
3547#endif
3548
3549	if (strchr(snapname, '@') == NULL)
3550		return (0);
3551
3552	vfsp = zfs_get_vfs(snapname);
3553	if (vfsp == NULL)
3554		return (0);
3555
3556	zfsvfs = vfsp->vfs_data;
3557	ASSERT(!dsl_pool_config_held(dmu_objset_pool(zfsvfs->z_os)));
3558
3559#ifdef illumos
3560	err = vn_vfswlock(vfsp->vfs_vnodecovered);
3561	VFS_RELE(vfsp);
3562	if (err != 0)
3563		return (SET_ERROR(err));
3564#endif
3565
3566	/*
3567	 * Always force the unmount for snapshots.
3568	 */
3569#ifdef illumos
3570	(void) dounmount(vfsp, MS_FORCE, kcred);
3571#else
3572	(void) dounmount(vfsp, MS_FORCE, curthread);
3573#endif
3574	return (0);
3575}
3576
3577/* ARGSUSED */
3578static int
3579zfs_unmount_snap_cb(const char *snapname, void *arg)
3580{
3581	return (zfs_unmount_snap(snapname));
3582}
3583
3584/*
3585 * When a clone is destroyed, its origin may also need to be destroyed,
3586 * in which case it must be unmounted.  This routine will do that unmount
3587 * if necessary.
3588 */
3589void
3590zfs_destroy_unmount_origin(const char *fsname)
3591{
3592	int error;
3593	objset_t *os;
3594	dsl_dataset_t *ds;
3595
3596	error = dmu_objset_hold(fsname, FTAG, &os);
3597	if (error != 0)
3598		return;
3599	ds = dmu_objset_ds(os);
3600	if (dsl_dir_is_clone(ds->ds_dir) && DS_IS_DEFER_DESTROY(ds->ds_prev)) {
3601		char originname[ZFS_MAX_DATASET_NAME_LEN];
3602		dsl_dataset_name(ds->ds_prev, originname);
3603		dmu_objset_rele(os, FTAG);
3604		(void) zfs_unmount_snap(originname);
3605	} else {
3606		dmu_objset_rele(os, FTAG);
3607	}
3608}
3609
3610/*
3611 * innvl: {
3612 *     "snaps" -> { snapshot1, snapshot2 }
3613 *     (optional boolean) "defer"
3614 * }
3615 *
3616 * outnvl: snapshot -> error code (int32)
3617 *
3618 */
3619/* ARGSUSED */
3620static int
3621zfs_ioc_destroy_snaps(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl)
3622{
3623	int error, poollen;
3624	nvlist_t *snaps;
3625	nvpair_t *pair;
3626	boolean_t defer;
3627
3628	if (nvlist_lookup_nvlist(innvl, "snaps", &snaps) != 0)
3629		return (SET_ERROR(EINVAL));
3630	defer = nvlist_exists(innvl, "defer");
3631
3632	poollen = strlen(poolname);
3633	for (pair = nvlist_next_nvpair(snaps, NULL); pair != NULL;
3634	    pair = nvlist_next_nvpair(snaps, pair)) {
3635		const char *name = nvpair_name(pair);
3636
3637		/*
3638		 * The snap must be in the specified pool to prevent the
3639		 * invalid removal of zvol minors below.
3640		 */
3641		if (strncmp(name, poolname, poollen) != 0 ||
3642		    (name[poollen] != '/' && name[poollen] != '@'))
3643			return (SET_ERROR(EXDEV));
3644
3645		error = zfs_unmount_snap(name);
3646		if (error != 0)
3647			return (error);
3648#if defined(__FreeBSD__)
3649		zvol_remove_minors(name);
3650#endif
3651	}
3652
3653	return (dsl_destroy_snapshots_nvl(snaps, defer, outnvl));
3654}
3655
3656/*
3657 * Create bookmarks.  Bookmark names are of the form <fs>#<bmark>.
3658 * All bookmarks must be in the same pool.
3659 *
3660 * innvl: {
3661 *     bookmark1 -> snapshot1, bookmark2 -> snapshot2
3662 * }
3663 *
3664 * outnvl: bookmark -> error code (int32)
3665 *
3666 */
3667/* ARGSUSED */
3668static int
3669zfs_ioc_bookmark(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl)
3670{
3671	for (nvpair_t *pair = nvlist_next_nvpair(innvl, NULL);
3672	    pair != NULL; pair = nvlist_next_nvpair(innvl, pair)) {
3673		char *snap_name;
3674
3675		/*
3676		 * Verify the snapshot argument.
3677		 */
3678		if (nvpair_value_string(pair, &snap_name) != 0)
3679			return (SET_ERROR(EINVAL));
3680
3681
3682		/* Verify that the keys (bookmarks) are unique */
3683		for (nvpair_t *pair2 = nvlist_next_nvpair(innvl, pair);
3684		    pair2 != NULL; pair2 = nvlist_next_nvpair(innvl, pair2)) {
3685			if (strcmp(nvpair_name(pair), nvpair_name(pair2)) == 0)
3686				return (SET_ERROR(EINVAL));
3687		}
3688	}
3689
3690	return (dsl_bookmark_create(innvl, outnvl));
3691}
3692
3693/*
3694 * innvl: {
3695 *     property 1, property 2, ...
3696 * }
3697 *
3698 * outnvl: {
3699 *     bookmark name 1 -> { property 1, property 2, ... },
3700 *     bookmark name 2 -> { property 1, property 2, ... }
3701 * }
3702 *
3703 */
3704static int
3705zfs_ioc_get_bookmarks(const char *fsname, nvlist_t *innvl, nvlist_t *outnvl)
3706{
3707	return (dsl_get_bookmarks(fsname, innvl, outnvl));
3708}
3709
3710/*
3711 * innvl: {
3712 *     bookmark name 1, bookmark name 2
3713 * }
3714 *
3715 * outnvl: bookmark -> error code (int32)
3716 *
3717 */
3718static int
3719zfs_ioc_destroy_bookmarks(const char *poolname, nvlist_t *innvl,
3720    nvlist_t *outnvl)
3721{
3722	int error, poollen;
3723
3724	poollen = strlen(poolname);
3725	for (nvpair_t *pair = nvlist_next_nvpair(innvl, NULL);
3726	    pair != NULL; pair = nvlist_next_nvpair(innvl, pair)) {
3727		const char *name = nvpair_name(pair);
3728		const char *cp = strchr(name, '#');
3729
3730		/*
3731		 * The bookmark name must contain an #, and the part after it
3732		 * must contain only valid characters.
3733		 */
3734		if (cp == NULL ||
3735		    zfs_component_namecheck(cp + 1, NULL, NULL) != 0)
3736			return (SET_ERROR(EINVAL));
3737
3738		/*
3739		 * The bookmark must be in the specified pool.
3740		 */
3741		if (strncmp(name, poolname, poollen) != 0 ||
3742		    (name[poollen] != '/' && name[poollen] != '#'))
3743			return (SET_ERROR(EXDEV));
3744	}
3745
3746	error = dsl_bookmark_destroy(innvl, outnvl);
3747	return (error);
3748}
3749
3750/*
3751 * inputs:
3752 * zc_name		name of dataset to destroy
3753 * zc_objset_type	type of objset
3754 * zc_defer_destroy	mark for deferred destroy
3755 *
3756 * outputs:		none
3757 */
3758static int
3759zfs_ioc_destroy(zfs_cmd_t *zc)
3760{
3761	int err;
3762
3763	if (zc->zc_objset_type == DMU_OST_ZFS) {
3764		err = zfs_unmount_snap(zc->zc_name);
3765		if (err != 0)
3766			return (err);
3767	}
3768
3769	if (strchr(zc->zc_name, '@'))
3770		err = dsl_destroy_snapshot(zc->zc_name, zc->zc_defer_destroy);
3771	else
3772		err = dsl_destroy_head(zc->zc_name);
3773	if (zc->zc_objset_type == DMU_OST_ZVOL && err == 0)
3774#ifdef __FreeBSD__
3775		zvol_remove_minors(zc->zc_name);
3776#else
3777		(void) zvol_remove_minor(zc->zc_name);
3778#endif
3779	return (err);
3780}
3781
3782/*
3783 * fsname is name of dataset to rollback (to most recent snapshot)
3784 *
3785 * innvl may contain name of expected target snapshot
3786 *
3787 * outnvl: "target" -> name of most recent snapshot
3788 * }
3789 */
3790/* ARGSUSED */
3791static int
3792zfs_ioc_rollback(const char *fsname, nvlist_t *innvl, nvlist_t *outnvl)
3793{
3794	zfsvfs_t *zfsvfs;
3795	char *target = NULL;
3796	int error;
3797
3798	(void) nvlist_lookup_string(innvl, "target", &target);
3799	if (target != NULL) {
3800		int fslen = strlen(fsname);
3801
3802		if (strncmp(fsname, target, fslen) != 0)
3803			return (SET_ERROR(EINVAL));
3804		if (target[fslen] != '@')
3805			return (SET_ERROR(EINVAL));
3806	}
3807
3808	if (getzfsvfs(fsname, &zfsvfs) == 0) {
3809		dsl_dataset_t *ds;
3810
3811		ds = dmu_objset_ds(zfsvfs->z_os);
3812		error = zfs_suspend_fs(zfsvfs);
3813		if (error == 0) {
3814			int resume_err;
3815
3816			error = dsl_dataset_rollback(fsname, target, zfsvfs,
3817			    outnvl);
3818			resume_err = zfs_resume_fs(zfsvfs, ds);
3819			error = error ? error : resume_err;
3820		}
3821#ifdef illumos
3822		VFS_RELE(zfsvfs->z_vfs);
3823#else
3824		vfs_unbusy(zfsvfs->z_vfs);
3825#endif
3826	} else {
3827		error = dsl_dataset_rollback(fsname, target, NULL, outnvl);
3828	}
3829	return (error);
3830}
3831
3832static int
3833recursive_unmount(const char *fsname, void *arg)
3834{
3835	const char *snapname = arg;
3836	char fullname[ZFS_MAX_DATASET_NAME_LEN];
3837
3838	(void) snprintf(fullname, sizeof (fullname), "%s@%s", fsname, snapname);
3839	return (zfs_unmount_snap(fullname));
3840}
3841
3842/*
3843 * inputs:
3844 * zc_name	old name of dataset
3845 * zc_value	new name of dataset
3846 * zc_cookie	recursive flag (only valid for snapshots)
3847 *
3848 * outputs:	none
3849 */
3850static int
3851zfs_ioc_rename(zfs_cmd_t *zc)
3852{
3853	boolean_t recursive = zc->zc_cookie & 1;
3854	char *at;
3855	boolean_t allow_mounted = B_TRUE;
3856
3857#ifdef __FreeBSD__
3858	allow_mounted = (zc->zc_cookie & 2) != 0;
3859#endif
3860
3861	zc->zc_value[sizeof (zc->zc_value) - 1] = '\0';
3862	if (dataset_namecheck(zc->zc_value, NULL, NULL) != 0 ||
3863	    strchr(zc->zc_value, '%'))
3864		return (SET_ERROR(EINVAL));
3865
3866	at = strchr(zc->zc_name, '@');
3867	if (at != NULL) {
3868		/* snaps must be in same fs */
3869		int error;
3870
3871		if (strncmp(zc->zc_name, zc->zc_value, at - zc->zc_name + 1))
3872			return (SET_ERROR(EXDEV));
3873		*at = '\0';
3874		if (zc->zc_objset_type == DMU_OST_ZFS && !allow_mounted) {
3875			error = dmu_objset_find(zc->zc_name,
3876			    recursive_unmount, at + 1,
3877			    recursive ? DS_FIND_CHILDREN : 0);
3878			if (error != 0) {
3879				*at = '@';
3880				return (error);
3881			}
3882		}
3883		error = dsl_dataset_rename_snapshot(zc->zc_name,
3884		    at + 1, strchr(zc->zc_value, '@') + 1, recursive);
3885		*at = '@';
3886
3887		return (error);
3888	} else {
3889#ifdef illumos
3890		if (zc->zc_objset_type == DMU_OST_ZVOL)
3891			(void) zvol_remove_minor(zc->zc_name);
3892#endif
3893		return (dsl_dir_rename(zc->zc_name, zc->zc_value));
3894	}
3895}
3896
3897static int
3898zfs_check_settable(const char *dsname, nvpair_t *pair, cred_t *cr)
3899{
3900	const char *propname = nvpair_name(pair);
3901	boolean_t issnap = (strchr(dsname, '@') != NULL);
3902	zfs_prop_t prop = zfs_name_to_prop(propname);
3903	uint64_t intval;
3904	int err;
3905
3906	if (prop == ZPROP_INVAL) {
3907		if (zfs_prop_user(propname)) {
3908			if (err = zfs_secpolicy_write_perms(dsname,
3909			    ZFS_DELEG_PERM_USERPROP, cr))
3910				return (err);
3911			return (0);
3912		}
3913
3914		if (!issnap && zfs_prop_userquota(propname)) {
3915			const char *perm = NULL;
3916			const char *uq_prefix =
3917			    zfs_userquota_prop_prefixes[ZFS_PROP_USERQUOTA];
3918			const char *gq_prefix =
3919			    zfs_userquota_prop_prefixes[ZFS_PROP_GROUPQUOTA];
3920
3921			if (strncmp(propname, uq_prefix,
3922			    strlen(uq_prefix)) == 0) {
3923				perm = ZFS_DELEG_PERM_USERQUOTA;
3924			} else if (strncmp(propname, gq_prefix,
3925			    strlen(gq_prefix)) == 0) {
3926				perm = ZFS_DELEG_PERM_GROUPQUOTA;
3927			} else {
3928				/* USERUSED and GROUPUSED are read-only */
3929				return (SET_ERROR(EINVAL));
3930			}
3931
3932			if (err = zfs_secpolicy_write_perms(dsname, perm, cr))
3933				return (err);
3934			return (0);
3935		}
3936
3937		return (SET_ERROR(EINVAL));
3938	}
3939
3940	if (issnap)
3941		return (SET_ERROR(EINVAL));
3942
3943	if (nvpair_type(pair) == DATA_TYPE_NVLIST) {
3944		/*
3945		 * dsl_prop_get_all_impl() returns properties in this
3946		 * format.
3947		 */
3948		nvlist_t *attrs;
3949		VERIFY(nvpair_value_nvlist(pair, &attrs) == 0);
3950		VERIFY(nvlist_lookup_nvpair(attrs, ZPROP_VALUE,
3951		    &pair) == 0);
3952	}
3953
3954	/*
3955	 * Check that this value is valid for this pool version
3956	 */
3957	switch (prop) {
3958	case ZFS_PROP_COMPRESSION:
3959		/*
3960		 * If the user specified gzip compression, make sure
3961		 * the SPA supports it. We ignore any errors here since
3962		 * we'll catch them later.
3963		 */
3964		if (nvpair_value_uint64(pair, &intval) == 0) {
3965			if (intval >= ZIO_COMPRESS_GZIP_1 &&
3966			    intval <= ZIO_COMPRESS_GZIP_9 &&
3967			    zfs_earlier_version(dsname,
3968			    SPA_VERSION_GZIP_COMPRESSION)) {
3969				return (SET_ERROR(ENOTSUP));
3970			}
3971
3972			if (intval == ZIO_COMPRESS_ZLE &&
3973			    zfs_earlier_version(dsname,
3974			    SPA_VERSION_ZLE_COMPRESSION))
3975				return (SET_ERROR(ENOTSUP));
3976
3977			if (intval == ZIO_COMPRESS_LZ4) {
3978				spa_t *spa;
3979
3980				if ((err = spa_open(dsname, &spa, FTAG)) != 0)
3981					return (err);
3982
3983				if (!spa_feature_is_enabled(spa,
3984				    SPA_FEATURE_LZ4_COMPRESS)) {
3985					spa_close(spa, FTAG);
3986					return (SET_ERROR(ENOTSUP));
3987				}
3988				spa_close(spa, FTAG);
3989			}
3990
3991			/*
3992			 * If this is a bootable dataset then
3993			 * verify that the compression algorithm
3994			 * is supported for booting. We must return
3995			 * something other than ENOTSUP since it
3996			 * implies a downrev pool version.
3997			 */
3998			if (zfs_is_bootfs(dsname) &&
3999			    !BOOTFS_COMPRESS_VALID(intval)) {
4000				return (SET_ERROR(ERANGE));
4001			}
4002		}
4003		break;
4004
4005	case ZFS_PROP_COPIES:
4006		if (zfs_earlier_version(dsname, SPA_VERSION_DITTO_BLOCKS))
4007			return (SET_ERROR(ENOTSUP));
4008		break;
4009
4010	case ZFS_PROP_RECORDSIZE:
4011		/* Record sizes above 128k need the feature to be enabled */
4012		if (nvpair_value_uint64(pair, &intval) == 0 &&
4013		    intval > SPA_OLD_MAXBLOCKSIZE) {
4014			spa_t *spa;
4015
4016			/*
4017			 * If this is a bootable dataset then
4018			 * the we don't allow large (>128K) blocks,
4019			 * because GRUB doesn't support them.
4020			 */
4021			if (zfs_is_bootfs(dsname) &&
4022			    intval > SPA_OLD_MAXBLOCKSIZE) {
4023				return (SET_ERROR(ERANGE));
4024			}
4025
4026			/*
4027			 * We don't allow setting the property above 1MB,
4028			 * unless the tunable has been changed.
4029			 */
4030			if (intval > zfs_max_recordsize ||
4031			    intval > SPA_MAXBLOCKSIZE)
4032				return (SET_ERROR(ERANGE));
4033
4034			if ((err = spa_open(dsname, &spa, FTAG)) != 0)
4035				return (err);
4036
4037			if (!spa_feature_is_enabled(spa,
4038			    SPA_FEATURE_LARGE_BLOCKS)) {
4039				spa_close(spa, FTAG);
4040				return (SET_ERROR(ENOTSUP));
4041			}
4042			spa_close(spa, FTAG);
4043		}
4044		break;
4045
4046	case ZFS_PROP_SHARESMB:
4047		if (zpl_earlier_version(dsname, ZPL_VERSION_FUID))
4048			return (SET_ERROR(ENOTSUP));
4049		break;
4050
4051	case ZFS_PROP_ACLINHERIT:
4052		if (nvpair_type(pair) == DATA_TYPE_UINT64 &&
4053		    nvpair_value_uint64(pair, &intval) == 0) {
4054			if (intval == ZFS_ACL_PASSTHROUGH_X &&
4055			    zfs_earlier_version(dsname,
4056			    SPA_VERSION_PASSTHROUGH_X))
4057				return (SET_ERROR(ENOTSUP));
4058		}
4059		break;
4060
4061	case ZFS_PROP_CHECKSUM:
4062	case ZFS_PROP_DEDUP:
4063	{
4064		spa_feature_t feature;
4065		spa_t *spa;
4066
4067		/* dedup feature version checks */
4068		if (prop == ZFS_PROP_DEDUP &&
4069		    zfs_earlier_version(dsname, SPA_VERSION_DEDUP))
4070			return (SET_ERROR(ENOTSUP));
4071
4072		if (nvpair_value_uint64(pair, &intval) != 0)
4073			return (SET_ERROR(EINVAL));
4074
4075		/* check prop value is enabled in features */
4076		feature = zio_checksum_to_feature(intval & ZIO_CHECKSUM_MASK);
4077		if (feature == SPA_FEATURE_NONE)
4078			break;
4079
4080		if ((err = spa_open(dsname, &spa, FTAG)) != 0)
4081			return (err);
4082		/*
4083		 * Salted checksums are not supported on root pools.
4084		 */
4085		if (spa_bootfs(spa) != 0 &&
4086		    intval < ZIO_CHECKSUM_FUNCTIONS &&
4087		    (zio_checksum_table[intval].ci_flags &
4088		    ZCHECKSUM_FLAG_SALTED)) {
4089			spa_close(spa, FTAG);
4090			return (SET_ERROR(ERANGE));
4091		}
4092		if (!spa_feature_is_enabled(spa, feature)) {
4093			spa_close(spa, FTAG);
4094			return (SET_ERROR(ENOTSUP));
4095		}
4096		spa_close(spa, FTAG);
4097		break;
4098	}
4099	}
4100
4101	return (zfs_secpolicy_setprop(dsname, prop, pair, CRED()));
4102}
4103
4104/*
4105 * Checks for a race condition to make sure we don't increment a feature flag
4106 * multiple times.
4107 */
4108static int
4109zfs_prop_activate_feature_check(void *arg, dmu_tx_t *tx)
4110{
4111	spa_t *spa = dmu_tx_pool(tx)->dp_spa;
4112	spa_feature_t *featurep = arg;
4113
4114	if (!spa_feature_is_active(spa, *featurep))
4115		return (0);
4116	else
4117		return (SET_ERROR(EBUSY));
4118}
4119
4120/*
4121 * The callback invoked on feature activation in the sync task caused by
4122 * zfs_prop_activate_feature.
4123 */
4124static void
4125zfs_prop_activate_feature_sync(void *arg, dmu_tx_t *tx)
4126{
4127	spa_t *spa = dmu_tx_pool(tx)->dp_spa;
4128	spa_feature_t *featurep = arg;
4129
4130	spa_feature_incr(spa, *featurep, tx);
4131}
4132
4133/*
4134 * Activates a feature on a pool in response to a property setting. This
4135 * creates a new sync task which modifies the pool to reflect the feature
4136 * as being active.
4137 */
4138static int
4139zfs_prop_activate_feature(spa_t *spa, spa_feature_t feature)
4140{
4141	int err;
4142
4143	/* EBUSY here indicates that the feature is already active */
4144	err = dsl_sync_task(spa_name(spa),
4145	    zfs_prop_activate_feature_check, zfs_prop_activate_feature_sync,
4146	    &feature, 2, ZFS_SPACE_CHECK_RESERVED);
4147
4148	if (err != 0 && err != EBUSY)
4149		return (err);
4150	else
4151		return (0);
4152}
4153
4154/*
4155 * Removes properties from the given props list that fail permission checks
4156 * needed to clear them and to restore them in case of a receive error. For each
4157 * property, make sure we have both set and inherit permissions.
4158 *
4159 * Returns the first error encountered if any permission checks fail. If the
4160 * caller provides a non-NULL errlist, it also gives the complete list of names
4161 * of all the properties that failed a permission check along with the
4162 * corresponding error numbers. The caller is responsible for freeing the
4163 * returned errlist.
4164 *
4165 * If every property checks out successfully, zero is returned and the list
4166 * pointed at by errlist is NULL.
4167 */
4168static int
4169zfs_check_clearable(char *dataset, nvlist_t *props, nvlist_t **errlist)
4170{
4171	zfs_cmd_t *zc;
4172	nvpair_t *pair, *next_pair;
4173	nvlist_t *errors;
4174	int err, rv = 0;
4175
4176	if (props == NULL)
4177		return (0);
4178
4179	VERIFY(nvlist_alloc(&errors, NV_UNIQUE_NAME, KM_SLEEP) == 0);
4180
4181	zc = kmem_alloc(sizeof (zfs_cmd_t), KM_SLEEP);
4182	(void) strcpy(zc->zc_name, dataset);
4183	pair = nvlist_next_nvpair(props, NULL);
4184	while (pair != NULL) {
4185		next_pair = nvlist_next_nvpair(props, pair);
4186
4187		(void) strcpy(zc->zc_value, nvpair_name(pair));
4188		if ((err = zfs_check_settable(dataset, pair, CRED())) != 0 ||
4189		    (err = zfs_secpolicy_inherit_prop(zc, NULL, CRED())) != 0) {
4190			VERIFY(nvlist_remove_nvpair(props, pair) == 0);
4191			VERIFY(nvlist_add_int32(errors,
4192			    zc->zc_value, err) == 0);
4193		}
4194		pair = next_pair;
4195	}
4196	kmem_free(zc, sizeof (zfs_cmd_t));
4197
4198	if ((pair = nvlist_next_nvpair(errors, NULL)) == NULL) {
4199		nvlist_free(errors);
4200		errors = NULL;
4201	} else {
4202		VERIFY(nvpair_value_int32(pair, &rv) == 0);
4203	}
4204
4205	if (errlist == NULL)
4206		nvlist_free(errors);
4207	else
4208		*errlist = errors;
4209
4210	return (rv);
4211}
4212
4213static boolean_t
4214propval_equals(nvpair_t *p1, nvpair_t *p2)
4215{
4216	if (nvpair_type(p1) == DATA_TYPE_NVLIST) {
4217		/* dsl_prop_get_all_impl() format */
4218		nvlist_t *attrs;
4219		VERIFY(nvpair_value_nvlist(p1, &attrs) == 0);
4220		VERIFY(nvlist_lookup_nvpair(attrs, ZPROP_VALUE,
4221		    &p1) == 0);
4222	}
4223
4224	if (nvpair_type(p2) == DATA_TYPE_NVLIST) {
4225		nvlist_t *attrs;
4226		VERIFY(nvpair_value_nvlist(p2, &attrs) == 0);
4227		VERIFY(nvlist_lookup_nvpair(attrs, ZPROP_VALUE,
4228		    &p2) == 0);
4229	}
4230
4231	if (nvpair_type(p1) != nvpair_type(p2))
4232		return (B_FALSE);
4233
4234	if (nvpair_type(p1) == DATA_TYPE_STRING) {
4235		char *valstr1, *valstr2;
4236
4237		VERIFY(nvpair_value_string(p1, (char **)&valstr1) == 0);
4238		VERIFY(nvpair_value_string(p2, (char **)&valstr2) == 0);
4239		return (strcmp(valstr1, valstr2) == 0);
4240	} else {
4241		uint64_t intval1, intval2;
4242
4243		VERIFY(nvpair_value_uint64(p1, &intval1) == 0);
4244		VERIFY(nvpair_value_uint64(p2, &intval2) == 0);
4245		return (intval1 == intval2);
4246	}
4247}
4248
4249/*
4250 * Remove properties from props if they are not going to change (as determined
4251 * by comparison with origprops). Remove them from origprops as well, since we
4252 * do not need to clear or restore properties that won't change.
4253 */
4254static void
4255props_reduce(nvlist_t *props, nvlist_t *origprops)
4256{
4257	nvpair_t *pair, *next_pair;
4258
4259	if (origprops == NULL)
4260		return; /* all props need to be received */
4261
4262	pair = nvlist_next_nvpair(props, NULL);
4263	while (pair != NULL) {
4264		const char *propname = nvpair_name(pair);
4265		nvpair_t *match;
4266
4267		next_pair = nvlist_next_nvpair(props, pair);
4268
4269		if ((nvlist_lookup_nvpair(origprops, propname,
4270		    &match) != 0) || !propval_equals(pair, match))
4271			goto next; /* need to set received value */
4272
4273		/* don't clear the existing received value */
4274		(void) nvlist_remove_nvpair(origprops, match);
4275		/* don't bother receiving the property */
4276		(void) nvlist_remove_nvpair(props, pair);
4277next:
4278		pair = next_pair;
4279	}
4280}
4281
4282/*
4283 * Extract properties that cannot be set PRIOR to the receipt of a dataset.
4284 * For example, refquota cannot be set until after the receipt of a dataset,
4285 * because in replication streams, an older/earlier snapshot may exceed the
4286 * refquota.  We want to receive the older/earlier snapshot, but setting
4287 * refquota pre-receipt will set the dsl's ACTUAL quota, which will prevent
4288 * the older/earlier snapshot from being received (with EDQUOT).
4289 *
4290 * The ZFS test "zfs_receive_011_pos" demonstrates such a scenario.
4291 *
4292 * libzfs will need to be judicious handling errors encountered by props
4293 * extracted by this function.
4294 */
4295static nvlist_t *
4296extract_delay_props(nvlist_t *props)
4297{
4298	nvlist_t *delayprops;
4299	nvpair_t *nvp, *tmp;
4300	static const zfs_prop_t delayable[] = { ZFS_PROP_REFQUOTA, 0 };
4301	int i;
4302
4303	VERIFY(nvlist_alloc(&delayprops, NV_UNIQUE_NAME, KM_SLEEP) == 0);
4304
4305	for (nvp = nvlist_next_nvpair(props, NULL); nvp != NULL;
4306	    nvp = nvlist_next_nvpair(props, nvp)) {
4307		/*
4308		 * strcmp() is safe because zfs_prop_to_name() always returns
4309		 * a bounded string.
4310		 */
4311		for (i = 0; delayable[i] != 0; i++) {
4312			if (strcmp(zfs_prop_to_name(delayable[i]),
4313			    nvpair_name(nvp)) == 0) {
4314				break;
4315			}
4316		}
4317		if (delayable[i] != 0) {
4318			tmp = nvlist_prev_nvpair(props, nvp);
4319			VERIFY(nvlist_add_nvpair(delayprops, nvp) == 0);
4320			VERIFY(nvlist_remove_nvpair(props, nvp) == 0);
4321			nvp = tmp;
4322		}
4323	}
4324
4325	if (nvlist_empty(delayprops)) {
4326		nvlist_free(delayprops);
4327		delayprops = NULL;
4328	}
4329	return (delayprops);
4330}
4331
4332#ifdef	DEBUG
4333static boolean_t zfs_ioc_recv_inject_err;
4334#endif
4335
4336/*
4337 * inputs:
4338 * zc_name		name of containing filesystem
4339 * zc_nvlist_src{_size}	nvlist of properties to apply
4340 * zc_value		name of snapshot to create
4341 * zc_string		name of clone origin (if DRR_FLAG_CLONE)
4342 * zc_cookie		file descriptor to recv from
4343 * zc_begin_record	the BEGIN record of the stream (not byteswapped)
4344 * zc_guid		force flag
4345 * zc_cleanup_fd	cleanup-on-exit file descriptor
4346 * zc_action_handle	handle for this guid/ds mapping (or zero on first call)
4347 * zc_resumable		if data is incomplete assume sender will resume
4348 *
4349 * outputs:
4350 * zc_cookie		number of bytes read
4351 * zc_nvlist_dst{_size} error for each unapplied received property
4352 * zc_obj		zprop_errflags_t
4353 * zc_action_handle	handle for this guid/ds mapping
4354 */
4355static int
4356zfs_ioc_recv(zfs_cmd_t *zc)
4357{
4358	file_t *fp;
4359	dmu_recv_cookie_t drc;
4360	boolean_t force = (boolean_t)zc->zc_guid;
4361	int fd;
4362	int error = 0;
4363	int props_error = 0;
4364	nvlist_t *errors;
4365	offset_t off;
4366	nvlist_t *props = NULL; /* sent properties */
4367	nvlist_t *origprops = NULL; /* existing properties */
4368	nvlist_t *delayprops = NULL; /* sent properties applied post-receive */
4369	char *origin = NULL;
4370	char *tosnap;
4371	char tofs[ZFS_MAX_DATASET_NAME_LEN];
4372	cap_rights_t rights;
4373	boolean_t first_recvd_props = B_FALSE;
4374
4375	if (dataset_namecheck(zc->zc_value, NULL, NULL) != 0 ||
4376	    strchr(zc->zc_value, '@') == NULL ||
4377	    strchr(zc->zc_value, '%'))
4378		return (SET_ERROR(EINVAL));
4379
4380	(void) strcpy(tofs, zc->zc_value);
4381	tosnap = strchr(tofs, '@');
4382	*tosnap++ = '\0';
4383
4384	if (zc->zc_nvlist_src != 0 &&
4385	    (error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
4386	    zc->zc_iflags, &props)) != 0)
4387		return (error);
4388
4389	fd = zc->zc_cookie;
4390#ifdef illumos
4391	fp = getf(fd);
4392#else
4393	fget_read(curthread, fd, cap_rights_init(&rights, CAP_PREAD), &fp);
4394#endif
4395	if (fp == NULL) {
4396		nvlist_free(props);
4397		return (SET_ERROR(EBADF));
4398	}
4399
4400	errors = fnvlist_alloc();
4401
4402	if (zc->zc_string[0])
4403		origin = zc->zc_string;
4404
4405	error = dmu_recv_begin(tofs, tosnap,
4406	    &zc->zc_begin_record, force, zc->zc_resumable, origin, &drc);
4407	if (error != 0)
4408		goto out;
4409
4410	/*
4411	 * Set properties before we receive the stream so that they are applied
4412	 * to the new data. Note that we must call dmu_recv_stream() if
4413	 * dmu_recv_begin() succeeds.
4414	 */
4415	if (props != NULL && !drc.drc_newfs) {
4416		if (spa_version(dsl_dataset_get_spa(drc.drc_ds)) >=
4417		    SPA_VERSION_RECVD_PROPS &&
4418		    !dsl_prop_get_hasrecvd(tofs))
4419			first_recvd_props = B_TRUE;
4420
4421		/*
4422		 * If new received properties are supplied, they are to
4423		 * completely replace the existing received properties, so stash
4424		 * away the existing ones.
4425		 */
4426		if (dsl_prop_get_received(tofs, &origprops) == 0) {
4427			nvlist_t *errlist = NULL;
4428			/*
4429			 * Don't bother writing a property if its value won't
4430			 * change (and avoid the unnecessary security checks).
4431			 *
4432			 * The first receive after SPA_VERSION_RECVD_PROPS is a
4433			 * special case where we blow away all local properties
4434			 * regardless.
4435			 */
4436			if (!first_recvd_props)
4437				props_reduce(props, origprops);
4438			if (zfs_check_clearable(tofs, origprops, &errlist) != 0)
4439				(void) nvlist_merge(errors, errlist, 0);
4440			nvlist_free(errlist);
4441
4442			if (clear_received_props(tofs, origprops,
4443			    first_recvd_props ? NULL : props) != 0)
4444				zc->zc_obj |= ZPROP_ERR_NOCLEAR;
4445		} else {
4446			zc->zc_obj |= ZPROP_ERR_NOCLEAR;
4447		}
4448	}
4449
4450	if (props != NULL) {
4451		props_error = dsl_prop_set_hasrecvd(tofs);
4452
4453		if (props_error == 0) {
4454			delayprops = extract_delay_props(props);
4455			(void) zfs_set_prop_nvlist(tofs, ZPROP_SRC_RECEIVED,
4456			    props, errors);
4457		}
4458	}
4459
4460	off = fp->f_offset;
4461	error = dmu_recv_stream(&drc, fp, &off, zc->zc_cleanup_fd,
4462	    &zc->zc_action_handle);
4463
4464	if (error == 0) {
4465		zfsvfs_t *zfsvfs = NULL;
4466
4467		if (getzfsvfs(tofs, &zfsvfs) == 0) {
4468			/* online recv */
4469			dsl_dataset_t *ds;
4470			int end_err;
4471
4472			ds = dmu_objset_ds(zfsvfs->z_os);
4473			error = zfs_suspend_fs(zfsvfs);
4474			/*
4475			 * If the suspend fails, then the recv_end will
4476			 * likely also fail, and clean up after itself.
4477			 */
4478			end_err = dmu_recv_end(&drc, zfsvfs);
4479			if (error == 0)
4480				error = zfs_resume_fs(zfsvfs, ds);
4481			error = error ? error : end_err;
4482#ifdef illumos
4483			VFS_RELE(zfsvfs->z_vfs);
4484#else
4485			vfs_unbusy(zfsvfs->z_vfs);
4486#endif
4487		} else {
4488			error = dmu_recv_end(&drc, NULL);
4489		}
4490
4491		/* Set delayed properties now, after we're done receiving. */
4492		if (delayprops != NULL && error == 0) {
4493			(void) zfs_set_prop_nvlist(tofs, ZPROP_SRC_RECEIVED,
4494			    delayprops, errors);
4495		}
4496	}
4497
4498	if (delayprops != NULL) {
4499		/*
4500		 * Merge delayed props back in with initial props, in case
4501		 * we're DEBUG and zfs_ioc_recv_inject_err is set (which means
4502		 * we have to make sure clear_received_props() includes
4503		 * the delayed properties).
4504		 *
4505		 * Since zfs_ioc_recv_inject_err is only in DEBUG kernels,
4506		 * using ASSERT() will be just like a VERIFY.
4507		 */
4508		ASSERT(nvlist_merge(props, delayprops, 0) == 0);
4509		nvlist_free(delayprops);
4510	}
4511
4512	/*
4513	 * Now that all props, initial and delayed, are set, report the prop
4514	 * errors to the caller.
4515	 */
4516	if (zc->zc_nvlist_dst_size != 0 &&
4517	    (nvlist_smush(errors, zc->zc_nvlist_dst_size) != 0 ||
4518	    put_nvlist(zc, errors) != 0)) {
4519		/*
4520		 * Caller made zc->zc_nvlist_dst less than the minimum expected
4521		 * size or supplied an invalid address.
4522		 */
4523		props_error = SET_ERROR(EINVAL);
4524	}
4525
4526	zc->zc_cookie = off - fp->f_offset;
4527	if (off >= 0 && off <= MAXOFFSET_T)
4528		fp->f_offset = off;
4529
4530#ifdef	DEBUG
4531	if (zfs_ioc_recv_inject_err) {
4532		zfs_ioc_recv_inject_err = B_FALSE;
4533		error = 1;
4534	}
4535#endif
4536
4537#ifdef __FreeBSD__
4538	if (error == 0)
4539		zvol_create_minors(tofs);
4540#endif
4541
4542	/*
4543	 * On error, restore the original props.
4544	 */
4545	if (error != 0 && props != NULL && !drc.drc_newfs) {
4546		if (clear_received_props(tofs, props, NULL) != 0) {
4547			/*
4548			 * We failed to clear the received properties.
4549			 * Since we may have left a $recvd value on the
4550			 * system, we can't clear the $hasrecvd flag.
4551			 */
4552			zc->zc_obj |= ZPROP_ERR_NORESTORE;
4553		} else if (first_recvd_props) {
4554			dsl_prop_unset_hasrecvd(tofs);
4555		}
4556
4557		if (origprops == NULL && !drc.drc_newfs) {
4558			/* We failed to stash the original properties. */
4559			zc->zc_obj |= ZPROP_ERR_NORESTORE;
4560		}
4561
4562		/*
4563		 * dsl_props_set() will not convert RECEIVED to LOCAL on or
4564		 * after SPA_VERSION_RECVD_PROPS, so we need to specify LOCAL
4565		 * explictly if we're restoring local properties cleared in the
4566		 * first new-style receive.
4567		 */
4568		if (origprops != NULL &&
4569		    zfs_set_prop_nvlist(tofs, (first_recvd_props ?
4570		    ZPROP_SRC_LOCAL : ZPROP_SRC_RECEIVED),
4571		    origprops, NULL) != 0) {
4572			/*
4573			 * We stashed the original properties but failed to
4574			 * restore them.
4575			 */
4576			zc->zc_obj |= ZPROP_ERR_NORESTORE;
4577		}
4578	}
4579out:
4580	nvlist_free(props);
4581	nvlist_free(origprops);
4582	nvlist_free(errors);
4583	releasef(fd);
4584
4585	if (error == 0)
4586		error = props_error;
4587
4588	return (error);
4589}
4590
4591/*
4592 * inputs:
4593 * zc_name	name of snapshot to send
4594 * zc_cookie	file descriptor to send stream to
4595 * zc_obj	fromorigin flag (mutually exclusive with zc_fromobj)
4596 * zc_sendobj	objsetid of snapshot to send
4597 * zc_fromobj	objsetid of incremental fromsnap (may be zero)
4598 * zc_guid	if set, estimate size of stream only.  zc_cookie is ignored.
4599 *		output size in zc_objset_type.
4600 * zc_flags	lzc_send_flags
4601 *
4602 * outputs:
4603 * zc_objset_type	estimated size, if zc_guid is set
4604 */
4605static int
4606zfs_ioc_send(zfs_cmd_t *zc)
4607{
4608	int error;
4609	offset_t off;
4610	boolean_t estimate = (zc->zc_guid != 0);
4611	boolean_t embedok = (zc->zc_flags & 0x1);
4612	boolean_t large_block_ok = (zc->zc_flags & 0x2);
4613	boolean_t compressok = (zc->zc_flags & 0x4);
4614
4615	if (zc->zc_obj != 0) {
4616		dsl_pool_t *dp;
4617		dsl_dataset_t *tosnap;
4618
4619		error = dsl_pool_hold(zc->zc_name, FTAG, &dp);
4620		if (error != 0)
4621			return (error);
4622
4623		error = dsl_dataset_hold_obj(dp, zc->zc_sendobj, FTAG, &tosnap);
4624		if (error != 0) {
4625			dsl_pool_rele(dp, FTAG);
4626			return (error);
4627		}
4628
4629		if (dsl_dir_is_clone(tosnap->ds_dir))
4630			zc->zc_fromobj =
4631			    dsl_dir_phys(tosnap->ds_dir)->dd_origin_obj;
4632		dsl_dataset_rele(tosnap, FTAG);
4633		dsl_pool_rele(dp, FTAG);
4634	}
4635
4636	if (estimate) {
4637		dsl_pool_t *dp;
4638		dsl_dataset_t *tosnap;
4639		dsl_dataset_t *fromsnap = NULL;
4640
4641		error = dsl_pool_hold(zc->zc_name, FTAG, &dp);
4642		if (error != 0)
4643			return (error);
4644
4645		error = dsl_dataset_hold_obj(dp, zc->zc_sendobj, FTAG, &tosnap);
4646		if (error != 0) {
4647			dsl_pool_rele(dp, FTAG);
4648			return (error);
4649		}
4650
4651		if (zc->zc_fromobj != 0) {
4652			error = dsl_dataset_hold_obj(dp, zc->zc_fromobj,
4653			    FTAG, &fromsnap);
4654			if (error != 0) {
4655				dsl_dataset_rele(tosnap, FTAG);
4656				dsl_pool_rele(dp, FTAG);
4657				return (error);
4658			}
4659		}
4660
4661		error = dmu_send_estimate(tosnap, fromsnap, compressok,
4662		    &zc->zc_objset_type);
4663
4664		if (fromsnap != NULL)
4665			dsl_dataset_rele(fromsnap, FTAG);
4666		dsl_dataset_rele(tosnap, FTAG);
4667		dsl_pool_rele(dp, FTAG);
4668	} else {
4669		file_t *fp;
4670		cap_rights_t rights;
4671
4672#ifdef illumos
4673		fp = getf(zc->zc_cookie);
4674#else
4675		fget_write(curthread, zc->zc_cookie,
4676		    cap_rights_init(&rights, CAP_WRITE), &fp);
4677#endif
4678		if (fp == NULL)
4679			return (SET_ERROR(EBADF));
4680
4681		off = fp->f_offset;
4682		error = dmu_send_obj(zc->zc_name, zc->zc_sendobj,
4683		    zc->zc_fromobj, embedok, large_block_ok, compressok,
4684#ifdef illumos
4685		    zc->zc_cookie, fp->f_vnode, &off);
4686#else
4687		    zc->zc_cookie, fp, &off);
4688#endif
4689
4690		if (off >= 0 && off <= MAXOFFSET_T)
4691			fp->f_offset = off;
4692		releasef(zc->zc_cookie);
4693	}
4694	return (error);
4695}
4696
4697/*
4698 * inputs:
4699 * zc_name	name of snapshot on which to report progress
4700 * zc_cookie	file descriptor of send stream
4701 *
4702 * outputs:
4703 * zc_cookie	number of bytes written in send stream thus far
4704 */
4705static int
4706zfs_ioc_send_progress(zfs_cmd_t *zc)
4707{
4708	dsl_pool_t *dp;
4709	dsl_dataset_t *ds;
4710	dmu_sendarg_t *dsp = NULL;
4711	int error;
4712
4713	error = dsl_pool_hold(zc->zc_name, FTAG, &dp);
4714	if (error != 0)
4715		return (error);
4716
4717	error = dsl_dataset_hold(dp, zc->zc_name, FTAG, &ds);
4718	if (error != 0) {
4719		dsl_pool_rele(dp, FTAG);
4720		return (error);
4721	}
4722
4723	mutex_enter(&ds->ds_sendstream_lock);
4724
4725	/*
4726	 * Iterate over all the send streams currently active on this dataset.
4727	 * If there's one which matches the specified file descriptor _and_ the
4728	 * stream was started by the current process, return the progress of
4729	 * that stream.
4730	 */
4731	for (dsp = list_head(&ds->ds_sendstreams); dsp != NULL;
4732	    dsp = list_next(&ds->ds_sendstreams, dsp)) {
4733		if (dsp->dsa_outfd == zc->zc_cookie &&
4734		    dsp->dsa_proc == curproc)
4735			break;
4736	}
4737
4738	if (dsp != NULL)
4739		zc->zc_cookie = *(dsp->dsa_off);
4740	else
4741		error = SET_ERROR(ENOENT);
4742
4743	mutex_exit(&ds->ds_sendstream_lock);
4744	dsl_dataset_rele(ds, FTAG);
4745	dsl_pool_rele(dp, FTAG);
4746	return (error);
4747}
4748
4749static int
4750zfs_ioc_inject_fault(zfs_cmd_t *zc)
4751{
4752	int id, error;
4753
4754	error = zio_inject_fault(zc->zc_name, (int)zc->zc_guid, &id,
4755	    &zc->zc_inject_record);
4756
4757	if (error == 0)
4758		zc->zc_guid = (uint64_t)id;
4759
4760	return (error);
4761}
4762
4763static int
4764zfs_ioc_clear_fault(zfs_cmd_t *zc)
4765{
4766	return (zio_clear_fault((int)zc->zc_guid));
4767}
4768
4769static int
4770zfs_ioc_inject_list_next(zfs_cmd_t *zc)
4771{
4772	int id = (int)zc->zc_guid;
4773	int error;
4774
4775	error = zio_inject_list_next(&id, zc->zc_name, sizeof (zc->zc_name),
4776	    &zc->zc_inject_record);
4777
4778	zc->zc_guid = id;
4779
4780	return (error);
4781}
4782
4783static int
4784zfs_ioc_error_log(zfs_cmd_t *zc)
4785{
4786	spa_t *spa;
4787	int error;
4788	size_t count = (size_t)zc->zc_nvlist_dst_size;
4789
4790	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
4791		return (error);
4792
4793	error = spa_get_errlog(spa, (void *)(uintptr_t)zc->zc_nvlist_dst,
4794	    &count);
4795	if (error == 0)
4796		zc->zc_nvlist_dst_size = count;
4797	else
4798		zc->zc_nvlist_dst_size = spa_get_errlog_size(spa);
4799
4800	spa_close(spa, FTAG);
4801
4802	return (error);
4803}
4804
4805static int
4806zfs_ioc_clear(zfs_cmd_t *zc)
4807{
4808	spa_t *spa;
4809	vdev_t *vd;
4810	int error;
4811
4812	/*
4813	 * On zpool clear we also fix up missing slogs
4814	 */
4815	mutex_enter(&spa_namespace_lock);
4816	spa = spa_lookup(zc->zc_name);
4817	if (spa == NULL) {
4818		mutex_exit(&spa_namespace_lock);
4819		return (SET_ERROR(EIO));
4820	}
4821	if (spa_get_log_state(spa) == SPA_LOG_MISSING) {
4822		/* we need to let spa_open/spa_load clear the chains */
4823		spa_set_log_state(spa, SPA_LOG_CLEAR);
4824	}
4825	spa->spa_last_open_failed = 0;
4826	mutex_exit(&spa_namespace_lock);
4827
4828	if (zc->zc_cookie & ZPOOL_NO_REWIND) {
4829		error = spa_open(zc->zc_name, &spa, FTAG);
4830	} else {
4831		nvlist_t *policy;
4832		nvlist_t *config = NULL;
4833
4834		if (zc->zc_nvlist_src == 0)
4835			return (SET_ERROR(EINVAL));
4836
4837		if ((error = get_nvlist(zc->zc_nvlist_src,
4838		    zc->zc_nvlist_src_size, zc->zc_iflags, &policy)) == 0) {
4839			error = spa_open_rewind(zc->zc_name, &spa, FTAG,
4840			    policy, &config);
4841			if (config != NULL) {
4842				int err;
4843
4844				if ((err = put_nvlist(zc, config)) != 0)
4845					error = err;
4846				nvlist_free(config);
4847			}
4848			nvlist_free(policy);
4849		}
4850	}
4851
4852	if (error != 0)
4853		return (error);
4854
4855	spa_vdev_state_enter(spa, SCL_NONE);
4856
4857	if (zc->zc_guid == 0) {
4858		vd = NULL;
4859	} else {
4860		vd = spa_lookup_by_guid(spa, zc->zc_guid, B_TRUE);
4861		if (vd == NULL) {
4862			(void) spa_vdev_state_exit(spa, NULL, ENODEV);
4863			spa_close(spa, FTAG);
4864			return (SET_ERROR(ENODEV));
4865		}
4866	}
4867
4868	vdev_clear(spa, vd);
4869
4870	(void) spa_vdev_state_exit(spa, NULL, 0);
4871
4872	/*
4873	 * Resume any suspended I/Os.
4874	 */
4875	if (zio_resume(spa) != 0)
4876		error = SET_ERROR(EIO);
4877
4878	spa_close(spa, FTAG);
4879
4880	return (error);
4881}
4882
4883static int
4884zfs_ioc_pool_reopen(zfs_cmd_t *zc)
4885{
4886	spa_t *spa;
4887	int error;
4888
4889	error = spa_open(zc->zc_name, &spa, FTAG);
4890	if (error != 0)
4891		return (error);
4892
4893	spa_vdev_state_enter(spa, SCL_NONE);
4894
4895	/*
4896	 * If a resilver is already in progress then set the
4897	 * spa_scrub_reopen flag to B_TRUE so that we don't restart
4898	 * the scan as a side effect of the reopen. Otherwise, let
4899	 * vdev_open() decided if a resilver is required.
4900	 */
4901	spa->spa_scrub_reopen = dsl_scan_resilvering(spa->spa_dsl_pool);
4902	vdev_reopen(spa->spa_root_vdev);
4903	spa->spa_scrub_reopen = B_FALSE;
4904
4905	(void) spa_vdev_state_exit(spa, NULL, 0);
4906	spa_close(spa, FTAG);
4907	return (0);
4908}
4909/*
4910 * inputs:
4911 * zc_name	name of filesystem
4912 *
4913 * outputs:
4914 * zc_string	name of conflicting snapshot, if there is one
4915 */
4916static int
4917zfs_ioc_promote(zfs_cmd_t *zc)
4918{
4919	dsl_pool_t *dp;
4920	dsl_dataset_t *ds, *ods;
4921	char origin[ZFS_MAX_DATASET_NAME_LEN];
4922	char *cp;
4923	int error;
4924
4925	error = dsl_pool_hold(zc->zc_name, FTAG, &dp);
4926	if (error != 0)
4927		return (error);
4928
4929	error = dsl_dataset_hold(dp, zc->zc_name, FTAG, &ds);
4930	if (error != 0) {
4931		dsl_pool_rele(dp, FTAG);
4932		return (error);
4933	}
4934
4935	if (!dsl_dir_is_clone(ds->ds_dir)) {
4936		dsl_dataset_rele(ds, FTAG);
4937		dsl_pool_rele(dp, FTAG);
4938		return (SET_ERROR(EINVAL));
4939	}
4940
4941	error = dsl_dataset_hold_obj(dp,
4942	    dsl_dir_phys(ds->ds_dir)->dd_origin_obj, FTAG, &ods);
4943	if (error != 0) {
4944		dsl_dataset_rele(ds, FTAG);
4945		dsl_pool_rele(dp, FTAG);
4946		return (error);
4947	}
4948
4949	dsl_dataset_name(ods, origin);
4950	dsl_dataset_rele(ods, FTAG);
4951	dsl_dataset_rele(ds, FTAG);
4952	dsl_pool_rele(dp, FTAG);
4953
4954	/*
4955	 * We don't need to unmount *all* the origin fs's snapshots, but
4956	 * it's easier.
4957	 */
4958	cp = strchr(origin, '@');
4959	if (cp)
4960		*cp = '\0';
4961	(void) dmu_objset_find(origin,
4962	    zfs_unmount_snap_cb, NULL, DS_FIND_SNAPSHOTS);
4963	return (dsl_dataset_promote(zc->zc_name, zc->zc_string));
4964}
4965
4966/*
4967 * Retrieve a single {user|group}{used|quota}@... property.
4968 *
4969 * inputs:
4970 * zc_name	name of filesystem
4971 * zc_objset_type zfs_userquota_prop_t
4972 * zc_value	domain name (eg. "S-1-234-567-89")
4973 * zc_guid	RID/UID/GID
4974 *
4975 * outputs:
4976 * zc_cookie	property value
4977 */
4978static int
4979zfs_ioc_userspace_one(zfs_cmd_t *zc)
4980{
4981	zfsvfs_t *zfsvfs;
4982	int error;
4983
4984	if (zc->zc_objset_type >= ZFS_NUM_USERQUOTA_PROPS)
4985		return (SET_ERROR(EINVAL));
4986
4987	error = zfsvfs_hold(zc->zc_name, FTAG, &zfsvfs, B_FALSE);
4988	if (error != 0)
4989		return (error);
4990
4991	error = zfs_userspace_one(zfsvfs,
4992	    zc->zc_objset_type, zc->zc_value, zc->zc_guid, &zc->zc_cookie);
4993	zfsvfs_rele(zfsvfs, FTAG);
4994
4995	return (error);
4996}
4997
4998/*
4999 * inputs:
5000 * zc_name		name of filesystem
5001 * zc_cookie		zap cursor
5002 * zc_objset_type	zfs_userquota_prop_t
5003 * zc_nvlist_dst[_size] buffer to fill (not really an nvlist)
5004 *
5005 * outputs:
5006 * zc_nvlist_dst[_size]	data buffer (array of zfs_useracct_t)
5007 * zc_cookie	zap cursor
5008 */
5009static int
5010zfs_ioc_userspace_many(zfs_cmd_t *zc)
5011{
5012	zfsvfs_t *zfsvfs;
5013	int bufsize = zc->zc_nvlist_dst_size;
5014
5015	if (bufsize <= 0)
5016		return (SET_ERROR(ENOMEM));
5017
5018	int error = zfsvfs_hold(zc->zc_name, FTAG, &zfsvfs, B_FALSE);
5019	if (error != 0)
5020		return (error);
5021
5022	void *buf = kmem_alloc(bufsize, KM_SLEEP);
5023
5024	error = zfs_userspace_many(zfsvfs, zc->zc_objset_type, &zc->zc_cookie,
5025	    buf, &zc->zc_nvlist_dst_size);
5026
5027	if (error == 0) {
5028		error = ddi_copyout(buf,
5029		    (void *)(uintptr_t)zc->zc_nvlist_dst,
5030		    zc->zc_nvlist_dst_size, zc->zc_iflags);
5031	}
5032	kmem_free(buf, bufsize);
5033	zfsvfs_rele(zfsvfs, FTAG);
5034
5035	return (error);
5036}
5037
5038/*
5039 * inputs:
5040 * zc_name		name of filesystem
5041 *
5042 * outputs:
5043 * none
5044 */
5045static int
5046zfs_ioc_userspace_upgrade(zfs_cmd_t *zc)
5047{
5048	objset_t *os;
5049	int error = 0;
5050	zfsvfs_t *zfsvfs;
5051
5052	if (getzfsvfs(zc->zc_name, &zfsvfs) == 0) {
5053		if (!dmu_objset_userused_enabled(zfsvfs->z_os)) {
5054			/*
5055			 * If userused is not enabled, it may be because the
5056			 * objset needs to be closed & reopened (to grow the
5057			 * objset_phys_t).  Suspend/resume the fs will do that.
5058			 */
5059			dsl_dataset_t *ds;
5060
5061			ds = dmu_objset_ds(zfsvfs->z_os);
5062			error = zfs_suspend_fs(zfsvfs);
5063			if (error == 0) {
5064				dmu_objset_refresh_ownership(zfsvfs->z_os,
5065				    zfsvfs);
5066				error = zfs_resume_fs(zfsvfs, ds);
5067			}
5068		}
5069		if (error == 0)
5070			error = dmu_objset_userspace_upgrade(zfsvfs->z_os);
5071#ifdef illumos
5072		VFS_RELE(zfsvfs->z_vfs);
5073#else
5074		vfs_unbusy(zfsvfs->z_vfs);
5075#endif
5076	} else {
5077		/* XXX kind of reading contents without owning */
5078		error = dmu_objset_hold(zc->zc_name, FTAG, &os);
5079		if (error != 0)
5080			return (error);
5081
5082		error = dmu_objset_userspace_upgrade(os);
5083		dmu_objset_rele(os, FTAG);
5084	}
5085
5086	return (error);
5087}
5088
5089#ifdef illumos
5090/*
5091 * We don't want to have a hard dependency
5092 * against some special symbols in sharefs
5093 * nfs, and smbsrv.  Determine them if needed when
5094 * the first file system is shared.
5095 * Neither sharefs, nfs or smbsrv are unloadable modules.
5096 */
5097int (*znfsexport_fs)(void *arg);
5098int (*zshare_fs)(enum sharefs_sys_op, share_t *, uint32_t);
5099int (*zsmbexport_fs)(void *arg, boolean_t add_share);
5100
5101int zfs_nfsshare_inited;
5102int zfs_smbshare_inited;
5103
5104ddi_modhandle_t nfs_mod;
5105ddi_modhandle_t sharefs_mod;
5106ddi_modhandle_t smbsrv_mod;
5107#endif	/* illumos */
5108kmutex_t zfs_share_lock;
5109
5110#ifdef illumos
5111static int
5112zfs_init_sharefs()
5113{
5114	int error;
5115
5116	ASSERT(MUTEX_HELD(&zfs_share_lock));
5117	/* Both NFS and SMB shares also require sharetab support. */
5118	if (sharefs_mod == NULL && ((sharefs_mod =
5119	    ddi_modopen("fs/sharefs",
5120	    KRTLD_MODE_FIRST, &error)) == NULL)) {
5121		return (SET_ERROR(ENOSYS));
5122	}
5123	if (zshare_fs == NULL && ((zshare_fs =
5124	    (int (*)(enum sharefs_sys_op, share_t *, uint32_t))
5125	    ddi_modsym(sharefs_mod, "sharefs_impl", &error)) == NULL)) {
5126		return (SET_ERROR(ENOSYS));
5127	}
5128	return (0);
5129}
5130#endif	/* illumos */
5131
5132static int
5133zfs_ioc_share(zfs_cmd_t *zc)
5134{
5135#ifdef illumos
5136	int error;
5137	int opcode;
5138
5139	switch (zc->zc_share.z_sharetype) {
5140	case ZFS_SHARE_NFS:
5141	case ZFS_UNSHARE_NFS:
5142		if (zfs_nfsshare_inited == 0) {
5143			mutex_enter(&zfs_share_lock);
5144			if (nfs_mod == NULL && ((nfs_mod = ddi_modopen("fs/nfs",
5145			    KRTLD_MODE_FIRST, &error)) == NULL)) {
5146				mutex_exit(&zfs_share_lock);
5147				return (SET_ERROR(ENOSYS));
5148			}
5149			if (znfsexport_fs == NULL &&
5150			    ((znfsexport_fs = (int (*)(void *))
5151			    ddi_modsym(nfs_mod,
5152			    "nfs_export", &error)) == NULL)) {
5153				mutex_exit(&zfs_share_lock);
5154				return (SET_ERROR(ENOSYS));
5155			}
5156			error = zfs_init_sharefs();
5157			if (error != 0) {
5158				mutex_exit(&zfs_share_lock);
5159				return (SET_ERROR(ENOSYS));
5160			}
5161			zfs_nfsshare_inited = 1;
5162			mutex_exit(&zfs_share_lock);
5163		}
5164		break;
5165	case ZFS_SHARE_SMB:
5166	case ZFS_UNSHARE_SMB:
5167		if (zfs_smbshare_inited == 0) {
5168			mutex_enter(&zfs_share_lock);
5169			if (smbsrv_mod == NULL && ((smbsrv_mod =
5170			    ddi_modopen("drv/smbsrv",
5171			    KRTLD_MODE_FIRST, &error)) == NULL)) {
5172				mutex_exit(&zfs_share_lock);
5173				return (SET_ERROR(ENOSYS));
5174			}
5175			if (zsmbexport_fs == NULL && ((zsmbexport_fs =
5176			    (int (*)(void *, boolean_t))ddi_modsym(smbsrv_mod,
5177			    "smb_server_share", &error)) == NULL)) {
5178				mutex_exit(&zfs_share_lock);
5179				return (SET_ERROR(ENOSYS));
5180			}
5181			error = zfs_init_sharefs();
5182			if (error != 0) {
5183				mutex_exit(&zfs_share_lock);
5184				return (SET_ERROR(ENOSYS));
5185			}
5186			zfs_smbshare_inited = 1;
5187			mutex_exit(&zfs_share_lock);
5188		}
5189		break;
5190	default:
5191		return (SET_ERROR(EINVAL));
5192	}
5193
5194	switch (zc->zc_share.z_sharetype) {
5195	case ZFS_SHARE_NFS:
5196	case ZFS_UNSHARE_NFS:
5197		if (error =
5198		    znfsexport_fs((void *)
5199		    (uintptr_t)zc->zc_share.z_exportdata))
5200			return (error);
5201		break;
5202	case ZFS_SHARE_SMB:
5203	case ZFS_UNSHARE_SMB:
5204		if (error = zsmbexport_fs((void *)
5205		    (uintptr_t)zc->zc_share.z_exportdata,
5206		    zc->zc_share.z_sharetype == ZFS_SHARE_SMB ?
5207		    B_TRUE: B_FALSE)) {
5208			return (error);
5209		}
5210		break;
5211	}
5212
5213	opcode = (zc->zc_share.z_sharetype == ZFS_SHARE_NFS ||
5214	    zc->zc_share.z_sharetype == ZFS_SHARE_SMB) ?
5215	    SHAREFS_ADD : SHAREFS_REMOVE;
5216
5217	/*
5218	 * Add or remove share from sharetab
5219	 */
5220	error = zshare_fs(opcode,
5221	    (void *)(uintptr_t)zc->zc_share.z_sharedata,
5222	    zc->zc_share.z_sharemax);
5223
5224	return (error);
5225
5226#else	/* !illumos */
5227	return (ENOSYS);
5228#endif	/* illumos */
5229}
5230
5231ace_t full_access[] = {
5232	{(uid_t)-1, ACE_ALL_PERMS, ACE_EVERYONE, 0}
5233};
5234
5235/*
5236 * inputs:
5237 * zc_name		name of containing filesystem
5238 * zc_obj		object # beyond which we want next in-use object #
5239 *
5240 * outputs:
5241 * zc_obj		next in-use object #
5242 */
5243static int
5244zfs_ioc_next_obj(zfs_cmd_t *zc)
5245{
5246	objset_t *os = NULL;
5247	int error;
5248
5249	error = dmu_objset_hold(zc->zc_name, FTAG, &os);
5250	if (error != 0)
5251		return (error);
5252
5253	error = dmu_object_next(os, &zc->zc_obj, B_FALSE,
5254	    dsl_dataset_phys(os->os_dsl_dataset)->ds_prev_snap_txg);
5255
5256	dmu_objset_rele(os, FTAG);
5257	return (error);
5258}
5259
5260/*
5261 * inputs:
5262 * zc_name		name of filesystem
5263 * zc_value		prefix name for snapshot
5264 * zc_cleanup_fd	cleanup-on-exit file descriptor for calling process
5265 *
5266 * outputs:
5267 * zc_value		short name of new snapshot
5268 */
5269static int
5270zfs_ioc_tmp_snapshot(zfs_cmd_t *zc)
5271{
5272	char *snap_name;
5273	char *hold_name;
5274	int error;
5275	minor_t minor;
5276
5277	error = zfs_onexit_fd_hold(zc->zc_cleanup_fd, &minor);
5278	if (error != 0)
5279		return (error);
5280
5281	snap_name = kmem_asprintf("%s-%016llx", zc->zc_value,
5282	    (u_longlong_t)ddi_get_lbolt64());
5283	hold_name = kmem_asprintf("%%%s", zc->zc_value);
5284
5285	error = dsl_dataset_snapshot_tmp(zc->zc_name, snap_name, minor,
5286	    hold_name);
5287	if (error == 0)
5288		(void) strcpy(zc->zc_value, snap_name);
5289	strfree(snap_name);
5290	strfree(hold_name);
5291	zfs_onexit_fd_rele(zc->zc_cleanup_fd);
5292	return (error);
5293}
5294
5295/*
5296 * inputs:
5297 * zc_name		name of "to" snapshot
5298 * zc_value		name of "from" snapshot
5299 * zc_cookie		file descriptor to write diff data on
5300 *
5301 * outputs:
5302 * dmu_diff_record_t's to the file descriptor
5303 */
5304static int
5305zfs_ioc_diff(zfs_cmd_t *zc)
5306{
5307	file_t *fp;
5308	cap_rights_t rights;
5309	offset_t off;
5310	int error;
5311
5312#ifdef illumos
5313	fp = getf(zc->zc_cookie);
5314#else
5315	fget_write(curthread, zc->zc_cookie,
5316		    cap_rights_init(&rights, CAP_WRITE), &fp);
5317#endif
5318	if (fp == NULL)
5319		return (SET_ERROR(EBADF));
5320
5321	off = fp->f_offset;
5322
5323#ifdef illumos
5324	error = dmu_diff(zc->zc_name, zc->zc_value, fp->f_vnode, &off);
5325#else
5326	error = dmu_diff(zc->zc_name, zc->zc_value, fp, &off);
5327#endif
5328
5329	if (off >= 0 && off <= MAXOFFSET_T)
5330		fp->f_offset = off;
5331	releasef(zc->zc_cookie);
5332
5333	return (error);
5334}
5335
5336#ifdef illumos
5337/*
5338 * Remove all ACL files in shares dir
5339 */
5340static int
5341zfs_smb_acl_purge(znode_t *dzp)
5342{
5343	zap_cursor_t	zc;
5344	zap_attribute_t	zap;
5345	zfsvfs_t *zfsvfs = dzp->z_zfsvfs;
5346	int error;
5347
5348	for (zap_cursor_init(&zc, zfsvfs->z_os, dzp->z_id);
5349	    (error = zap_cursor_retrieve(&zc, &zap)) == 0;
5350	    zap_cursor_advance(&zc)) {
5351		if ((error = VOP_REMOVE(ZTOV(dzp), zap.za_name, kcred,
5352		    NULL, 0)) != 0)
5353			break;
5354	}
5355	zap_cursor_fini(&zc);
5356	return (error);
5357}
5358#endif	/* illumos */
5359
5360static int
5361zfs_ioc_smb_acl(zfs_cmd_t *zc)
5362{
5363#ifdef illumos
5364	vnode_t *vp;
5365	znode_t *dzp;
5366	vnode_t *resourcevp = NULL;
5367	znode_t *sharedir;
5368	zfsvfs_t *zfsvfs;
5369	nvlist_t *nvlist;
5370	char *src, *target;
5371	vattr_t vattr;
5372	vsecattr_t vsec;
5373	int error = 0;
5374
5375	if ((error = lookupname(zc->zc_value, UIO_SYSSPACE,
5376	    NO_FOLLOW, NULL, &vp)) != 0)
5377		return (error);
5378
5379	/* Now make sure mntpnt and dataset are ZFS */
5380
5381	if (strcmp(vp->v_vfsp->mnt_stat.f_fstypename, "zfs") != 0 ||
5382	    (strcmp((char *)refstr_value(vp->v_vfsp->vfs_resource),
5383	    zc->zc_name) != 0)) {
5384		VN_RELE(vp);
5385		return (SET_ERROR(EINVAL));
5386	}
5387
5388	dzp = VTOZ(vp);
5389	zfsvfs = dzp->z_zfsvfs;
5390	ZFS_ENTER(zfsvfs);
5391
5392	/*
5393	 * Create share dir if its missing.
5394	 */
5395	mutex_enter(&zfsvfs->z_lock);
5396	if (zfsvfs->z_shares_dir == 0) {
5397		dmu_tx_t *tx;
5398
5399		tx = dmu_tx_create(zfsvfs->z_os);
5400		dmu_tx_hold_zap(tx, MASTER_NODE_OBJ, TRUE,
5401		    ZFS_SHARES_DIR);
5402		dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, FALSE, NULL);
5403		error = dmu_tx_assign(tx, TXG_WAIT);
5404		if (error != 0) {
5405			dmu_tx_abort(tx);
5406		} else {
5407			error = zfs_create_share_dir(zfsvfs, tx);
5408			dmu_tx_commit(tx);
5409		}
5410		if (error != 0) {
5411			mutex_exit(&zfsvfs->z_lock);
5412			VN_RELE(vp);
5413			ZFS_EXIT(zfsvfs);
5414			return (error);
5415		}
5416	}
5417	mutex_exit(&zfsvfs->z_lock);
5418
5419	ASSERT(zfsvfs->z_shares_dir);
5420	if ((error = zfs_zget(zfsvfs, zfsvfs->z_shares_dir, &sharedir)) != 0) {
5421		VN_RELE(vp);
5422		ZFS_EXIT(zfsvfs);
5423		return (error);
5424	}
5425
5426	switch (zc->zc_cookie) {
5427	case ZFS_SMB_ACL_ADD:
5428		vattr.va_mask = AT_MODE|AT_UID|AT_GID|AT_TYPE;
5429		vattr.va_type = VREG;
5430		vattr.va_mode = S_IFREG|0777;
5431		vattr.va_uid = 0;
5432		vattr.va_gid = 0;
5433
5434		vsec.vsa_mask = VSA_ACE;
5435		vsec.vsa_aclentp = &full_access;
5436		vsec.vsa_aclentsz = sizeof (full_access);
5437		vsec.vsa_aclcnt = 1;
5438
5439		error = VOP_CREATE(ZTOV(sharedir), zc->zc_string,
5440		    &vattr, EXCL, 0, &resourcevp, kcred, 0, NULL, &vsec);
5441		if (resourcevp)
5442			VN_RELE(resourcevp);
5443		break;
5444
5445	case ZFS_SMB_ACL_REMOVE:
5446		error = VOP_REMOVE(ZTOV(sharedir), zc->zc_string, kcred,
5447		    NULL, 0);
5448		break;
5449
5450	case ZFS_SMB_ACL_RENAME:
5451		if ((error = get_nvlist(zc->zc_nvlist_src,
5452		    zc->zc_nvlist_src_size, zc->zc_iflags, &nvlist)) != 0) {
5453			VN_RELE(vp);
5454			VN_RELE(ZTOV(sharedir));
5455			ZFS_EXIT(zfsvfs);
5456			return (error);
5457		}
5458		if (nvlist_lookup_string(nvlist, ZFS_SMB_ACL_SRC, &src) ||
5459		    nvlist_lookup_string(nvlist, ZFS_SMB_ACL_TARGET,
5460		    &target)) {
5461			VN_RELE(vp);
5462			VN_RELE(ZTOV(sharedir));
5463			ZFS_EXIT(zfsvfs);
5464			nvlist_free(nvlist);
5465			return (error);
5466		}
5467		error = VOP_RENAME(ZTOV(sharedir), src, ZTOV(sharedir), target,
5468		    kcred, NULL, 0);
5469		nvlist_free(nvlist);
5470		break;
5471
5472	case ZFS_SMB_ACL_PURGE:
5473		error = zfs_smb_acl_purge(sharedir);
5474		break;
5475
5476	default:
5477		error = SET_ERROR(EINVAL);
5478		break;
5479	}
5480
5481	VN_RELE(vp);
5482	VN_RELE(ZTOV(sharedir));
5483
5484	ZFS_EXIT(zfsvfs);
5485
5486	return (error);
5487#else	/* !illumos */
5488	return (EOPNOTSUPP);
5489#endif	/* illumos */
5490}
5491
5492/*
5493 * innvl: {
5494 *     "holds" -> { snapname -> holdname (string), ... }
5495 *     (optional) "cleanup_fd" -> fd (int32)
5496 * }
5497 *
5498 * outnvl: {
5499 *     snapname -> error value (int32)
5500 *     ...
5501 * }
5502 */
5503/* ARGSUSED */
5504static int
5505zfs_ioc_hold(const char *pool, nvlist_t *args, nvlist_t *errlist)
5506{
5507	nvpair_t *pair;
5508	nvlist_t *holds;
5509	int cleanup_fd = -1;
5510	int error;
5511	minor_t minor = 0;
5512
5513	error = nvlist_lookup_nvlist(args, "holds", &holds);
5514	if (error != 0)
5515		return (SET_ERROR(EINVAL));
5516
5517	/* make sure the user didn't pass us any invalid (empty) tags */
5518	for (pair = nvlist_next_nvpair(holds, NULL); pair != NULL;
5519	    pair = nvlist_next_nvpair(holds, pair)) {
5520		char *htag;
5521
5522		error = nvpair_value_string(pair, &htag);
5523		if (error != 0)
5524			return (SET_ERROR(error));
5525
5526		if (strlen(htag) == 0)
5527			return (SET_ERROR(EINVAL));
5528	}
5529
5530	if (nvlist_lookup_int32(args, "cleanup_fd", &cleanup_fd) == 0) {
5531		error = zfs_onexit_fd_hold(cleanup_fd, &minor);
5532		if (error != 0)
5533			return (error);
5534	}
5535
5536	error = dsl_dataset_user_hold(holds, minor, errlist);
5537	if (minor != 0)
5538		zfs_onexit_fd_rele(cleanup_fd);
5539	return (error);
5540}
5541
5542/*
5543 * innvl is not used.
5544 *
5545 * outnvl: {
5546 *    holdname -> time added (uint64 seconds since epoch)
5547 *    ...
5548 * }
5549 */
5550/* ARGSUSED */
5551static int
5552zfs_ioc_get_holds(const char *snapname, nvlist_t *args, nvlist_t *outnvl)
5553{
5554	return (dsl_dataset_get_holds(snapname, outnvl));
5555}
5556
5557/*
5558 * innvl: {
5559 *     snapname -> { holdname, ... }
5560 *     ...
5561 * }
5562 *
5563 * outnvl: {
5564 *     snapname -> error value (int32)
5565 *     ...
5566 * }
5567 */
5568/* ARGSUSED */
5569static int
5570zfs_ioc_release(const char *pool, nvlist_t *holds, nvlist_t *errlist)
5571{
5572	return (dsl_dataset_user_release(holds, errlist));
5573}
5574
5575/*
5576 * inputs:
5577 * zc_name		name of new filesystem or snapshot
5578 * zc_value		full name of old snapshot
5579 *
5580 * outputs:
5581 * zc_cookie		space in bytes
5582 * zc_objset_type	compressed space in bytes
5583 * zc_perm_action	uncompressed space in bytes
5584 */
5585static int
5586zfs_ioc_space_written(zfs_cmd_t *zc)
5587{
5588	int error;
5589	dsl_pool_t *dp;
5590	dsl_dataset_t *new, *old;
5591
5592	error = dsl_pool_hold(zc->zc_name, FTAG, &dp);
5593	if (error != 0)
5594		return (error);
5595	error = dsl_dataset_hold(dp, zc->zc_name, FTAG, &new);
5596	if (error != 0) {
5597		dsl_pool_rele(dp, FTAG);
5598		return (error);
5599	}
5600	error = dsl_dataset_hold(dp, zc->zc_value, FTAG, &old);
5601	if (error != 0) {
5602		dsl_dataset_rele(new, FTAG);
5603		dsl_pool_rele(dp, FTAG);
5604		return (error);
5605	}
5606
5607	error = dsl_dataset_space_written(old, new, &zc->zc_cookie,
5608	    &zc->zc_objset_type, &zc->zc_perm_action);
5609	dsl_dataset_rele(old, FTAG);
5610	dsl_dataset_rele(new, FTAG);
5611	dsl_pool_rele(dp, FTAG);
5612	return (error);
5613}
5614
5615/*
5616 * innvl: {
5617 *     "firstsnap" -> snapshot name
5618 * }
5619 *
5620 * outnvl: {
5621 *     "used" -> space in bytes
5622 *     "compressed" -> compressed space in bytes
5623 *     "uncompressed" -> uncompressed space in bytes
5624 * }
5625 */
5626static int
5627zfs_ioc_space_snaps(const char *lastsnap, nvlist_t *innvl, nvlist_t *outnvl)
5628{
5629	int error;
5630	dsl_pool_t *dp;
5631	dsl_dataset_t *new, *old;
5632	char *firstsnap;
5633	uint64_t used, comp, uncomp;
5634
5635	if (nvlist_lookup_string(innvl, "firstsnap", &firstsnap) != 0)
5636		return (SET_ERROR(EINVAL));
5637
5638	error = dsl_pool_hold(lastsnap, FTAG, &dp);
5639	if (error != 0)
5640		return (error);
5641
5642	error = dsl_dataset_hold(dp, lastsnap, FTAG, &new);
5643	if (error == 0 && !new->ds_is_snapshot) {
5644		dsl_dataset_rele(new, FTAG);
5645		error = SET_ERROR(EINVAL);
5646	}
5647	if (error != 0) {
5648		dsl_pool_rele(dp, FTAG);
5649		return (error);
5650	}
5651	error = dsl_dataset_hold(dp, firstsnap, FTAG, &old);
5652	if (error == 0 && !old->ds_is_snapshot) {
5653		dsl_dataset_rele(old, FTAG);
5654		error = SET_ERROR(EINVAL);
5655	}
5656	if (error != 0) {
5657		dsl_dataset_rele(new, FTAG);
5658		dsl_pool_rele(dp, FTAG);
5659		return (error);
5660	}
5661
5662	error = dsl_dataset_space_wouldfree(old, new, &used, &comp, &uncomp);
5663	dsl_dataset_rele(old, FTAG);
5664	dsl_dataset_rele(new, FTAG);
5665	dsl_pool_rele(dp, FTAG);
5666	fnvlist_add_uint64(outnvl, "used", used);
5667	fnvlist_add_uint64(outnvl, "compressed", comp);
5668	fnvlist_add_uint64(outnvl, "uncompressed", uncomp);
5669	return (error);
5670}
5671
5672static int
5673zfs_ioc_jail(zfs_cmd_t *zc)
5674{
5675
5676	return (zone_dataset_attach(curthread->td_ucred, zc->zc_name,
5677	    (int)zc->zc_jailid));
5678}
5679
5680static int
5681zfs_ioc_unjail(zfs_cmd_t *zc)
5682{
5683
5684	return (zone_dataset_detach(curthread->td_ucred, zc->zc_name,
5685	    (int)zc->zc_jailid));
5686}
5687
5688/*
5689 * innvl: {
5690 *     "fd" -> file descriptor to write stream to (int32)
5691 *     (optional) "fromsnap" -> full snap name to send an incremental from
5692 *     (optional) "largeblockok" -> (value ignored)
5693 *         indicates that blocks > 128KB are permitted
5694 *     (optional) "embedok" -> (value ignored)
5695 *         presence indicates DRR_WRITE_EMBEDDED records are permitted
5696 *     (optional) "compressok" -> (value ignored)
5697 *         presence indicates compressed DRR_WRITE records are permitted
5698 *     (optional) "resume_object" and "resume_offset" -> (uint64)
5699 *         if present, resume send stream from specified object and offset.
5700 * }
5701 *
5702 * outnvl is unused
5703 */
5704/* ARGSUSED */
5705static int
5706zfs_ioc_send_new(const char *snapname, nvlist_t *innvl, nvlist_t *outnvl)
5707{
5708	cap_rights_t rights;
5709	file_t *fp;
5710	int error;
5711	offset_t off;
5712	char *fromname = NULL;
5713	int fd;
5714	boolean_t largeblockok;
5715	boolean_t embedok;
5716	boolean_t compressok;
5717	uint64_t resumeobj = 0;
5718	uint64_t resumeoff = 0;
5719
5720	error = nvlist_lookup_int32(innvl, "fd", &fd);
5721	if (error != 0)
5722		return (SET_ERROR(EINVAL));
5723
5724	(void) nvlist_lookup_string(innvl, "fromsnap", &fromname);
5725
5726	largeblockok = nvlist_exists(innvl, "largeblockok");
5727	embedok = nvlist_exists(innvl, "embedok");
5728	compressok = nvlist_exists(innvl, "compressok");
5729
5730	(void) nvlist_lookup_uint64(innvl, "resume_object", &resumeobj);
5731	(void) nvlist_lookup_uint64(innvl, "resume_offset", &resumeoff);
5732
5733#ifdef illumos
5734	file_t *fp = getf(fd);
5735#else
5736	fget_write(curthread, fd, cap_rights_init(&rights, CAP_WRITE), &fp);
5737#endif
5738	if (fp == NULL)
5739		return (SET_ERROR(EBADF));
5740
5741	off = fp->f_offset;
5742	error = dmu_send(snapname, fromname, embedok, largeblockok, compressok,
5743#ifdef illumos
5744	    fd, resumeobj, resumeoff, fp->f_vnode, &off);
5745#else
5746	    fd, resumeobj, resumeoff, fp, &off);
5747#endif
5748
5749#ifdef illumos
5750	if (VOP_SEEK(fp->f_vnode, fp->f_offset, &off, NULL) == 0)
5751		fp->f_offset = off;
5752#else
5753	fp->f_offset = off;
5754#endif
5755
5756	releasef(fd);
5757	return (error);
5758}
5759
5760/*
5761 * Determine approximately how large a zfs send stream will be -- the number
5762 * of bytes that will be written to the fd supplied to zfs_ioc_send_new().
5763 *
5764 * innvl: {
5765 *     (optional) "from" -> full snap or bookmark name to send an incremental
5766 *                          from
5767 *     (optional) "largeblockok" -> (value ignored)
5768 *         indicates that blocks > 128KB are permitted
5769 *     (optional) "embedok" -> (value ignored)
5770 *         presence indicates DRR_WRITE_EMBEDDED records are permitted
5771 *     (optional) "compressok" -> (value ignored)
5772 *         presence indicates compressed DRR_WRITE records are permitted
5773 * }
5774 *
5775 * outnvl: {
5776 *     "space" -> bytes of space (uint64)
5777 * }
5778 */
5779static int
5780zfs_ioc_send_space(const char *snapname, nvlist_t *innvl, nvlist_t *outnvl)
5781{
5782	dsl_pool_t *dp;
5783	dsl_dataset_t *tosnap;
5784	int error;
5785	char *fromname;
5786	/* LINTED E_FUNC_SET_NOT_USED */
5787	boolean_t largeblockok;
5788	/* LINTED E_FUNC_SET_NOT_USED */
5789	boolean_t embedok;
5790	boolean_t compressok;
5791	uint64_t space;
5792
5793	error = dsl_pool_hold(snapname, FTAG, &dp);
5794	if (error != 0)
5795		return (error);
5796
5797	error = dsl_dataset_hold(dp, snapname, FTAG, &tosnap);
5798	if (error != 0) {
5799		dsl_pool_rele(dp, FTAG);
5800		return (error);
5801	}
5802
5803	largeblockok = nvlist_exists(innvl, "largeblockok");
5804	embedok = nvlist_exists(innvl, "embedok");
5805	compressok = nvlist_exists(innvl, "compressok");
5806
5807	error = nvlist_lookup_string(innvl, "from", &fromname);
5808	if (error == 0) {
5809		if (strchr(fromname, '@') != NULL) {
5810			/*
5811			 * If from is a snapshot, hold it and use the more
5812			 * efficient dmu_send_estimate to estimate send space
5813			 * size using deadlists.
5814			 */
5815			dsl_dataset_t *fromsnap;
5816			error = dsl_dataset_hold(dp, fromname, FTAG, &fromsnap);
5817			if (error != 0)
5818				goto out;
5819			error = dmu_send_estimate(tosnap, fromsnap, compressok,
5820			    &space);
5821			dsl_dataset_rele(fromsnap, FTAG);
5822		} else if (strchr(fromname, '#') != NULL) {
5823			/*
5824			 * If from is a bookmark, fetch the creation TXG of the
5825			 * snapshot it was created from and use that to find
5826			 * blocks that were born after it.
5827			 */
5828			zfs_bookmark_phys_t frombm;
5829
5830			error = dsl_bookmark_lookup(dp, fromname, tosnap,
5831			    &frombm);
5832			if (error != 0)
5833				goto out;
5834			error = dmu_send_estimate_from_txg(tosnap,
5835			    frombm.zbm_creation_txg, compressok, &space);
5836		} else {
5837			/*
5838			 * from is not properly formatted as a snapshot or
5839			 * bookmark
5840			 */
5841			error = SET_ERROR(EINVAL);
5842			goto out;
5843		}
5844	} else {
5845		// If estimating the size of a full send, use dmu_send_estimate
5846		error = dmu_send_estimate(tosnap, NULL, compressok, &space);
5847	}
5848
5849	fnvlist_add_uint64(outnvl, "space", space);
5850
5851out:
5852	dsl_dataset_rele(tosnap, FTAG);
5853	dsl_pool_rele(dp, FTAG);
5854	return (error);
5855}
5856
5857static zfs_ioc_vec_t zfs_ioc_vec[ZFS_IOC_LAST - ZFS_IOC_FIRST];
5858
5859static void
5860zfs_ioctl_register_legacy(zfs_ioc_t ioc, zfs_ioc_legacy_func_t *func,
5861    zfs_secpolicy_func_t *secpolicy, zfs_ioc_namecheck_t namecheck,
5862    boolean_t log_history, zfs_ioc_poolcheck_t pool_check)
5863{
5864	zfs_ioc_vec_t *vec = &zfs_ioc_vec[ioc - ZFS_IOC_FIRST];
5865
5866	ASSERT3U(ioc, >=, ZFS_IOC_FIRST);
5867	ASSERT3U(ioc, <, ZFS_IOC_LAST);
5868	ASSERT3P(vec->zvec_legacy_func, ==, NULL);
5869	ASSERT3P(vec->zvec_func, ==, NULL);
5870
5871	vec->zvec_legacy_func = func;
5872	vec->zvec_secpolicy = secpolicy;
5873	vec->zvec_namecheck = namecheck;
5874	vec->zvec_allow_log = log_history;
5875	vec->zvec_pool_check = pool_check;
5876}
5877
5878/*
5879 * See the block comment at the beginning of this file for details on
5880 * each argument to this function.
5881 */
5882static void
5883zfs_ioctl_register(const char *name, zfs_ioc_t ioc, zfs_ioc_func_t *func,
5884    zfs_secpolicy_func_t *secpolicy, zfs_ioc_namecheck_t namecheck,
5885    zfs_ioc_poolcheck_t pool_check, boolean_t smush_outnvlist,
5886    boolean_t allow_log)
5887{
5888	zfs_ioc_vec_t *vec = &zfs_ioc_vec[ioc - ZFS_IOC_FIRST];
5889
5890	ASSERT3U(ioc, >=, ZFS_IOC_FIRST);
5891	ASSERT3U(ioc, <, ZFS_IOC_LAST);
5892	ASSERT3P(vec->zvec_legacy_func, ==, NULL);
5893	ASSERT3P(vec->zvec_func, ==, NULL);
5894
5895	/* if we are logging, the name must be valid */
5896	ASSERT(!allow_log || namecheck != NO_NAME);
5897
5898	vec->zvec_name = name;
5899	vec->zvec_func = func;
5900	vec->zvec_secpolicy = secpolicy;
5901	vec->zvec_namecheck = namecheck;
5902	vec->zvec_pool_check = pool_check;
5903	vec->zvec_smush_outnvlist = smush_outnvlist;
5904	vec->zvec_allow_log = allow_log;
5905}
5906
5907static void
5908zfs_ioctl_register_pool(zfs_ioc_t ioc, zfs_ioc_legacy_func_t *func,
5909    zfs_secpolicy_func_t *secpolicy, boolean_t log_history,
5910    zfs_ioc_poolcheck_t pool_check)
5911{
5912	zfs_ioctl_register_legacy(ioc, func, secpolicy,
5913	    POOL_NAME, log_history, pool_check);
5914}
5915
5916static void
5917zfs_ioctl_register_dataset_nolog(zfs_ioc_t ioc, zfs_ioc_legacy_func_t *func,
5918    zfs_secpolicy_func_t *secpolicy, zfs_ioc_poolcheck_t pool_check)
5919{
5920	zfs_ioctl_register_legacy(ioc, func, secpolicy,
5921	    DATASET_NAME, B_FALSE, pool_check);
5922}
5923
5924static void
5925zfs_ioctl_register_pool_modify(zfs_ioc_t ioc, zfs_ioc_legacy_func_t *func)
5926{
5927	zfs_ioctl_register_legacy(ioc, func, zfs_secpolicy_config,
5928	    POOL_NAME, B_TRUE, POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY);
5929}
5930
5931static void
5932zfs_ioctl_register_pool_meta(zfs_ioc_t ioc, zfs_ioc_legacy_func_t *func,
5933    zfs_secpolicy_func_t *secpolicy)
5934{
5935	zfs_ioctl_register_legacy(ioc, func, secpolicy,
5936	    NO_NAME, B_FALSE, POOL_CHECK_NONE);
5937}
5938
5939static void
5940zfs_ioctl_register_dataset_read_secpolicy(zfs_ioc_t ioc,
5941    zfs_ioc_legacy_func_t *func, zfs_secpolicy_func_t *secpolicy)
5942{
5943	zfs_ioctl_register_legacy(ioc, func, secpolicy,
5944	    DATASET_NAME, B_FALSE, POOL_CHECK_SUSPENDED);
5945}
5946
5947static void
5948zfs_ioctl_register_dataset_read(zfs_ioc_t ioc, zfs_ioc_legacy_func_t *func)
5949{
5950	zfs_ioctl_register_dataset_read_secpolicy(ioc, func,
5951	    zfs_secpolicy_read);
5952}
5953
5954static void
5955zfs_ioctl_register_dataset_modify(zfs_ioc_t ioc, zfs_ioc_legacy_func_t *func,
5956    zfs_secpolicy_func_t *secpolicy)
5957{
5958	zfs_ioctl_register_legacy(ioc, func, secpolicy,
5959	    DATASET_NAME, B_TRUE, POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY);
5960}
5961
5962static void
5963zfs_ioctl_init(void)
5964{
5965	zfs_ioctl_register("snapshot", ZFS_IOC_SNAPSHOT,
5966	    zfs_ioc_snapshot, zfs_secpolicy_snapshot, POOL_NAME,
5967	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE);
5968
5969	zfs_ioctl_register("log_history", ZFS_IOC_LOG_HISTORY,
5970	    zfs_ioc_log_history, zfs_secpolicy_log_history, NO_NAME,
5971	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_FALSE, B_FALSE);
5972
5973	zfs_ioctl_register("space_snaps", ZFS_IOC_SPACE_SNAPS,
5974	    zfs_ioc_space_snaps, zfs_secpolicy_read, DATASET_NAME,
5975	    POOL_CHECK_SUSPENDED, B_FALSE, B_FALSE);
5976
5977	zfs_ioctl_register("send", ZFS_IOC_SEND_NEW,
5978	    zfs_ioc_send_new, zfs_secpolicy_send_new, DATASET_NAME,
5979	    POOL_CHECK_SUSPENDED, B_FALSE, B_FALSE);
5980
5981	zfs_ioctl_register("send_space", ZFS_IOC_SEND_SPACE,
5982	    zfs_ioc_send_space, zfs_secpolicy_read, DATASET_NAME,
5983	    POOL_CHECK_SUSPENDED, B_FALSE, B_FALSE);
5984
5985	zfs_ioctl_register("create", ZFS_IOC_CREATE,
5986	    zfs_ioc_create, zfs_secpolicy_create_clone, DATASET_NAME,
5987	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE);
5988
5989	zfs_ioctl_register("clone", ZFS_IOC_CLONE,
5990	    zfs_ioc_clone, zfs_secpolicy_create_clone, DATASET_NAME,
5991	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE);
5992
5993	zfs_ioctl_register("destroy_snaps", ZFS_IOC_DESTROY_SNAPS,
5994	    zfs_ioc_destroy_snaps, zfs_secpolicy_destroy_snaps, POOL_NAME,
5995	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE);
5996
5997	zfs_ioctl_register("hold", ZFS_IOC_HOLD,
5998	    zfs_ioc_hold, zfs_secpolicy_hold, POOL_NAME,
5999	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE);
6000	zfs_ioctl_register("release", ZFS_IOC_RELEASE,
6001	    zfs_ioc_release, zfs_secpolicy_release, POOL_NAME,
6002	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE);
6003
6004	zfs_ioctl_register("get_holds", ZFS_IOC_GET_HOLDS,
6005	    zfs_ioc_get_holds, zfs_secpolicy_read, DATASET_NAME,
6006	    POOL_CHECK_SUSPENDED, B_FALSE, B_FALSE);
6007
6008	zfs_ioctl_register("rollback", ZFS_IOC_ROLLBACK,
6009	    zfs_ioc_rollback, zfs_secpolicy_rollback, DATASET_NAME,
6010	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_FALSE, B_TRUE);
6011
6012	zfs_ioctl_register("bookmark", ZFS_IOC_BOOKMARK,
6013	    zfs_ioc_bookmark, zfs_secpolicy_bookmark, POOL_NAME,
6014	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE);
6015
6016	zfs_ioctl_register("get_bookmarks", ZFS_IOC_GET_BOOKMARKS,
6017	    zfs_ioc_get_bookmarks, zfs_secpolicy_read, DATASET_NAME,
6018	    POOL_CHECK_SUSPENDED, B_FALSE, B_FALSE);
6019
6020	zfs_ioctl_register("destroy_bookmarks", ZFS_IOC_DESTROY_BOOKMARKS,
6021	    zfs_ioc_destroy_bookmarks, zfs_secpolicy_destroy_bookmarks,
6022	    POOL_NAME,
6023	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE);
6024
6025	/* IOCTLS that use the legacy function signature */
6026
6027	zfs_ioctl_register_legacy(ZFS_IOC_POOL_FREEZE, zfs_ioc_pool_freeze,
6028	    zfs_secpolicy_config, NO_NAME, B_FALSE, POOL_CHECK_READONLY);
6029
6030	zfs_ioctl_register_pool(ZFS_IOC_POOL_CREATE, zfs_ioc_pool_create,
6031	    zfs_secpolicy_config, B_TRUE, POOL_CHECK_NONE);
6032	zfs_ioctl_register_pool_modify(ZFS_IOC_POOL_SCAN,
6033	    zfs_ioc_pool_scan);
6034	zfs_ioctl_register_pool_modify(ZFS_IOC_POOL_UPGRADE,
6035	    zfs_ioc_pool_upgrade);
6036	zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_ADD,
6037	    zfs_ioc_vdev_add);
6038	zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_REMOVE,
6039	    zfs_ioc_vdev_remove);
6040	zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_SET_STATE,
6041	    zfs_ioc_vdev_set_state);
6042	zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_ATTACH,
6043	    zfs_ioc_vdev_attach);
6044	zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_DETACH,
6045	    zfs_ioc_vdev_detach);
6046	zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_SETPATH,
6047	    zfs_ioc_vdev_setpath);
6048	zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_SETFRU,
6049	    zfs_ioc_vdev_setfru);
6050	zfs_ioctl_register_pool_modify(ZFS_IOC_POOL_SET_PROPS,
6051	    zfs_ioc_pool_set_props);
6052	zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_SPLIT,
6053	    zfs_ioc_vdev_split);
6054	zfs_ioctl_register_pool_modify(ZFS_IOC_POOL_REGUID,
6055	    zfs_ioc_pool_reguid);
6056
6057	zfs_ioctl_register_pool_meta(ZFS_IOC_POOL_CONFIGS,
6058	    zfs_ioc_pool_configs, zfs_secpolicy_none);
6059	zfs_ioctl_register_pool_meta(ZFS_IOC_POOL_TRYIMPORT,
6060	    zfs_ioc_pool_tryimport, zfs_secpolicy_config);
6061	zfs_ioctl_register_pool_meta(ZFS_IOC_INJECT_FAULT,
6062	    zfs_ioc_inject_fault, zfs_secpolicy_inject);
6063	zfs_ioctl_register_pool_meta(ZFS_IOC_CLEAR_FAULT,
6064	    zfs_ioc_clear_fault, zfs_secpolicy_inject);
6065	zfs_ioctl_register_pool_meta(ZFS_IOC_INJECT_LIST_NEXT,
6066	    zfs_ioc_inject_list_next, zfs_secpolicy_inject);
6067
6068	/*
6069	 * pool destroy, and export don't log the history as part of
6070	 * zfsdev_ioctl, but rather zfs_ioc_pool_export
6071	 * does the logging of those commands.
6072	 */
6073	zfs_ioctl_register_pool(ZFS_IOC_POOL_DESTROY, zfs_ioc_pool_destroy,
6074	    zfs_secpolicy_config, B_FALSE, POOL_CHECK_NONE);
6075	zfs_ioctl_register_pool(ZFS_IOC_POOL_EXPORT, zfs_ioc_pool_export,
6076	    zfs_secpolicy_config, B_FALSE, POOL_CHECK_NONE);
6077
6078	zfs_ioctl_register_pool(ZFS_IOC_POOL_STATS, zfs_ioc_pool_stats,
6079	    zfs_secpolicy_read, B_FALSE, POOL_CHECK_NONE);
6080	zfs_ioctl_register_pool(ZFS_IOC_POOL_GET_PROPS, zfs_ioc_pool_get_props,
6081	    zfs_secpolicy_read, B_FALSE, POOL_CHECK_NONE);
6082
6083	zfs_ioctl_register_pool(ZFS_IOC_ERROR_LOG, zfs_ioc_error_log,
6084	    zfs_secpolicy_inject, B_FALSE, POOL_CHECK_NONE);
6085	zfs_ioctl_register_pool(ZFS_IOC_DSOBJ_TO_DSNAME,
6086	    zfs_ioc_dsobj_to_dsname,
6087	    zfs_secpolicy_diff, B_FALSE, POOL_CHECK_NONE);
6088	zfs_ioctl_register_pool(ZFS_IOC_POOL_GET_HISTORY,
6089	    zfs_ioc_pool_get_history,
6090	    zfs_secpolicy_config, B_FALSE, POOL_CHECK_SUSPENDED);
6091
6092	zfs_ioctl_register_pool(ZFS_IOC_POOL_IMPORT, zfs_ioc_pool_import,
6093	    zfs_secpolicy_config, B_TRUE, POOL_CHECK_NONE);
6094
6095	zfs_ioctl_register_pool(ZFS_IOC_CLEAR, zfs_ioc_clear,
6096	    zfs_secpolicy_config, B_TRUE, POOL_CHECK_NONE);
6097	zfs_ioctl_register_pool(ZFS_IOC_POOL_REOPEN, zfs_ioc_pool_reopen,
6098	    zfs_secpolicy_config, B_TRUE, POOL_CHECK_SUSPENDED);
6099
6100	zfs_ioctl_register_dataset_read(ZFS_IOC_SPACE_WRITTEN,
6101	    zfs_ioc_space_written);
6102	zfs_ioctl_register_dataset_read(ZFS_IOC_OBJSET_RECVD_PROPS,
6103	    zfs_ioc_objset_recvd_props);
6104	zfs_ioctl_register_dataset_read(ZFS_IOC_NEXT_OBJ,
6105	    zfs_ioc_next_obj);
6106	zfs_ioctl_register_dataset_read(ZFS_IOC_GET_FSACL,
6107	    zfs_ioc_get_fsacl);
6108	zfs_ioctl_register_dataset_read(ZFS_IOC_OBJSET_STATS,
6109	    zfs_ioc_objset_stats);
6110	zfs_ioctl_register_dataset_read(ZFS_IOC_OBJSET_ZPLPROPS,
6111	    zfs_ioc_objset_zplprops);
6112	zfs_ioctl_register_dataset_read(ZFS_IOC_DATASET_LIST_NEXT,
6113	    zfs_ioc_dataset_list_next);
6114	zfs_ioctl_register_dataset_read(ZFS_IOC_SNAPSHOT_LIST_NEXT,
6115	    zfs_ioc_snapshot_list_next);
6116	zfs_ioctl_register_dataset_read(ZFS_IOC_SEND_PROGRESS,
6117	    zfs_ioc_send_progress);
6118
6119	zfs_ioctl_register_dataset_read_secpolicy(ZFS_IOC_DIFF,
6120	    zfs_ioc_diff, zfs_secpolicy_diff);
6121	zfs_ioctl_register_dataset_read_secpolicy(ZFS_IOC_OBJ_TO_STATS,
6122	    zfs_ioc_obj_to_stats, zfs_secpolicy_diff);
6123	zfs_ioctl_register_dataset_read_secpolicy(ZFS_IOC_OBJ_TO_PATH,
6124	    zfs_ioc_obj_to_path, zfs_secpolicy_diff);
6125	zfs_ioctl_register_dataset_read_secpolicy(ZFS_IOC_USERSPACE_ONE,
6126	    zfs_ioc_userspace_one, zfs_secpolicy_userspace_one);
6127	zfs_ioctl_register_dataset_read_secpolicy(ZFS_IOC_USERSPACE_MANY,
6128	    zfs_ioc_userspace_many, zfs_secpolicy_userspace_many);
6129	zfs_ioctl_register_dataset_read_secpolicy(ZFS_IOC_SEND,
6130	    zfs_ioc_send, zfs_secpolicy_send);
6131
6132	zfs_ioctl_register_dataset_modify(ZFS_IOC_SET_PROP, zfs_ioc_set_prop,
6133	    zfs_secpolicy_none);
6134	zfs_ioctl_register_dataset_modify(ZFS_IOC_DESTROY, zfs_ioc_destroy,
6135	    zfs_secpolicy_destroy);
6136	zfs_ioctl_register_dataset_modify(ZFS_IOC_RENAME, zfs_ioc_rename,
6137	    zfs_secpolicy_rename);
6138	zfs_ioctl_register_dataset_modify(ZFS_IOC_RECV, zfs_ioc_recv,
6139	    zfs_secpolicy_recv);
6140	zfs_ioctl_register_dataset_modify(ZFS_IOC_PROMOTE, zfs_ioc_promote,
6141	    zfs_secpolicy_promote);
6142	zfs_ioctl_register_dataset_modify(ZFS_IOC_INHERIT_PROP,
6143	    zfs_ioc_inherit_prop, zfs_secpolicy_inherit_prop);
6144	zfs_ioctl_register_dataset_modify(ZFS_IOC_SET_FSACL, zfs_ioc_set_fsacl,
6145	    zfs_secpolicy_set_fsacl);
6146
6147	zfs_ioctl_register_dataset_nolog(ZFS_IOC_SHARE, zfs_ioc_share,
6148	    zfs_secpolicy_share, POOL_CHECK_NONE);
6149	zfs_ioctl_register_dataset_nolog(ZFS_IOC_SMB_ACL, zfs_ioc_smb_acl,
6150	    zfs_secpolicy_smb_acl, POOL_CHECK_NONE);
6151	zfs_ioctl_register_dataset_nolog(ZFS_IOC_USERSPACE_UPGRADE,
6152	    zfs_ioc_userspace_upgrade, zfs_secpolicy_userspace_upgrade,
6153	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY);
6154	zfs_ioctl_register_dataset_nolog(ZFS_IOC_TMP_SNAPSHOT,
6155	    zfs_ioc_tmp_snapshot, zfs_secpolicy_tmp_snapshot,
6156	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY);
6157
6158#ifdef __FreeBSD__
6159	zfs_ioctl_register_dataset_nolog(ZFS_IOC_JAIL, zfs_ioc_jail,
6160	    zfs_secpolicy_config, POOL_CHECK_NONE);
6161	zfs_ioctl_register_dataset_nolog(ZFS_IOC_UNJAIL, zfs_ioc_unjail,
6162	    zfs_secpolicy_config, POOL_CHECK_NONE);
6163	zfs_ioctl_register("fbsd_nextboot", ZFS_IOC_NEXTBOOT,
6164	    zfs_ioc_nextboot, zfs_secpolicy_config, NO_NAME,
6165	    POOL_CHECK_NONE, B_FALSE, B_FALSE);
6166#endif
6167}
6168
6169int
6170pool_status_check(const char *name, zfs_ioc_namecheck_t type,
6171    zfs_ioc_poolcheck_t check)
6172{
6173	spa_t *spa;
6174	int error;
6175
6176	ASSERT(type == POOL_NAME || type == DATASET_NAME);
6177
6178	if (check & POOL_CHECK_NONE)
6179		return (0);
6180
6181	error = spa_open(name, &spa, FTAG);
6182	if (error == 0) {
6183		if ((check & POOL_CHECK_SUSPENDED) && spa_suspended(spa))
6184			error = SET_ERROR(EAGAIN);
6185		else if ((check & POOL_CHECK_READONLY) && !spa_writeable(spa))
6186			error = SET_ERROR(EROFS);
6187		spa_close(spa, FTAG);
6188	}
6189	return (error);
6190}
6191
6192/*
6193 * Find a free minor number.
6194 */
6195minor_t
6196zfsdev_minor_alloc(void)
6197{
6198	static minor_t last_minor;
6199	minor_t m;
6200
6201	ASSERT(MUTEX_HELD(&spa_namespace_lock));
6202
6203	for (m = last_minor + 1; m != last_minor; m++) {
6204		if (m > ZFSDEV_MAX_MINOR)
6205			m = 1;
6206		if (ddi_get_soft_state(zfsdev_state, m) == NULL) {
6207			last_minor = m;
6208			return (m);
6209		}
6210	}
6211
6212	return (0);
6213}
6214
6215static int
6216zfs_ctldev_init(struct cdev *devp)
6217{
6218	minor_t minor;
6219	zfs_soft_state_t *zs;
6220
6221	ASSERT(MUTEX_HELD(&spa_namespace_lock));
6222
6223	minor = zfsdev_minor_alloc();
6224	if (minor == 0)
6225		return (SET_ERROR(ENXIO));
6226
6227	if (ddi_soft_state_zalloc(zfsdev_state, minor) != DDI_SUCCESS)
6228		return (SET_ERROR(EAGAIN));
6229
6230	devfs_set_cdevpriv((void *)(uintptr_t)minor, zfsdev_close);
6231
6232	zs = ddi_get_soft_state(zfsdev_state, minor);
6233	zs->zss_type = ZSST_CTLDEV;
6234	zfs_onexit_init((zfs_onexit_t **)&zs->zss_data);
6235
6236	return (0);
6237}
6238
6239static void
6240zfs_ctldev_destroy(zfs_onexit_t *zo, minor_t minor)
6241{
6242	ASSERT(MUTEX_HELD(&spa_namespace_lock));
6243
6244	zfs_onexit_destroy(zo);
6245	ddi_soft_state_free(zfsdev_state, minor);
6246}
6247
6248void *
6249zfsdev_get_soft_state(minor_t minor, enum zfs_soft_state_type which)
6250{
6251	zfs_soft_state_t *zp;
6252
6253	zp = ddi_get_soft_state(zfsdev_state, minor);
6254	if (zp == NULL || zp->zss_type != which)
6255		return (NULL);
6256
6257	return (zp->zss_data);
6258}
6259
6260static int
6261zfsdev_open(struct cdev *devp, int flag, int mode, struct thread *td)
6262{
6263	int error = 0;
6264
6265#ifdef illumos
6266	if (getminor(*devp) != 0)
6267		return (zvol_open(devp, flag, otyp, cr));
6268#endif
6269
6270	/* This is the control device. Allocate a new minor if requested. */
6271	if (flag & FEXCL) {
6272		mutex_enter(&spa_namespace_lock);
6273		error = zfs_ctldev_init(devp);
6274		mutex_exit(&spa_namespace_lock);
6275	}
6276
6277	return (error);
6278}
6279
6280static void
6281zfsdev_close(void *data)
6282{
6283	zfs_onexit_t *zo;
6284	minor_t minor = (minor_t)(uintptr_t)data;
6285
6286	if (minor == 0)
6287		return;
6288
6289	mutex_enter(&spa_namespace_lock);
6290	zo = zfsdev_get_soft_state(minor, ZSST_CTLDEV);
6291	if (zo == NULL) {
6292		mutex_exit(&spa_namespace_lock);
6293		return;
6294	}
6295	zfs_ctldev_destroy(zo, minor);
6296	mutex_exit(&spa_namespace_lock);
6297}
6298
6299static int
6300zfsdev_ioctl(struct cdev *dev, u_long zcmd, caddr_t arg, int flag,
6301    struct thread *td)
6302{
6303	zfs_cmd_t *zc;
6304	uint_t vecnum;
6305	int error, rc, len;
6306#ifdef illumos
6307	minor_t minor = getminor(dev);
6308#else
6309	zfs_iocparm_t *zc_iocparm;
6310	int cflag, cmd, oldvecnum;
6311	boolean_t newioc, compat;
6312	void *compat_zc = NULL;
6313	cred_t *cr = td->td_ucred;
6314#endif
6315	const zfs_ioc_vec_t *vec;
6316	char *saved_poolname = NULL;
6317	nvlist_t *innvl = NULL;
6318
6319	cflag = ZFS_CMD_COMPAT_NONE;
6320	compat = B_FALSE;
6321	newioc = B_TRUE;	/* "new" style (zfs_iocparm_t) ioctl */
6322
6323	len = IOCPARM_LEN(zcmd);
6324	vecnum = cmd = zcmd & 0xff;
6325
6326	/*
6327	 * Check if we are talking to supported older binaries
6328	 * and translate zfs_cmd if necessary
6329	 */
6330	if (len != sizeof(zfs_iocparm_t)) {
6331		newioc = B_FALSE;
6332		compat = B_TRUE;
6333
6334		vecnum = cmd;
6335
6336		switch (len) {
6337		case sizeof(zfs_cmd_zcmd_t):
6338			cflag = ZFS_CMD_COMPAT_LZC;
6339			break;
6340		case sizeof(zfs_cmd_deadman_t):
6341			cflag = ZFS_CMD_COMPAT_DEADMAN;
6342			break;
6343		case sizeof(zfs_cmd_v28_t):
6344			cflag = ZFS_CMD_COMPAT_V28;
6345			break;
6346		case sizeof(zfs_cmd_v15_t):
6347			cflag = ZFS_CMD_COMPAT_V15;
6348			vecnum = zfs_ioctl_v15_to_v28[cmd];
6349
6350			/*
6351			 * Return without further handling
6352			 * if the command is blacklisted.
6353			 */
6354			if (vecnum == ZFS_IOC_COMPAT_PASS)
6355				return (0);
6356			else if (vecnum == ZFS_IOC_COMPAT_FAIL)
6357				return (ENOTSUP);
6358			break;
6359		default:
6360			return (EINVAL);
6361		}
6362	}
6363
6364#ifdef illumos
6365	vecnum = cmd - ZFS_IOC_FIRST;
6366	ASSERT3U(getmajor(dev), ==, ddi_driver_major(zfs_dip));
6367#endif
6368
6369	if (vecnum >= sizeof (zfs_ioc_vec) / sizeof (zfs_ioc_vec[0]))
6370		return (SET_ERROR(EINVAL));
6371	vec = &zfs_ioc_vec[vecnum];
6372
6373	zc = kmem_zalloc(sizeof(zfs_cmd_t), KM_SLEEP);
6374
6375#ifdef illumos
6376	error = ddi_copyin((void *)arg, zc, sizeof (zfs_cmd_t), flag);
6377	if (error != 0) {
6378		error = SET_ERROR(EFAULT);
6379		goto out;
6380	}
6381#else	/* !illumos */
6382	bzero(zc, sizeof(zfs_cmd_t));
6383
6384	if (newioc) {
6385		zc_iocparm = (void *)arg;
6386
6387		switch (zc_iocparm->zfs_ioctl_version) {
6388		case ZFS_IOCVER_CURRENT:
6389			if (zc_iocparm->zfs_cmd_size != sizeof(zfs_cmd_t)) {
6390				error = SET_ERROR(EINVAL);
6391				goto out;
6392			}
6393			break;
6394		case ZFS_IOCVER_INLANES:
6395			if (zc_iocparm->zfs_cmd_size != sizeof(zfs_cmd_inlanes_t)) {
6396				error = SET_ERROR(EFAULT);
6397				goto out;
6398			}
6399			compat = B_TRUE;
6400			cflag = ZFS_CMD_COMPAT_INLANES;
6401			break;
6402		case ZFS_IOCVER_RESUME:
6403			if (zc_iocparm->zfs_cmd_size != sizeof(zfs_cmd_resume_t)) {
6404				error = SET_ERROR(EFAULT);
6405				goto out;
6406			}
6407			compat = B_TRUE;
6408			cflag = ZFS_CMD_COMPAT_RESUME;
6409			break;
6410		case ZFS_IOCVER_EDBP:
6411			if (zc_iocparm->zfs_cmd_size != sizeof(zfs_cmd_edbp_t)) {
6412				error = SET_ERROR(EFAULT);
6413				goto out;
6414			}
6415			compat = B_TRUE;
6416			cflag = ZFS_CMD_COMPAT_EDBP;
6417			break;
6418		case ZFS_IOCVER_ZCMD:
6419			if (zc_iocparm->zfs_cmd_size > sizeof(zfs_cmd_t) ||
6420			    zc_iocparm->zfs_cmd_size < sizeof(zfs_cmd_zcmd_t)) {
6421				error = SET_ERROR(EFAULT);
6422				goto out;
6423			}
6424			compat = B_TRUE;
6425			cflag = ZFS_CMD_COMPAT_ZCMD;
6426			break;
6427		default:
6428			error = SET_ERROR(EINVAL);
6429			goto out;
6430			/* NOTREACHED */
6431		}
6432
6433		if (compat) {
6434			ASSERT(sizeof(zfs_cmd_t) >= zc_iocparm->zfs_cmd_size);
6435			compat_zc = kmem_zalloc(sizeof(zfs_cmd_t), KM_SLEEP);
6436			bzero(compat_zc, sizeof(zfs_cmd_t));
6437
6438			error = ddi_copyin((void *)(uintptr_t)zc_iocparm->zfs_cmd,
6439			    compat_zc, zc_iocparm->zfs_cmd_size, flag);
6440			if (error != 0) {
6441				error = SET_ERROR(EFAULT);
6442				goto out;
6443			}
6444		} else {
6445			error = ddi_copyin((void *)(uintptr_t)zc_iocparm->zfs_cmd,
6446			    zc, zc_iocparm->zfs_cmd_size, flag);
6447			if (error != 0) {
6448				error = SET_ERROR(EFAULT);
6449				goto out;
6450			}
6451		}
6452	}
6453
6454	if (compat) {
6455		if (newioc) {
6456			ASSERT(compat_zc != NULL);
6457			zfs_cmd_compat_get(zc, compat_zc, cflag);
6458		} else {
6459			ASSERT(compat_zc == NULL);
6460			zfs_cmd_compat_get(zc, arg, cflag);
6461		}
6462		oldvecnum = vecnum;
6463		error = zfs_ioctl_compat_pre(zc, &vecnum, cflag);
6464		if (error != 0)
6465			goto out;
6466		if (oldvecnum != vecnum)
6467			vec = &zfs_ioc_vec[vecnum];
6468	}
6469#endif	/* !illumos */
6470
6471	zc->zc_iflags = flag & FKIOCTL;
6472	if (zc->zc_nvlist_src_size != 0) {
6473		error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
6474		    zc->zc_iflags, &innvl);
6475		if (error != 0)
6476			goto out;
6477	}
6478
6479	/* rewrite innvl for backwards compatibility */
6480	if (compat)
6481		innvl = zfs_ioctl_compat_innvl(zc, innvl, vecnum, cflag);
6482
6483	/*
6484	 * Ensure that all pool/dataset names are valid before we pass down to
6485	 * the lower layers.
6486	 */
6487	zc->zc_name[sizeof (zc->zc_name) - 1] = '\0';
6488	switch (vec->zvec_namecheck) {
6489	case POOL_NAME:
6490		if (pool_namecheck(zc->zc_name, NULL, NULL) != 0)
6491			error = SET_ERROR(EINVAL);
6492		else
6493			error = pool_status_check(zc->zc_name,
6494			    vec->zvec_namecheck, vec->zvec_pool_check);
6495		break;
6496
6497	case DATASET_NAME:
6498		if (dataset_namecheck(zc->zc_name, NULL, NULL) != 0)
6499			error = SET_ERROR(EINVAL);
6500		else
6501			error = pool_status_check(zc->zc_name,
6502			    vec->zvec_namecheck, vec->zvec_pool_check);
6503		break;
6504
6505	case NO_NAME:
6506		break;
6507	}
6508
6509	if (error == 0)
6510		error = vec->zvec_secpolicy(zc, innvl, cr);
6511
6512	if (error != 0)
6513		goto out;
6514
6515	/* legacy ioctls can modify zc_name */
6516	len = strcspn(zc->zc_name, "/@#") + 1;
6517	saved_poolname = kmem_alloc(len, KM_SLEEP);
6518	(void) strlcpy(saved_poolname, zc->zc_name, len);
6519
6520	if (vec->zvec_func != NULL) {
6521		nvlist_t *outnvl;
6522		int puterror = 0;
6523		spa_t *spa;
6524		nvlist_t *lognv = NULL;
6525
6526		ASSERT(vec->zvec_legacy_func == NULL);
6527
6528		/*
6529		 * Add the innvl to the lognv before calling the func,
6530		 * in case the func changes the innvl.
6531		 */
6532		if (vec->zvec_allow_log) {
6533			lognv = fnvlist_alloc();
6534			fnvlist_add_string(lognv, ZPOOL_HIST_IOCTL,
6535			    vec->zvec_name);
6536			if (!nvlist_empty(innvl)) {
6537				fnvlist_add_nvlist(lognv, ZPOOL_HIST_INPUT_NVL,
6538				    innvl);
6539			}
6540		}
6541
6542		outnvl = fnvlist_alloc();
6543		error = vec->zvec_func(zc->zc_name, innvl, outnvl);
6544
6545		if (error == 0 && vec->zvec_allow_log &&
6546		    spa_open(zc->zc_name, &spa, FTAG) == 0) {
6547			if (!nvlist_empty(outnvl)) {
6548				fnvlist_add_nvlist(lognv, ZPOOL_HIST_OUTPUT_NVL,
6549				    outnvl);
6550			}
6551			(void) spa_history_log_nvl(spa, lognv);
6552			spa_close(spa, FTAG);
6553		}
6554		fnvlist_free(lognv);
6555
6556		/* rewrite outnvl for backwards compatibility */
6557		if (compat)
6558			outnvl = zfs_ioctl_compat_outnvl(zc, outnvl, vecnum,
6559			    cflag);
6560
6561		if (!nvlist_empty(outnvl) || zc->zc_nvlist_dst_size != 0) {
6562			int smusherror = 0;
6563			if (vec->zvec_smush_outnvlist) {
6564				smusherror = nvlist_smush(outnvl,
6565				    zc->zc_nvlist_dst_size);
6566			}
6567			if (smusherror == 0)
6568				puterror = put_nvlist(zc, outnvl);
6569		}
6570
6571		if (puterror != 0)
6572			error = puterror;
6573
6574		nvlist_free(outnvl);
6575	} else {
6576		error = vec->zvec_legacy_func(zc);
6577	}
6578
6579out:
6580	nvlist_free(innvl);
6581
6582#ifdef illumos
6583	rc = ddi_copyout(zc, (void *)arg, sizeof (zfs_cmd_t), flag);
6584	if (error == 0 && rc != 0)
6585		error = SET_ERROR(EFAULT);
6586#else
6587	if (compat) {
6588		zfs_ioctl_compat_post(zc, cmd, cflag);
6589		if (newioc) {
6590			ASSERT(compat_zc != NULL);
6591			ASSERT(sizeof(zfs_cmd_t) >= zc_iocparm->zfs_cmd_size);
6592
6593			zfs_cmd_compat_put(zc, compat_zc, vecnum, cflag);
6594			rc = ddi_copyout(compat_zc,
6595			    (void *)(uintptr_t)zc_iocparm->zfs_cmd,
6596			    zc_iocparm->zfs_cmd_size, flag);
6597			if (error == 0 && rc != 0)
6598				error = SET_ERROR(EFAULT);
6599			kmem_free(compat_zc, sizeof (zfs_cmd_t));
6600		} else {
6601			zfs_cmd_compat_put(zc, arg, vecnum, cflag);
6602		}
6603	} else {
6604		ASSERT(newioc);
6605
6606		rc = ddi_copyout(zc, (void *)(uintptr_t)zc_iocparm->zfs_cmd,
6607		    sizeof (zfs_cmd_t), flag);
6608		if (error == 0 && rc != 0)
6609			error = SET_ERROR(EFAULT);
6610	}
6611#endif
6612	if (error == 0 && vec->zvec_allow_log) {
6613		char *s = tsd_get(zfs_allow_log_key);
6614		if (s != NULL)
6615			strfree(s);
6616		(void) tsd_set(zfs_allow_log_key, saved_poolname);
6617	} else {
6618		if (saved_poolname != NULL)
6619			strfree(saved_poolname);
6620	}
6621
6622	kmem_free(zc, sizeof (zfs_cmd_t));
6623	return (error);
6624}
6625
6626#ifdef illumos
6627static int
6628zfs_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
6629{
6630	if (cmd != DDI_ATTACH)
6631		return (DDI_FAILURE);
6632
6633	if (ddi_create_minor_node(dip, "zfs", S_IFCHR, 0,
6634	    DDI_PSEUDO, 0) == DDI_FAILURE)
6635		return (DDI_FAILURE);
6636
6637	zfs_dip = dip;
6638
6639	ddi_report_dev(dip);
6640
6641	return (DDI_SUCCESS);
6642}
6643
6644static int
6645zfs_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
6646{
6647	if (spa_busy() || zfs_busy() || zvol_busy())
6648		return (DDI_FAILURE);
6649
6650	if (cmd != DDI_DETACH)
6651		return (DDI_FAILURE);
6652
6653	zfs_dip = NULL;
6654
6655	ddi_prop_remove_all(dip);
6656	ddi_remove_minor_node(dip, NULL);
6657
6658	return (DDI_SUCCESS);
6659}
6660
6661/*ARGSUSED*/
6662static int
6663zfs_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result)
6664{
6665	switch (infocmd) {
6666	case DDI_INFO_DEVT2DEVINFO:
6667		*result = zfs_dip;
6668		return (DDI_SUCCESS);
6669
6670	case DDI_INFO_DEVT2INSTANCE:
6671		*result = (void *)0;
6672		return (DDI_SUCCESS);
6673	}
6674
6675	return (DDI_FAILURE);
6676}
6677#endif	/* illumos */
6678
6679/*
6680 * OK, so this is a little weird.
6681 *
6682 * /dev/zfs is the control node, i.e. minor 0.
6683 * /dev/zvol/[r]dsk/pool/dataset are the zvols, minor > 0.
6684 *
6685 * /dev/zfs has basically nothing to do except serve up ioctls,
6686 * so most of the standard driver entry points are in zvol.c.
6687 */
6688#ifdef illumos
6689static struct cb_ops zfs_cb_ops = {
6690	zfsdev_open,	/* open */
6691	zfsdev_close,	/* close */
6692	zvol_strategy,	/* strategy */
6693	nodev,		/* print */
6694	zvol_dump,	/* dump */
6695	zvol_read,	/* read */
6696	zvol_write,	/* write */
6697	zfsdev_ioctl,	/* ioctl */
6698	nodev,		/* devmap */
6699	nodev,		/* mmap */
6700	nodev,		/* segmap */
6701	nochpoll,	/* poll */
6702	ddi_prop_op,	/* prop_op */
6703	NULL,		/* streamtab */
6704	D_NEW | D_MP | D_64BIT,		/* Driver compatibility flag */
6705	CB_REV,		/* version */
6706	nodev,		/* async read */
6707	nodev,		/* async write */
6708};
6709
6710static struct dev_ops zfs_dev_ops = {
6711	DEVO_REV,	/* version */
6712	0,		/* refcnt */
6713	zfs_info,	/* info */
6714	nulldev,	/* identify */
6715	nulldev,	/* probe */
6716	zfs_attach,	/* attach */
6717	zfs_detach,	/* detach */
6718	nodev,		/* reset */
6719	&zfs_cb_ops,	/* driver operations */
6720	NULL,		/* no bus operations */
6721	NULL,		/* power */
6722	ddi_quiesce_not_needed,	/* quiesce */
6723};
6724
6725static struct modldrv zfs_modldrv = {
6726	&mod_driverops,
6727	"ZFS storage pool",
6728	&zfs_dev_ops
6729};
6730
6731static struct modlinkage modlinkage = {
6732	MODREV_1,
6733	(void *)&zfs_modlfs,
6734	(void *)&zfs_modldrv,
6735	NULL
6736};
6737#endif	/* illumos */
6738
6739static struct cdevsw zfs_cdevsw = {
6740	.d_version =	D_VERSION,
6741	.d_open =	zfsdev_open,
6742	.d_ioctl =	zfsdev_ioctl,
6743	.d_name =	ZFS_DEV_NAME
6744};
6745
6746static void
6747zfs_allow_log_destroy(void *arg)
6748{
6749	char *poolname = arg;
6750	strfree(poolname);
6751}
6752
6753static void
6754zfsdev_init(void)
6755{
6756	zfsdev = make_dev(&zfs_cdevsw, 0x0, UID_ROOT, GID_OPERATOR, 0666,
6757	    ZFS_DEV_NAME);
6758}
6759
6760static void
6761zfsdev_fini(void)
6762{
6763	if (zfsdev != NULL)
6764		destroy_dev(zfsdev);
6765}
6766
6767static struct root_hold_token *zfs_root_token;
6768struct proc *zfsproc;
6769
6770#ifdef illumos
6771int
6772_init(void)
6773{
6774	int error;
6775
6776	spa_init(FREAD | FWRITE);
6777	zfs_init();
6778	zvol_init();
6779	zfs_ioctl_init();
6780
6781	if ((error = mod_install(&modlinkage)) != 0) {
6782		zvol_fini();
6783		zfs_fini();
6784		spa_fini();
6785		return (error);
6786	}
6787
6788	tsd_create(&zfs_fsyncer_key, NULL);
6789	tsd_create(&rrw_tsd_key, rrw_tsd_destroy);
6790	tsd_create(&zfs_allow_log_key, zfs_allow_log_destroy);
6791
6792	error = ldi_ident_from_mod(&modlinkage, &zfs_li);
6793	ASSERT(error == 0);
6794	mutex_init(&zfs_share_lock, NULL, MUTEX_DEFAULT, NULL);
6795
6796	return (0);
6797}
6798
6799int
6800_fini(void)
6801{
6802	int error;
6803
6804	if (spa_busy() || zfs_busy() || zvol_busy() || zio_injection_enabled)
6805		return (SET_ERROR(EBUSY));
6806
6807	if ((error = mod_remove(&modlinkage)) != 0)
6808		return (error);
6809
6810	zvol_fini();
6811	zfs_fini();
6812	spa_fini();
6813	if (zfs_nfsshare_inited)
6814		(void) ddi_modclose(nfs_mod);
6815	if (zfs_smbshare_inited)
6816		(void) ddi_modclose(smbsrv_mod);
6817	if (zfs_nfsshare_inited || zfs_smbshare_inited)
6818		(void) ddi_modclose(sharefs_mod);
6819
6820	tsd_destroy(&zfs_fsyncer_key);
6821	ldi_ident_release(zfs_li);
6822	zfs_li = NULL;
6823	mutex_destroy(&zfs_share_lock);
6824
6825	return (error);
6826}
6827
6828int
6829_info(struct modinfo *modinfop)
6830{
6831	return (mod_info(&modlinkage, modinfop));
6832}
6833#endif	/* illumos */
6834
6835static int zfs__init(void);
6836static int zfs__fini(void);
6837static void zfs_shutdown(void *, int);
6838
6839static eventhandler_tag zfs_shutdown_event_tag;
6840
6841#ifdef __FreeBSD__
6842#define ZFS_MIN_KSTACK_PAGES 4
6843#endif
6844
6845int
6846zfs__init(void)
6847{
6848
6849#ifdef __FreeBSD__
6850#if KSTACK_PAGES < ZFS_MIN_KSTACK_PAGES
6851	printf("ZFS NOTICE: KSTACK_PAGES is %d which could result in stack "
6852	    "overflow panic!\nPlease consider adding "
6853	    "'options KSTACK_PAGES=%d' to your kernel config\n", KSTACK_PAGES,
6854	    ZFS_MIN_KSTACK_PAGES);
6855#endif
6856#endif
6857	zfs_root_token = root_mount_hold("ZFS");
6858
6859	mutex_init(&zfs_share_lock, NULL, MUTEX_DEFAULT, NULL);
6860
6861	spa_init(FREAD | FWRITE);
6862	zfs_init();
6863	zvol_init();
6864	zfs_ioctl_init();
6865
6866	tsd_create(&zfs_fsyncer_key, NULL);
6867	tsd_create(&rrw_tsd_key, rrw_tsd_destroy);
6868	tsd_create(&zfs_allow_log_key, zfs_allow_log_destroy);
6869	tsd_create(&zfs_geom_probe_vdev_key, NULL);
6870
6871	printf("ZFS storage pool version: features support (" SPA_VERSION_STRING ")\n");
6872	root_mount_rel(zfs_root_token);
6873
6874	zfsdev_init();
6875
6876	return (0);
6877}
6878
6879int
6880zfs__fini(void)
6881{
6882	if (spa_busy() || zfs_busy() || zvol_busy() ||
6883	    zio_injection_enabled) {
6884		return (EBUSY);
6885	}
6886
6887	zfsdev_fini();
6888	zvol_fini();
6889	zfs_fini();
6890	spa_fini();
6891
6892	tsd_destroy(&zfs_fsyncer_key);
6893	tsd_destroy(&rrw_tsd_key);
6894	tsd_destroy(&zfs_allow_log_key);
6895
6896	mutex_destroy(&zfs_share_lock);
6897
6898	return (0);
6899}
6900
6901static void
6902zfs_shutdown(void *arg __unused, int howto __unused)
6903{
6904
6905	/*
6906	 * ZFS fini routines can not properly work in a panic-ed system.
6907	 */
6908	if (panicstr == NULL)
6909		(void)zfs__fini();
6910}
6911
6912
6913static int
6914zfs_modevent(module_t mod, int type, void *unused __unused)
6915{
6916	int err;
6917
6918	switch (type) {
6919	case MOD_LOAD:
6920		err = zfs__init();
6921		if (err == 0)
6922			zfs_shutdown_event_tag = EVENTHANDLER_REGISTER(
6923			    shutdown_post_sync, zfs_shutdown, NULL,
6924			    SHUTDOWN_PRI_FIRST);
6925		return (err);
6926	case MOD_UNLOAD:
6927		err = zfs__fini();
6928		if (err == 0 && zfs_shutdown_event_tag != NULL)
6929			EVENTHANDLER_DEREGISTER(shutdown_post_sync,
6930			    zfs_shutdown_event_tag);
6931		return (err);
6932	case MOD_SHUTDOWN:
6933		return (0);
6934	default:
6935		break;
6936	}
6937	return (EOPNOTSUPP);
6938}
6939
6940static moduledata_t zfs_mod = {
6941	"zfsctrl",
6942	zfs_modevent,
6943	0
6944};
6945DECLARE_MODULE(zfsctrl, zfs_mod, SI_SUB_VFS, SI_ORDER_ANY);
6946MODULE_VERSION(zfsctrl, 1);
6947MODULE_DEPEND(zfsctrl, opensolaris, 1, 1, 1);
6948MODULE_DEPEND(zfsctrl, krpc, 1, 1, 1);
6949MODULE_DEPEND(zfsctrl, acl_nfs4, 1, 1, 1);
6950