1168404Spjd/*
2168404Spjd * CDDL HEADER START
3168404Spjd *
4168404Spjd * The contents of this file are subject to the terms of the
5168404Spjd * Common Development and Distribution License (the "License").
6168404Spjd * You may not use this file except in compliance with the License.
7168404Spjd *
8168404Spjd * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9168404Spjd * or http://www.opensolaris.org/os/licensing.
10168404Spjd * See the License for the specific language governing permissions
11168404Spjd * and limitations under the License.
12168404Spjd *
13168404Spjd * When distributing Covered Code, include this CDDL HEADER in each
14168404Spjd * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15168404Spjd * If applicable, add the following below this CDDL HEADER, with the
16168404Spjd * fields enclosed by brackets "[]" replaced with your own identifying
17168404Spjd * information: Portions Copyright [yyyy] [name of copyright owner]
18168404Spjd *
19168404Spjd * CDDL HEADER END
20168404Spjd */
21168404Spjd/*
22219089Spjd * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
23226707Spjd * Copyright (c) 2011 Pawel Jakub Dawidek <pawel@dawidek.net>.
24226707Spjd * All rights reserved.
25332547Smav * Copyright (c) 2012, 2017 by Delphix. All rights reserved.
26264835Sdelphij * Copyright (c) 2014 Joyent, Inc. All rights reserved.
27286575Smav * Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
28282126Savg * Copyright 2015 Nexenta Systems, Inc. All rights reserved.
29359722Sfreqlabs * Copyright (c) 2018, loli10K <ezomori.nozomu@gmail.com>. All rights reserved.
30168404Spjd */
31168404Spjd
32168404Spjd#include <sys/dmu.h>
33185029Spjd#include <sys/dmu_objset.h>
34168404Spjd#include <sys/dmu_tx.h>
35168404Spjd#include <sys/dsl_dataset.h>
36168404Spjd#include <sys/dsl_dir.h>
37168404Spjd#include <sys/dsl_prop.h>
38168404Spjd#include <sys/dsl_synctask.h>
39185029Spjd#include <sys/dsl_deleg.h>
40259813Sdelphij#include <sys/dmu_impl.h>
41168404Spjd#include <sys/spa.h>
42219089Spjd#include <sys/metaslab.h>
43168404Spjd#include <sys/zap.h>
44168404Spjd#include <sys/zio.h>
45168404Spjd#include <sys/arc.h>
46185029Spjd#include <sys/sunddi.h>
47219317Spjd#include <sys/zvol.h>
48226678Spjd#ifdef _KERNEL
49226676Spjd#include <sys/zfs_vfsops.h>
50226678Spjd#endif
51264835Sdelphij#include <sys/zfeature.h>
52264835Sdelphij#include <sys/policy.h>
53264835Sdelphij#include <sys/zfs_znode.h>
54168404Spjd#include "zfs_namecheck.h"
55264835Sdelphij#include "zfs_prop.h"
56168404Spjd
57264835Sdelphij/*
58264835Sdelphij * Filesystem and Snapshot Limits
59264835Sdelphij * ------------------------------
60264835Sdelphij *
61264835Sdelphij * These limits are used to restrict the number of filesystems and/or snapshots
62264835Sdelphij * that can be created at a given level in the tree or below. A typical
63264835Sdelphij * use-case is with a delegated dataset where the administrator wants to ensure
64264835Sdelphij * that a user within the zone is not creating too many additional filesystems
65264835Sdelphij * or snapshots, even though they're not exceeding their space quota.
66264835Sdelphij *
67264835Sdelphij * The filesystem and snapshot counts are stored as extensible properties. This
68264835Sdelphij * capability is controlled by a feature flag and must be enabled to be used.
69264835Sdelphij * Once enabled, the feature is not active until the first limit is set. At
70264835Sdelphij * that point, future operations to create/destroy filesystems or snapshots
71264835Sdelphij * will validate and update the counts.
72264835Sdelphij *
73264835Sdelphij * Because the count properties will not exist before the feature is active,
74264835Sdelphij * the counts are updated when a limit is first set on an uninitialized
75264835Sdelphij * dsl_dir node in the tree (The filesystem/snapshot count on a node includes
76264835Sdelphij * all of the nested filesystems/snapshots. Thus, a new leaf node has a
77264835Sdelphij * filesystem count of 0 and a snapshot count of 0. Non-existent filesystem and
78264835Sdelphij * snapshot count properties on a node indicate uninitialized counts on that
79264835Sdelphij * node.) When first setting a limit on an uninitialized node, the code starts
80264835Sdelphij * at the filesystem with the new limit and descends into all sub-filesystems
81264835Sdelphij * to add the count properties.
82264835Sdelphij *
83264835Sdelphij * In practice this is lightweight since a limit is typically set when the
84264835Sdelphij * filesystem is created and thus has no children. Once valid, changing the
85264835Sdelphij * limit value won't require a re-traversal since the counts are already valid.
86264835Sdelphij * When recursively fixing the counts, if a node with a limit is encountered
87264835Sdelphij * during the descent, the counts are known to be valid and there is no need to
88264835Sdelphij * descend into that filesystem's children. The counts on filesystems above the
89264835Sdelphij * one with the new limit will still be uninitialized, unless a limit is
90264835Sdelphij * eventually set on one of those filesystems. The counts are always recursively
91264835Sdelphij * updated when a limit is set on a dataset, unless there is already a limit.
92264835Sdelphij * When a new limit value is set on a filesystem with an existing limit, it is
93264835Sdelphij * possible for the new limit to be less than the current count at that level
94264835Sdelphij * since a user who can change the limit is also allowed to exceed the limit.
95264835Sdelphij *
96264835Sdelphij * Once the feature is active, then whenever a filesystem or snapshot is
97264835Sdelphij * created, the code recurses up the tree, validating the new count against the
98264835Sdelphij * limit at each initialized level. In practice, most levels will not have a
99264835Sdelphij * limit set. If there is a limit at any initialized level up the tree, the
100264835Sdelphij * check must pass or the creation will fail. Likewise, when a filesystem or
101264835Sdelphij * snapshot is destroyed, the counts are recursively adjusted all the way up
102264835Sdelphij * the initizized nodes in the tree. Renaming a filesystem into different point
103264835Sdelphij * in the tree will first validate, then update the counts on each branch up to
104264835Sdelphij * the common ancestor. A receive will also validate the counts and then update
105264835Sdelphij * them.
106264835Sdelphij *
107264835Sdelphij * An exception to the above behavior is that the limit is not enforced if the
108264835Sdelphij * user has permission to modify the limit. This is primarily so that
109264835Sdelphij * recursive snapshots in the global zone always work. We want to prevent a
110264835Sdelphij * denial-of-service in which a lower level delegated dataset could max out its
111264835Sdelphij * limit and thus block recursive snapshots from being taken in the global zone.
112264835Sdelphij * Because of this, it is possible for the snapshot count to be over the limit
113264835Sdelphij * and snapshots taken in the global zone could cause a lower level dataset to
114264835Sdelphij * hit or exceed its limit. The administrator taking the global zone recursive
115264835Sdelphij * snapshot should be aware of this side-effect and behave accordingly.
116264835Sdelphij * For consistency, the filesystem limit is also not enforced if the user can
117264835Sdelphij * modify the limit.
118264835Sdelphij *
119264835Sdelphij * The filesystem and snapshot limits are validated by dsl_fs_ss_limit_check()
120264835Sdelphij * and updated by dsl_fs_ss_count_adjust(). A new limit value is setup in
121264835Sdelphij * dsl_dir_activate_fs_ss_limit() and the counts are adjusted, if necessary, by
122264835Sdelphij * dsl_dir_init_fs_ss_count().
123264835Sdelphij *
124264835Sdelphij * There is a special case when we receive a filesystem that already exists. In
125264835Sdelphij * this case a temporary clone name of %X is created (see dmu_recv_begin). We
126264835Sdelphij * never update the filesystem counts for temporary clones.
127264835Sdelphij *
128264835Sdelphij * Likewise, we do not update the snapshot counts for temporary snapshots,
129264835Sdelphij * such as those created by zfs diff.
130264835Sdelphij */
131264835Sdelphij
132275782Sdelphijextern inline dsl_dir_phys_t *dsl_dir_phys(dsl_dir_t *dd);
133275782Sdelphij
134185029Spjdstatic uint64_t dsl_dir_space_towrite(dsl_dir_t *dd);
135168404Spjd
136332525Smavtypedef struct ddulrt_arg {
137332525Smav	dsl_dir_t	*ddulrta_dd;
138332525Smav	uint64_t	ddlrta_txg;
139332525Smav} ddulrt_arg_t;
140332525Smav
141168404Spjdstatic void
142321527Smavdsl_dir_evict_async(void *dbu)
143168404Spjd{
144286575Smav	dsl_dir_t *dd = dbu;
145168404Spjd	dsl_pool_t *dp = dd->dd_pool;
146168404Spjd	int t;
147168404Spjd
148286575Smav	dd->dd_dbuf = NULL;
149286575Smav
150168404Spjd	for (t = 0; t < TXG_SIZE; t++) {
151168404Spjd		ASSERT(!txg_list_member(&dp->dp_dirty_dirs, dd, t));
152168404Spjd		ASSERT(dd->dd_tempreserved[t] == 0);
153168404Spjd		ASSERT(dd->dd_space_towrite[t] == 0);
154168404Spjd	}
155168404Spjd
156168404Spjd	if (dd->dd_parent)
157286575Smav		dsl_dir_async_rele(dd->dd_parent, dd);
158168404Spjd
159286575Smav	spa_async_close(dd->dd_pool->dp_spa, dd);
160168404Spjd
161288204Sdelphij	dsl_prop_fini(dd);
162168404Spjd	mutex_destroy(&dd->dd_lock);
163168404Spjd	kmem_free(dd, sizeof (dsl_dir_t));
164168404Spjd}
165168404Spjd
166168404Spjdint
167248571Smmdsl_dir_hold_obj(dsl_pool_t *dp, uint64_t ddobj,
168168404Spjd    const char *tail, void *tag, dsl_dir_t **ddp)
169168404Spjd{
170168404Spjd	dmu_buf_t *dbuf;
171168404Spjd	dsl_dir_t *dd;
172168404Spjd	int err;
173168404Spjd
174248571Smm	ASSERT(dsl_pool_config_held(dp));
175168404Spjd
176168404Spjd	err = dmu_bonus_hold(dp->dp_meta_objset, ddobj, tag, &dbuf);
177248571Smm	if (err != 0)
178168404Spjd		return (err);
179168404Spjd	dd = dmu_buf_get_user(dbuf);
180168404Spjd#ifdef ZFS_DEBUG
181168404Spjd	{
182168404Spjd		dmu_object_info_t doi;
183168404Spjd		dmu_object_info_from_db(dbuf, &doi);
184259813Sdelphij		ASSERT3U(doi.doi_bonus_type, ==, DMU_OT_DSL_DIR);
185185029Spjd		ASSERT3U(doi.doi_bonus_size, >=, sizeof (dsl_dir_phys_t));
186168404Spjd	}
187168404Spjd#endif
188168404Spjd	if (dd == NULL) {
189168404Spjd		dsl_dir_t *winner;
190168404Spjd
191168404Spjd		dd = kmem_zalloc(sizeof (dsl_dir_t), KM_SLEEP);
192168404Spjd		dd->dd_object = ddobj;
193168404Spjd		dd->dd_dbuf = dbuf;
194168404Spjd		dd->dd_pool = dp;
195168404Spjd		mutex_init(&dd->dd_lock, NULL, MUTEX_DEFAULT, NULL);
196288204Sdelphij		dsl_prop_init(dd);
197168404Spjd
198219089Spjd		dsl_dir_snap_cmtime_update(dd);
199219089Spjd
200275782Sdelphij		if (dsl_dir_phys(dd)->dd_parent_obj) {
201275782Sdelphij			err = dsl_dir_hold_obj(dp,
202275782Sdelphij			    dsl_dir_phys(dd)->dd_parent_obj, NULL, dd,
203275782Sdelphij			    &dd->dd_parent);
204248571Smm			if (err != 0)
205185029Spjd				goto errout;
206168404Spjd			if (tail) {
207168404Spjd#ifdef ZFS_DEBUG
208168404Spjd				uint64_t foundobj;
209168404Spjd
210168404Spjd				err = zap_lookup(dp->dp_meta_objset,
211275782Sdelphij				    dsl_dir_phys(dd->dd_parent)->
212275782Sdelphij				    dd_child_dir_zapobj, tail,
213275782Sdelphij				    sizeof (foundobj), 1, &foundobj);
214168404Spjd				ASSERT(err || foundobj == ddobj);
215168404Spjd#endif
216168404Spjd				(void) strcpy(dd->dd_myname, tail);
217168404Spjd			} else {
218168404Spjd				err = zap_value_search(dp->dp_meta_objset,
219275782Sdelphij				    dsl_dir_phys(dd->dd_parent)->
220275782Sdelphij				    dd_child_dir_zapobj,
221185029Spjd				    ddobj, 0, dd->dd_myname);
222168404Spjd			}
223248571Smm			if (err != 0)
224185029Spjd				goto errout;
225168404Spjd		} else {
226168404Spjd			(void) strcpy(dd->dd_myname, spa_name(dp->dp_spa));
227168404Spjd		}
228168404Spjd
229219089Spjd		if (dsl_dir_is_clone(dd)) {
230219089Spjd			dmu_buf_t *origin_bonus;
231219089Spjd			dsl_dataset_phys_t *origin_phys;
232219089Spjd
233219089Spjd			/*
234219089Spjd			 * We can't open the origin dataset, because
235219089Spjd			 * that would require opening this dsl_dir.
236219089Spjd			 * Just look at its phys directly instead.
237219089Spjd			 */
238219089Spjd			err = dmu_bonus_hold(dp->dp_meta_objset,
239275782Sdelphij			    dsl_dir_phys(dd)->dd_origin_obj, FTAG,
240275782Sdelphij			    &origin_bonus);
241248571Smm			if (err != 0)
242219089Spjd				goto errout;
243219089Spjd			origin_phys = origin_bonus->db_data;
244219089Spjd			dd->dd_origin_txg =
245219089Spjd			    origin_phys->ds_creation_txg;
246219089Spjd			dmu_buf_rele(origin_bonus, FTAG);
247219089Spjd		}
248219089Spjd
249321527Smav		dmu_buf_init_user(&dd->dd_dbu, NULL, dsl_dir_evict_async,
250321527Smav		    &dd->dd_dbuf);
251286575Smav		winner = dmu_buf_set_user_ie(dbuf, &dd->dd_dbu);
252286575Smav		if (winner != NULL) {
253168404Spjd			if (dd->dd_parent)
254248571Smm				dsl_dir_rele(dd->dd_parent, dd);
255288204Sdelphij			dsl_prop_fini(dd);
256168404Spjd			mutex_destroy(&dd->dd_lock);
257168404Spjd			kmem_free(dd, sizeof (dsl_dir_t));
258168404Spjd			dd = winner;
259168404Spjd		} else {
260168404Spjd			spa_open_ref(dp->dp_spa, dd);
261168404Spjd		}
262168404Spjd	}
263168404Spjd
264168404Spjd	/*
265168404Spjd	 * The dsl_dir_t has both open-to-close and instantiate-to-evict
266168404Spjd	 * holds on the spa.  We need the open-to-close holds because
267168404Spjd	 * otherwise the spa_refcnt wouldn't change when we open a
268168404Spjd	 * dir which the spa also has open, so we could incorrectly
269168404Spjd	 * think it was OK to unload/export/destroy the pool.  We need
270168404Spjd	 * the instantiate-to-evict hold because the dsl_dir_t has a
271168404Spjd	 * pointer to the dd_pool, which has a pointer to the spa_t.
272168404Spjd	 */
273168404Spjd	spa_open_ref(dp->dp_spa, tag);
274168404Spjd	ASSERT3P(dd->dd_pool, ==, dp);
275168404Spjd	ASSERT3U(dd->dd_object, ==, ddobj);
276168404Spjd	ASSERT3P(dd->dd_dbuf, ==, dbuf);
277168404Spjd	*ddp = dd;
278168404Spjd	return (0);
279185029Spjd
280185029Spjderrout:
281185029Spjd	if (dd->dd_parent)
282248571Smm		dsl_dir_rele(dd->dd_parent, dd);
283288204Sdelphij	dsl_prop_fini(dd);
284185029Spjd	mutex_destroy(&dd->dd_lock);
285185029Spjd	kmem_free(dd, sizeof (dsl_dir_t));
286185029Spjd	dmu_buf_rele(dbuf, tag);
287185029Spjd	return (err);
288168404Spjd}
289168404Spjd
290168404Spjdvoid
291248571Smmdsl_dir_rele(dsl_dir_t *dd, void *tag)
292168404Spjd{
293168404Spjd	dprintf_dd(dd, "%s\n", "");
294168404Spjd	spa_close(dd->dd_pool->dp_spa, tag);
295168404Spjd	dmu_buf_rele(dd->dd_dbuf, tag);
296168404Spjd}
297168404Spjd
298286575Smav/*
299286575Smav * Remove a reference to the given dsl dir that is being asynchronously
300286575Smav * released.  Async releases occur from a taskq performing eviction of
301286575Smav * dsl datasets and dirs.  This process is identical to a normal release
302286575Smav * with the exception of using the async API for releasing the reference on
303286575Smav * the spa.
304286575Smav */
305286575Smavvoid
306286575Smavdsl_dir_async_rele(dsl_dir_t *dd, void *tag)
307286575Smav{
308286575Smav	dprintf_dd(dd, "%s\n", "");
309286575Smav	spa_async_close(dd->dd_pool->dp_spa, tag);
310286575Smav	dmu_buf_rele(dd->dd_dbuf, tag);
311286575Smav}
312286575Smav
313307108Smav/* buf must be at least ZFS_MAX_DATASET_NAME_LEN bytes */
314168404Spjdvoid
315168404Spjddsl_dir_name(dsl_dir_t *dd, char *buf)
316168404Spjd{
317168404Spjd	if (dd->dd_parent) {
318168404Spjd		dsl_dir_name(dd->dd_parent, buf);
319307108Smav		VERIFY3U(strlcat(buf, "/", ZFS_MAX_DATASET_NAME_LEN), <,
320307108Smav		    ZFS_MAX_DATASET_NAME_LEN);
321168404Spjd	} else {
322168404Spjd		buf[0] = '\0';
323168404Spjd	}
324168404Spjd	if (!MUTEX_HELD(&dd->dd_lock)) {
325168404Spjd		/*
326168404Spjd		 * recursive mutex so that we can use
327168404Spjd		 * dprintf_dd() with dd_lock held
328168404Spjd		 */
329168404Spjd		mutex_enter(&dd->dd_lock);
330307108Smav		VERIFY3U(strlcat(buf, dd->dd_myname, ZFS_MAX_DATASET_NAME_LEN),
331307108Smav		    <, ZFS_MAX_DATASET_NAME_LEN);
332168404Spjd		mutex_exit(&dd->dd_lock);
333168404Spjd	} else {
334307108Smav		VERIFY3U(strlcat(buf, dd->dd_myname, ZFS_MAX_DATASET_NAME_LEN),
335307108Smav		    <, ZFS_MAX_DATASET_NAME_LEN);
336168404Spjd	}
337168404Spjd}
338168404Spjd
339239620Smm/* Calculate name length, avoiding all the strcat calls of dsl_dir_name */
340168404Spjdint
341168498Spjddsl_dir_namelen(dsl_dir_t *dd)
342168498Spjd{
343168498Spjd	int result = 0;
344168498Spjd
345168498Spjd	if (dd->dd_parent) {
346168498Spjd		/* parent's name + 1 for the "/" */
347168498Spjd		result = dsl_dir_namelen(dd->dd_parent) + 1;
348168498Spjd	}
349168498Spjd
350168498Spjd	if (!MUTEX_HELD(&dd->dd_lock)) {
351168498Spjd		/* see dsl_dir_name */
352168498Spjd		mutex_enter(&dd->dd_lock);
353168498Spjd		result += strlen(dd->dd_myname);
354168498Spjd		mutex_exit(&dd->dd_lock);
355168498Spjd	} else {
356168498Spjd		result += strlen(dd->dd_myname);
357168498Spjd	}
358168498Spjd
359168498Spjd	return (result);
360168498Spjd}
361168498Spjd
362168404Spjdstatic int
363168404Spjdgetcomponent(const char *path, char *component, const char **nextp)
364168404Spjd{
365168404Spjd	char *p;
366248571Smm
367209962Smm	if ((path == NULL) || (path[0] == '\0'))
368249195Smm		return (SET_ERROR(ENOENT));
369168404Spjd	/* This would be a good place to reserve some namespace... */
370168404Spjd	p = strpbrk(path, "/@");
371168404Spjd	if (p && (p[1] == '/' || p[1] == '@')) {
372168404Spjd		/* two separators in a row */
373249195Smm		return (SET_ERROR(EINVAL));
374168404Spjd	}
375168404Spjd	if (p == NULL || p == path) {
376168404Spjd		/*
377168404Spjd		 * if the first thing is an @ or /, it had better be an
378168404Spjd		 * @ and it had better not have any more ats or slashes,
379168404Spjd		 * and it had better have something after the @.
380168404Spjd		 */
381168404Spjd		if (p != NULL &&
382168404Spjd		    (p[0] != '@' || strpbrk(path+1, "/@") || p[1] == '\0'))
383249195Smm			return (SET_ERROR(EINVAL));
384307108Smav		if (strlen(path) >= ZFS_MAX_DATASET_NAME_LEN)
385249195Smm			return (SET_ERROR(ENAMETOOLONG));
386168404Spjd		(void) strcpy(component, path);
387168404Spjd		p = NULL;
388168404Spjd	} else if (p[0] == '/') {
389307108Smav		if (p - path >= ZFS_MAX_DATASET_NAME_LEN)
390249195Smm			return (SET_ERROR(ENAMETOOLONG));
391168404Spjd		(void) strncpy(component, path, p - path);
392248571Smm		component[p - path] = '\0';
393168404Spjd		p++;
394168404Spjd	} else if (p[0] == '@') {
395168404Spjd		/*
396168404Spjd		 * if the next separator is an @, there better not be
397168404Spjd		 * any more slashes.
398168404Spjd		 */
399168404Spjd		if (strchr(path, '/'))
400249195Smm			return (SET_ERROR(EINVAL));
401307108Smav		if (p - path >= ZFS_MAX_DATASET_NAME_LEN)
402249195Smm			return (SET_ERROR(ENAMETOOLONG));
403168404Spjd		(void) strncpy(component, path, p - path);
404248571Smm		component[p - path] = '\0';
405168404Spjd	} else {
406248571Smm		panic("invalid p=%p", (void *)p);
407168404Spjd	}
408168404Spjd	*nextp = p;
409168404Spjd	return (0);
410168404Spjd}
411168404Spjd
412168404Spjd/*
413248571Smm * Return the dsl_dir_t, and possibly the last component which couldn't
414248571Smm * be found in *tail.  The name must be in the specified dsl_pool_t.  This
415248571Smm * thread must hold the dp_config_rwlock for the pool.  Returns NULL if the
416248571Smm * path is bogus, or if tail==NULL and we couldn't parse the whole name.
417248571Smm * (*tail)[0] == '@' means that the last component is a snapshot.
418168404Spjd */
419168404Spjdint
420248571Smmdsl_dir_hold(dsl_pool_t *dp, const char *name, void *tag,
421168404Spjd    dsl_dir_t **ddp, const char **tailp)
422168404Spjd{
423307108Smav	char buf[ZFS_MAX_DATASET_NAME_LEN];
424248571Smm	const char *spaname, *next, *nextnext = NULL;
425168404Spjd	int err;
426168404Spjd	dsl_dir_t *dd;
427168404Spjd	uint64_t ddobj;
428168404Spjd
429168404Spjd	err = getcomponent(name, buf, &next);
430248571Smm	if (err != 0)
431168404Spjd		return (err);
432168404Spjd
433248571Smm	/* Make sure the name is in the specified pool. */
434248571Smm	spaname = spa_name(dp->dp_spa);
435248571Smm	if (strcmp(buf, spaname) != 0)
436282126Savg		return (SET_ERROR(EXDEV));
437168404Spjd
438248571Smm	ASSERT(dsl_pool_config_held(dp));
439168404Spjd
440248571Smm	err = dsl_dir_hold_obj(dp, dp->dp_root_dir_obj, NULL, tag, &dd);
441248571Smm	if (err != 0) {
442168404Spjd		return (err);
443168404Spjd	}
444168404Spjd
445168404Spjd	while (next != NULL) {
446286575Smav		dsl_dir_t *child_dd;
447168404Spjd		err = getcomponent(next, buf, &nextnext);
448248571Smm		if (err != 0)
449168404Spjd			break;
450168404Spjd		ASSERT(next[0] != '\0');
451168404Spjd		if (next[0] == '@')
452168404Spjd			break;
453168404Spjd		dprintf("looking up %s in obj%lld\n",
454275782Sdelphij		    buf, dsl_dir_phys(dd)->dd_child_dir_zapobj);
455168404Spjd
456168404Spjd		err = zap_lookup(dp->dp_meta_objset,
457275782Sdelphij		    dsl_dir_phys(dd)->dd_child_dir_zapobj,
458168404Spjd		    buf, sizeof (ddobj), 1, &ddobj);
459248571Smm		if (err != 0) {
460168404Spjd			if (err == ENOENT)
461168404Spjd				err = 0;
462168404Spjd			break;
463168404Spjd		}
464168404Spjd
465286575Smav		err = dsl_dir_hold_obj(dp, ddobj, buf, tag, &child_dd);
466248571Smm		if (err != 0)
467168404Spjd			break;
468248571Smm		dsl_dir_rele(dd, tag);
469286575Smav		dd = child_dd;
470168404Spjd		next = nextnext;
471168404Spjd	}
472168404Spjd
473248571Smm	if (err != 0) {
474248571Smm		dsl_dir_rele(dd, tag);
475168404Spjd		return (err);
476168404Spjd	}
477168404Spjd
478168404Spjd	/*
479168404Spjd	 * It's an error if there's more than one component left, or
480168404Spjd	 * tailp==NULL and there's any component left.
481168404Spjd	 */
482168404Spjd	if (next != NULL &&
483168404Spjd	    (tailp == NULL || (nextnext && nextnext[0] != '\0'))) {
484168404Spjd		/* bad path name */
485248571Smm		dsl_dir_rele(dd, tag);
486168404Spjd		dprintf("next=%p (%s) tail=%p\n", next, next?next:"", tailp);
487249195Smm		err = SET_ERROR(ENOENT);
488168404Spjd	}
489248571Smm	if (tailp != NULL)
490168404Spjd		*tailp = next;
491168404Spjd	*ddp = dd;
492168404Spjd	return (err);
493168404Spjd}
494168404Spjd
495264835Sdelphij/*
496264835Sdelphij * If the counts are already initialized for this filesystem and its
497264835Sdelphij * descendants then do nothing, otherwise initialize the counts.
498264835Sdelphij *
499264835Sdelphij * The counts on this filesystem, and those below, may be uninitialized due to
500264835Sdelphij * either the use of a pre-existing pool which did not support the
501264835Sdelphij * filesystem/snapshot limit feature, or one in which the feature had not yet
502264835Sdelphij * been enabled.
503264835Sdelphij *
504264835Sdelphij * Recursively descend the filesystem tree and update the filesystem/snapshot
505264835Sdelphij * counts on each filesystem below, then update the cumulative count on the
506264835Sdelphij * current filesystem. If the filesystem already has a count set on it,
507264835Sdelphij * then we know that its counts, and the counts on the filesystems below it,
508264835Sdelphij * are already correct, so we don't have to update this filesystem.
509264835Sdelphij */
510264835Sdelphijstatic void
511264835Sdelphijdsl_dir_init_fs_ss_count(dsl_dir_t *dd, dmu_tx_t *tx)
512264835Sdelphij{
513264835Sdelphij	uint64_t my_fs_cnt = 0;
514264835Sdelphij	uint64_t my_ss_cnt = 0;
515264835Sdelphij	dsl_pool_t *dp = dd->dd_pool;
516264835Sdelphij	objset_t *os = dp->dp_meta_objset;
517264835Sdelphij	zap_cursor_t *zc;
518264835Sdelphij	zap_attribute_t *za;
519264835Sdelphij	dsl_dataset_t *ds;
520264835Sdelphij
521266915Sdelphij	ASSERT(spa_feature_is_active(dp->dp_spa, SPA_FEATURE_FS_SS_LIMIT));
522264835Sdelphij	ASSERT(dsl_pool_config_held(dp));
523264835Sdelphij	ASSERT(dmu_tx_is_syncing(tx));
524264835Sdelphij
525264835Sdelphij	dsl_dir_zapify(dd, tx);
526264835Sdelphij
527264835Sdelphij	/*
528264835Sdelphij	 * If the filesystem count has already been initialized then we
529264835Sdelphij	 * don't need to recurse down any further.
530264835Sdelphij	 */
531264835Sdelphij	if (zap_contains(os, dd->dd_object, DD_FIELD_FILESYSTEM_COUNT) == 0)
532264835Sdelphij		return;
533264835Sdelphij
534264835Sdelphij	zc = kmem_alloc(sizeof (zap_cursor_t), KM_SLEEP);
535264835Sdelphij	za = kmem_alloc(sizeof (zap_attribute_t), KM_SLEEP);
536264835Sdelphij
537264835Sdelphij	/* Iterate my child dirs */
538275782Sdelphij	for (zap_cursor_init(zc, os, dsl_dir_phys(dd)->dd_child_dir_zapobj);
539264835Sdelphij	    zap_cursor_retrieve(zc, za) == 0; zap_cursor_advance(zc)) {
540264835Sdelphij		dsl_dir_t *chld_dd;
541264835Sdelphij		uint64_t count;
542264835Sdelphij
543264835Sdelphij		VERIFY0(dsl_dir_hold_obj(dp, za->za_first_integer, NULL, FTAG,
544264835Sdelphij		    &chld_dd));
545264835Sdelphij
546264835Sdelphij		/*
547264835Sdelphij		 * Ignore hidden ($FREE, $MOS & $ORIGIN) objsets and
548264835Sdelphij		 * temporary datasets.
549264835Sdelphij		 */
550264835Sdelphij		if (chld_dd->dd_myname[0] == '$' ||
551264835Sdelphij		    chld_dd->dd_myname[0] == '%') {
552264835Sdelphij			dsl_dir_rele(chld_dd, FTAG);
553264835Sdelphij			continue;
554264835Sdelphij		}
555264835Sdelphij
556264835Sdelphij		my_fs_cnt++;	/* count this child */
557264835Sdelphij
558264835Sdelphij		dsl_dir_init_fs_ss_count(chld_dd, tx);
559264835Sdelphij
560264835Sdelphij		VERIFY0(zap_lookup(os, chld_dd->dd_object,
561264835Sdelphij		    DD_FIELD_FILESYSTEM_COUNT, sizeof (count), 1, &count));
562264835Sdelphij		my_fs_cnt += count;
563264835Sdelphij		VERIFY0(zap_lookup(os, chld_dd->dd_object,
564264835Sdelphij		    DD_FIELD_SNAPSHOT_COUNT, sizeof (count), 1, &count));
565264835Sdelphij		my_ss_cnt += count;
566264835Sdelphij
567264835Sdelphij		dsl_dir_rele(chld_dd, FTAG);
568264835Sdelphij	}
569264835Sdelphij	zap_cursor_fini(zc);
570264835Sdelphij	/* Count my snapshots (we counted children's snapshots above) */
571264835Sdelphij	VERIFY0(dsl_dataset_hold_obj(dd->dd_pool,
572275782Sdelphij	    dsl_dir_phys(dd)->dd_head_dataset_obj, FTAG, &ds));
573264835Sdelphij
574275782Sdelphij	for (zap_cursor_init(zc, os, dsl_dataset_phys(ds)->ds_snapnames_zapobj);
575264835Sdelphij	    zap_cursor_retrieve(zc, za) == 0;
576264835Sdelphij	    zap_cursor_advance(zc)) {
577264835Sdelphij		/* Don't count temporary snapshots */
578264835Sdelphij		if (za->za_name[0] != '%')
579264835Sdelphij			my_ss_cnt++;
580264835Sdelphij	}
581266915Sdelphij	zap_cursor_fini(zc);
582264835Sdelphij
583264835Sdelphij	dsl_dataset_rele(ds, FTAG);
584264835Sdelphij
585264835Sdelphij	kmem_free(zc, sizeof (zap_cursor_t));
586264835Sdelphij	kmem_free(za, sizeof (zap_attribute_t));
587264835Sdelphij
588264835Sdelphij	/* we're in a sync task, update counts */
589264835Sdelphij	dmu_buf_will_dirty(dd->dd_dbuf, tx);
590264835Sdelphij	VERIFY0(zap_add(os, dd->dd_object, DD_FIELD_FILESYSTEM_COUNT,
591264835Sdelphij	    sizeof (my_fs_cnt), 1, &my_fs_cnt, tx));
592264835Sdelphij	VERIFY0(zap_add(os, dd->dd_object, DD_FIELD_SNAPSHOT_COUNT,
593264835Sdelphij	    sizeof (my_ss_cnt), 1, &my_ss_cnt, tx));
594264835Sdelphij}
595264835Sdelphij
596264835Sdelphijstatic int
597264835Sdelphijdsl_dir_actv_fs_ss_limit_check(void *arg, dmu_tx_t *tx)
598264835Sdelphij{
599264835Sdelphij	char *ddname = (char *)arg;
600264835Sdelphij	dsl_pool_t *dp = dmu_tx_pool(tx);
601264835Sdelphij	dsl_dataset_t *ds;
602264835Sdelphij	dsl_dir_t *dd;
603264835Sdelphij	int error;
604264835Sdelphij
605264835Sdelphij	error = dsl_dataset_hold(dp, ddname, FTAG, &ds);
606264835Sdelphij	if (error != 0)
607264835Sdelphij		return (error);
608264835Sdelphij
609264835Sdelphij	if (!spa_feature_is_enabled(dp->dp_spa, SPA_FEATURE_FS_SS_LIMIT)) {
610264835Sdelphij		dsl_dataset_rele(ds, FTAG);
611264835Sdelphij		return (SET_ERROR(ENOTSUP));
612264835Sdelphij	}
613264835Sdelphij
614264835Sdelphij	dd = ds->ds_dir;
615264835Sdelphij	if (spa_feature_is_active(dp->dp_spa, SPA_FEATURE_FS_SS_LIMIT) &&
616264835Sdelphij	    dsl_dir_is_zapified(dd) &&
617264835Sdelphij	    zap_contains(dp->dp_meta_objset, dd->dd_object,
618264835Sdelphij	    DD_FIELD_FILESYSTEM_COUNT) == 0) {
619264835Sdelphij		dsl_dataset_rele(ds, FTAG);
620264835Sdelphij		return (SET_ERROR(EALREADY));
621264835Sdelphij	}
622264835Sdelphij
623264835Sdelphij	dsl_dataset_rele(ds, FTAG);
624264835Sdelphij	return (0);
625264835Sdelphij}
626264835Sdelphij
627264835Sdelphijstatic void
628264835Sdelphijdsl_dir_actv_fs_ss_limit_sync(void *arg, dmu_tx_t *tx)
629264835Sdelphij{
630264835Sdelphij	char *ddname = (char *)arg;
631264835Sdelphij	dsl_pool_t *dp = dmu_tx_pool(tx);
632264835Sdelphij	dsl_dataset_t *ds;
633264835Sdelphij	spa_t *spa;
634264835Sdelphij
635264835Sdelphij	VERIFY0(dsl_dataset_hold(dp, ddname, FTAG, &ds));
636264835Sdelphij
637264835Sdelphij	spa = dsl_dataset_get_spa(ds);
638264835Sdelphij
639264835Sdelphij	if (!spa_feature_is_active(spa, SPA_FEATURE_FS_SS_LIMIT)) {
640264835Sdelphij		/*
641264835Sdelphij		 * Since the feature was not active and we're now setting a
642264835Sdelphij		 * limit, increment the feature-active counter so that the
643264835Sdelphij		 * feature becomes active for the first time.
644264835Sdelphij		 *
645264835Sdelphij		 * We are already in a sync task so we can update the MOS.
646264835Sdelphij		 */
647264835Sdelphij		spa_feature_incr(spa, SPA_FEATURE_FS_SS_LIMIT, tx);
648264835Sdelphij	}
649264835Sdelphij
650264835Sdelphij	/*
651264835Sdelphij	 * Since we are now setting a non-UINT64_MAX limit on the filesystem,
652264835Sdelphij	 * we need to ensure the counts are correct. Descend down the tree from
653264835Sdelphij	 * this point and update all of the counts to be accurate.
654264835Sdelphij	 */
655264835Sdelphij	dsl_dir_init_fs_ss_count(ds->ds_dir, tx);
656264835Sdelphij
657264835Sdelphij	dsl_dataset_rele(ds, FTAG);
658264835Sdelphij}
659264835Sdelphij
660264835Sdelphij/*
661264835Sdelphij * Make sure the feature is enabled and activate it if necessary.
662264835Sdelphij * Since we're setting a limit, ensure the on-disk counts are valid.
663264835Sdelphij * This is only called by the ioctl path when setting a limit value.
664264835Sdelphij *
665264835Sdelphij * We do not need to validate the new limit, since users who can change the
666264835Sdelphij * limit are also allowed to exceed the limit.
667264835Sdelphij */
668264835Sdelphijint
669264835Sdelphijdsl_dir_activate_fs_ss_limit(const char *ddname)
670264835Sdelphij{
671264835Sdelphij	int error;
672264835Sdelphij
673264835Sdelphij	error = dsl_sync_task(ddname, dsl_dir_actv_fs_ss_limit_check,
674268473Sdelphij	    dsl_dir_actv_fs_ss_limit_sync, (void *)ddname, 0,
675268473Sdelphij	    ZFS_SPACE_CHECK_RESERVED);
676264835Sdelphij
677264835Sdelphij	if (error == EALREADY)
678264835Sdelphij		error = 0;
679264835Sdelphij
680264835Sdelphij	return (error);
681264835Sdelphij}
682264835Sdelphij
683264835Sdelphij/*
684264835Sdelphij * Used to determine if the filesystem_limit or snapshot_limit should be
685264835Sdelphij * enforced. We allow the limit to be exceeded if the user has permission to
686264835Sdelphij * write the property value. We pass in the creds that we got in the open
687264835Sdelphij * context since we will always be the GZ root in syncing context. We also have
688264835Sdelphij * to handle the case where we are allowed to change the limit on the current
689264835Sdelphij * dataset, but there may be another limit in the tree above.
690264835Sdelphij *
691264835Sdelphij * We can never modify these two properties within a non-global zone. In
692264835Sdelphij * addition, the other checks are modeled on zfs_secpolicy_write_perms. We
693264835Sdelphij * can't use that function since we are already holding the dp_config_rwlock.
694264835Sdelphij * In addition, we already have the dd and dealing with snapshots is simplified
695264835Sdelphij * in this code.
696264835Sdelphij */
697264835Sdelphij
698264835Sdelphijtypedef enum {
699264835Sdelphij	ENFORCE_ALWAYS,
700264835Sdelphij	ENFORCE_NEVER,
701264835Sdelphij	ENFORCE_ABOVE
702264835Sdelphij} enforce_res_t;
703264835Sdelphij
704264835Sdelphijstatic enforce_res_t
705264835Sdelphijdsl_enforce_ds_ss_limits(dsl_dir_t *dd, zfs_prop_t prop, cred_t *cr)
706264835Sdelphij{
707264835Sdelphij	enforce_res_t enforce = ENFORCE_ALWAYS;
708264835Sdelphij	uint64_t obj;
709264835Sdelphij	dsl_dataset_t *ds;
710264835Sdelphij	uint64_t zoned;
711264835Sdelphij
712264835Sdelphij	ASSERT(prop == ZFS_PROP_FILESYSTEM_LIMIT ||
713264835Sdelphij	    prop == ZFS_PROP_SNAPSHOT_LIMIT);
714264835Sdelphij
715264835Sdelphij#ifdef _KERNEL
716264835Sdelphij#ifdef __FreeBSD__
717264835Sdelphij	if (jailed(cr))
718264835Sdelphij#else
719264835Sdelphij	if (crgetzoneid(cr) != GLOBAL_ZONEID)
720264835Sdelphij#endif
721264835Sdelphij		return (ENFORCE_ALWAYS);
722264835Sdelphij
723264835Sdelphij	if (secpolicy_zfs(cr) == 0)
724264835Sdelphij		return (ENFORCE_NEVER);
725264835Sdelphij#endif
726264835Sdelphij
727275782Sdelphij	if ((obj = dsl_dir_phys(dd)->dd_head_dataset_obj) == 0)
728264835Sdelphij		return (ENFORCE_ALWAYS);
729264835Sdelphij
730264835Sdelphij	ASSERT(dsl_pool_config_held(dd->dd_pool));
731264835Sdelphij
732264835Sdelphij	if (dsl_dataset_hold_obj(dd->dd_pool, obj, FTAG, &ds) != 0)
733264835Sdelphij		return (ENFORCE_ALWAYS);
734264835Sdelphij
735264835Sdelphij	if (dsl_prop_get_ds(ds, "zoned", 8, 1, &zoned, NULL) || zoned) {
736264835Sdelphij		/* Only root can access zoned fs's from the GZ */
737264835Sdelphij		enforce = ENFORCE_ALWAYS;
738264835Sdelphij	} else {
739264835Sdelphij		if (dsl_deleg_access_impl(ds, zfs_prop_to_name(prop), cr) == 0)
740264835Sdelphij			enforce = ENFORCE_ABOVE;
741264835Sdelphij	}
742264835Sdelphij
743264835Sdelphij	dsl_dataset_rele(ds, FTAG);
744264835Sdelphij	return (enforce);
745264835Sdelphij}
746264835Sdelphij
747332525Smavstatic void
748332525Smavdsl_dir_update_last_remap_txg_sync(void *varg, dmu_tx_t *tx)
749332525Smav{
750332525Smav	ddulrt_arg_t *arg = varg;
751332525Smav	uint64_t last_remap_txg;
752332525Smav	dsl_dir_t *dd = arg->ddulrta_dd;
753332525Smav	objset_t *mos = dd->dd_pool->dp_meta_objset;
754332525Smav
755332525Smav	dsl_dir_zapify(dd, tx);
756332525Smav	if (zap_lookup(mos, dd->dd_object, DD_FIELD_LAST_REMAP_TXG,
757332525Smav	    sizeof (last_remap_txg), 1, &last_remap_txg) != 0 ||
758332525Smav	    last_remap_txg < arg->ddlrta_txg) {
759332525Smav		VERIFY0(zap_update(mos, dd->dd_object, DD_FIELD_LAST_REMAP_TXG,
760332525Smav		    sizeof (arg->ddlrta_txg), 1, &arg->ddlrta_txg, tx));
761332525Smav	}
762332525Smav}
763332525Smav
764332525Smavint
765332525Smavdsl_dir_update_last_remap_txg(dsl_dir_t *dd, uint64_t txg)
766332525Smav{
767332525Smav	ddulrt_arg_t arg;
768332525Smav	arg.ddulrta_dd = dd;
769332525Smav	arg.ddlrta_txg = txg;
770332525Smav
771332525Smav	return (dsl_sync_task(spa_name(dd->dd_pool->dp_spa),
772332525Smav	    NULL, dsl_dir_update_last_remap_txg_sync, &arg,
773332525Smav	    1, ZFS_SPACE_CHECK_RESERVED));
774332525Smav}
775332525Smav
776264835Sdelphij/*
777264835Sdelphij * Check if adding additional child filesystem(s) would exceed any filesystem
778264835Sdelphij * limits or adding additional snapshot(s) would exceed any snapshot limits.
779264835Sdelphij * The prop argument indicates which limit to check.
780264835Sdelphij *
781264835Sdelphij * Note that all filesystem limits up to the root (or the highest
782264835Sdelphij * initialized) filesystem or the given ancestor must be satisfied.
783264835Sdelphij */
784264835Sdelphijint
785264835Sdelphijdsl_fs_ss_limit_check(dsl_dir_t *dd, uint64_t delta, zfs_prop_t prop,
786264835Sdelphij    dsl_dir_t *ancestor, cred_t *cr)
787264835Sdelphij{
788264835Sdelphij	objset_t *os = dd->dd_pool->dp_meta_objset;
789264835Sdelphij	uint64_t limit, count;
790264835Sdelphij	char *count_prop;
791264835Sdelphij	enforce_res_t enforce;
792264835Sdelphij	int err = 0;
793264835Sdelphij
794264835Sdelphij	ASSERT(dsl_pool_config_held(dd->dd_pool));
795264835Sdelphij	ASSERT(prop == ZFS_PROP_FILESYSTEM_LIMIT ||
796264835Sdelphij	    prop == ZFS_PROP_SNAPSHOT_LIMIT);
797264835Sdelphij
798264835Sdelphij	/*
799264835Sdelphij	 * If we're allowed to change the limit, don't enforce the limit
800264835Sdelphij	 * e.g. this can happen if a snapshot is taken by an administrative
801264835Sdelphij	 * user in the global zone (i.e. a recursive snapshot by root).
802264835Sdelphij	 * However, we must handle the case of delegated permissions where we
803264835Sdelphij	 * are allowed to change the limit on the current dataset, but there
804264835Sdelphij	 * is another limit in the tree above.
805264835Sdelphij	 */
806264835Sdelphij	enforce = dsl_enforce_ds_ss_limits(dd, prop, cr);
807264835Sdelphij	if (enforce == ENFORCE_NEVER)
808264835Sdelphij		return (0);
809264835Sdelphij
810264835Sdelphij	/*
811264835Sdelphij	 * e.g. if renaming a dataset with no snapshots, count adjustment
812264835Sdelphij	 * is 0.
813264835Sdelphij	 */
814264835Sdelphij	if (delta == 0)
815264835Sdelphij		return (0);
816264835Sdelphij
817264835Sdelphij	if (prop == ZFS_PROP_SNAPSHOT_LIMIT) {
818264835Sdelphij		/*
819264835Sdelphij		 * We don't enforce the limit for temporary snapshots. This is
820264835Sdelphij		 * indicated by a NULL cred_t argument.
821264835Sdelphij		 */
822264835Sdelphij		if (cr == NULL)
823264835Sdelphij			return (0);
824264835Sdelphij
825264835Sdelphij		count_prop = DD_FIELD_SNAPSHOT_COUNT;
826264835Sdelphij	} else {
827264835Sdelphij		count_prop = DD_FIELD_FILESYSTEM_COUNT;
828264835Sdelphij	}
829264835Sdelphij
830264835Sdelphij	/*
831264835Sdelphij	 * If an ancestor has been provided, stop checking the limit once we
832264835Sdelphij	 * hit that dir. We need this during rename so that we don't overcount
833264835Sdelphij	 * the check once we recurse up to the common ancestor.
834264835Sdelphij	 */
835264835Sdelphij	if (ancestor == dd)
836264835Sdelphij		return (0);
837264835Sdelphij
838264835Sdelphij	/*
839264835Sdelphij	 * If we hit an uninitialized node while recursing up the tree, we can
840264835Sdelphij	 * stop since we know there is no limit here (or above). The counts are
841264835Sdelphij	 * not valid on this node and we know we won't touch this node's counts.
842264835Sdelphij	 */
843264835Sdelphij	if (!dsl_dir_is_zapified(dd) || zap_lookup(os, dd->dd_object,
844264835Sdelphij	    count_prop, sizeof (count), 1, &count) == ENOENT)
845264835Sdelphij		return (0);
846264835Sdelphij
847264835Sdelphij	err = dsl_prop_get_dd(dd, zfs_prop_to_name(prop), 8, 1, &limit, NULL,
848264835Sdelphij	    B_FALSE);
849264835Sdelphij	if (err != 0)
850264835Sdelphij		return (err);
851264835Sdelphij
852264835Sdelphij	/* Is there a limit which we've hit? */
853264835Sdelphij	if (enforce == ENFORCE_ALWAYS && (count + delta) > limit)
854264835Sdelphij		return (SET_ERROR(EDQUOT));
855264835Sdelphij
856264835Sdelphij	if (dd->dd_parent != NULL)
857264835Sdelphij		err = dsl_fs_ss_limit_check(dd->dd_parent, delta, prop,
858264835Sdelphij		    ancestor, cr);
859264835Sdelphij
860264835Sdelphij	return (err);
861264835Sdelphij}
862264835Sdelphij
863264835Sdelphij/*
864264835Sdelphij * Adjust the filesystem or snapshot count for the specified dsl_dir_t and all
865264835Sdelphij * parents. When a new filesystem/snapshot is created, increment the count on
866264835Sdelphij * all parents, and when a filesystem/snapshot is destroyed, decrement the
867264835Sdelphij * count.
868264835Sdelphij */
869264835Sdelphijvoid
870264835Sdelphijdsl_fs_ss_count_adjust(dsl_dir_t *dd, int64_t delta, const char *prop,
871264835Sdelphij    dmu_tx_t *tx)
872264835Sdelphij{
873264835Sdelphij	int err;
874264835Sdelphij	objset_t *os = dd->dd_pool->dp_meta_objset;
875264835Sdelphij	uint64_t count;
876264835Sdelphij
877264835Sdelphij	ASSERT(dsl_pool_config_held(dd->dd_pool));
878264835Sdelphij	ASSERT(dmu_tx_is_syncing(tx));
879264835Sdelphij	ASSERT(strcmp(prop, DD_FIELD_FILESYSTEM_COUNT) == 0 ||
880264835Sdelphij	    strcmp(prop, DD_FIELD_SNAPSHOT_COUNT) == 0);
881264835Sdelphij
882264835Sdelphij	/*
883264835Sdelphij	 * When we receive an incremental stream into a filesystem that already
884264835Sdelphij	 * exists, a temporary clone is created.  We don't count this temporary
885264835Sdelphij	 * clone, whose name begins with a '%'. We also ignore hidden ($FREE,
886264835Sdelphij	 * $MOS & $ORIGIN) objsets.
887264835Sdelphij	 */
888264835Sdelphij	if ((dd->dd_myname[0] == '%' || dd->dd_myname[0] == '$') &&
889264835Sdelphij	    strcmp(prop, DD_FIELD_FILESYSTEM_COUNT) == 0)
890264835Sdelphij		return;
891264835Sdelphij
892264835Sdelphij	/*
893264835Sdelphij	 * e.g. if renaming a dataset with no snapshots, count adjustment is 0
894264835Sdelphij	 */
895264835Sdelphij	if (delta == 0)
896264835Sdelphij		return;
897264835Sdelphij
898264835Sdelphij	/*
899264835Sdelphij	 * If we hit an uninitialized node while recursing up the tree, we can
900264835Sdelphij	 * stop since we know the counts are not valid on this node and we
901264835Sdelphij	 * know we shouldn't touch this node's counts. An uninitialized count
902264835Sdelphij	 * on the node indicates that either the feature has not yet been
903264835Sdelphij	 * activated or there are no limits on this part of the tree.
904264835Sdelphij	 */
905264835Sdelphij	if (!dsl_dir_is_zapified(dd) || (err = zap_lookup(os, dd->dd_object,
906264835Sdelphij	    prop, sizeof (count), 1, &count)) == ENOENT)
907264835Sdelphij		return;
908264835Sdelphij	VERIFY0(err);
909264835Sdelphij
910264835Sdelphij	count += delta;
911264835Sdelphij	/* Use a signed verify to make sure we're not neg. */
912264835Sdelphij	VERIFY3S(count, >=, 0);
913264835Sdelphij
914264835Sdelphij	VERIFY0(zap_update(os, dd->dd_object, prop, sizeof (count), 1, &count,
915264835Sdelphij	    tx));
916264835Sdelphij
917264835Sdelphij	/* Roll up this additional count into our ancestors */
918264835Sdelphij	if (dd->dd_parent != NULL)
919264835Sdelphij		dsl_fs_ss_count_adjust(dd->dd_parent, delta, prop, tx);
920264835Sdelphij}
921264835Sdelphij
922168404Spjduint64_t
923185029Spjddsl_dir_create_sync(dsl_pool_t *dp, dsl_dir_t *pds, const char *name,
924185029Spjd    dmu_tx_t *tx)
925168404Spjd{
926185029Spjd	objset_t *mos = dp->dp_meta_objset;
927168404Spjd	uint64_t ddobj;
928219089Spjd	dsl_dir_phys_t *ddphys;
929168404Spjd	dmu_buf_t *dbuf;
930168404Spjd
931168404Spjd	ddobj = dmu_object_alloc(mos, DMU_OT_DSL_DIR, 0,
932168404Spjd	    DMU_OT_DSL_DIR, sizeof (dsl_dir_phys_t), tx);
933185029Spjd	if (pds) {
934332547Smav		VERIFY0(zap_add(mos, dsl_dir_phys(pds)->dd_child_dir_zapobj,
935185029Spjd		    name, sizeof (uint64_t), 1, &ddobj, tx));
936185029Spjd	} else {
937185029Spjd		/* it's the root dir */
938332547Smav		VERIFY0(zap_add(mos, DMU_POOL_DIRECTORY_OBJECT,
939185029Spjd		    DMU_POOL_ROOT_DATASET, sizeof (uint64_t), 1, &ddobj, tx));
940185029Spjd	}
941332547Smav	VERIFY0(dmu_bonus_hold(mos, ddobj, FTAG, &dbuf));
942168404Spjd	dmu_buf_will_dirty(dbuf, tx);
943219089Spjd	ddphys = dbuf->db_data;
944168404Spjd
945219089Spjd	ddphys->dd_creation_time = gethrestime_sec();
946264835Sdelphij	if (pds) {
947219089Spjd		ddphys->dd_parent_obj = pds->dd_object;
948264835Sdelphij
949264835Sdelphij		/* update the filesystem counts */
950264835Sdelphij		dsl_fs_ss_count_adjust(pds, 1, DD_FIELD_FILESYSTEM_COUNT, tx);
951264835Sdelphij	}
952219089Spjd	ddphys->dd_props_zapobj = zap_create(mos,
953168404Spjd	    DMU_OT_DSL_PROPS, DMU_OT_NONE, 0, tx);
954219089Spjd	ddphys->dd_child_dir_zapobj = zap_create(mos,
955168404Spjd	    DMU_OT_DSL_DIR_CHILD_MAP, DMU_OT_NONE, 0, tx);
956185029Spjd	if (spa_version(dp->dp_spa) >= SPA_VERSION_USED_BREAKDOWN)
957219089Spjd		ddphys->dd_flags |= DD_FLAG_USED_BREAKDOWN;
958168404Spjd	dmu_buf_rele(dbuf, FTAG);
959168404Spjd
960168404Spjd	return (ddobj);
961168404Spjd}
962168404Spjd
963185029Spjdboolean_t
964185029Spjddsl_dir_is_clone(dsl_dir_t *dd)
965168404Spjd{
966275782Sdelphij	return (dsl_dir_phys(dd)->dd_origin_obj &&
967185029Spjd	    (dd->dd_pool->dp_origin_snap == NULL ||
968275782Sdelphij	    dsl_dir_phys(dd)->dd_origin_obj !=
969185029Spjd	    dd->dd_pool->dp_origin_snap->ds_object));
970168404Spjd}
971168404Spjd
972325534Savg
973325534Savguint64_t
974325534Savgdsl_dir_get_used(dsl_dir_t *dd)
975325534Savg{
976325534Savg	return (dsl_dir_phys(dd)->dd_used_bytes);
977325534Savg}
978325534Savg
979325534Savguint64_t
980332547Smavdsl_dir_get_compressed(dsl_dir_t *dd)
981332547Smav{
982332547Smav	return (dsl_dir_phys(dd)->dd_compressed_bytes);
983332547Smav}
984332547Smav
985332547Smavuint64_t
986325534Savgdsl_dir_get_quota(dsl_dir_t *dd)
987325534Savg{
988325534Savg	return (dsl_dir_phys(dd)->dd_quota);
989325534Savg}
990325534Savg
991325534Savguint64_t
992325534Savgdsl_dir_get_reservation(dsl_dir_t *dd)
993325534Savg{
994325534Savg	return (dsl_dir_phys(dd)->dd_reserved);
995325534Savg}
996325534Savg
997325534Savguint64_t
998325534Savgdsl_dir_get_compressratio(dsl_dir_t *dd)
999325534Savg{
1000325534Savg	/* a fixed point number, 100x the ratio */
1001325534Savg	return (dsl_dir_phys(dd)->dd_compressed_bytes == 0 ? 100 :
1002325534Savg	    (dsl_dir_phys(dd)->dd_uncompressed_bytes * 100 /
1003325534Savg	    dsl_dir_phys(dd)->dd_compressed_bytes));
1004325534Savg}
1005325534Savg
1006325534Savguint64_t
1007325534Savgdsl_dir_get_logicalused(dsl_dir_t *dd)
1008325534Savg{
1009325534Savg	return (dsl_dir_phys(dd)->dd_uncompressed_bytes);
1010325534Savg}
1011325534Savg
1012325534Savguint64_t
1013325534Savgdsl_dir_get_usedsnap(dsl_dir_t *dd)
1014325534Savg{
1015325534Savg	return (dsl_dir_phys(dd)->dd_used_breakdown[DD_USED_SNAP]);
1016325534Savg}
1017325534Savg
1018325534Savguint64_t
1019325534Savgdsl_dir_get_usedds(dsl_dir_t *dd)
1020325534Savg{
1021325534Savg	return (dsl_dir_phys(dd)->dd_used_breakdown[DD_USED_HEAD]);
1022325534Savg}
1023325534Savg
1024325534Savguint64_t
1025325534Savgdsl_dir_get_usedrefreserv(dsl_dir_t *dd)
1026325534Savg{
1027325534Savg	return (dsl_dir_phys(dd)->dd_used_breakdown[DD_USED_REFRSRV]);
1028325534Savg}
1029325534Savg
1030325534Savguint64_t
1031325534Savgdsl_dir_get_usedchild(dsl_dir_t *dd)
1032325534Savg{
1033325534Savg	return (dsl_dir_phys(dd)->dd_used_breakdown[DD_USED_CHILD] +
1034325534Savg	    dsl_dir_phys(dd)->dd_used_breakdown[DD_USED_CHILD_RSRV]);
1035325534Savg}
1036325534Savg
1037168404Spjdvoid
1038325534Savgdsl_dir_get_origin(dsl_dir_t *dd, char *buf)
1039325534Savg{
1040325534Savg	dsl_dataset_t *ds;
1041325534Savg	VERIFY0(dsl_dataset_hold_obj(dd->dd_pool,
1042325534Savg	    dsl_dir_phys(dd)->dd_origin_obj, FTAG, &ds));
1043325534Savg
1044325534Savg	dsl_dataset_name(ds, buf);
1045325534Savg
1046325534Savg	dsl_dataset_rele(ds, FTAG);
1047325534Savg}
1048325534Savg
1049325534Savgint
1050325534Savgdsl_dir_get_filesystem_count(dsl_dir_t *dd, uint64_t *count)
1051325534Savg{
1052325534Savg	if (dsl_dir_is_zapified(dd)) {
1053325534Savg		objset_t *os = dd->dd_pool->dp_meta_objset;
1054325534Savg		return (zap_lookup(os, dd->dd_object, DD_FIELD_FILESYSTEM_COUNT,
1055325534Savg		    sizeof (*count), 1, count));
1056325534Savg	} else {
1057325534Savg		return (ENOENT);
1058325534Savg	}
1059325534Savg}
1060325534Savg
1061325534Savgint
1062325534Savgdsl_dir_get_snapshot_count(dsl_dir_t *dd, uint64_t *count)
1063325534Savg{
1064325534Savg	if (dsl_dir_is_zapified(dd)) {
1065325534Savg		objset_t *os = dd->dd_pool->dp_meta_objset;
1066325534Savg		return (zap_lookup(os, dd->dd_object, DD_FIELD_SNAPSHOT_COUNT,
1067325534Savg		    sizeof (*count), 1, count));
1068325534Savg	} else {
1069325534Savg		return (ENOENT);
1070325534Savg	}
1071325534Savg}
1072325534Savg
1073332525Smavint
1074332525Smavdsl_dir_get_remaptxg(dsl_dir_t *dd, uint64_t *count)
1075332525Smav{
1076332525Smav	if (dsl_dir_is_zapified(dd)) {
1077332525Smav		objset_t *os = dd->dd_pool->dp_meta_objset;
1078332525Smav		return (zap_lookup(os, dd->dd_object, DD_FIELD_LAST_REMAP_TXG,
1079332525Smav		    sizeof (*count), 1, count));
1080332525Smav	} else {
1081332525Smav		return (ENOENT);
1082332525Smav	}
1083332525Smav}
1084332525Smav
1085325534Savgvoid
1086168404Spjddsl_dir_stats(dsl_dir_t *dd, nvlist_t *nv)
1087168404Spjd{
1088168404Spjd	mutex_enter(&dd->dd_lock);
1089275782Sdelphij	dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_QUOTA,
1090325534Savg	    dsl_dir_get_quota(dd));
1091168404Spjd	dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_RESERVATION,
1092325534Savg	    dsl_dir_get_reservation(dd));
1093247585Smm	dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_LOGICALUSED,
1094325534Savg	    dsl_dir_get_logicalused(dd));
1095275782Sdelphij	if (dsl_dir_phys(dd)->dd_flags & DD_FLAG_USED_BREAKDOWN) {
1096185029Spjd		dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_USEDSNAP,
1097325534Savg		    dsl_dir_get_usedsnap(dd));
1098185029Spjd		dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_USEDDS,
1099325534Savg		    dsl_dir_get_usedds(dd));
1100185029Spjd		dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_USEDREFRESERV,
1101325534Savg		    dsl_dir_get_usedrefreserv(dd));
1102185029Spjd		dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_USEDCHILD,
1103325534Savg		    dsl_dir_get_usedchild(dd));
1104185029Spjd	}
1105168404Spjd	mutex_exit(&dd->dd_lock);
1106168404Spjd
1107325534Savg	uint64_t count;
1108325534Savg	if (dsl_dir_get_filesystem_count(dd, &count) == 0) {
1109325534Savg		dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_FILESYSTEM_COUNT,
1110325534Savg		    count);
1111264835Sdelphij	}
1112325534Savg	if (dsl_dir_get_snapshot_count(dd, &count) == 0) {
1113325534Savg		dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_SNAPSHOT_COUNT,
1114325534Savg		    count);
1115325534Savg	}
1116332525Smav	if (dsl_dir_get_remaptxg(dd, &count) == 0) {
1117332525Smav		dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_REMAPTXG,
1118332525Smav		    count);
1119332525Smav	}
1120264835Sdelphij
1121185029Spjd	if (dsl_dir_is_clone(dd)) {
1122307108Smav		char buf[ZFS_MAX_DATASET_NAME_LEN];
1123325534Savg		dsl_dir_get_origin(dd, buf);
1124168404Spjd		dsl_prop_nvlist_add_string(nv, ZFS_PROP_ORIGIN, buf);
1125168404Spjd	}
1126325534Savg
1127168404Spjd}
1128168404Spjd
1129168404Spjdvoid
1130168404Spjddsl_dir_dirty(dsl_dir_t *dd, dmu_tx_t *tx)
1131168404Spjd{
1132168404Spjd	dsl_pool_t *dp = dd->dd_pool;
1133168404Spjd
1134275782Sdelphij	ASSERT(dsl_dir_phys(dd));
1135168404Spjd
1136248571Smm	if (txg_list_add(&dp->dp_dirty_dirs, dd, tx->tx_txg)) {
1137168404Spjd		/* up the hold count until we can be written out */
1138168404Spjd		dmu_buf_add_ref(dd->dd_dbuf, dd);
1139168404Spjd	}
1140168404Spjd}
1141168404Spjd
1142168404Spjdstatic int64_t
1143168404Spjdparent_delta(dsl_dir_t *dd, uint64_t used, int64_t delta)
1144168404Spjd{
1145275782Sdelphij	uint64_t old_accounted = MAX(used, dsl_dir_phys(dd)->dd_reserved);
1146275782Sdelphij	uint64_t new_accounted =
1147275782Sdelphij	    MAX(used + delta, dsl_dir_phys(dd)->dd_reserved);
1148168404Spjd	return (new_accounted - old_accounted);
1149168404Spjd}
1150168404Spjd
1151168404Spjdvoid
1152168404Spjddsl_dir_sync(dsl_dir_t *dd, dmu_tx_t *tx)
1153168404Spjd{
1154168404Spjd	ASSERT(dmu_tx_is_syncing(tx));
1155168404Spjd
1156168404Spjd	mutex_enter(&dd->dd_lock);
1157240415Smm	ASSERT0(dd->dd_tempreserved[tx->tx_txg&TXG_MASK]);
1158168404Spjd	dprintf_dd(dd, "txg=%llu towrite=%lluK\n", tx->tx_txg,
1159168404Spjd	    dd->dd_space_towrite[tx->tx_txg&TXG_MASK] / 1024);
1160168404Spjd	dd->dd_space_towrite[tx->tx_txg&TXG_MASK] = 0;
1161168404Spjd	mutex_exit(&dd->dd_lock);
1162168404Spjd
1163168404Spjd	/* release the hold from dsl_dir_dirty */
1164168404Spjd	dmu_buf_rele(dd->dd_dbuf, dd);
1165168404Spjd}
1166168404Spjd
1167168404Spjdstatic uint64_t
1168185029Spjddsl_dir_space_towrite(dsl_dir_t *dd)
1169168404Spjd{
1170185029Spjd	uint64_t space = 0;
1171168404Spjd
1172168404Spjd	ASSERT(MUTEX_HELD(&dd->dd_lock));
1173168404Spjd
1174321547Smav	for (int i = 0; i < TXG_SIZE; i++) {
1175321547Smav		space += dd->dd_space_towrite[i & TXG_MASK];
1176321547Smav		ASSERT3U(dd->dd_space_towrite[i & TXG_MASK], >=, 0);
1177168404Spjd	}
1178168404Spjd	return (space);
1179168404Spjd}
1180168404Spjd
1181168404Spjd/*
1182168404Spjd * How much space would dd have available if ancestor had delta applied
1183168404Spjd * to it?  If ondiskonly is set, we're only interested in what's
1184168404Spjd * on-disk, not estimated pending changes.
1185168404Spjd */
1186168404Spjduint64_t
1187168404Spjddsl_dir_space_available(dsl_dir_t *dd,
1188168404Spjd    dsl_dir_t *ancestor, int64_t delta, int ondiskonly)
1189168404Spjd{
1190168404Spjd	uint64_t parentspace, myspace, quota, used;
1191168404Spjd
1192168404Spjd	/*
1193168404Spjd	 * If there are no restrictions otherwise, assume we have
1194168404Spjd	 * unlimited space available.
1195168404Spjd	 */
1196168404Spjd	quota = UINT64_MAX;
1197168404Spjd	parentspace = UINT64_MAX;
1198168404Spjd
1199168404Spjd	if (dd->dd_parent != NULL) {
1200168404Spjd		parentspace = dsl_dir_space_available(dd->dd_parent,
1201168404Spjd		    ancestor, delta, ondiskonly);
1202168404Spjd	}
1203168404Spjd
1204168404Spjd	mutex_enter(&dd->dd_lock);
1205275782Sdelphij	if (dsl_dir_phys(dd)->dd_quota != 0)
1206275782Sdelphij		quota = dsl_dir_phys(dd)->dd_quota;
1207275782Sdelphij	used = dsl_dir_phys(dd)->dd_used_bytes;
1208185029Spjd	if (!ondiskonly)
1209185029Spjd		used += dsl_dir_space_towrite(dd);
1210168404Spjd
1211168404Spjd	if (dd->dd_parent == NULL) {
1212332547Smav		uint64_t poolsize = dsl_pool_adjustedsize(dd->dd_pool,
1213332547Smav		    ZFS_SPACE_CHECK_NORMAL);
1214168404Spjd		quota = MIN(quota, poolsize);
1215168404Spjd	}
1216168404Spjd
1217275782Sdelphij	if (dsl_dir_phys(dd)->dd_reserved > used && parentspace != UINT64_MAX) {
1218168404Spjd		/*
1219168404Spjd		 * We have some space reserved, in addition to what our
1220168404Spjd		 * parent gave us.
1221168404Spjd		 */
1222275782Sdelphij		parentspace += dsl_dir_phys(dd)->dd_reserved - used;
1223168404Spjd	}
1224168404Spjd
1225185029Spjd	if (dd == ancestor) {
1226185029Spjd		ASSERT(delta <= 0);
1227185029Spjd		ASSERT(used >= -delta);
1228185029Spjd		used += delta;
1229185029Spjd		if (parentspace != UINT64_MAX)
1230185029Spjd			parentspace -= delta;
1231185029Spjd	}
1232185029Spjd
1233168404Spjd	if (used > quota) {
1234168404Spjd		/* over quota */
1235168404Spjd		myspace = 0;
1236168404Spjd	} else {
1237168404Spjd		/*
1238168404Spjd		 * the lesser of the space provided by our parent and
1239168404Spjd		 * the space left in our quota
1240168404Spjd		 */
1241168404Spjd		myspace = MIN(parentspace, quota - used);
1242168404Spjd	}
1243168404Spjd
1244168404Spjd	mutex_exit(&dd->dd_lock);
1245168404Spjd
1246168404Spjd	return (myspace);
1247168404Spjd}
1248168404Spjd
1249168404Spjdstruct tempreserve {
1250168404Spjd	list_node_t tr_node;
1251168404Spjd	dsl_dir_t *tr_ds;
1252168404Spjd	uint64_t tr_size;
1253168404Spjd};
1254168404Spjd
1255168404Spjdstatic int
1256185029Spjddsl_dir_tempreserve_impl(dsl_dir_t *dd, uint64_t asize, boolean_t netfree,
1257321547Smav    boolean_t ignorequota, list_t *tr_list,
1258185029Spjd    dmu_tx_t *tx, boolean_t first)
1259168404Spjd{
1260168404Spjd	uint64_t txg = tx->tx_txg;
1261321547Smav	uint64_t quota;
1262185029Spjd	struct tempreserve *tr;
1263219089Spjd	int retval = EDQUOT;
1264185029Spjd	uint64_t ref_rsrv = 0;
1265168404Spjd
1266168404Spjd	ASSERT3U(txg, !=, 0);
1267185029Spjd	ASSERT3S(asize, >, 0);
1268168404Spjd
1269168404Spjd	mutex_enter(&dd->dd_lock);
1270185029Spjd
1271168404Spjd	/*
1272168404Spjd	 * Check against the dsl_dir's quota.  We don't add in the delta
1273168404Spjd	 * when checking for over-quota because they get one free hit.
1274168404Spjd	 */
1275321547Smav	uint64_t est_inflight = dsl_dir_space_towrite(dd);
1276321547Smav	for (int i = 0; i < TXG_SIZE; i++)
1277185029Spjd		est_inflight += dd->dd_tempreserved[i];
1278321547Smav	uint64_t used_on_disk = dsl_dir_phys(dd)->dd_used_bytes;
1279168404Spjd
1280185029Spjd	/*
1281185029Spjd	 * On the first iteration, fetch the dataset's used-on-disk and
1282185029Spjd	 * refreservation values. Also, if checkrefquota is set, test if
1283185029Spjd	 * allocating this space would exceed the dataset's refquota.
1284185029Spjd	 */
1285185029Spjd	if (first && tx->tx_objset) {
1286185029Spjd		int error;
1287219089Spjd		dsl_dataset_t *ds = tx->tx_objset->os_dsl_dataset;
1288168404Spjd
1289321547Smav		error = dsl_dataset_check_quota(ds, !netfree,
1290185029Spjd		    asize, est_inflight, &used_on_disk, &ref_rsrv);
1291321547Smav		if (error != 0) {
1292185029Spjd			mutex_exit(&dd->dd_lock);
1293185029Spjd			return (error);
1294185029Spjd		}
1295185029Spjd	}
1296185029Spjd
1297185029Spjd	/*
1298185029Spjd	 * If this transaction will result in a net free of space,
1299185029Spjd	 * we want to let it through.
1300185029Spjd	 */
1301275782Sdelphij	if (ignorequota || netfree || dsl_dir_phys(dd)->dd_quota == 0)
1302185029Spjd		quota = UINT64_MAX;
1303185029Spjd	else
1304275782Sdelphij		quota = dsl_dir_phys(dd)->dd_quota;
1305168404Spjd
1306168404Spjd	/*
1307219089Spjd	 * Adjust the quota against the actual pool size at the root
1308219089Spjd	 * minus any outstanding deferred frees.
1309185029Spjd	 * To ensure that it's possible to remove files from a full
1310185029Spjd	 * pool without inducing transient overcommits, we throttle
1311168404Spjd	 * netfree transactions against a quota that is slightly larger,
1312168404Spjd	 * but still within the pool's allocation slop.  In cases where
1313168404Spjd	 * we're very close to full, this will allow a steady trickle of
1314168404Spjd	 * removes to get through.
1315168404Spjd	 */
1316321547Smav	uint64_t deferred = 0;
1317168404Spjd	if (dd->dd_parent == NULL) {
1318332547Smav		uint64_t avail = dsl_pool_unreserved_space(dd->dd_pool,
1319332547Smav		    (netfree) ?
1320332547Smav		    ZFS_SPACE_CHECK_RESERVED : ZFS_SPACE_CHECK_NORMAL);
1321332547Smav
1322332547Smav		if (avail < quota) {
1323332547Smav			quota = avail;
1324219089Spjd			retval = ENOSPC;
1325168404Spjd		}
1326168404Spjd	}
1327168404Spjd
1328168404Spjd	/*
1329168404Spjd	 * If they are requesting more space, and our current estimate
1330185029Spjd	 * is over quota, they get to try again unless the actual
1331168404Spjd	 * on-disk is over quota and there are no pending changes (which
1332168404Spjd	 * may free up space for us).
1333168404Spjd	 */
1334219089Spjd	if (used_on_disk + est_inflight >= quota) {
1335219089Spjd		if (est_inflight > 0 || used_on_disk < quota ||
1336219089Spjd		    (retval == ENOSPC && used_on_disk < quota + deferred))
1337219089Spjd			retval = ERESTART;
1338185029Spjd		dprintf_dd(dd, "failing: used=%lluK inflight = %lluK "
1339168404Spjd		    "quota=%lluK tr=%lluK err=%d\n",
1340185029Spjd		    used_on_disk>>10, est_inflight>>10,
1341219089Spjd		    quota>>10, asize>>10, retval);
1342168404Spjd		mutex_exit(&dd->dd_lock);
1343249195Smm		return (SET_ERROR(retval));
1344168404Spjd	}
1345168404Spjd
1346168404Spjd	/* We need to up our estimated delta before dropping dd_lock */
1347321547Smav	dd->dd_tempreserved[txg & TXG_MASK] += asize;
1348168404Spjd
1349321547Smav	uint64_t parent_rsrv = parent_delta(dd, used_on_disk + est_inflight,
1350185029Spjd	    asize - ref_rsrv);
1351168404Spjd	mutex_exit(&dd->dd_lock);
1352168404Spjd
1353185029Spjd	tr = kmem_zalloc(sizeof (struct tempreserve), KM_SLEEP);
1354168404Spjd	tr->tr_ds = dd;
1355168404Spjd	tr->tr_size = asize;
1356168404Spjd	list_insert_tail(tr_list, tr);
1357168404Spjd
1358168404Spjd	/* see if it's OK with our parent */
1359321547Smav	if (dd->dd_parent != NULL && parent_rsrv != 0) {
1360275782Sdelphij		boolean_t ismos = (dsl_dir_phys(dd)->dd_head_dataset_obj == 0);
1361185029Spjd
1362168404Spjd		return (dsl_dir_tempreserve_impl(dd->dd_parent,
1363321547Smav		    parent_rsrv, netfree, ismos, tr_list, tx, B_FALSE));
1364168404Spjd	} else {
1365168404Spjd		return (0);
1366168404Spjd	}
1367168404Spjd}
1368168404Spjd
1369168404Spjd/*
1370168404Spjd * Reserve space in this dsl_dir, to be used in this tx's txg.
1371185029Spjd * After the space has been dirtied (and dsl_dir_willuse_space()
1372185029Spjd * has been called), the reservation should be canceled, using
1373185029Spjd * dsl_dir_tempreserve_clear().
1374168404Spjd */
1375168404Spjdint
1376185029Spjddsl_dir_tempreserve_space(dsl_dir_t *dd, uint64_t lsize, uint64_t asize,
1377321547Smav    boolean_t netfree, void **tr_cookiep, dmu_tx_t *tx)
1378168404Spjd{
1379185029Spjd	int err;
1380168404Spjd	list_t *tr_list;
1381168404Spjd
1382185029Spjd	if (asize == 0) {
1383185029Spjd		*tr_cookiep = NULL;
1384185029Spjd		return (0);
1385185029Spjd	}
1386185029Spjd
1387168404Spjd	tr_list = kmem_alloc(sizeof (list_t), KM_SLEEP);
1388168404Spjd	list_create(tr_list, sizeof (struct tempreserve),
1389168404Spjd	    offsetof(struct tempreserve, tr_node));
1390185029Spjd	ASSERT3S(asize, >, 0);
1391168404Spjd
1392339141Smav	err = arc_tempreserve_space(dd->dd_pool->dp_spa, lsize, tx->tx_txg);
1393168404Spjd	if (err == 0) {
1394168404Spjd		struct tempreserve *tr;
1395168404Spjd
1396185029Spjd		tr = kmem_zalloc(sizeof (struct tempreserve), KM_SLEEP);
1397185029Spjd		tr->tr_size = lsize;
1398185029Spjd		list_insert_tail(tr_list, tr);
1399185029Spjd	} else {
1400185029Spjd		if (err == EAGAIN) {
1401258632Savg			/*
1402258632Savg			 * If arc_memory_throttle() detected that pageout
1403258632Savg			 * is running and we are low on memory, we delay new
1404258632Savg			 * non-pageout transactions to give pageout an
1405258632Savg			 * advantage.
1406258632Savg			 *
1407258632Savg			 * It is unfortunate to be delaying while the caller's
1408258632Savg			 * locks are held.
1409258632Savg			 */
1410255437Sdelphij			txg_delay(dd->dd_pool, tx->tx_txg,
1411255437Sdelphij			    MSEC2NSEC(10), MSEC2NSEC(10));
1412249195Smm			err = SET_ERROR(ERESTART);
1413168404Spjd		}
1414168404Spjd	}
1415168404Spjd
1416185029Spjd	if (err == 0) {
1417321547Smav		err = dsl_dir_tempreserve_impl(dd, asize, netfree,
1418321547Smav		    B_FALSE, tr_list, tx, B_TRUE);
1419185029Spjd	}
1420185029Spjd
1421248571Smm	if (err != 0)
1422168404Spjd		dsl_dir_tempreserve_clear(tr_list, tx);
1423168404Spjd	else
1424168404Spjd		*tr_cookiep = tr_list;
1425185029Spjd
1426168404Spjd	return (err);
1427168404Spjd}
1428168404Spjd
1429168404Spjd/*
1430168404Spjd * Clear a temporary reservation that we previously made with
1431168404Spjd * dsl_dir_tempreserve_space().
1432168404Spjd */
1433168404Spjdvoid
1434168404Spjddsl_dir_tempreserve_clear(void *tr_cookie, dmu_tx_t *tx)
1435168404Spjd{
1436168404Spjd	int txgidx = tx->tx_txg & TXG_MASK;
1437168404Spjd	list_t *tr_list = tr_cookie;
1438168404Spjd	struct tempreserve *tr;
1439168404Spjd
1440168404Spjd	ASSERT3U(tx->tx_txg, !=, 0);
1441168404Spjd
1442185029Spjd	if (tr_cookie == NULL)
1443185029Spjd		return;
1444185029Spjd
1445258632Savg	while ((tr = list_head(tr_list)) != NULL) {
1446258632Savg		if (tr->tr_ds) {
1447168404Spjd			mutex_enter(&tr->tr_ds->dd_lock);
1448168404Spjd			ASSERT3U(tr->tr_ds->dd_tempreserved[txgidx], >=,
1449168404Spjd			    tr->tr_size);
1450168404Spjd			tr->tr_ds->dd_tempreserved[txgidx] -= tr->tr_size;
1451168404Spjd			mutex_exit(&tr->tr_ds->dd_lock);
1452185029Spjd		} else {
1453185029Spjd			arc_tempreserve_clear(tr->tr_size);
1454168404Spjd		}
1455168404Spjd		list_remove(tr_list, tr);
1456168404Spjd		kmem_free(tr, sizeof (struct tempreserve));
1457168404Spjd	}
1458168404Spjd
1459168404Spjd	kmem_free(tr_list, sizeof (list_t));
1460168404Spjd}
1461168404Spjd
1462258632Savg/*
1463258632Savg * This should be called from open context when we think we're going to write
1464258632Savg * or free space, for example when dirtying data. Be conservative; it's okay
1465258632Savg * to write less space or free more, but we don't want to write more or free
1466258632Savg * less than the amount specified.
1467258632Savg */
1468258632Savgvoid
1469258632Savgdsl_dir_willuse_space(dsl_dir_t *dd, int64_t space, dmu_tx_t *tx)
1470168404Spjd{
1471168404Spjd	int64_t parent_space;
1472168404Spjd	uint64_t est_used;
1473168404Spjd
1474168404Spjd	mutex_enter(&dd->dd_lock);
1475168404Spjd	if (space > 0)
1476168404Spjd		dd->dd_space_towrite[tx->tx_txg & TXG_MASK] += space;
1477168404Spjd
1478275782Sdelphij	est_used = dsl_dir_space_towrite(dd) + dsl_dir_phys(dd)->dd_used_bytes;
1479168404Spjd	parent_space = parent_delta(dd, est_used, space);
1480168404Spjd	mutex_exit(&dd->dd_lock);
1481168404Spjd
1482168404Spjd	/* Make sure that we clean up dd_space_to* */
1483168404Spjd	dsl_dir_dirty(dd, tx);
1484168404Spjd
1485168404Spjd	/* XXX this is potentially expensive and unnecessary... */
1486168404Spjd	if (parent_space && dd->dd_parent)
1487258632Savg		dsl_dir_willuse_space(dd->dd_parent, parent_space, tx);
1488168404Spjd}
1489168404Spjd
1490168404Spjd/* call from syncing context when we actually write/free space for this dd */
1491168404Spjdvoid
1492185029Spjddsl_dir_diduse_space(dsl_dir_t *dd, dd_used_t type,
1493168404Spjd    int64_t used, int64_t compressed, int64_t uncompressed, dmu_tx_t *tx)
1494168404Spjd{
1495168404Spjd	int64_t accounted_delta;
1496254757Sdelphij
1497254757Sdelphij	/*
1498254757Sdelphij	 * dsl_dataset_set_refreservation_sync_impl() calls this with
1499254757Sdelphij	 * dd_lock held, so that it can atomically update
1500254757Sdelphij	 * ds->ds_reserved and the dsl_dir accounting, so that
1501254757Sdelphij	 * dsl_dataset_check_quota() can see dataset and dir accounting
1502254757Sdelphij	 * consistently.
1503254757Sdelphij	 */
1504185029Spjd	boolean_t needlock = !MUTEX_HELD(&dd->dd_lock);
1505168404Spjd
1506168404Spjd	ASSERT(dmu_tx_is_syncing(tx));
1507185029Spjd	ASSERT(type < DD_USED_NUM);
1508168404Spjd
1509254757Sdelphij	dmu_buf_will_dirty(dd->dd_dbuf, tx);
1510254757Sdelphij
1511185029Spjd	if (needlock)
1512185029Spjd		mutex_enter(&dd->dd_lock);
1513275782Sdelphij	accounted_delta =
1514275782Sdelphij	    parent_delta(dd, dsl_dir_phys(dd)->dd_used_bytes, used);
1515275782Sdelphij	ASSERT(used >= 0 || dsl_dir_phys(dd)->dd_used_bytes >= -used);
1516168404Spjd	ASSERT(compressed >= 0 ||
1517275782Sdelphij	    dsl_dir_phys(dd)->dd_compressed_bytes >= -compressed);
1518168404Spjd	ASSERT(uncompressed >= 0 ||
1519275782Sdelphij	    dsl_dir_phys(dd)->dd_uncompressed_bytes >= -uncompressed);
1520275782Sdelphij	dsl_dir_phys(dd)->dd_used_bytes += used;
1521275782Sdelphij	dsl_dir_phys(dd)->dd_uncompressed_bytes += uncompressed;
1522275782Sdelphij	dsl_dir_phys(dd)->dd_compressed_bytes += compressed;
1523168404Spjd
1524275782Sdelphij	if (dsl_dir_phys(dd)->dd_flags & DD_FLAG_USED_BREAKDOWN) {
1525185029Spjd		ASSERT(used > 0 ||
1526275782Sdelphij		    dsl_dir_phys(dd)->dd_used_breakdown[type] >= -used);
1527275782Sdelphij		dsl_dir_phys(dd)->dd_used_breakdown[type] += used;
1528185029Spjd#ifdef DEBUG
1529185029Spjd		dd_used_t t;
1530185029Spjd		uint64_t u = 0;
1531185029Spjd		for (t = 0; t < DD_USED_NUM; t++)
1532275782Sdelphij			u += dsl_dir_phys(dd)->dd_used_breakdown[t];
1533275782Sdelphij		ASSERT3U(u, ==, dsl_dir_phys(dd)->dd_used_bytes);
1534185029Spjd#endif
1535185029Spjd	}
1536185029Spjd	if (needlock)
1537185029Spjd		mutex_exit(&dd->dd_lock);
1538185029Spjd
1539168404Spjd	if (dd->dd_parent != NULL) {
1540185029Spjd		dsl_dir_diduse_space(dd->dd_parent, DD_USED_CHILD,
1541168404Spjd		    accounted_delta, compressed, uncompressed, tx);
1542185029Spjd		dsl_dir_transfer_space(dd->dd_parent,
1543185029Spjd		    used - accounted_delta,
1544277419Smav		    DD_USED_CHILD_RSRV, DD_USED_CHILD, NULL);
1545168404Spjd	}
1546168404Spjd}
1547168404Spjd
1548185029Spjdvoid
1549185029Spjddsl_dir_transfer_space(dsl_dir_t *dd, int64_t delta,
1550185029Spjd    dd_used_t oldtype, dd_used_t newtype, dmu_tx_t *tx)
1551185029Spjd{
1552277419Smav	ASSERT(tx == NULL || dmu_tx_is_syncing(tx));
1553185029Spjd	ASSERT(oldtype < DD_USED_NUM);
1554185029Spjd	ASSERT(newtype < DD_USED_NUM);
1555185029Spjd
1556275782Sdelphij	if (delta == 0 ||
1557275782Sdelphij	    !(dsl_dir_phys(dd)->dd_flags & DD_FLAG_USED_BREAKDOWN))
1558185029Spjd		return;
1559185029Spjd
1560277419Smav	if (tx != NULL)
1561277419Smav		dmu_buf_will_dirty(dd->dd_dbuf, tx);
1562254757Sdelphij	mutex_enter(&dd->dd_lock);
1563185029Spjd	ASSERT(delta > 0 ?
1564275782Sdelphij	    dsl_dir_phys(dd)->dd_used_breakdown[oldtype] >= delta :
1565275782Sdelphij	    dsl_dir_phys(dd)->dd_used_breakdown[newtype] >= -delta);
1566275782Sdelphij	ASSERT(dsl_dir_phys(dd)->dd_used_bytes >= ABS(delta));
1567275782Sdelphij	dsl_dir_phys(dd)->dd_used_breakdown[oldtype] -= delta;
1568275782Sdelphij	dsl_dir_phys(dd)->dd_used_breakdown[newtype] += delta;
1569254757Sdelphij	mutex_exit(&dd->dd_lock);
1570185029Spjd}
1571185029Spjd
1572248571Smmtypedef struct dsl_dir_set_qr_arg {
1573248571Smm	const char *ddsqra_name;
1574248571Smm	zprop_source_t ddsqra_source;
1575248571Smm	uint64_t ddsqra_value;
1576248571Smm} dsl_dir_set_qr_arg_t;
1577248571Smm
1578168404Spjdstatic int
1579248571Smmdsl_dir_set_quota_check(void *arg, dmu_tx_t *tx)
1580168404Spjd{
1581248571Smm	dsl_dir_set_qr_arg_t *ddsqra = arg;
1582248571Smm	dsl_pool_t *dp = dmu_tx_pool(tx);
1583248571Smm	dsl_dataset_t *ds;
1584248571Smm	int error;
1585248571Smm	uint64_t towrite, newval;
1586168404Spjd
1587248571Smm	error = dsl_dataset_hold(dp, ddsqra->ddsqra_name, FTAG, &ds);
1588248571Smm	if (error != 0)
1589248571Smm		return (error);
1590219089Spjd
1591248571Smm	error = dsl_prop_predict(ds->ds_dir, "quota",
1592248571Smm	    ddsqra->ddsqra_source, ddsqra->ddsqra_value, &newval);
1593248571Smm	if (error != 0) {
1594248571Smm		dsl_dataset_rele(ds, FTAG);
1595248571Smm		return (error);
1596248571Smm	}
1597248571Smm
1598248571Smm	if (newval == 0) {
1599248571Smm		dsl_dataset_rele(ds, FTAG);
1600168404Spjd		return (0);
1601248571Smm	}
1602168404Spjd
1603248571Smm	mutex_enter(&ds->ds_dir->dd_lock);
1604168404Spjd	/*
1605168404Spjd	 * If we are doing the preliminary check in open context, and
1606168404Spjd	 * there are pending changes, then don't fail it, since the
1607185029Spjd	 * pending changes could under-estimate the amount of space to be
1608168404Spjd	 * freed up.
1609168404Spjd	 */
1610248571Smm	towrite = dsl_dir_space_towrite(ds->ds_dir);
1611168404Spjd	if ((dmu_tx_is_syncing(tx) || towrite == 0) &&
1612275782Sdelphij	    (newval < dsl_dir_phys(ds->ds_dir)->dd_reserved ||
1613275782Sdelphij	    newval < dsl_dir_phys(ds->ds_dir)->dd_used_bytes + towrite)) {
1614249195Smm		error = SET_ERROR(ENOSPC);
1615168404Spjd	}
1616248571Smm	mutex_exit(&ds->ds_dir->dd_lock);
1617248571Smm	dsl_dataset_rele(ds, FTAG);
1618248571Smm	return (error);
1619168404Spjd}
1620168404Spjd
1621168404Spjdstatic void
1622248571Smmdsl_dir_set_quota_sync(void *arg, dmu_tx_t *tx)
1623168404Spjd{
1624248571Smm	dsl_dir_set_qr_arg_t *ddsqra = arg;
1625248571Smm	dsl_pool_t *dp = dmu_tx_pool(tx);
1626248571Smm	dsl_dataset_t *ds;
1627248571Smm	uint64_t newval;
1628168404Spjd
1629248571Smm	VERIFY0(dsl_dataset_hold(dp, ddsqra->ddsqra_name, FTAG, &ds));
1630219089Spjd
1631249787Smm	if (spa_version(dp->dp_spa) >= SPA_VERSION_RECVD_PROPS) {
1632249787Smm		dsl_prop_set_sync_impl(ds, zfs_prop_to_name(ZFS_PROP_QUOTA),
1633249787Smm		    ddsqra->ddsqra_source, sizeof (ddsqra->ddsqra_value), 1,
1634249787Smm		    &ddsqra->ddsqra_value, tx);
1635168404Spjd
1636249787Smm		VERIFY0(dsl_prop_get_int_ds(ds,
1637249787Smm		    zfs_prop_to_name(ZFS_PROP_QUOTA), &newval));
1638249787Smm	} else {
1639249787Smm		newval = ddsqra->ddsqra_value;
1640249787Smm		spa_history_log_internal_ds(ds, "set", tx, "%s=%lld",
1641249787Smm		    zfs_prop_to_name(ZFS_PROP_QUOTA), (longlong_t)newval);
1642249787Smm	}
1643248571Smm
1644248571Smm	dmu_buf_will_dirty(ds->ds_dir->dd_dbuf, tx);
1645248571Smm	mutex_enter(&ds->ds_dir->dd_lock);
1646275782Sdelphij	dsl_dir_phys(ds->ds_dir)->dd_quota = newval;
1647248571Smm	mutex_exit(&ds->ds_dir->dd_lock);
1648248571Smm	dsl_dataset_rele(ds, FTAG);
1649168404Spjd}
1650168404Spjd
1651168404Spjdint
1652219089Spjddsl_dir_set_quota(const char *ddname, zprop_source_t source, uint64_t quota)
1653168404Spjd{
1654248571Smm	dsl_dir_set_qr_arg_t ddsqra;
1655168404Spjd
1656248571Smm	ddsqra.ddsqra_name = ddname;
1657248571Smm	ddsqra.ddsqra_source = source;
1658248571Smm	ddsqra.ddsqra_value = quota;
1659219089Spjd
1660248571Smm	return (dsl_sync_task(ddname, dsl_dir_set_quota_check,
1661332547Smav	    dsl_dir_set_quota_sync, &ddsqra, 0,
1662332547Smav	    ZFS_SPACE_CHECK_EXTRA_RESERVED));
1663168404Spjd}
1664168404Spjd
1665185029Spjdint
1666248571Smmdsl_dir_set_reservation_check(void *arg, dmu_tx_t *tx)
1667168404Spjd{
1668248571Smm	dsl_dir_set_qr_arg_t *ddsqra = arg;
1669248571Smm	dsl_pool_t *dp = dmu_tx_pool(tx);
1670248571Smm	dsl_dataset_t *ds;
1671248571Smm	dsl_dir_t *dd;
1672248571Smm	uint64_t newval, used, avail;
1673248571Smm	int error;
1674168404Spjd
1675248571Smm	error = dsl_dataset_hold(dp, ddsqra->ddsqra_name, FTAG, &ds);
1676248571Smm	if (error != 0)
1677248571Smm		return (error);
1678248571Smm	dd = ds->ds_dir;
1679219089Spjd
1680168404Spjd	/*
1681168404Spjd	 * If we are doing the preliminary check in open context, the
1682168404Spjd	 * space estimates may be inaccurate.
1683168404Spjd	 */
1684248571Smm	if (!dmu_tx_is_syncing(tx)) {
1685248571Smm		dsl_dataset_rele(ds, FTAG);
1686168404Spjd		return (0);
1687248571Smm	}
1688168404Spjd
1689248571Smm	error = dsl_prop_predict(ds->ds_dir,
1690248571Smm	    zfs_prop_to_name(ZFS_PROP_RESERVATION),
1691248571Smm	    ddsqra->ddsqra_source, ddsqra->ddsqra_value, &newval);
1692248571Smm	if (error != 0) {
1693248571Smm		dsl_dataset_rele(ds, FTAG);
1694248571Smm		return (error);
1695248571Smm	}
1696248571Smm
1697168404Spjd	mutex_enter(&dd->dd_lock);
1698275782Sdelphij	used = dsl_dir_phys(dd)->dd_used_bytes;
1699168404Spjd	mutex_exit(&dd->dd_lock);
1700168404Spjd
1701168404Spjd	if (dd->dd_parent) {
1702168404Spjd		avail = dsl_dir_space_available(dd->dd_parent,
1703168404Spjd		    NULL, 0, FALSE);
1704168404Spjd	} else {
1705332547Smav		avail = dsl_pool_adjustedsize(dd->dd_pool,
1706332547Smav		    ZFS_SPACE_CHECK_NORMAL) - used;
1707168404Spjd	}
1708168404Spjd
1709275782Sdelphij	if (MAX(used, newval) > MAX(used, dsl_dir_phys(dd)->dd_reserved)) {
1710248571Smm		uint64_t delta = MAX(used, newval) -
1711275782Sdelphij		    MAX(used, dsl_dir_phys(dd)->dd_reserved);
1712209962Smm
1713248571Smm		if (delta > avail ||
1714275782Sdelphij		    (dsl_dir_phys(dd)->dd_quota > 0 &&
1715275782Sdelphij		    newval > dsl_dir_phys(dd)->dd_quota))
1716249195Smm			error = SET_ERROR(ENOSPC);
1717209962Smm	}
1718209962Smm
1719248571Smm	dsl_dataset_rele(ds, FTAG);
1720248571Smm	return (error);
1721168404Spjd}
1722168404Spjd
1723248571Smmvoid
1724248571Smmdsl_dir_set_reservation_sync_impl(dsl_dir_t *dd, uint64_t value, dmu_tx_t *tx)
1725168404Spjd{
1726168404Spjd	uint64_t used;
1727168404Spjd	int64_t delta;
1728168404Spjd
1729185029Spjd	dmu_buf_will_dirty(dd->dd_dbuf, tx);
1730185029Spjd
1731168404Spjd	mutex_enter(&dd->dd_lock);
1732275782Sdelphij	used = dsl_dir_phys(dd)->dd_used_bytes;
1733275782Sdelphij	delta = MAX(used, value) - MAX(used, dsl_dir_phys(dd)->dd_reserved);
1734275782Sdelphij	dsl_dir_phys(dd)->dd_reserved = value;
1735168404Spjd
1736168404Spjd	if (dd->dd_parent != NULL) {
1737168404Spjd		/* Roll up this additional usage into our ancestors */
1738185029Spjd		dsl_dir_diduse_space(dd->dd_parent, DD_USED_CHILD_RSRV,
1739185029Spjd		    delta, 0, 0, tx);
1740168404Spjd	}
1741185029Spjd	mutex_exit(&dd->dd_lock);
1742168404Spjd}
1743168404Spjd
1744248571Smmstatic void
1745248571Smmdsl_dir_set_reservation_sync(void *arg, dmu_tx_t *tx)
1746168404Spjd{
1747248571Smm	dsl_dir_set_qr_arg_t *ddsqra = arg;
1748248571Smm	dsl_pool_t *dp = dmu_tx_pool(tx);
1749219089Spjd	dsl_dataset_t *ds;
1750248571Smm	uint64_t newval;
1751168404Spjd
1752248571Smm	VERIFY0(dsl_dataset_hold(dp, ddsqra->ddsqra_name, FTAG, &ds));
1753219089Spjd
1754249787Smm	if (spa_version(dp->dp_spa) >= SPA_VERSION_RECVD_PROPS) {
1755249787Smm		dsl_prop_set_sync_impl(ds,
1756249787Smm		    zfs_prop_to_name(ZFS_PROP_RESERVATION),
1757249787Smm		    ddsqra->ddsqra_source, sizeof (ddsqra->ddsqra_value), 1,
1758249787Smm		    &ddsqra->ddsqra_value, tx);
1759219089Spjd
1760249787Smm		VERIFY0(dsl_prop_get_int_ds(ds,
1761249787Smm		    zfs_prop_to_name(ZFS_PROP_RESERVATION), &newval));
1762249787Smm	} else {
1763249787Smm		newval = ddsqra->ddsqra_value;
1764249787Smm		spa_history_log_internal_ds(ds, "set", tx, "%s=%lld",
1765249787Smm		    zfs_prop_to_name(ZFS_PROP_RESERVATION),
1766249787Smm		    (longlong_t)newval);
1767249787Smm	}
1768219089Spjd
1769248571Smm	dsl_dir_set_reservation_sync_impl(ds->ds_dir, newval, tx);
1770248571Smm	dsl_dataset_rele(ds, FTAG);
1771248571Smm}
1772219089Spjd
1773248571Smmint
1774248571Smmdsl_dir_set_reservation(const char *ddname, zprop_source_t source,
1775248571Smm    uint64_t reservation)
1776248571Smm{
1777248571Smm	dsl_dir_set_qr_arg_t ddsqra;
1778219089Spjd
1779248571Smm	ddsqra.ddsqra_name = ddname;
1780248571Smm	ddsqra.ddsqra_source = source;
1781248571Smm	ddsqra.ddsqra_value = reservation;
1782248571Smm
1783248571Smm	return (dsl_sync_task(ddname, dsl_dir_set_reservation_check,
1784332547Smav	    dsl_dir_set_reservation_sync, &ddsqra, 0,
1785332547Smav	    ZFS_SPACE_CHECK_EXTRA_RESERVED));
1786168404Spjd}
1787168404Spjd
1788168404Spjdstatic dsl_dir_t *
1789168404Spjdclosest_common_ancestor(dsl_dir_t *ds1, dsl_dir_t *ds2)
1790168404Spjd{
1791168404Spjd	for (; ds1; ds1 = ds1->dd_parent) {
1792168404Spjd		dsl_dir_t *dd;
1793168404Spjd		for (dd = ds2; dd; dd = dd->dd_parent) {
1794168404Spjd			if (ds1 == dd)
1795168404Spjd				return (dd);
1796168404Spjd		}
1797168404Spjd	}
1798168404Spjd	return (NULL);
1799168404Spjd}
1800168404Spjd
1801168404Spjd/*
1802168404Spjd * If delta is applied to dd, how much of that delta would be applied to
1803168404Spjd * ancestor?  Syncing context only.
1804168404Spjd */
1805168404Spjdstatic int64_t
1806168404Spjdwould_change(dsl_dir_t *dd, int64_t delta, dsl_dir_t *ancestor)
1807168404Spjd{
1808168404Spjd	if (dd == ancestor)
1809168404Spjd		return (delta);
1810168404Spjd
1811168404Spjd	mutex_enter(&dd->dd_lock);
1812275782Sdelphij	delta = parent_delta(dd, dsl_dir_phys(dd)->dd_used_bytes, delta);
1813168404Spjd	mutex_exit(&dd->dd_lock);
1814168404Spjd	return (would_change(dd->dd_parent, delta, ancestor));
1815168404Spjd}
1816168404Spjd
1817248571Smmtypedef struct dsl_dir_rename_arg {
1818248571Smm	const char *ddra_oldname;
1819248571Smm	const char *ddra_newname;
1820264835Sdelphij	cred_t *ddra_cred;
1821248571Smm} dsl_dir_rename_arg_t;
1822168404Spjd
1823339129Smavtypedef struct dsl_valid_rename_arg {
1824339129Smav	int char_delta;
1825339129Smav	int nest_delta;
1826339129Smav} dsl_valid_rename_arg_t;
1827339129Smav
1828248571Smm/* ARGSUSED */
1829168404Spjdstatic int
1830248571Smmdsl_valid_rename(dsl_pool_t *dp, dsl_dataset_t *ds, void *arg)
1831168404Spjd{
1832339129Smav	dsl_valid_rename_arg_t *dvra = arg;
1833307108Smav	char namebuf[ZFS_MAX_DATASET_NAME_LEN];
1834168404Spjd
1835248571Smm	dsl_dataset_name(ds, namebuf);
1836248571Smm
1837339129Smav	ASSERT3U(strnlen(namebuf, ZFS_MAX_DATASET_NAME_LEN),
1838339129Smav	    <, ZFS_MAX_DATASET_NAME_LEN);
1839339129Smav	int namelen = strlen(namebuf) + dvra->char_delta;
1840339129Smav	int depth = get_dataset_depth(namebuf) + dvra->nest_delta;
1841339129Smav
1842339129Smav	if (namelen >= ZFS_MAX_DATASET_NAME_LEN)
1843249195Smm		return (SET_ERROR(ENAMETOOLONG));
1844339129Smav	if (dvra->nest_delta > 0 && depth >= zfs_max_dataset_nesting)
1845339129Smav		return (SET_ERROR(ENAMETOOLONG));
1846248571Smm	return (0);
1847248571Smm}
1848248571Smm
1849248571Smmstatic int
1850248571Smmdsl_dir_rename_check(void *arg, dmu_tx_t *tx)
1851248571Smm{
1852248571Smm	dsl_dir_rename_arg_t *ddra = arg;
1853248571Smm	dsl_pool_t *dp = dmu_tx_pool(tx);
1854248571Smm	dsl_dir_t *dd, *newparent;
1855339129Smav	dsl_valid_rename_arg_t dvra;
1856359722Sfreqlabs	dsl_dataset_t *parentds;
1857359722Sfreqlabs	objset_t *parentos;
1858248571Smm	const char *mynewname;
1859248571Smm	int error;
1860248571Smm
1861248571Smm	/* target dir should exist */
1862248571Smm	error = dsl_dir_hold(dp, ddra->ddra_oldname, FTAG, &dd, NULL);
1863248571Smm	if (error != 0)
1864248571Smm		return (error);
1865248571Smm
1866248571Smm	/* new parent should exist */
1867248571Smm	error = dsl_dir_hold(dp, ddra->ddra_newname, FTAG,
1868248571Smm	    &newparent, &mynewname);
1869248571Smm	if (error != 0) {
1870248571Smm		dsl_dir_rele(dd, FTAG);
1871248571Smm		return (error);
1872226676Spjd	}
1873168404Spjd
1874248571Smm	/* can't rename to different pool */
1875248571Smm	if (dd->dd_pool != newparent->dd_pool) {
1876248571Smm		dsl_dir_rele(newparent, FTAG);
1877248571Smm		dsl_dir_rele(dd, FTAG);
1878282127Savg		return (SET_ERROR(EXDEV));
1879248571Smm	}
1880248571Smm
1881248571Smm	/* new name should not already exist */
1882248571Smm	if (mynewname == NULL) {
1883248571Smm		dsl_dir_rele(newparent, FTAG);
1884248571Smm		dsl_dir_rele(dd, FTAG);
1885249195Smm		return (SET_ERROR(EEXIST));
1886248571Smm	}
1887168404Spjd
1888359722Sfreqlabs	/* can't rename below anything but filesystems (eg. no ZVOLs) */
1889359722Sfreqlabs	error = dsl_dataset_hold_obj(newparent->dd_pool,
1890359722Sfreqlabs	    dsl_dir_phys(newparent)->dd_head_dataset_obj, FTAG, &parentds);
1891359722Sfreqlabs	if (error != 0) {
1892359722Sfreqlabs		dsl_dir_rele(newparent, FTAG);
1893359722Sfreqlabs		dsl_dir_rele(dd, FTAG);
1894359722Sfreqlabs		return (error);
1895359722Sfreqlabs	}
1896359722Sfreqlabs	error = dmu_objset_from_ds(parentds, &parentos);
1897359722Sfreqlabs	if (error != 0) {
1898359722Sfreqlabs		dsl_dataset_rele(parentds, FTAG);
1899359722Sfreqlabs		dsl_dir_rele(newparent, FTAG);
1900359722Sfreqlabs		dsl_dir_rele(dd, FTAG);
1901359722Sfreqlabs		return (error);
1902359722Sfreqlabs	}
1903359722Sfreqlabs	if (dmu_objset_type(parentos) != DMU_OST_ZFS) {
1904359722Sfreqlabs		dsl_dataset_rele(parentds, FTAG);
1905359722Sfreqlabs		dsl_dir_rele(newparent, FTAG);
1906359722Sfreqlabs		dsl_dir_rele(dd, FTAG);
1907359722Sfreqlabs		return (error);
1908359722Sfreqlabs	}
1909359722Sfreqlabs	dsl_dataset_rele(parentds, FTAG);
1910359722Sfreqlabs
1911339129Smav	ASSERT3U(strnlen(ddra->ddra_newname, ZFS_MAX_DATASET_NAME_LEN),
1912339129Smav	    <, ZFS_MAX_DATASET_NAME_LEN);
1913339129Smav	ASSERT3U(strnlen(ddra->ddra_oldname, ZFS_MAX_DATASET_NAME_LEN),
1914339129Smav	    <, ZFS_MAX_DATASET_NAME_LEN);
1915339129Smav	dvra.char_delta = strlen(ddra->ddra_newname)
1916339129Smav	    - strlen(ddra->ddra_oldname);
1917339129Smav	dvra.nest_delta = get_dataset_depth(ddra->ddra_newname)
1918339129Smav	    - get_dataset_depth(ddra->ddra_oldname);
1919339129Smav
1920248571Smm	/* if the name length is growing, validate child name lengths */
1921339129Smav	if (dvra.char_delta > 0 || dvra.nest_delta > 0) {
1922248571Smm		error = dmu_objset_find_dp(dp, dd->dd_object, dsl_valid_rename,
1923339129Smav		    &dvra, DS_FIND_CHILDREN | DS_FIND_SNAPSHOTS);
1924248571Smm		if (error != 0) {
1925248571Smm			dsl_dir_rele(newparent, FTAG);
1926248571Smm			dsl_dir_rele(dd, FTAG);
1927248571Smm			return (error);
1928248571Smm		}
1929248571Smm	}
1930248571Smm
1931264835Sdelphij	if (dmu_tx_is_syncing(tx)) {
1932266915Sdelphij		if (spa_feature_is_active(dp->dp_spa,
1933264835Sdelphij		    SPA_FEATURE_FS_SS_LIMIT)) {
1934264835Sdelphij			/*
1935264835Sdelphij			 * Although this is the check function and we don't
1936264835Sdelphij			 * normally make on-disk changes in check functions,
1937264835Sdelphij			 * we need to do that here.
1938264835Sdelphij			 *
1939264835Sdelphij			 * Ensure this portion of the tree's counts have been
1940264835Sdelphij			 * initialized in case the new parent has limits set.
1941264835Sdelphij			 */
1942264835Sdelphij			dsl_dir_init_fs_ss_count(dd, tx);
1943264835Sdelphij		}
1944264835Sdelphij	}
1945264835Sdelphij
1946248571Smm	if (newparent != dd->dd_parent) {
1947168404Spjd		/* is there enough space? */
1948168404Spjd		uint64_t myspace =
1949275782Sdelphij		    MAX(dsl_dir_phys(dd)->dd_used_bytes,
1950275782Sdelphij		    dsl_dir_phys(dd)->dd_reserved);
1951264835Sdelphij		objset_t *os = dd->dd_pool->dp_meta_objset;
1952264835Sdelphij		uint64_t fs_cnt = 0;
1953264835Sdelphij		uint64_t ss_cnt = 0;
1954168404Spjd
1955264835Sdelphij		if (dsl_dir_is_zapified(dd)) {
1956264835Sdelphij			int err;
1957264835Sdelphij
1958264835Sdelphij			err = zap_lookup(os, dd->dd_object,
1959264835Sdelphij			    DD_FIELD_FILESYSTEM_COUNT, sizeof (fs_cnt), 1,
1960264835Sdelphij			    &fs_cnt);
1961266915Sdelphij			if (err != ENOENT && err != 0) {
1962266915Sdelphij				dsl_dir_rele(newparent, FTAG);
1963266915Sdelphij				dsl_dir_rele(dd, FTAG);
1964264835Sdelphij				return (err);
1965266915Sdelphij			}
1966264835Sdelphij
1967264835Sdelphij			/*
1968264835Sdelphij			 * have to add 1 for the filesystem itself that we're
1969264835Sdelphij			 * moving
1970264835Sdelphij			 */
1971264835Sdelphij			fs_cnt++;
1972264835Sdelphij
1973264835Sdelphij			err = zap_lookup(os, dd->dd_object,
1974264835Sdelphij			    DD_FIELD_SNAPSHOT_COUNT, sizeof (ss_cnt), 1,
1975264835Sdelphij			    &ss_cnt);
1976266915Sdelphij			if (err != ENOENT && err != 0) {
1977266915Sdelphij				dsl_dir_rele(newparent, FTAG);
1978266915Sdelphij				dsl_dir_rele(dd, FTAG);
1979264835Sdelphij				return (err);
1980266915Sdelphij			}
1981264835Sdelphij		}
1982264835Sdelphij
1983168404Spjd		/* no rename into our descendant */
1984248571Smm		if (closest_common_ancestor(dd, newparent) == dd) {
1985248571Smm			dsl_dir_rele(newparent, FTAG);
1986248571Smm			dsl_dir_rele(dd, FTAG);
1987249195Smm			return (SET_ERROR(EINVAL));
1988248571Smm		}
1989168404Spjd
1990248571Smm		error = dsl_dir_transfer_possible(dd->dd_parent,
1991264835Sdelphij		    newparent, fs_cnt, ss_cnt, myspace, ddra->ddra_cred);
1992248571Smm		if (error != 0) {
1993248571Smm			dsl_dir_rele(newparent, FTAG);
1994248571Smm			dsl_dir_rele(dd, FTAG);
1995248571Smm			return (error);
1996248571Smm		}
1997168404Spjd	}
1998168404Spjd
1999248571Smm	dsl_dir_rele(newparent, FTAG);
2000248571Smm	dsl_dir_rele(dd, FTAG);
2001168404Spjd	return (0);
2002168404Spjd}
2003168404Spjd
2004168404Spjdstatic void
2005248571Smmdsl_dir_rename_sync(void *arg, dmu_tx_t *tx)
2006168404Spjd{
2007248571Smm	dsl_dir_rename_arg_t *ddra = arg;
2008248571Smm	dsl_pool_t *dp = dmu_tx_pool(tx);
2009248571Smm	dsl_dir_t *dd, *newparent;
2010248571Smm	const char *mynewname;
2011248571Smm	int error;
2012168404Spjd	objset_t *mos = dp->dp_meta_objset;
2013168404Spjd
2014248571Smm	VERIFY0(dsl_dir_hold(dp, ddra->ddra_oldname, FTAG, &dd, NULL));
2015248571Smm	VERIFY0(dsl_dir_hold(dp, ddra->ddra_newname, FTAG, &newparent,
2016248571Smm	    &mynewname));
2017248571Smm
2018248571Smm	/* Log this before we change the name. */
2019248571Smm	spa_history_log_internal_dd(dd, "rename", tx,
2020248571Smm	    "-> %s", ddra->ddra_newname);
2021248571Smm
2022248571Smm	if (newparent != dd->dd_parent) {
2023264835Sdelphij		objset_t *os = dd->dd_pool->dp_meta_objset;
2024264835Sdelphij		uint64_t fs_cnt = 0;
2025264835Sdelphij		uint64_t ss_cnt = 0;
2026264835Sdelphij
2027264835Sdelphij		/*
2028264835Sdelphij		 * We already made sure the dd counts were initialized in the
2029264835Sdelphij		 * check function.
2030264835Sdelphij		 */
2031266915Sdelphij		if (spa_feature_is_active(dp->dp_spa,
2032264835Sdelphij		    SPA_FEATURE_FS_SS_LIMIT)) {
2033264835Sdelphij			VERIFY0(zap_lookup(os, dd->dd_object,
2034264835Sdelphij			    DD_FIELD_FILESYSTEM_COUNT, sizeof (fs_cnt), 1,
2035264835Sdelphij			    &fs_cnt));
2036264835Sdelphij			/* add 1 for the filesystem itself that we're moving */
2037264835Sdelphij			fs_cnt++;
2038264835Sdelphij
2039264835Sdelphij			VERIFY0(zap_lookup(os, dd->dd_object,
2040264835Sdelphij			    DD_FIELD_SNAPSHOT_COUNT, sizeof (ss_cnt), 1,
2041264835Sdelphij			    &ss_cnt));
2042264835Sdelphij		}
2043264835Sdelphij
2044264835Sdelphij		dsl_fs_ss_count_adjust(dd->dd_parent, -fs_cnt,
2045264835Sdelphij		    DD_FIELD_FILESYSTEM_COUNT, tx);
2046264835Sdelphij		dsl_fs_ss_count_adjust(newparent, fs_cnt,
2047264835Sdelphij		    DD_FIELD_FILESYSTEM_COUNT, tx);
2048264835Sdelphij
2049264835Sdelphij		dsl_fs_ss_count_adjust(dd->dd_parent, -ss_cnt,
2050264835Sdelphij		    DD_FIELD_SNAPSHOT_COUNT, tx);
2051264835Sdelphij		dsl_fs_ss_count_adjust(newparent, ss_cnt,
2052264835Sdelphij		    DD_FIELD_SNAPSHOT_COUNT, tx);
2053264835Sdelphij
2054185029Spjd		dsl_dir_diduse_space(dd->dd_parent, DD_USED_CHILD,
2055275782Sdelphij		    -dsl_dir_phys(dd)->dd_used_bytes,
2056275782Sdelphij		    -dsl_dir_phys(dd)->dd_compressed_bytes,
2057275782Sdelphij		    -dsl_dir_phys(dd)->dd_uncompressed_bytes, tx);
2058248571Smm		dsl_dir_diduse_space(newparent, DD_USED_CHILD,
2059275782Sdelphij		    dsl_dir_phys(dd)->dd_used_bytes,
2060275782Sdelphij		    dsl_dir_phys(dd)->dd_compressed_bytes,
2061275782Sdelphij		    dsl_dir_phys(dd)->dd_uncompressed_bytes, tx);
2062185029Spjd
2063275782Sdelphij		if (dsl_dir_phys(dd)->dd_reserved >
2064275782Sdelphij		    dsl_dir_phys(dd)->dd_used_bytes) {
2065275782Sdelphij			uint64_t unused_rsrv = dsl_dir_phys(dd)->dd_reserved -
2066275782Sdelphij			    dsl_dir_phys(dd)->dd_used_bytes;
2067185029Spjd
2068185029Spjd			dsl_dir_diduse_space(dd->dd_parent, DD_USED_CHILD_RSRV,
2069185029Spjd			    -unused_rsrv, 0, 0, tx);
2070248571Smm			dsl_dir_diduse_space(newparent, DD_USED_CHILD_RSRV,
2071185029Spjd			    unused_rsrv, 0, 0, tx);
2072185029Spjd		}
2073168404Spjd	}
2074168404Spjd
2075168404Spjd	dmu_buf_will_dirty(dd->dd_dbuf, tx);
2076168404Spjd
2077168404Spjd	/* remove from old parent zapobj */
2078275782Sdelphij	error = zap_remove(mos,
2079275782Sdelphij	    dsl_dir_phys(dd->dd_parent)->dd_child_dir_zapobj,
2080168404Spjd	    dd->dd_myname, tx);
2081248571Smm	ASSERT0(error);
2082168404Spjd
2083248571Smm	(void) strcpy(dd->dd_myname, mynewname);
2084248571Smm	dsl_dir_rele(dd->dd_parent, dd);
2085275782Sdelphij	dsl_dir_phys(dd)->dd_parent_obj = newparent->dd_object;
2086248571Smm	VERIFY0(dsl_dir_hold_obj(dp,
2087248571Smm	    newparent->dd_object, NULL, dd, &dd->dd_parent));
2088168404Spjd
2089168404Spjd	/* add to new parent zapobj */
2090275782Sdelphij	VERIFY0(zap_add(mos, dsl_dir_phys(newparent)->dd_child_dir_zapobj,
2091248571Smm	    dd->dd_myname, 8, 1, &dd->dd_object, tx));
2092248571Smm
2093248571Smm#ifdef __FreeBSD__
2094219320Spjd#ifdef _KERNEL
2095248571Smm	zfsvfs_update_fromname(ddra->ddra_oldname, ddra->ddra_newname);
2096248571Smm	zvol_rename_minors(ddra->ddra_oldname, ddra->ddra_newname);
2097219320Spjd#endif
2098248571Smm#endif
2099185029Spjd
2100248571Smm	dsl_prop_notify_all(dd);
2101248571Smm
2102248571Smm	dsl_dir_rele(newparent, FTAG);
2103248571Smm	dsl_dir_rele(dd, FTAG);
2104168404Spjd}
2105168404Spjd
2106168404Spjdint
2107248571Smmdsl_dir_rename(const char *oldname, const char *newname)
2108168404Spjd{
2109248571Smm	dsl_dir_rename_arg_t ddra;
2110168404Spjd
2111248571Smm	ddra.ddra_oldname = oldname;
2112248571Smm	ddra.ddra_newname = newname;
2113264835Sdelphij	ddra.ddra_cred = CRED();
2114168404Spjd
2115248571Smm	return (dsl_sync_task(oldname,
2116268473Sdelphij	    dsl_dir_rename_check, dsl_dir_rename_sync, &ddra,
2117268473Sdelphij	    3, ZFS_SPACE_CHECK_RESERVED));
2118168404Spjd}
2119168404Spjd
2120168404Spjdint
2121264835Sdelphijdsl_dir_transfer_possible(dsl_dir_t *sdd, dsl_dir_t *tdd,
2122264835Sdelphij    uint64_t fs_cnt, uint64_t ss_cnt, uint64_t space, cred_t *cr)
2123168404Spjd{
2124168404Spjd	dsl_dir_t *ancestor;
2125168404Spjd	int64_t adelta;
2126168404Spjd	uint64_t avail;
2127264835Sdelphij	int err;
2128168404Spjd
2129168404Spjd	ancestor = closest_common_ancestor(sdd, tdd);
2130168404Spjd	adelta = would_change(sdd, -space, ancestor);
2131168404Spjd	avail = dsl_dir_space_available(tdd, ancestor, adelta, FALSE);
2132168404Spjd	if (avail < space)
2133249195Smm		return (SET_ERROR(ENOSPC));
2134168404Spjd
2135264835Sdelphij	err = dsl_fs_ss_limit_check(tdd, fs_cnt, ZFS_PROP_FILESYSTEM_LIMIT,
2136264835Sdelphij	    ancestor, cr);
2137264835Sdelphij	if (err != 0)
2138264835Sdelphij		return (err);
2139264835Sdelphij	err = dsl_fs_ss_limit_check(tdd, ss_cnt, ZFS_PROP_SNAPSHOT_LIMIT,
2140264835Sdelphij	    ancestor, cr);
2141264835Sdelphij	if (err != 0)
2142264835Sdelphij		return (err);
2143264835Sdelphij
2144168404Spjd	return (0);
2145168404Spjd}
2146219089Spjd
2147219089Spjdtimestruc_t
2148219089Spjddsl_dir_snap_cmtime(dsl_dir_t *dd)
2149219089Spjd{
2150219089Spjd	timestruc_t t;
2151219089Spjd
2152219089Spjd	mutex_enter(&dd->dd_lock);
2153219089Spjd	t = dd->dd_snap_cmtime;
2154219089Spjd	mutex_exit(&dd->dd_lock);
2155219089Spjd
2156219089Spjd	return (t);
2157219089Spjd}
2158219089Spjd
2159219089Spjdvoid
2160219089Spjddsl_dir_snap_cmtime_update(dsl_dir_t *dd)
2161219089Spjd{
2162219089Spjd	timestruc_t t;
2163219089Spjd
2164219089Spjd	gethrestime(&t);
2165219089Spjd	mutex_enter(&dd->dd_lock);
2166219089Spjd	dd->dd_snap_cmtime = t;
2167219089Spjd	mutex_exit(&dd->dd_lock);
2168219089Spjd}
2169259813Sdelphij
2170259813Sdelphijvoid
2171259813Sdelphijdsl_dir_zapify(dsl_dir_t *dd, dmu_tx_t *tx)
2172259813Sdelphij{
2173259813Sdelphij	objset_t *mos = dd->dd_pool->dp_meta_objset;
2174259813Sdelphij	dmu_object_zapify(mos, dd->dd_object, DMU_OT_DSL_DIR, tx);
2175259813Sdelphij}
2176264835Sdelphij
2177264835Sdelphijboolean_t
2178264835Sdelphijdsl_dir_is_zapified(dsl_dir_t *dd)
2179264835Sdelphij{
2180264835Sdelphij	dmu_object_info_t doi;
2181264835Sdelphij
2182264835Sdelphij	dmu_object_info_from_db(dd->dd_dbuf, &doi);
2183264835Sdelphij	return (doi.doi_type == DMU_OTN_ZAP_METADATA);
2184264835Sdelphij}
2185