1168404Spjd/*
2168404Spjd * CDDL HEADER START
3168404Spjd *
4168404Spjd * The contents of this file are subject to the terms of the
5168404Spjd * Common Development and Distribution License (the "License").
6168404Spjd * You may not use this file except in compliance with the License.
7168404Spjd *
8168404Spjd * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9168404Spjd * or http://www.opensolaris.org/os/licensing.
10168404Spjd * See the License for the specific language governing permissions
11168404Spjd * and limitations under the License.
12168404Spjd *
13168404Spjd * When distributing Covered Code, include this CDDL HEADER in each
14168404Spjd * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15168404Spjd * If applicable, add the following below this CDDL HEADER, with the
16168404Spjd * fields enclosed by brackets "[]" replaced with your own identifying
17168404Spjd * information: Portions Copyright [yyyy] [name of copyright owner]
18168404Spjd *
19168404Spjd * CDDL HEADER END
20168404Spjd */
21168404Spjd/*
22219089Spjd * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
23249643Smm * Copyright (c) 2013 by Delphix. All rights reserved.
24252140Sdelphij * Copyright (c) 2013 by Saso Kiselkov. All rights reserved.
25265754Sdelphij * Copyright (c) 2013, Joyent, Inc. All rights reserved.
26168404Spjd */
27168404Spjd
28219089Spjd/* Portions Copyright 2010 Robert Milkowski */
29219089Spjd
30185029Spjd#include <sys/cred.h>
31168404Spjd#include <sys/zfs_context.h>
32168404Spjd#include <sys/dmu_objset.h>
33168404Spjd#include <sys/dsl_dir.h>
34168404Spjd#include <sys/dsl_dataset.h>
35168404Spjd#include <sys/dsl_prop.h>
36168404Spjd#include <sys/dsl_pool.h>
37168404Spjd#include <sys/dsl_synctask.h>
38185029Spjd#include <sys/dsl_deleg.h>
39168404Spjd#include <sys/dnode.h>
40168404Spjd#include <sys/dbuf.h>
41168404Spjd#include <sys/zvol.h>
42168404Spjd#include <sys/dmu_tx.h>
43168404Spjd#include <sys/zap.h>
44168404Spjd#include <sys/zil.h>
45168404Spjd#include <sys/dmu_impl.h>
46185029Spjd#include <sys/zfs_ioctl.h>
47219089Spjd#include <sys/sa.h>
48219089Spjd#include <sys/zfs_onexit.h>
49249643Smm#include <sys/dsl_destroy.h>
50168404Spjd
51219089Spjd/*
52219089Spjd * Needed to close a window in dnode_move() that allows the objset to be freed
53219089Spjd * before it can be safely accessed.
54219089Spjd */
55219089Spjdkrwlock_t os_lock;
56219089Spjd
57219089Spjdvoid
58219089Spjddmu_objset_init(void)
59219089Spjd{
60219089Spjd	rw_init(&os_lock, NULL, RW_DEFAULT, NULL);
61219089Spjd}
62219089Spjd
63219089Spjdvoid
64219089Spjddmu_objset_fini(void)
65219089Spjd{
66219089Spjd	rw_destroy(&os_lock);
67219089Spjd}
68219089Spjd
69168404Spjdspa_t *
70168404Spjddmu_objset_spa(objset_t *os)
71168404Spjd{
72219089Spjd	return (os->os_spa);
73168404Spjd}
74168404Spjd
75168404Spjdzilog_t *
76168404Spjddmu_objset_zil(objset_t *os)
77168404Spjd{
78219089Spjd	return (os->os_zil);
79168404Spjd}
80168404Spjd
81168404Spjddsl_pool_t *
82168404Spjddmu_objset_pool(objset_t *os)
83168404Spjd{
84168404Spjd	dsl_dataset_t *ds;
85168404Spjd
86219089Spjd	if ((ds = os->os_dsl_dataset) != NULL && ds->ds_dir)
87168404Spjd		return (ds->ds_dir->dd_pool);
88168404Spjd	else
89219089Spjd		return (spa_get_dsl(os->os_spa));
90168404Spjd}
91168404Spjd
92168404Spjddsl_dataset_t *
93168404Spjddmu_objset_ds(objset_t *os)
94168404Spjd{
95219089Spjd	return (os->os_dsl_dataset);
96168404Spjd}
97168404Spjd
98168404Spjddmu_objset_type_t
99168404Spjddmu_objset_type(objset_t *os)
100168404Spjd{
101219089Spjd	return (os->os_phys->os_type);
102168404Spjd}
103168404Spjd
104168404Spjdvoid
105168404Spjddmu_objset_name(objset_t *os, char *buf)
106168404Spjd{
107219089Spjd	dsl_dataset_name(os->os_dsl_dataset, buf);
108168404Spjd}
109168404Spjd
110168404Spjduint64_t
111168404Spjddmu_objset_id(objset_t *os)
112168404Spjd{
113219089Spjd	dsl_dataset_t *ds = os->os_dsl_dataset;
114168404Spjd
115168404Spjd	return (ds ? ds->ds_object : 0);
116168404Spjd}
117168404Spjd
118219089Spjduint64_t
119219089Spjddmu_objset_syncprop(objset_t *os)
120219089Spjd{
121219089Spjd	return (os->os_sync);
122219089Spjd}
123219089Spjd
124219089Spjduint64_t
125219089Spjddmu_objset_logbias(objset_t *os)
126219089Spjd{
127219089Spjd	return (os->os_logbias);
128219089Spjd}
129219089Spjd
130168404Spjdstatic void
131168404Spjdchecksum_changed_cb(void *arg, uint64_t newval)
132168404Spjd{
133219089Spjd	objset_t *os = arg;
134168404Spjd
135168404Spjd	/*
136168404Spjd	 * Inheritance should have been done by now.
137168404Spjd	 */
138168404Spjd	ASSERT(newval != ZIO_CHECKSUM_INHERIT);
139168404Spjd
140219089Spjd	os->os_checksum = zio_checksum_select(newval, ZIO_CHECKSUM_ON_VALUE);
141168404Spjd}
142168404Spjd
143168404Spjdstatic void
144168404Spjdcompression_changed_cb(void *arg, uint64_t newval)
145168404Spjd{
146219089Spjd	objset_t *os = arg;
147168404Spjd
148168404Spjd	/*
149168404Spjd	 * Inheritance and range checking should have been done by now.
150168404Spjd	 */
151168404Spjd	ASSERT(newval != ZIO_COMPRESS_INHERIT);
152168404Spjd
153219089Spjd	os->os_compress = zio_compress_select(newval, ZIO_COMPRESS_ON_VALUE);
154168404Spjd}
155168404Spjd
156168404Spjdstatic void
157168404Spjdcopies_changed_cb(void *arg, uint64_t newval)
158168404Spjd{
159219089Spjd	objset_t *os = arg;
160168404Spjd
161168404Spjd	/*
162168404Spjd	 * Inheritance and range checking should have been done by now.
163168404Spjd	 */
164168404Spjd	ASSERT(newval > 0);
165219089Spjd	ASSERT(newval <= spa_max_replication(os->os_spa));
166168404Spjd
167219089Spjd	os->os_copies = newval;
168168404Spjd}
169168404Spjd
170185029Spjdstatic void
171219089Spjddedup_changed_cb(void *arg, uint64_t newval)
172219089Spjd{
173219089Spjd	objset_t *os = arg;
174219089Spjd	spa_t *spa = os->os_spa;
175219089Spjd	enum zio_checksum checksum;
176219089Spjd
177219089Spjd	/*
178219089Spjd	 * Inheritance should have been done by now.
179219089Spjd	 */
180219089Spjd	ASSERT(newval != ZIO_CHECKSUM_INHERIT);
181219089Spjd
182219089Spjd	checksum = zio_checksum_dedup_select(spa, newval, ZIO_CHECKSUM_OFF);
183219089Spjd
184219089Spjd	os->os_dedup_checksum = checksum & ZIO_CHECKSUM_MASK;
185219089Spjd	os->os_dedup_verify = !!(checksum & ZIO_CHECKSUM_VERIFY);
186219089Spjd}
187219089Spjd
188219089Spjdstatic void
189185029Spjdprimary_cache_changed_cb(void *arg, uint64_t newval)
190185029Spjd{
191219089Spjd	objset_t *os = arg;
192185029Spjd
193185029Spjd	/*
194185029Spjd	 * Inheritance and range checking should have been done by now.
195185029Spjd	 */
196185029Spjd	ASSERT(newval == ZFS_CACHE_ALL || newval == ZFS_CACHE_NONE ||
197185029Spjd	    newval == ZFS_CACHE_METADATA);
198185029Spjd
199219089Spjd	os->os_primary_cache = newval;
200185029Spjd}
201185029Spjd
202185029Spjdstatic void
203185029Spjdsecondary_cache_changed_cb(void *arg, uint64_t newval)
204185029Spjd{
205219089Spjd	objset_t *os = arg;
206185029Spjd
207185029Spjd	/*
208185029Spjd	 * Inheritance and range checking should have been done by now.
209185029Spjd	 */
210185029Spjd	ASSERT(newval == ZFS_CACHE_ALL || newval == ZFS_CACHE_NONE ||
211185029Spjd	    newval == ZFS_CACHE_METADATA);
212185029Spjd
213219089Spjd	os->os_secondary_cache = newval;
214185029Spjd}
215185029Spjd
216219089Spjdstatic void
217219089Spjdsync_changed_cb(void *arg, uint64_t newval)
218219089Spjd{
219219089Spjd	objset_t *os = arg;
220219089Spjd
221219089Spjd	/*
222219089Spjd	 * Inheritance and range checking should have been done by now.
223219089Spjd	 */
224219089Spjd	ASSERT(newval == ZFS_SYNC_STANDARD || newval == ZFS_SYNC_ALWAYS ||
225219089Spjd	    newval == ZFS_SYNC_DISABLED);
226219089Spjd
227219089Spjd	os->os_sync = newval;
228219089Spjd	if (os->os_zil)
229219089Spjd		zil_set_sync(os->os_zil, newval);
230219089Spjd}
231219089Spjd
232219089Spjdstatic void
233219089Spjdlogbias_changed_cb(void *arg, uint64_t newval)
234219089Spjd{
235219089Spjd	objset_t *os = arg;
236219089Spjd
237219089Spjd	ASSERT(newval == ZFS_LOGBIAS_LATENCY ||
238219089Spjd	    newval == ZFS_LOGBIAS_THROUGHPUT);
239219089Spjd	os->os_logbias = newval;
240219089Spjd	if (os->os_zil)
241219089Spjd		zil_set_logbias(os->os_zil, newval);
242219089Spjd}
243219089Spjd
244168404Spjdvoid
245168404Spjddmu_objset_byteswap(void *buf, size_t size)
246168404Spjd{
247168404Spjd	objset_phys_t *osp = buf;
248168404Spjd
249209962Smm	ASSERT(size == OBJSET_OLD_PHYS_SIZE || size == sizeof (objset_phys_t));
250168404Spjd	dnode_byteswap(&osp->os_meta_dnode);
251168404Spjd	byteswap_uint64_array(&osp->os_zil_header, sizeof (zil_header_t));
252168404Spjd	osp->os_type = BSWAP_64(osp->os_type);
253209962Smm	osp->os_flags = BSWAP_64(osp->os_flags);
254209962Smm	if (size == sizeof (objset_phys_t)) {
255209962Smm		dnode_byteswap(&osp->os_userused_dnode);
256209962Smm		dnode_byteswap(&osp->os_groupused_dnode);
257209962Smm	}
258168404Spjd}
259168404Spjd
260168404Spjdint
261168404Spjddmu_objset_open_impl(spa_t *spa, dsl_dataset_t *ds, blkptr_t *bp,
262219089Spjd    objset_t **osp)
263168404Spjd{
264219089Spjd	objset_t *os;
265185029Spjd	int i, err;
266168404Spjd
267185029Spjd	ASSERT(ds == NULL || MUTEX_HELD(&ds->ds_opening_lock));
268185029Spjd
269219089Spjd	os = kmem_zalloc(sizeof (objset_t), KM_SLEEP);
270219089Spjd	os->os_dsl_dataset = ds;
271219089Spjd	os->os_spa = spa;
272219089Spjd	os->os_rootbp = bp;
273219089Spjd	if (!BP_IS_HOLE(os->os_rootbp)) {
274168404Spjd		uint32_t aflags = ARC_WAIT;
275168404Spjd		zbookmark_t zb;
276219089Spjd		SET_BOOKMARK(&zb, ds ? ds->ds_object : DMU_META_OBJSET,
277219089Spjd		    ZB_ROOT_OBJECT, ZB_ROOT_LEVEL, ZB_ROOT_BLKID);
278219089Spjd
279219089Spjd		if (DMU_OS_IS_L2CACHEABLE(os))
280185029Spjd			aflags |= ARC_L2CACHE;
281252140Sdelphij		if (DMU_OS_IS_L2COMPRESSIBLE(os))
282252140Sdelphij			aflags |= ARC_L2COMPRESS;
283168404Spjd
284219089Spjd		dprintf_bp(os->os_rootbp, "reading %s", "");
285247406Smm		err = arc_read(NULL, spa, os->os_rootbp,
286219089Spjd		    arc_getbuf_func, &os->os_phys_buf,
287168404Spjd		    ZIO_PRIORITY_SYNC_READ, ZIO_FLAG_CANFAIL, &aflags, &zb);
288249643Smm		if (err != 0) {
289219089Spjd			kmem_free(os, sizeof (objset_t));
290185029Spjd			/* convert checksum errors into IO errors */
291185029Spjd			if (err == ECKSUM)
292249643Smm				err = SET_ERROR(EIO);
293168404Spjd			return (err);
294168404Spjd		}
295209962Smm
296209962Smm		/* Increase the blocksize if we are permitted. */
297209962Smm		if (spa_version(spa) >= SPA_VERSION_USERSPACE &&
298219089Spjd		    arc_buf_size(os->os_phys_buf) < sizeof (objset_phys_t)) {
299209962Smm			arc_buf_t *buf = arc_buf_alloc(spa,
300219089Spjd			    sizeof (objset_phys_t), &os->os_phys_buf,
301209962Smm			    ARC_BUFC_METADATA);
302209962Smm			bzero(buf->b_data, sizeof (objset_phys_t));
303219089Spjd			bcopy(os->os_phys_buf->b_data, buf->b_data,
304219089Spjd			    arc_buf_size(os->os_phys_buf));
305219089Spjd			(void) arc_buf_remove_ref(os->os_phys_buf,
306219089Spjd			    &os->os_phys_buf);
307219089Spjd			os->os_phys_buf = buf;
308209962Smm		}
309209962Smm
310219089Spjd		os->os_phys = os->os_phys_buf->b_data;
311219089Spjd		os->os_flags = os->os_phys->os_flags;
312168404Spjd	} else {
313209962Smm		int size = spa_version(spa) >= SPA_VERSION_USERSPACE ?
314209962Smm		    sizeof (objset_phys_t) : OBJSET_OLD_PHYS_SIZE;
315219089Spjd		os->os_phys_buf = arc_buf_alloc(spa, size,
316219089Spjd		    &os->os_phys_buf, ARC_BUFC_METADATA);
317219089Spjd		os->os_phys = os->os_phys_buf->b_data;
318219089Spjd		bzero(os->os_phys, size);
319168404Spjd	}
320168404Spjd
321168404Spjd	/*
322168404Spjd	 * Note: the changed_cb will be called once before the register
323168404Spjd	 * func returns, thus changing the checksum/compression from the
324185029Spjd	 * default (fletcher2/off).  Snapshots don't need to know about
325185029Spjd	 * checksum/compression/copies.
326168404Spjd	 */
327185029Spjd	if (ds) {
328249643Smm		err = dsl_prop_register(ds,
329249643Smm		    zfs_prop_to_name(ZFS_PROP_PRIMARYCACHE),
330219089Spjd		    primary_cache_changed_cb, os);
331249643Smm		if (err == 0) {
332249643Smm			err = dsl_prop_register(ds,
333249643Smm			    zfs_prop_to_name(ZFS_PROP_SECONDARYCACHE),
334219089Spjd			    secondary_cache_changed_cb, os);
335249643Smm		}
336185029Spjd		if (!dsl_dataset_is_snapshot(ds)) {
337249643Smm			if (err == 0) {
338249643Smm				err = dsl_prop_register(ds,
339249643Smm				    zfs_prop_to_name(ZFS_PROP_CHECKSUM),
340219089Spjd				    checksum_changed_cb, os);
341249643Smm			}
342249643Smm			if (err == 0) {
343249643Smm				err = dsl_prop_register(ds,
344249643Smm				    zfs_prop_to_name(ZFS_PROP_COMPRESSION),
345219089Spjd				    compression_changed_cb, os);
346249643Smm			}
347249643Smm			if (err == 0) {
348249643Smm				err = dsl_prop_register(ds,
349249643Smm				    zfs_prop_to_name(ZFS_PROP_COPIES),
350219089Spjd				    copies_changed_cb, os);
351249643Smm			}
352249643Smm			if (err == 0) {
353249643Smm				err = dsl_prop_register(ds,
354249643Smm				    zfs_prop_to_name(ZFS_PROP_DEDUP),
355219089Spjd				    dedup_changed_cb, os);
356249643Smm			}
357249643Smm			if (err == 0) {
358249643Smm				err = dsl_prop_register(ds,
359249643Smm				    zfs_prop_to_name(ZFS_PROP_LOGBIAS),
360219089Spjd				    logbias_changed_cb, os);
361249643Smm			}
362249643Smm			if (err == 0) {
363249643Smm				err = dsl_prop_register(ds,
364249643Smm				    zfs_prop_to_name(ZFS_PROP_SYNC),
365219089Spjd				    sync_changed_cb, os);
366249643Smm			}
367185029Spjd		}
368249643Smm		if (err != 0) {
369219089Spjd			VERIFY(arc_buf_remove_ref(os->os_phys_buf,
370249643Smm			    &os->os_phys_buf));
371219089Spjd			kmem_free(os, sizeof (objset_t));
372168404Spjd			return (err);
373168404Spjd		}
374168404Spjd	} else if (ds == NULL) {
375168404Spjd		/* It's the meta-objset. */
376219089Spjd		os->os_checksum = ZIO_CHECKSUM_FLETCHER_4;
377219089Spjd		os->os_compress = ZIO_COMPRESS_LZJB;
378219089Spjd		os->os_copies = spa_max_replication(spa);
379219089Spjd		os->os_dedup_checksum = ZIO_CHECKSUM_OFF;
380219089Spjd		os->os_dedup_verify = 0;
381219089Spjd		os->os_logbias = 0;
382219089Spjd		os->os_sync = 0;
383219089Spjd		os->os_primary_cache = ZFS_CACHE_ALL;
384219089Spjd		os->os_secondary_cache = ZFS_CACHE_ALL;
385168404Spjd	}
386168404Spjd
387219089Spjd	if (ds == NULL || !dsl_dataset_is_snapshot(ds))
388219089Spjd		os->os_zil_header = os->os_phys->os_zil_header;
389219089Spjd	os->os_zil = zil_alloc(os, &os->os_zil_header);
390168404Spjd
391168404Spjd	for (i = 0; i < TXG_SIZE; i++) {
392219089Spjd		list_create(&os->os_dirty_dnodes[i], sizeof (dnode_t),
393168404Spjd		    offsetof(dnode_t, dn_dirty_link[i]));
394219089Spjd		list_create(&os->os_free_dnodes[i], sizeof (dnode_t),
395168404Spjd		    offsetof(dnode_t, dn_dirty_link[i]));
396168404Spjd	}
397219089Spjd	list_create(&os->os_dnodes, sizeof (dnode_t),
398168404Spjd	    offsetof(dnode_t, dn_link));
399219089Spjd	list_create(&os->os_downgraded_dbufs, sizeof (dmu_buf_impl_t),
400168404Spjd	    offsetof(dmu_buf_impl_t, db_link));
401168404Spjd
402219089Spjd	mutex_init(&os->os_lock, NULL, MUTEX_DEFAULT, NULL);
403219089Spjd	mutex_init(&os->os_obj_lock, NULL, MUTEX_DEFAULT, NULL);
404219089Spjd	mutex_init(&os->os_user_ptr_lock, NULL, MUTEX_DEFAULT, NULL);
405168404Spjd
406219089Spjd	DMU_META_DNODE(os) = dnode_special_open(os,
407219089Spjd	    &os->os_phys->os_meta_dnode, DMU_META_DNODE_OBJECT,
408219089Spjd	    &os->os_meta_dnode);
409219089Spjd	if (arc_buf_size(os->os_phys_buf) >= sizeof (objset_phys_t)) {
410219089Spjd		DMU_USERUSED_DNODE(os) = dnode_special_open(os,
411219089Spjd		    &os->os_phys->os_userused_dnode, DMU_USERUSED_OBJECT,
412219089Spjd		    &os->os_userused_dnode);
413219089Spjd		DMU_GROUPUSED_DNODE(os) = dnode_special_open(os,
414219089Spjd		    &os->os_phys->os_groupused_dnode, DMU_GROUPUSED_OBJECT,
415219089Spjd		    &os->os_groupused_dnode);
416209962Smm	}
417168404Spjd
418185029Spjd	/*
419185029Spjd	 * We should be the only thread trying to do this because we
420185029Spjd	 * have ds_opening_lock
421185029Spjd	 */
422185029Spjd	if (ds) {
423219089Spjd		mutex_enter(&ds->ds_lock);
424219089Spjd		ASSERT(ds->ds_objset == NULL);
425219089Spjd		ds->ds_objset = os;
426219089Spjd		mutex_exit(&ds->ds_lock);
427168404Spjd	}
428168404Spjd
429219089Spjd	*osp = os;
430168404Spjd	return (0);
431168404Spjd}
432168404Spjd
433219089Spjdint
434219089Spjddmu_objset_from_ds(dsl_dataset_t *ds, objset_t **osp)
435168404Spjd{
436219089Spjd	int err = 0;
437168404Spjd
438185029Spjd	mutex_enter(&ds->ds_opening_lock);
439219089Spjd	*osp = ds->ds_objset;
440219089Spjd	if (*osp == NULL) {
441168404Spjd		err = dmu_objset_open_impl(dsl_dataset_get_spa(ds),
442219089Spjd		    ds, dsl_dataset_get_blkptr(ds), osp);
443168404Spjd	}
444185029Spjd	mutex_exit(&ds->ds_opening_lock);
445219089Spjd	return (err);
446168404Spjd}
447168404Spjd
448249643Smm/*
449249643Smm * Holds the pool while the objset is held.  Therefore only one objset
450249643Smm * can be held at a time.
451249643Smm */
452185029Spjdint
453219089Spjddmu_objset_hold(const char *name, void *tag, objset_t **osp)
454185029Spjd{
455249643Smm	dsl_pool_t *dp;
456219089Spjd	dsl_dataset_t *ds;
457185029Spjd	int err;
458185029Spjd
459249643Smm	err = dsl_pool_hold(name, tag, &dp);
460249643Smm	if (err != 0)
461219089Spjd		return (err);
462249643Smm	err = dsl_dataset_hold(dp, name, tag, &ds);
463249643Smm	if (err != 0) {
464249643Smm		dsl_pool_rele(dp, tag);
465249643Smm		return (err);
466249643Smm	}
467219089Spjd
468219089Spjd	err = dmu_objset_from_ds(ds, osp);
469249643Smm	if (err != 0) {
470219089Spjd		dsl_dataset_rele(ds, tag);
471249643Smm		dsl_pool_rele(dp, tag);
472249643Smm	}
473219089Spjd
474185029Spjd	return (err);
475185029Spjd}
476185029Spjd
477249643Smm/*
478249643Smm * dsl_pool must not be held when this is called.
479249643Smm * Upon successful return, there will be a longhold on the dataset,
480249643Smm * and the dsl_pool will not be held.
481249643Smm */
482185029Spjdint
483219089Spjddmu_objset_own(const char *name, dmu_objset_type_t type,
484219089Spjd    boolean_t readonly, void *tag, objset_t **osp)
485185029Spjd{
486249643Smm	dsl_pool_t *dp;
487185029Spjd	dsl_dataset_t *ds;
488185029Spjd	int err;
489185029Spjd
490249643Smm	err = dsl_pool_hold(name, FTAG, &dp);
491249643Smm	if (err != 0)
492185029Spjd		return (err);
493249643Smm	err = dsl_dataset_own(dp, name, tag, &ds);
494249643Smm	if (err != 0) {
495249643Smm		dsl_pool_rele(dp, FTAG);
496249643Smm		return (err);
497249643Smm	}
498185029Spjd
499219089Spjd	err = dmu_objset_from_ds(ds, osp);
500249643Smm	dsl_pool_rele(dp, FTAG);
501249643Smm	if (err != 0) {
502219089Spjd		dsl_dataset_disown(ds, tag);
503219089Spjd	} else if (type != DMU_OST_ANY && type != (*osp)->os_phys->os_type) {
504249643Smm		dsl_dataset_disown(ds, tag);
505249643Smm		return (SET_ERROR(EINVAL));
506219089Spjd	} else if (!readonly && dsl_dataset_is_snapshot(ds)) {
507249643Smm		dsl_dataset_disown(ds, tag);
508249643Smm		return (SET_ERROR(EROFS));
509185029Spjd	}
510185029Spjd	return (err);
511185029Spjd}
512185029Spjd
513168404Spjdvoid
514219089Spjddmu_objset_rele(objset_t *os, void *tag)
515168404Spjd{
516249643Smm	dsl_pool_t *dp = dmu_objset_pool(os);
517219089Spjd	dsl_dataset_rele(os->os_dsl_dataset, tag);
518249643Smm	dsl_pool_rele(dp, tag);
519219089Spjd}
520185029Spjd
521257119Sdelphij/*
522257119Sdelphij * When we are called, os MUST refer to an objset associated with a dataset
523257119Sdelphij * that is owned by 'tag'; that is, is held and long held by 'tag' and ds_owner
524257119Sdelphij * == tag.  We will then release and reacquire ownership of the dataset while
525257119Sdelphij * holding the pool config_rwlock to avoid intervening namespace or ownership
526257119Sdelphij * changes may occur.
527257119Sdelphij *
528257119Sdelphij * This exists solely to accommodate zfs_ioc_userspace_upgrade()'s desire to
529257119Sdelphij * release the hold on its dataset and acquire a new one on the dataset of the
530257119Sdelphij * same name so that it can be partially torn down and reconstructed.
531257119Sdelphij */
532219089Spjdvoid
533257119Sdelphijdmu_objset_refresh_ownership(objset_t *os, void *tag)
534257119Sdelphij{
535257119Sdelphij	dsl_pool_t *dp;
536257119Sdelphij	dsl_dataset_t *ds, *newds;
537257119Sdelphij	char name[MAXNAMELEN];
538257119Sdelphij
539257119Sdelphij	ds = os->os_dsl_dataset;
540257119Sdelphij	VERIFY3P(ds, !=, NULL);
541257119Sdelphij	VERIFY3P(ds->ds_owner, ==, tag);
542257119Sdelphij	VERIFY(dsl_dataset_long_held(ds));
543257119Sdelphij
544257119Sdelphij	dsl_dataset_name(ds, name);
545257119Sdelphij	dp = dmu_objset_pool(os);
546257119Sdelphij	dsl_pool_config_enter(dp, FTAG);
547257119Sdelphij	dmu_objset_disown(os, tag);
548257119Sdelphij	VERIFY0(dsl_dataset_own(dp, name, tag, &newds));
549257119Sdelphij	VERIFY3P(newds, ==, os->os_dsl_dataset);
550257119Sdelphij	dsl_pool_config_exit(dp, FTAG);
551257119Sdelphij}
552257119Sdelphij
553257119Sdelphijvoid
554219089Spjddmu_objset_disown(objset_t *os, void *tag)
555219089Spjd{
556219089Spjd	dsl_dataset_disown(os->os_dsl_dataset, tag);
557168404Spjd}
558168404Spjd
559249643Smmvoid
560185029Spjddmu_objset_evict_dbufs(objset_t *os)
561168404Spjd{
562168404Spjd	dnode_t *dn;
563168404Spjd
564219089Spjd	mutex_enter(&os->os_lock);
565168404Spjd
566168404Spjd	/* process the mdn last, since the other dnodes have holds on it */
567219089Spjd	list_remove(&os->os_dnodes, DMU_META_DNODE(os));
568219089Spjd	list_insert_tail(&os->os_dnodes, DMU_META_DNODE(os));
569168404Spjd
570168404Spjd	/*
571168404Spjd	 * Find the first dnode with holds.  We have to do this dance
572168404Spjd	 * because dnode_add_ref() only works if you already have a
573168404Spjd	 * hold.  If there are no holds then it has no dbufs so OK to
574168404Spjd	 * skip.
575168404Spjd	 */
576219089Spjd	for (dn = list_head(&os->os_dnodes);
577185029Spjd	    dn && !dnode_add_ref(dn, FTAG);
578219089Spjd	    dn = list_next(&os->os_dnodes, dn))
579168404Spjd		continue;
580168404Spjd
581168404Spjd	while (dn) {
582168404Spjd		dnode_t *next_dn = dn;
583168404Spjd
584168404Spjd		do {
585219089Spjd			next_dn = list_next(&os->os_dnodes, next_dn);
586185029Spjd		} while (next_dn && !dnode_add_ref(next_dn, FTAG));
587168404Spjd
588219089Spjd		mutex_exit(&os->os_lock);
589185029Spjd		dnode_evict_dbufs(dn);
590168404Spjd		dnode_rele(dn, FTAG);
591219089Spjd		mutex_enter(&os->os_lock);
592168404Spjd		dn = next_dn;
593168404Spjd	}
594219089Spjd	mutex_exit(&os->os_lock);
595168404Spjd}
596168404Spjd
597168404Spjdvoid
598219089Spjddmu_objset_evict(objset_t *os)
599168404Spjd{
600219089Spjd	dsl_dataset_t *ds = os->os_dsl_dataset;
601168404Spjd
602219089Spjd	for (int t = 0; t < TXG_SIZE; t++)
603219089Spjd		ASSERT(!dmu_objset_is_dirty(os, t));
604168404Spjd
605185029Spjd	if (ds) {
606185029Spjd		if (!dsl_dataset_is_snapshot(ds)) {
607249643Smm			VERIFY0(dsl_prop_unregister(ds,
608249643Smm			    zfs_prop_to_name(ZFS_PROP_CHECKSUM),
609219089Spjd			    checksum_changed_cb, os));
610249643Smm			VERIFY0(dsl_prop_unregister(ds,
611249643Smm			    zfs_prop_to_name(ZFS_PROP_COMPRESSION),
612219089Spjd			    compression_changed_cb, os));
613249643Smm			VERIFY0(dsl_prop_unregister(ds,
614249643Smm			    zfs_prop_to_name(ZFS_PROP_COPIES),
615219089Spjd			    copies_changed_cb, os));
616249643Smm			VERIFY0(dsl_prop_unregister(ds,
617249643Smm			    zfs_prop_to_name(ZFS_PROP_DEDUP),
618219089Spjd			    dedup_changed_cb, os));
619249643Smm			VERIFY0(dsl_prop_unregister(ds,
620249643Smm			    zfs_prop_to_name(ZFS_PROP_LOGBIAS),
621219089Spjd			    logbias_changed_cb, os));
622249643Smm			VERIFY0(dsl_prop_unregister(ds,
623249643Smm			    zfs_prop_to_name(ZFS_PROP_SYNC),
624219089Spjd			    sync_changed_cb, os));
625185029Spjd		}
626249643Smm		VERIFY0(dsl_prop_unregister(ds,
627249643Smm		    zfs_prop_to_name(ZFS_PROP_PRIMARYCACHE),
628219089Spjd		    primary_cache_changed_cb, os));
629249643Smm		VERIFY0(dsl_prop_unregister(ds,
630249643Smm		    zfs_prop_to_name(ZFS_PROP_SECONDARYCACHE),
631219089Spjd		    secondary_cache_changed_cb, os));
632168404Spjd	}
633168404Spjd
634219089Spjd	if (os->os_sa)
635219089Spjd		sa_tear_down(os);
636219089Spjd
637249643Smm	dmu_objset_evict_dbufs(os);
638168404Spjd
639219089Spjd	dnode_special_close(&os->os_meta_dnode);
640219089Spjd	if (DMU_USERUSED_DNODE(os)) {
641219089Spjd		dnode_special_close(&os->os_userused_dnode);
642219089Spjd		dnode_special_close(&os->os_groupused_dnode);
643209962Smm	}
644219089Spjd	zil_free(os->os_zil);
645168404Spjd
646219089Spjd	ASSERT3P(list_head(&os->os_dnodes), ==, NULL);
647209962Smm
648249643Smm	VERIFY(arc_buf_remove_ref(os->os_phys_buf, &os->os_phys_buf));
649219089Spjd
650219089Spjd	/*
651219089Spjd	 * This is a barrier to prevent the objset from going away in
652219089Spjd	 * dnode_move() until we can safely ensure that the objset is still in
653219089Spjd	 * use. We consider the objset valid before the barrier and invalid
654219089Spjd	 * after the barrier.
655219089Spjd	 */
656219089Spjd	rw_enter(&os_lock, RW_READER);
657219089Spjd	rw_exit(&os_lock);
658219089Spjd
659219089Spjd	mutex_destroy(&os->os_lock);
660219089Spjd	mutex_destroy(&os->os_obj_lock);
661219089Spjd	mutex_destroy(&os->os_user_ptr_lock);
662219089Spjd	kmem_free(os, sizeof (objset_t));
663168404Spjd}
664168404Spjd
665219089Spjdtimestruc_t
666219089Spjddmu_objset_snap_cmtime(objset_t *os)
667219089Spjd{
668219089Spjd	return (dsl_dir_snap_cmtime(os->os_dsl_dataset->ds_dir));
669219089Spjd}
670219089Spjd
671168404Spjd/* called from dsl for meta-objset */
672219089Spjdobjset_t *
673168404Spjddmu_objset_create_impl(spa_t *spa, dsl_dataset_t *ds, blkptr_t *bp,
674168404Spjd    dmu_objset_type_t type, dmu_tx_t *tx)
675168404Spjd{
676219089Spjd	objset_t *os;
677168404Spjd	dnode_t *mdn;
678168404Spjd
679168404Spjd	ASSERT(dmu_tx_is_syncing(tx));
680249643Smm
681219089Spjd	if (ds != NULL)
682249643Smm		VERIFY0(dmu_objset_from_ds(ds, &os));
683219089Spjd	else
684249643Smm		VERIFY0(dmu_objset_open_impl(spa, NULL, bp, &os));
685168404Spjd
686219089Spjd	mdn = DMU_META_DNODE(os);
687219089Spjd
688168404Spjd	dnode_allocate(mdn, DMU_OT_DNODE, 1 << DNODE_BLOCK_SHIFT,
689168404Spjd	    DN_MAX_INDBLKSHIFT, DMU_OT_NONE, 0, tx);
690168404Spjd
691168404Spjd	/*
692168404Spjd	 * We don't want to have to increase the meta-dnode's nlevels
693168404Spjd	 * later, because then we could do it in quescing context while
694168404Spjd	 * we are also accessing it in open context.
695168404Spjd	 *
696168404Spjd	 * This precaution is not necessary for the MOS (ds == NULL),
697168404Spjd	 * because the MOS is only updated in syncing context.
698168404Spjd	 * This is most fortunate: the MOS is the only objset that
699168404Spjd	 * needs to be synced multiple times as spa_sync() iterates
700168404Spjd	 * to convergence, so minimizing its dn_nlevels matters.
701168404Spjd	 */
702168404Spjd	if (ds != NULL) {
703168404Spjd		int levels = 1;
704168404Spjd
705168404Spjd		/*
706168404Spjd		 * Determine the number of levels necessary for the meta-dnode
707168404Spjd		 * to contain DN_MAX_OBJECT dnodes.
708168404Spjd		 */
709168404Spjd		while ((uint64_t)mdn->dn_nblkptr << (mdn->dn_datablkshift +
710168404Spjd		    (levels - 1) * (mdn->dn_indblkshift - SPA_BLKPTRSHIFT)) <
711168404Spjd		    DN_MAX_OBJECT * sizeof (dnode_phys_t))
712168404Spjd			levels++;
713168404Spjd
714168404Spjd		mdn->dn_next_nlevels[tx->tx_txg & TXG_MASK] =
715168404Spjd		    mdn->dn_nlevels = levels;
716168404Spjd	}
717168404Spjd
718168404Spjd	ASSERT(type != DMU_OST_NONE);
719168404Spjd	ASSERT(type != DMU_OST_ANY);
720168404Spjd	ASSERT(type < DMU_OST_NUMTYPES);
721219089Spjd	os->os_phys->os_type = type;
722219089Spjd	if (dmu_objset_userused_enabled(os)) {
723219089Spjd		os->os_phys->os_flags |= OBJSET_FLAG_USERACCOUNTING_COMPLETE;
724219089Spjd		os->os_flags = os->os_phys->os_flags;
725209962Smm	}
726168404Spjd
727168404Spjd	dsl_dataset_dirty(ds, tx);
728168404Spjd
729219089Spjd	return (os);
730168404Spjd}
731168404Spjd
732249643Smmtypedef struct dmu_objset_create_arg {
733249643Smm	const char *doca_name;
734249643Smm	cred_t *doca_cred;
735249643Smm	void (*doca_userfunc)(objset_t *os, void *arg,
736249643Smm	    cred_t *cr, dmu_tx_t *tx);
737249643Smm	void *doca_userarg;
738249643Smm	dmu_objset_type_t doca_type;
739249643Smm	uint64_t doca_flags;
740249643Smm} dmu_objset_create_arg_t;
741168404Spjd
742185029Spjd/*ARGSUSED*/
743168404Spjdstatic int
744249643Smmdmu_objset_create_check(void *arg, dmu_tx_t *tx)
745168404Spjd{
746249643Smm	dmu_objset_create_arg_t *doca = arg;
747249643Smm	dsl_pool_t *dp = dmu_tx_pool(tx);
748249643Smm	dsl_dir_t *pdd;
749249643Smm	const char *tail;
750249643Smm	int error;
751168404Spjd
752249643Smm	if (strchr(doca->doca_name, '@') != NULL)
753249643Smm		return (SET_ERROR(EINVAL));
754168404Spjd
755249643Smm	error = dsl_dir_hold(dp, doca->doca_name, FTAG, &pdd, &tail);
756249643Smm	if (error != 0)
757249643Smm		return (error);
758249643Smm	if (tail == NULL) {
759249643Smm		dsl_dir_rele(pdd, FTAG);
760249643Smm		return (SET_ERROR(EEXIST));
761168404Spjd	}
762265754Sdelphij	error = dsl_fs_ss_limit_check(pdd, 1, ZFS_PROP_FILESYSTEM_LIMIT, NULL,
763265754Sdelphij	    doca->doca_cred);
764249643Smm	dsl_dir_rele(pdd, FTAG);
765185029Spjd
766265754Sdelphij	return (error);
767168404Spjd}
768168404Spjd
769168404Spjdstatic void
770249643Smmdmu_objset_create_sync(void *arg, dmu_tx_t *tx)
771168404Spjd{
772249643Smm	dmu_objset_create_arg_t *doca = arg;
773249643Smm	dsl_pool_t *dp = dmu_tx_pool(tx);
774249643Smm	dsl_dir_t *pdd;
775249643Smm	const char *tail;
776249643Smm	dsl_dataset_t *ds;
777219089Spjd	uint64_t obj;
778249643Smm	blkptr_t *bp;
779249643Smm	objset_t *os;
780168404Spjd
781249643Smm	VERIFY0(dsl_dir_hold(dp, doca->doca_name, FTAG, &pdd, &tail));
782168404Spjd
783249643Smm	obj = dsl_dataset_create_sync(pdd, tail, NULL, doca->doca_flags,
784249643Smm	    doca->doca_cred, tx);
785168404Spjd
786249643Smm	VERIFY0(dsl_dataset_hold_obj(pdd->dd_pool, obj, FTAG, &ds));
787249643Smm	bp = dsl_dataset_get_blkptr(ds);
788249643Smm	os = dmu_objset_create_impl(pdd->dd_pool->dp_spa,
789249643Smm	    ds, bp, doca->doca_type, tx);
790168404Spjd
791249643Smm	if (doca->doca_userfunc != NULL) {
792249643Smm		doca->doca_userfunc(os, doca->doca_userarg,
793249643Smm		    doca->doca_cred, tx);
794168404Spjd	}
795185029Spjd
796249643Smm	spa_history_log_internal_ds(ds, "create", tx, "");
797249643Smm	dsl_dataset_rele(ds, FTAG);
798249643Smm	dsl_dir_rele(pdd, FTAG);
799168404Spjd}
800168404Spjd
801168404Spjdint
802219089Spjddmu_objset_create(const char *name, dmu_objset_type_t type, uint64_t flags,
803185029Spjd    void (*func)(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx), void *arg)
804168404Spjd{
805249643Smm	dmu_objset_create_arg_t doca;
806168404Spjd
807249643Smm	doca.doca_name = name;
808249643Smm	doca.doca_cred = CRED();
809249643Smm	doca.doca_flags = flags;
810249643Smm	doca.doca_userfunc = func;
811249643Smm	doca.doca_userarg = arg;
812249643Smm	doca.doca_type = type;
813168404Spjd
814249643Smm	return (dsl_sync_task(name,
815249643Smm	    dmu_objset_create_check, dmu_objset_create_sync, &doca, 5));
816168404Spjd}
817168404Spjd
818249643Smmtypedef struct dmu_objset_clone_arg {
819249643Smm	const char *doca_clone;
820249643Smm	const char *doca_origin;
821249643Smm	cred_t *doca_cred;
822249643Smm} dmu_objset_clone_arg_t;
823249643Smm
824249643Smm/*ARGSUSED*/
825249643Smmstatic int
826249643Smmdmu_objset_clone_check(void *arg, dmu_tx_t *tx)
827168404Spjd{
828249643Smm	dmu_objset_clone_arg_t *doca = arg;
829219089Spjd	dsl_dir_t *pdd;
830219089Spjd	const char *tail;
831249643Smm	int error;
832249643Smm	dsl_dataset_t *origin;
833249643Smm	dsl_pool_t *dp = dmu_tx_pool(tx);
834168404Spjd
835249643Smm	if (strchr(doca->doca_clone, '@') != NULL)
836249643Smm		return (SET_ERROR(EINVAL));
837249643Smm
838249643Smm	error = dsl_dir_hold(dp, doca->doca_clone, FTAG, &pdd, &tail);
839249643Smm	if (error != 0)
840249643Smm		return (error);
841219089Spjd	if (tail == NULL) {
842249643Smm		dsl_dir_rele(pdd, FTAG);
843249643Smm		return (SET_ERROR(EEXIST));
844168404Spjd	}
845249643Smm	/* You can't clone across pools. */
846249643Smm	if (pdd->dd_pool != dp) {
847249643Smm		dsl_dir_rele(pdd, FTAG);
848249643Smm		return (SET_ERROR(EXDEV));
849168404Spjd	}
850265754Sdelphij	error = dsl_fs_ss_limit_check(pdd, 1, ZFS_PROP_FILESYSTEM_LIMIT, NULL,
851265754Sdelphij	    doca->doca_cred);
852265754Sdelphij	if (error != 0) {
853265754Sdelphij		dsl_dir_rele(pdd, FTAG);
854265754Sdelphij		return (SET_ERROR(EDQUOT));
855265754Sdelphij	}
856249643Smm	dsl_dir_rele(pdd, FTAG);
857185029Spjd
858249643Smm	error = dsl_dataset_hold(dp, doca->doca_origin, FTAG, &origin);
859249643Smm	if (error != 0)
860219089Spjd		return (error);
861219089Spjd
862249643Smm	/* You can't clone across pools. */
863249643Smm	if (origin->ds_dir->dd_pool != dp) {
864249643Smm		dsl_dataset_rele(origin, FTAG);
865249643Smm		return (SET_ERROR(EXDEV));
866249643Smm	}
867219089Spjd
868249643Smm	/* You can only clone snapshots, not the head datasets. */
869249643Smm	if (!dsl_dataset_is_snapshot(origin)) {
870249643Smm		dsl_dataset_rele(origin, FTAG);
871249643Smm		return (SET_ERROR(EINVAL));
872219089Spjd	}
873249643Smm	dsl_dataset_rele(origin, FTAG);
874249643Smm
875249643Smm	return (0);
876209962Smm}
877209962Smm
878209962Smmstatic void
879249643Smmdmu_objset_clone_sync(void *arg, dmu_tx_t *tx)
880209962Smm{
881249643Smm	dmu_objset_clone_arg_t *doca = arg;
882249643Smm	dsl_pool_t *dp = dmu_tx_pool(tx);
883249643Smm	dsl_dir_t *pdd;
884249643Smm	const char *tail;
885249643Smm	dsl_dataset_t *origin, *ds;
886249643Smm	uint64_t obj;
887249643Smm	char namebuf[MAXNAMELEN];
888209962Smm
889249643Smm	VERIFY0(dsl_dir_hold(dp, doca->doca_clone, FTAG, &pdd, &tail));
890249643Smm	VERIFY0(dsl_dataset_hold(dp, doca->doca_origin, FTAG, &origin));
891209962Smm
892249643Smm	obj = dsl_dataset_create_sync(pdd, tail, origin, 0,
893249643Smm	    doca->doca_cred, tx);
894219089Spjd
895249643Smm	VERIFY0(dsl_dataset_hold_obj(pdd->dd_pool, obj, FTAG, &ds));
896249643Smm	dsl_dataset_name(origin, namebuf);
897249643Smm	spa_history_log_internal_ds(ds, "clone", tx,
898249643Smm	    "origin=%s (%llu)", namebuf, origin->ds_object);
899249643Smm	dsl_dataset_rele(ds, FTAG);
900249643Smm	dsl_dataset_rele(origin, FTAG);
901249643Smm	dsl_dir_rele(pdd, FTAG);
902209962Smm}
903209962Smm
904249643Smmint
905249643Smmdmu_objset_clone(const char *clone, const char *origin)
906168404Spjd{
907249643Smm	dmu_objset_clone_arg_t doca;
908168404Spjd
909249643Smm	doca.doca_clone = clone;
910249643Smm	doca.doca_origin = origin;
911249643Smm	doca.doca_cred = CRED();
912219089Spjd
913249643Smm	return (dsl_sync_task(clone,
914249643Smm	    dmu_objset_clone_check, dmu_objset_clone_sync, &doca, 5));
915168404Spjd}
916168404Spjd
917168404Spjdint
918249643Smmdmu_objset_snapshot_one(const char *fsname, const char *snapname)
919168404Spjd{
920168404Spjd	int err;
921249643Smm	char *longsnap = kmem_asprintf("%s@%s", fsname, snapname);
922249643Smm	nvlist_t *snaps = fnvlist_alloc();
923168404Spjd
924249643Smm	fnvlist_add_boolean(snaps, longsnap);
925249643Smm	strfree(longsnap);
926249643Smm	err = dsl_dataset_snapshot(snaps, NULL, NULL);
927249643Smm	fnvlist_free(snaps);
928168404Spjd	return (err);
929168404Spjd}
930168404Spjd
931168404Spjdstatic void
932209962Smmdmu_objset_sync_dnodes(list_t *list, list_t *newlist, dmu_tx_t *tx)
933168404Spjd{
934168404Spjd	dnode_t *dn;
935168404Spjd
936168404Spjd	while (dn = list_head(list)) {
937168404Spjd		ASSERT(dn->dn_object != DMU_META_DNODE_OBJECT);
938168404Spjd		ASSERT(dn->dn_dbuf->db_data_pending);
939168404Spjd		/*
940209962Smm		 * Initialize dn_zio outside dnode_sync() because the
941209962Smm		 * meta-dnode needs to set it ouside dnode_sync().
942168404Spjd		 */
943168404Spjd		dn->dn_zio = dn->dn_dbuf->db_data_pending->dr_zio;
944168404Spjd		ASSERT(dn->dn_zio);
945168404Spjd
946168404Spjd		ASSERT3U(dn->dn_nlevels, <=, DN_MAX_LEVELS);
947168404Spjd		list_remove(list, dn);
948209962Smm
949209962Smm		if (newlist) {
950209962Smm			(void) dnode_add_ref(dn, newlist);
951209962Smm			list_insert_tail(newlist, dn);
952209962Smm		}
953209962Smm
954168404Spjd		dnode_sync(dn, tx);
955168404Spjd	}
956168404Spjd}
957168404Spjd
958168404Spjd/* ARGSUSED */
959168404Spjdstatic void
960219089Spjddmu_objset_write_ready(zio_t *zio, arc_buf_t *abuf, void *arg)
961168404Spjd{
962185029Spjd	blkptr_t *bp = zio->io_bp;
963219089Spjd	objset_t *os = arg;
964168404Spjd	dnode_phys_t *dnp = &os->os_phys->os_meta_dnode;
965168404Spjd
966249643Smm	ASSERT3P(bp, ==, os->os_rootbp);
967249643Smm	ASSERT3U(BP_GET_TYPE(bp), ==, DMU_OT_OBJSET);
968249643Smm	ASSERT0(BP_GET_LEVEL(bp));
969185029Spjd
970168404Spjd	/*
971209962Smm	 * Update rootbp fill count: it should be the number of objects
972209962Smm	 * allocated in the object set (not counting the "special"
973209962Smm	 * objects that are stored in the objset_phys_t -- the meta
974209962Smm	 * dnode and user/group accounting objects).
975168404Spjd	 */
976209962Smm	bp->blk_fill = 0;
977185029Spjd	for (int i = 0; i < dnp->dn_nblkptr; i++)
978168404Spjd		bp->blk_fill += dnp->dn_blkptr[i].blk_fill;
979219089Spjd}
980168404Spjd
981219089Spjd/* ARGSUSED */
982219089Spjdstatic void
983219089Spjddmu_objset_write_done(zio_t *zio, arc_buf_t *abuf, void *arg)
984219089Spjd{
985219089Spjd	blkptr_t *bp = zio->io_bp;
986219089Spjd	blkptr_t *bp_orig = &zio->io_bp_orig;
987219089Spjd	objset_t *os = arg;
988219089Spjd
989185029Spjd	if (zio->io_flags & ZIO_FLAG_IO_REWRITE) {
990219089Spjd		ASSERT(BP_EQUAL(bp, bp_orig));
991185029Spjd	} else {
992219089Spjd		dsl_dataset_t *ds = os->os_dsl_dataset;
993219089Spjd		dmu_tx_t *tx = os->os_synctx;
994219089Spjd
995219089Spjd		(void) dsl_dataset_block_kill(ds, bp_orig, tx, B_TRUE);
996219089Spjd		dsl_dataset_block_born(ds, bp, tx);
997168404Spjd	}
998168404Spjd}
999168404Spjd
1000168404Spjd/* called from dsl */
1001168404Spjdvoid
1002219089Spjddmu_objset_sync(objset_t *os, zio_t *pio, dmu_tx_t *tx)
1003168404Spjd{
1004168404Spjd	int txgoff;
1005168404Spjd	zbookmark_t zb;
1006219089Spjd	zio_prop_t zp;
1007168404Spjd	zio_t *zio;
1008168404Spjd	list_t *list;
1009209962Smm	list_t *newlist = NULL;
1010168404Spjd	dbuf_dirty_record_t *dr;
1011168404Spjd
1012168404Spjd	dprintf_ds(os->os_dsl_dataset, "txg=%llu\n", tx->tx_txg);
1013168404Spjd
1014168404Spjd	ASSERT(dmu_tx_is_syncing(tx));
1015168404Spjd	/* XXX the write_done callback should really give us the tx... */
1016168404Spjd	os->os_synctx = tx;
1017168404Spjd
1018168404Spjd	if (os->os_dsl_dataset == NULL) {
1019168404Spjd		/*
1020168404Spjd		 * This is the MOS.  If we have upgraded,
1021168404Spjd		 * spa_max_replication() could change, so reset
1022168404Spjd		 * os_copies here.
1023168404Spjd		 */
1024168404Spjd		os->os_copies = spa_max_replication(os->os_spa);
1025168404Spjd	}
1026168404Spjd
1027168404Spjd	/*
1028168404Spjd	 * Create the root block IO
1029168404Spjd	 */
1030219089Spjd	SET_BOOKMARK(&zb, os->os_dsl_dataset ?
1031219089Spjd	    os->os_dsl_dataset->ds_object : DMU_META_OBJSET,
1032219089Spjd	    ZB_ROOT_OBJECT, ZB_ROOT_LEVEL, ZB_ROOT_BLKID);
1033247406Smm	arc_release(os->os_phys_buf, &os->os_phys_buf);
1034185029Spjd
1035219089Spjd	dmu_write_policy(os, NULL, 0, 0, &zp);
1036185029Spjd
1037219089Spjd	zio = arc_write(pio, os->os_spa, tx->tx_txg,
1038252140Sdelphij	    os->os_rootbp, os->os_phys_buf, DMU_OS_IS_L2CACHEABLE(os),
1039252140Sdelphij	    DMU_OS_IS_L2COMPRESSIBLE(os), &zp, dmu_objset_write_ready,
1040260764Savg	    NULL, dmu_objset_write_done, os, ZIO_PRIORITY_ASYNC_WRITE,
1041252140Sdelphij	    ZIO_FLAG_MUSTSUCCEED, &zb);
1042185029Spjd
1043168404Spjd	/*
1044209962Smm	 * Sync special dnodes - the parent IO for the sync is the root block
1045168404Spjd	 */
1046219089Spjd	DMU_META_DNODE(os)->dn_zio = zio;
1047219089Spjd	dnode_sync(DMU_META_DNODE(os), tx);
1048168404Spjd
1049209962Smm	os->os_phys->os_flags = os->os_flags;
1050209962Smm
1051219089Spjd	if (DMU_USERUSED_DNODE(os) &&
1052219089Spjd	    DMU_USERUSED_DNODE(os)->dn_type != DMU_OT_NONE) {
1053219089Spjd		DMU_USERUSED_DNODE(os)->dn_zio = zio;
1054219089Spjd		dnode_sync(DMU_USERUSED_DNODE(os), tx);
1055219089Spjd		DMU_GROUPUSED_DNODE(os)->dn_zio = zio;
1056219089Spjd		dnode_sync(DMU_GROUPUSED_DNODE(os), tx);
1057209962Smm	}
1058209962Smm
1059168404Spjd	txgoff = tx->tx_txg & TXG_MASK;
1060168404Spjd
1061209962Smm	if (dmu_objset_userused_enabled(os)) {
1062209962Smm		newlist = &os->os_synced_dnodes;
1063209962Smm		/*
1064209962Smm		 * We must create the list here because it uses the
1065209962Smm		 * dn_dirty_link[] of this txg.
1066209962Smm		 */
1067209962Smm		list_create(newlist, sizeof (dnode_t),
1068209962Smm		    offsetof(dnode_t, dn_dirty_link[txgoff]));
1069209962Smm	}
1070168404Spjd
1071209962Smm	dmu_objset_sync_dnodes(&os->os_free_dnodes[txgoff], newlist, tx);
1072209962Smm	dmu_objset_sync_dnodes(&os->os_dirty_dnodes[txgoff], newlist, tx);
1073209962Smm
1074219089Spjd	list = &DMU_META_DNODE(os)->dn_dirty_records[txgoff];
1075168404Spjd	while (dr = list_head(list)) {
1076249643Smm		ASSERT0(dr->dr_dbuf->db_level);
1077168404Spjd		list_remove(list, dr);
1078168404Spjd		if (dr->dr_zio)
1079168404Spjd			zio_nowait(dr->dr_zio);
1080168404Spjd	}
1081168404Spjd	/*
1082168404Spjd	 * Free intent log blocks up to this tx.
1083168404Spjd	 */
1084168404Spjd	zil_sync(os->os_zil, tx);
1085185029Spjd	os->os_phys->os_zil_header = os->os_zil_header;
1086168404Spjd	zio_nowait(zio);
1087168404Spjd}
1088168404Spjd
1089219089Spjdboolean_t
1090219089Spjddmu_objset_is_dirty(objset_t *os, uint64_t txg)
1091219089Spjd{
1092219089Spjd	return (!list_is_empty(&os->os_dirty_dnodes[txg & TXG_MASK]) ||
1093219089Spjd	    !list_is_empty(&os->os_free_dnodes[txg & TXG_MASK]));
1094219089Spjd}
1095219089Spjd
1096209962Smmstatic objset_used_cb_t *used_cbs[DMU_OST_NUMTYPES];
1097209962Smm
1098168404Spjdvoid
1099209962Smmdmu_objset_register_type(dmu_objset_type_t ost, objset_used_cb_t *cb)
1100209962Smm{
1101209962Smm	used_cbs[ost] = cb;
1102209962Smm}
1103209962Smm
1104209962Smmboolean_t
1105219089Spjddmu_objset_userused_enabled(objset_t *os)
1106209962Smm{
1107209962Smm	return (spa_version(os->os_spa) >= SPA_VERSION_USERSPACE &&
1108219089Spjd	    used_cbs[os->os_phys->os_type] != NULL &&
1109219089Spjd	    DMU_USERUSED_DNODE(os) != NULL);
1110209962Smm}
1111209962Smm
1112219089Spjdstatic void
1113219089Spjddo_userquota_update(objset_t *os, uint64_t used, uint64_t flags,
1114219089Spjd    uint64_t user, uint64_t group, boolean_t subtract, dmu_tx_t *tx)
1115219089Spjd{
1116219089Spjd	if ((flags & DNODE_FLAG_USERUSED_ACCOUNTED)) {
1117219089Spjd		int64_t delta = DNODE_SIZE + used;
1118219089Spjd		if (subtract)
1119219089Spjd			delta = -delta;
1120219089Spjd		VERIFY3U(0, ==, zap_increment_int(os, DMU_USERUSED_OBJECT,
1121219089Spjd		    user, delta, tx));
1122219089Spjd		VERIFY3U(0, ==, zap_increment_int(os, DMU_GROUPUSED_OBJECT,
1123219089Spjd		    group, delta, tx));
1124219089Spjd	}
1125219089Spjd}
1126219089Spjd
1127209962Smmvoid
1128219089Spjddmu_objset_do_userquota_updates(objset_t *os, dmu_tx_t *tx)
1129209962Smm{
1130209962Smm	dnode_t *dn;
1131209962Smm	list_t *list = &os->os_synced_dnodes;
1132209962Smm
1133209962Smm	ASSERT(list_head(list) == NULL || dmu_objset_userused_enabled(os));
1134209962Smm
1135209962Smm	while (dn = list_head(list)) {
1136219089Spjd		int flags;
1137209962Smm		ASSERT(!DMU_OBJECT_IS_SPECIAL(dn->dn_object));
1138209962Smm		ASSERT(dn->dn_phys->dn_type == DMU_OT_NONE ||
1139209962Smm		    dn->dn_phys->dn_flags &
1140209962Smm		    DNODE_FLAG_USERUSED_ACCOUNTED);
1141209962Smm
1142209962Smm		/* Allocate the user/groupused objects if necessary. */
1143219089Spjd		if (DMU_USERUSED_DNODE(os)->dn_type == DMU_OT_NONE) {
1144219089Spjd			VERIFY(0 == zap_create_claim(os,
1145209962Smm			    DMU_USERUSED_OBJECT,
1146209962Smm			    DMU_OT_USERGROUP_USED, DMU_OT_NONE, 0, tx));
1147219089Spjd			VERIFY(0 == zap_create_claim(os,
1148209962Smm			    DMU_GROUPUSED_OBJECT,
1149209962Smm			    DMU_OT_USERGROUP_USED, DMU_OT_NONE, 0, tx));
1150209962Smm		}
1151209962Smm
1152209962Smm		/*
1153219089Spjd		 * We intentionally modify the zap object even if the
1154219089Spjd		 * net delta is zero.  Otherwise
1155219089Spjd		 * the block of the zap obj could be shared between
1156219089Spjd		 * datasets but need to be different between them after
1157219089Spjd		 * a bprewrite.
1158209962Smm		 */
1159219089Spjd
1160219089Spjd		flags = dn->dn_id_flags;
1161219089Spjd		ASSERT(flags);
1162219089Spjd		if (flags & DN_ID_OLD_EXIST)  {
1163219089Spjd			do_userquota_update(os, dn->dn_oldused, dn->dn_oldflags,
1164219089Spjd			    dn->dn_olduid, dn->dn_oldgid, B_TRUE, tx);
1165209962Smm		}
1166219089Spjd		if (flags & DN_ID_NEW_EXIST) {
1167219089Spjd			do_userquota_update(os, DN_USED_BYTES(dn->dn_phys),
1168219089Spjd			    dn->dn_phys->dn_flags,  dn->dn_newuid,
1169219089Spjd			    dn->dn_newgid, B_FALSE, tx);
1170219089Spjd		}
1171209962Smm
1172209962Smm		mutex_enter(&dn->dn_mtx);
1173219089Spjd		dn->dn_oldused = 0;
1174219089Spjd		dn->dn_oldflags = 0;
1175219089Spjd		if (dn->dn_id_flags & DN_ID_NEW_EXIST) {
1176219089Spjd			dn->dn_olduid = dn->dn_newuid;
1177219089Spjd			dn->dn_oldgid = dn->dn_newgid;
1178219089Spjd			dn->dn_id_flags |= DN_ID_OLD_EXIST;
1179219089Spjd			if (dn->dn_bonuslen == 0)
1180219089Spjd				dn->dn_id_flags |= DN_ID_CHKED_SPILL;
1181219089Spjd			else
1182219089Spjd				dn->dn_id_flags |= DN_ID_CHKED_BONUS;
1183219089Spjd		}
1184219089Spjd		dn->dn_id_flags &= ~(DN_ID_NEW_EXIST);
1185209962Smm		mutex_exit(&dn->dn_mtx);
1186209962Smm
1187209962Smm		list_remove(list, dn);
1188209962Smm		dnode_rele(dn, list);
1189209962Smm	}
1190209962Smm}
1191209962Smm
1192219089Spjd/*
1193219089Spjd * Returns a pointer to data to find uid/gid from
1194219089Spjd *
1195219089Spjd * If a dirty record for transaction group that is syncing can't
1196219089Spjd * be found then NULL is returned.  In the NULL case it is assumed
1197219089Spjd * the uid/gid aren't changing.
1198219089Spjd */
1199219089Spjdstatic void *
1200219089Spjddmu_objset_userquota_find_data(dmu_buf_impl_t *db, dmu_tx_t *tx)
1201219089Spjd{
1202219089Spjd	dbuf_dirty_record_t *dr, **drp;
1203219089Spjd	void *data;
1204219089Spjd
1205219089Spjd	if (db->db_dirtycnt == 0)
1206219089Spjd		return (db->db.db_data);  /* Nothing is changing */
1207219089Spjd
1208219089Spjd	for (drp = &db->db_last_dirty; (dr = *drp) != NULL; drp = &dr->dr_next)
1209219089Spjd		if (dr->dr_txg == tx->tx_txg)
1210219089Spjd			break;
1211219089Spjd
1212219089Spjd	if (dr == NULL) {
1213219089Spjd		data = NULL;
1214219089Spjd	} else {
1215219089Spjd		dnode_t *dn;
1216219089Spjd
1217219089Spjd		DB_DNODE_ENTER(dr->dr_dbuf);
1218219089Spjd		dn = DB_DNODE(dr->dr_dbuf);
1219219089Spjd
1220219089Spjd		if (dn->dn_bonuslen == 0 &&
1221219089Spjd		    dr->dr_dbuf->db_blkid == DMU_SPILL_BLKID)
1222219089Spjd			data = dr->dt.dl.dr_data->b_data;
1223219089Spjd		else
1224219089Spjd			data = dr->dt.dl.dr_data;
1225219089Spjd
1226219089Spjd		DB_DNODE_EXIT(dr->dr_dbuf);
1227219089Spjd	}
1228219089Spjd
1229219089Spjd	return (data);
1230219089Spjd}
1231219089Spjd
1232219089Spjdvoid
1233219089Spjddmu_objset_userquota_get_ids(dnode_t *dn, boolean_t before, dmu_tx_t *tx)
1234219089Spjd{
1235219089Spjd	objset_t *os = dn->dn_objset;
1236219089Spjd	void *data = NULL;
1237219089Spjd	dmu_buf_impl_t *db = NULL;
1238248369Smm	uint64_t *user = NULL;
1239248369Smm	uint64_t *group = NULL;
1240219089Spjd	int flags = dn->dn_id_flags;
1241219089Spjd	int error;
1242219089Spjd	boolean_t have_spill = B_FALSE;
1243219089Spjd
1244219089Spjd	if (!dmu_objset_userused_enabled(dn->dn_objset))
1245219089Spjd		return;
1246219089Spjd
1247219089Spjd	if (before && (flags & (DN_ID_CHKED_BONUS|DN_ID_OLD_EXIST|
1248219089Spjd	    DN_ID_CHKED_SPILL)))
1249219089Spjd		return;
1250219089Spjd
1251219089Spjd	if (before && dn->dn_bonuslen != 0)
1252219089Spjd		data = DN_BONUS(dn->dn_phys);
1253219089Spjd	else if (!before && dn->dn_bonuslen != 0) {
1254219089Spjd		if (dn->dn_bonus) {
1255219089Spjd			db = dn->dn_bonus;
1256219089Spjd			mutex_enter(&db->db_mtx);
1257219089Spjd			data = dmu_objset_userquota_find_data(db, tx);
1258219089Spjd		} else {
1259219089Spjd			data = DN_BONUS(dn->dn_phys);
1260219089Spjd		}
1261219089Spjd	} else if (dn->dn_bonuslen == 0 && dn->dn_bonustype == DMU_OT_SA) {
1262219089Spjd			int rf = 0;
1263219089Spjd
1264219089Spjd			if (RW_WRITE_HELD(&dn->dn_struct_rwlock))
1265219089Spjd				rf |= DB_RF_HAVESTRUCT;
1266219089Spjd			error = dmu_spill_hold_by_dnode(dn,
1267219089Spjd			    rf | DB_RF_MUST_SUCCEED,
1268219089Spjd			    FTAG, (dmu_buf_t **)&db);
1269219089Spjd			ASSERT(error == 0);
1270219089Spjd			mutex_enter(&db->db_mtx);
1271219089Spjd			data = (before) ? db->db.db_data :
1272219089Spjd			    dmu_objset_userquota_find_data(db, tx);
1273219089Spjd			have_spill = B_TRUE;
1274219089Spjd	} else {
1275219089Spjd		mutex_enter(&dn->dn_mtx);
1276219089Spjd		dn->dn_id_flags |= DN_ID_CHKED_BONUS;
1277219089Spjd		mutex_exit(&dn->dn_mtx);
1278219089Spjd		return;
1279219089Spjd	}
1280219089Spjd
1281219089Spjd	if (before) {
1282219089Spjd		ASSERT(data);
1283219089Spjd		user = &dn->dn_olduid;
1284219089Spjd		group = &dn->dn_oldgid;
1285219089Spjd	} else if (data) {
1286219089Spjd		user = &dn->dn_newuid;
1287219089Spjd		group = &dn->dn_newgid;
1288219089Spjd	}
1289219089Spjd
1290219089Spjd	/*
1291219089Spjd	 * Must always call the callback in case the object
1292219089Spjd	 * type has changed and that type isn't an object type to track
1293219089Spjd	 */
1294219089Spjd	error = used_cbs[os->os_phys->os_type](dn->dn_bonustype, data,
1295219089Spjd	    user, group);
1296219089Spjd
1297219089Spjd	/*
1298219089Spjd	 * Preserve existing uid/gid when the callback can't determine
1299219089Spjd	 * what the new uid/gid are and the callback returned EEXIST.
1300219089Spjd	 * The EEXIST error tells us to just use the existing uid/gid.
1301219089Spjd	 * If we don't know what the old values are then just assign
1302219089Spjd	 * them to 0, since that is a new file  being created.
1303219089Spjd	 */
1304219089Spjd	if (!before && data == NULL && error == EEXIST) {
1305219089Spjd		if (flags & DN_ID_OLD_EXIST) {
1306219089Spjd			dn->dn_newuid = dn->dn_olduid;
1307219089Spjd			dn->dn_newgid = dn->dn_oldgid;
1308219089Spjd		} else {
1309219089Spjd			dn->dn_newuid = 0;
1310219089Spjd			dn->dn_newgid = 0;
1311219089Spjd		}
1312219089Spjd		error = 0;
1313219089Spjd	}
1314219089Spjd
1315219089Spjd	if (db)
1316219089Spjd		mutex_exit(&db->db_mtx);
1317219089Spjd
1318219089Spjd	mutex_enter(&dn->dn_mtx);
1319219089Spjd	if (error == 0 && before)
1320219089Spjd		dn->dn_id_flags |= DN_ID_OLD_EXIST;
1321219089Spjd	if (error == 0 && !before)
1322219089Spjd		dn->dn_id_flags |= DN_ID_NEW_EXIST;
1323219089Spjd
1324219089Spjd	if (have_spill) {
1325219089Spjd		dn->dn_id_flags |= DN_ID_CHKED_SPILL;
1326219089Spjd	} else {
1327219089Spjd		dn->dn_id_flags |= DN_ID_CHKED_BONUS;
1328219089Spjd	}
1329219089Spjd	mutex_exit(&dn->dn_mtx);
1330219089Spjd	if (have_spill)
1331219089Spjd		dmu_buf_rele((dmu_buf_t *)db, FTAG);
1332219089Spjd}
1333219089Spjd
1334209962Smmboolean_t
1335209962Smmdmu_objset_userspace_present(objset_t *os)
1336209962Smm{
1337219089Spjd	return (os->os_phys->os_flags &
1338209962Smm	    OBJSET_FLAG_USERACCOUNTING_COMPLETE);
1339209962Smm}
1340209962Smm
1341209962Smmint
1342209962Smmdmu_objset_userspace_upgrade(objset_t *os)
1343209962Smm{
1344209962Smm	uint64_t obj;
1345209962Smm	int err = 0;
1346209962Smm
1347209962Smm	if (dmu_objset_userspace_present(os))
1348209962Smm		return (0);
1349219089Spjd	if (!dmu_objset_userused_enabled(os))
1350249643Smm		return (SET_ERROR(ENOTSUP));
1351209962Smm	if (dmu_objset_is_snapshot(os))
1352249643Smm		return (SET_ERROR(EINVAL));
1353209962Smm
1354209962Smm	/*
1355209962Smm	 * We simply need to mark every object dirty, so that it will be
1356209962Smm	 * synced out and now accounted.  If this is called
1357209962Smm	 * concurrently, or if we already did some work before crashing,
1358209962Smm	 * that's fine, since we track each object's accounted state
1359209962Smm	 * independently.
1360209962Smm	 */
1361209962Smm
1362209962Smm	for (obj = 0; err == 0; err = dmu_object_next(os, &obj, FALSE, 0)) {
1363209962Smm		dmu_tx_t *tx;
1364209962Smm		dmu_buf_t *db;
1365209962Smm		int objerr;
1366209962Smm
1367209962Smm		if (issig(JUSTLOOKING) && issig(FORREAL))
1368249643Smm			return (SET_ERROR(EINTR));
1369209962Smm
1370209962Smm		objerr = dmu_bonus_hold(os, obj, FTAG, &db);
1371249643Smm		if (objerr != 0)
1372209962Smm			continue;
1373209962Smm		tx = dmu_tx_create(os);
1374209962Smm		dmu_tx_hold_bonus(tx, obj);
1375209962Smm		objerr = dmu_tx_assign(tx, TXG_WAIT);
1376249643Smm		if (objerr != 0) {
1377209962Smm			dmu_tx_abort(tx);
1378209962Smm			continue;
1379209962Smm		}
1380209962Smm		dmu_buf_will_dirty(db, tx);
1381209962Smm		dmu_buf_rele(db, FTAG);
1382209962Smm		dmu_tx_commit(tx);
1383209962Smm	}
1384209962Smm
1385219089Spjd	os->os_flags |= OBJSET_FLAG_USERACCOUNTING_COMPLETE;
1386209962Smm	txg_wait_synced(dmu_objset_pool(os), 0);
1387209962Smm	return (0);
1388209962Smm}
1389209962Smm
1390209962Smmvoid
1391168404Spjddmu_objset_space(objset_t *os, uint64_t *refdbytesp, uint64_t *availbytesp,
1392168404Spjd    uint64_t *usedobjsp, uint64_t *availobjsp)
1393168404Spjd{
1394219089Spjd	dsl_dataset_space(os->os_dsl_dataset, refdbytesp, availbytesp,
1395168404Spjd	    usedobjsp, availobjsp);
1396168404Spjd}
1397168404Spjd
1398168404Spjduint64_t
1399168404Spjddmu_objset_fsid_guid(objset_t *os)
1400168404Spjd{
1401219089Spjd	return (dsl_dataset_fsid_guid(os->os_dsl_dataset));
1402168404Spjd}
1403168404Spjd
1404168404Spjdvoid
1405168404Spjddmu_objset_fast_stat(objset_t *os, dmu_objset_stats_t *stat)
1406168404Spjd{
1407219089Spjd	stat->dds_type = os->os_phys->os_type;
1408219089Spjd	if (os->os_dsl_dataset)
1409219089Spjd		dsl_dataset_fast_stat(os->os_dsl_dataset, stat);
1410168404Spjd}
1411168404Spjd
1412168404Spjdvoid
1413168404Spjddmu_objset_stats(objset_t *os, nvlist_t *nv)
1414168404Spjd{
1415219089Spjd	ASSERT(os->os_dsl_dataset ||
1416219089Spjd	    os->os_phys->os_type == DMU_OST_META);
1417168404Spjd
1418219089Spjd	if (os->os_dsl_dataset != NULL)
1419219089Spjd		dsl_dataset_stats(os->os_dsl_dataset, nv);
1420168404Spjd
1421168404Spjd	dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_TYPE,
1422219089Spjd	    os->os_phys->os_type);
1423209962Smm	dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_USERACCOUNTING,
1424209962Smm	    dmu_objset_userspace_present(os));
1425168404Spjd}
1426168404Spjd
1427168404Spjdint
1428168404Spjddmu_objset_is_snapshot(objset_t *os)
1429168404Spjd{
1430219089Spjd	if (os->os_dsl_dataset != NULL)
1431219089Spjd		return (dsl_dataset_is_snapshot(os->os_dsl_dataset));
1432168404Spjd	else
1433168404Spjd		return (B_FALSE);
1434168404Spjd}
1435168404Spjd
1436168404Spjdint
1437185029Spjddmu_snapshot_realname(objset_t *os, char *name, char *real, int maxlen,
1438185029Spjd    boolean_t *conflict)
1439185029Spjd{
1440219089Spjd	dsl_dataset_t *ds = os->os_dsl_dataset;
1441185029Spjd	uint64_t ignored;
1442185029Spjd
1443185029Spjd	if (ds->ds_phys->ds_snapnames_zapobj == 0)
1444249643Smm		return (SET_ERROR(ENOENT));
1445185029Spjd
1446185029Spjd	return (zap_lookup_norm(ds->ds_dir->dd_pool->dp_meta_objset,
1447185029Spjd	    ds->ds_phys->ds_snapnames_zapobj, name, 8, 1, &ignored, MT_FIRST,
1448185029Spjd	    real, maxlen, conflict));
1449185029Spjd}
1450185029Spjd
1451185029Spjdint
1452168404Spjddmu_snapshot_list_next(objset_t *os, int namelen, char *name,
1453185029Spjd    uint64_t *idp, uint64_t *offp, boolean_t *case_conflict)
1454168404Spjd{
1455219089Spjd	dsl_dataset_t *ds = os->os_dsl_dataset;
1456168404Spjd	zap_cursor_t cursor;
1457168404Spjd	zap_attribute_t attr;
1458168404Spjd
1459249643Smm	ASSERT(dsl_pool_config_held(dmu_objset_pool(os)));
1460249643Smm
1461168404Spjd	if (ds->ds_phys->ds_snapnames_zapobj == 0)
1462249643Smm		return (SET_ERROR(ENOENT));
1463168404Spjd
1464168404Spjd	zap_cursor_init_serialized(&cursor,
1465168404Spjd	    ds->ds_dir->dd_pool->dp_meta_objset,
1466168404Spjd	    ds->ds_phys->ds_snapnames_zapobj, *offp);
1467168404Spjd
1468168404Spjd	if (zap_cursor_retrieve(&cursor, &attr) != 0) {
1469168404Spjd		zap_cursor_fini(&cursor);
1470249643Smm		return (SET_ERROR(ENOENT));
1471168404Spjd	}
1472168404Spjd
1473168404Spjd	if (strlen(attr.za_name) + 1 > namelen) {
1474168404Spjd		zap_cursor_fini(&cursor);
1475249643Smm		return (SET_ERROR(ENAMETOOLONG));
1476168404Spjd	}
1477168404Spjd
1478168404Spjd	(void) strcpy(name, attr.za_name);
1479168404Spjd	if (idp)
1480168404Spjd		*idp = attr.za_first_integer;
1481185029Spjd	if (case_conflict)
1482185029Spjd		*case_conflict = attr.za_normalization_conflict;
1483168404Spjd	zap_cursor_advance(&cursor);
1484168404Spjd	*offp = zap_cursor_serialize(&cursor);
1485168404Spjd	zap_cursor_fini(&cursor);
1486168404Spjd
1487168404Spjd	return (0);
1488168404Spjd}
1489168404Spjd
1490168404Spjdint
1491168404Spjddmu_dir_list_next(objset_t *os, int namelen, char *name,
1492168404Spjd    uint64_t *idp, uint64_t *offp)
1493168404Spjd{
1494219089Spjd	dsl_dir_t *dd = os->os_dsl_dataset->ds_dir;
1495168404Spjd	zap_cursor_t cursor;
1496168404Spjd	zap_attribute_t attr;
1497168404Spjd
1498168404Spjd	/* there is no next dir on a snapshot! */
1499219089Spjd	if (os->os_dsl_dataset->ds_object !=
1500168404Spjd	    dd->dd_phys->dd_head_dataset_obj)
1501249643Smm		return (SET_ERROR(ENOENT));
1502168404Spjd
1503168404Spjd	zap_cursor_init_serialized(&cursor,
1504168404Spjd	    dd->dd_pool->dp_meta_objset,
1505168404Spjd	    dd->dd_phys->dd_child_dir_zapobj, *offp);
1506168404Spjd
1507168404Spjd	if (zap_cursor_retrieve(&cursor, &attr) != 0) {
1508168404Spjd		zap_cursor_fini(&cursor);
1509249643Smm		return (SET_ERROR(ENOENT));
1510168404Spjd	}
1511168404Spjd
1512168404Spjd	if (strlen(attr.za_name) + 1 > namelen) {
1513168404Spjd		zap_cursor_fini(&cursor);
1514249643Smm		return (SET_ERROR(ENAMETOOLONG));
1515168404Spjd	}
1516168404Spjd
1517168404Spjd	(void) strcpy(name, attr.za_name);
1518168404Spjd	if (idp)
1519168404Spjd		*idp = attr.za_first_integer;
1520168404Spjd	zap_cursor_advance(&cursor);
1521168404Spjd	*offp = zap_cursor_serialize(&cursor);
1522168404Spjd	zap_cursor_fini(&cursor);
1523168404Spjd
1524168404Spjd	return (0);
1525168404Spjd}
1526168404Spjd
1527168404Spjd/*
1528249643Smm * Find objsets under and including ddobj, call func(ds) on each.
1529168404Spjd */
1530168404Spjdint
1531249643Smmdmu_objset_find_dp(dsl_pool_t *dp, uint64_t ddobj,
1532249643Smm    int func(dsl_pool_t *, dsl_dataset_t *, void *), void *arg, int flags)
1533168404Spjd{
1534249643Smm	dsl_dir_t *dd;
1535249643Smm	dsl_dataset_t *ds;
1536249643Smm	zap_cursor_t zc;
1537249643Smm	zap_attribute_t *attr;
1538249643Smm	uint64_t thisobj;
1539249643Smm	int err;
1540249643Smm
1541249643Smm	ASSERT(dsl_pool_config_held(dp));
1542249643Smm
1543249643Smm	err = dsl_dir_hold_obj(dp, ddobj, NULL, FTAG, &dd);
1544249643Smm	if (err != 0)
1545249643Smm		return (err);
1546249643Smm
1547249643Smm	/* Don't visit hidden ($MOS & $ORIGIN) objsets. */
1548249643Smm	if (dd->dd_myname[0] == '$') {
1549249643Smm		dsl_dir_rele(dd, FTAG);
1550249643Smm		return (0);
1551249643Smm	}
1552249643Smm
1553249643Smm	thisobj = dd->dd_phys->dd_head_dataset_obj;
1554249643Smm	attr = kmem_alloc(sizeof (zap_attribute_t), KM_SLEEP);
1555249643Smm
1556249643Smm	/*
1557249643Smm	 * Iterate over all children.
1558249643Smm	 */
1559249643Smm	if (flags & DS_FIND_CHILDREN) {
1560249643Smm		for (zap_cursor_init(&zc, dp->dp_meta_objset,
1561249643Smm		    dd->dd_phys->dd_child_dir_zapobj);
1562249643Smm		    zap_cursor_retrieve(&zc, attr) == 0;
1563249643Smm		    (void) zap_cursor_advance(&zc)) {
1564249643Smm			ASSERT3U(attr->za_integer_length, ==,
1565249643Smm			    sizeof (uint64_t));
1566249643Smm			ASSERT3U(attr->za_num_integers, ==, 1);
1567249643Smm
1568249643Smm			err = dmu_objset_find_dp(dp, attr->za_first_integer,
1569249643Smm			    func, arg, flags);
1570249643Smm			if (err != 0)
1571249643Smm				break;
1572249643Smm		}
1573249643Smm		zap_cursor_fini(&zc);
1574249643Smm
1575249643Smm		if (err != 0) {
1576249643Smm			dsl_dir_rele(dd, FTAG);
1577249643Smm			kmem_free(attr, sizeof (zap_attribute_t));
1578249643Smm			return (err);
1579249643Smm		}
1580249643Smm	}
1581249643Smm
1582249643Smm	/*
1583249643Smm	 * Iterate over all snapshots.
1584249643Smm	 */
1585249643Smm	if (flags & DS_FIND_SNAPSHOTS) {
1586249643Smm		dsl_dataset_t *ds;
1587249643Smm		err = dsl_dataset_hold_obj(dp, thisobj, FTAG, &ds);
1588249643Smm
1589249643Smm		if (err == 0) {
1590249643Smm			uint64_t snapobj = ds->ds_phys->ds_snapnames_zapobj;
1591249643Smm			dsl_dataset_rele(ds, FTAG);
1592249643Smm
1593249643Smm			for (zap_cursor_init(&zc, dp->dp_meta_objset, snapobj);
1594249643Smm			    zap_cursor_retrieve(&zc, attr) == 0;
1595249643Smm			    (void) zap_cursor_advance(&zc)) {
1596249643Smm				ASSERT3U(attr->za_integer_length, ==,
1597249643Smm				    sizeof (uint64_t));
1598249643Smm				ASSERT3U(attr->za_num_integers, ==, 1);
1599249643Smm
1600249643Smm				err = dsl_dataset_hold_obj(dp,
1601249643Smm				    attr->za_first_integer, FTAG, &ds);
1602249643Smm				if (err != 0)
1603249643Smm					break;
1604249643Smm				err = func(dp, ds, arg);
1605249643Smm				dsl_dataset_rele(ds, FTAG);
1606249643Smm				if (err != 0)
1607249643Smm					break;
1608249643Smm			}
1609249643Smm			zap_cursor_fini(&zc);
1610249643Smm		}
1611249643Smm	}
1612249643Smm
1613249643Smm	dsl_dir_rele(dd, FTAG);
1614249643Smm	kmem_free(attr, sizeof (zap_attribute_t));
1615249643Smm
1616249643Smm	if (err != 0)
1617249643Smm		return (err);
1618249643Smm
1619249643Smm	/*
1620249643Smm	 * Apply to self.
1621249643Smm	 */
1622249643Smm	err = dsl_dataset_hold_obj(dp, thisobj, FTAG, &ds);
1623249643Smm	if (err != 0)
1624249643Smm		return (err);
1625249643Smm	err = func(dp, ds, arg);
1626249643Smm	dsl_dataset_rele(ds, FTAG);
1627249643Smm	return (err);
1628185029Spjd}
1629185029Spjd
1630185029Spjd/*
1631249643Smm * Find all objsets under name, and for each, call 'func(child_name, arg)'.
1632249643Smm * The dp_config_rwlock must not be held when this is called, and it
1633249643Smm * will not be held when the callback is called.
1634249643Smm * Therefore this function should only be used when the pool is not changing
1635249643Smm * (e.g. in syncing context), or the callback can deal with the possible races.
1636185029Spjd */
1637249643Smmstatic int
1638249643Smmdmu_objset_find_impl(spa_t *spa, const char *name,
1639249643Smm    int func(const char *, void *), void *arg, int flags)
1640185029Spjd{
1641168404Spjd	dsl_dir_t *dd;
1642249643Smm	dsl_pool_t *dp = spa_get_dsl(spa);
1643185029Spjd	dsl_dataset_t *ds;
1644168404Spjd	zap_cursor_t zc;
1645168498Spjd	zap_attribute_t *attr;
1646168404Spjd	char *child;
1647185029Spjd	uint64_t thisobj;
1648185029Spjd	int err;
1649168404Spjd
1650249643Smm	dsl_pool_config_enter(dp, FTAG);
1651249643Smm
1652249643Smm	err = dsl_dir_hold(dp, name, FTAG, &dd, NULL);
1653249643Smm	if (err != 0) {
1654249643Smm		dsl_pool_config_exit(dp, FTAG);
1655168404Spjd		return (err);
1656249643Smm	}
1657168404Spjd
1658185029Spjd	/* Don't visit hidden ($MOS & $ORIGIN) objsets. */
1659185029Spjd	if (dd->dd_myname[0] == '$') {
1660249643Smm		dsl_dir_rele(dd, FTAG);
1661249643Smm		dsl_pool_config_exit(dp, FTAG);
1662185029Spjd		return (0);
1663185029Spjd	}
1664185029Spjd
1665185029Spjd	thisobj = dd->dd_phys->dd_head_dataset_obj;
1666168498Spjd	attr = kmem_alloc(sizeof (zap_attribute_t), KM_SLEEP);
1667168404Spjd
1668168404Spjd	/*
1669168404Spjd	 * Iterate over all children.
1670168404Spjd	 */
1671168404Spjd	if (flags & DS_FIND_CHILDREN) {
1672185029Spjd		for (zap_cursor_init(&zc, dp->dp_meta_objset,
1673168404Spjd		    dd->dd_phys->dd_child_dir_zapobj);
1674168498Spjd		    zap_cursor_retrieve(&zc, attr) == 0;
1675168404Spjd		    (void) zap_cursor_advance(&zc)) {
1676249643Smm			ASSERT3U(attr->za_integer_length, ==,
1677249643Smm			    sizeof (uint64_t));
1678249643Smm			ASSERT3U(attr->za_num_integers, ==, 1);
1679168404Spjd
1680219089Spjd			child = kmem_asprintf("%s/%s", name, attr->za_name);
1681249643Smm			dsl_pool_config_exit(dp, FTAG);
1682249643Smm			err = dmu_objset_find_impl(spa, child,
1683249643Smm			    func, arg, flags);
1684249643Smm			dsl_pool_config_enter(dp, FTAG);
1685219089Spjd			strfree(child);
1686249643Smm			if (err != 0)
1687168404Spjd				break;
1688168404Spjd		}
1689168404Spjd		zap_cursor_fini(&zc);
1690168404Spjd
1691249643Smm		if (err != 0) {
1692249643Smm			dsl_dir_rele(dd, FTAG);
1693249643Smm			dsl_pool_config_exit(dp, FTAG);
1694168498Spjd			kmem_free(attr, sizeof (zap_attribute_t));
1695168404Spjd			return (err);
1696168404Spjd		}
1697168404Spjd	}
1698168404Spjd
1699168404Spjd	/*
1700168404Spjd	 * Iterate over all snapshots.
1701168404Spjd	 */
1702185029Spjd	if (flags & DS_FIND_SNAPSHOTS) {
1703185029Spjd		err = dsl_dataset_hold_obj(dp, thisobj, FTAG, &ds);
1704168404Spjd
1705185029Spjd		if (err == 0) {
1706185029Spjd			uint64_t snapobj = ds->ds_phys->ds_snapnames_zapobj;
1707185029Spjd			dsl_dataset_rele(ds, FTAG);
1708168404Spjd
1709185029Spjd			for (zap_cursor_init(&zc, dp->dp_meta_objset, snapobj);
1710185029Spjd			    zap_cursor_retrieve(&zc, attr) == 0;
1711185029Spjd			    (void) zap_cursor_advance(&zc)) {
1712249643Smm				ASSERT3U(attr->za_integer_length, ==,
1713185029Spjd				    sizeof (uint64_t));
1714249643Smm				ASSERT3U(attr->za_num_integers, ==, 1);
1715168404Spjd
1716219089Spjd				child = kmem_asprintf("%s@%s",
1717219089Spjd				    name, attr->za_name);
1718249643Smm				dsl_pool_config_exit(dp, FTAG);
1719249643Smm				err = func(child, arg);
1720249643Smm				dsl_pool_config_enter(dp, FTAG);
1721219089Spjd				strfree(child);
1722249643Smm				if (err != 0)
1723185029Spjd					break;
1724185029Spjd			}
1725185029Spjd			zap_cursor_fini(&zc);
1726168404Spjd		}
1727168404Spjd	}
1728168404Spjd
1729249643Smm	dsl_dir_rele(dd, FTAG);
1730168498Spjd	kmem_free(attr, sizeof (zap_attribute_t));
1731249643Smm	dsl_pool_config_exit(dp, FTAG);
1732168404Spjd
1733249643Smm	if (err != 0)
1734168404Spjd		return (err);
1735168404Spjd
1736249643Smm	/* Apply to self. */
1737249643Smm	return (func(name, arg));
1738168404Spjd}
1739185029Spjd
1740249643Smm/*
1741249643Smm * See comment above dmu_objset_find_impl().
1742249643Smm */
1743207626Smmint
1744249643Smmdmu_objset_find(char *name, int func(const char *, void *), void *arg,
1745249643Smm    int flags)
1746207626Smm{
1747249643Smm	spa_t *spa;
1748249643Smm	int error;
1749207626Smm
1750249643Smm	error = spa_open(name, &spa, FTAG);
1751249643Smm	if (error != 0)
1752249643Smm		return (error);
1753249643Smm	error = dmu_objset_find_impl(spa, name, func, arg, flags);
1754249643Smm	spa_close(spa, FTAG);
1755249643Smm	return (error);
1756207626Smm}
1757207626Smm
1758185029Spjdvoid
1759185029Spjddmu_objset_set_user(objset_t *os, void *user_ptr)
1760185029Spjd{
1761219089Spjd	ASSERT(MUTEX_HELD(&os->os_user_ptr_lock));
1762219089Spjd	os->os_user_ptr = user_ptr;
1763185029Spjd}
1764185029Spjd
1765185029Spjdvoid *
1766185029Spjddmu_objset_get_user(objset_t *os)
1767185029Spjd{
1768219089Spjd	ASSERT(MUTEX_HELD(&os->os_user_ptr_lock));
1769219089Spjd	return (os->os_user_ptr);
1770185029Spjd}
1771249643Smm
1772249643Smm/*
1773249643Smm * Determine name of filesystem, given name of snapshot.
1774249643Smm * buf must be at least MAXNAMELEN bytes
1775249643Smm */
1776249643Smmint
1777249643Smmdmu_fsname(const char *snapname, char *buf)
1778249643Smm{
1779249643Smm	char *atp = strchr(snapname, '@');
1780249643Smm	if (atp == NULL)
1781249643Smm		return (SET_ERROR(EINVAL));
1782249643Smm	if (atp - snapname >= MAXNAMELEN)
1783249643Smm		return (SET_ERROR(ENAMETOOLONG));
1784249643Smm	(void) strlcpy(buf, snapname, atp - snapname + 1);
1785249643Smm	return (0);
1786249643Smm}
1787