1168404Spjd/*
2168404Spjd * CDDL HEADER START
3168404Spjd *
4168404Spjd * The contents of this file are subject to the terms of the
5168404Spjd * Common Development and Distribution License (the "License").
6168404Spjd * You may not use this file except in compliance with the License.
7168404Spjd *
8168404Spjd * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9168404Spjd * or http://www.opensolaris.org/os/licensing.
10168404Spjd * See the License for the specific language governing permissions
11168404Spjd * and limitations under the License.
12168404Spjd *
13168404Spjd * When distributing Covered Code, include this CDDL HEADER in each
14168404Spjd * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15168404Spjd * If applicable, add the following below this CDDL HEADER, with the
16168404Spjd * fields enclosed by brackets "[]" replaced with your own identifying
17168404Spjd * information: Portions Copyright [yyyy] [name of copyright owner]
18168404Spjd *
19168404Spjd * CDDL HEADER END
20168404Spjd */
21168404Spjd/*
22219089Spjd * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
23266771Sdelphij * Copyright (c) 2012, 2014 by Delphix. All rights reserved.
24251478Sdelphij * Copyright (c) 2013 by Saso Kiselkov. All rights reserved.
25264835Sdelphij * Copyright (c) 2013, Joyent, Inc. All rights reserved.
26286575Smav * Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
27282126Savg * Copyright 2015 Nexenta Systems, Inc. All rights reserved.
28286686Smav * Copyright (c) 2015, STRATO AG, Inc. All rights reserved.
29296519Smav * Copyright (c) 2014 Integros [integros.com]
30168404Spjd */
31168404Spjd
32219089Spjd/* Portions Copyright 2010 Robert Milkowski */
33219089Spjd
34185029Spjd#include <sys/cred.h>
35168404Spjd#include <sys/zfs_context.h>
36168404Spjd#include <sys/dmu_objset.h>
37168404Spjd#include <sys/dsl_dir.h>
38168404Spjd#include <sys/dsl_dataset.h>
39168404Spjd#include <sys/dsl_prop.h>
40168404Spjd#include <sys/dsl_pool.h>
41168404Spjd#include <sys/dsl_synctask.h>
42185029Spjd#include <sys/dsl_deleg.h>
43168404Spjd#include <sys/dnode.h>
44168404Spjd#include <sys/dbuf.h>
45168404Spjd#include <sys/zvol.h>
46168404Spjd#include <sys/dmu_tx.h>
47168404Spjd#include <sys/zap.h>
48168404Spjd#include <sys/zil.h>
49168404Spjd#include <sys/dmu_impl.h>
50185029Spjd#include <sys/zfs_ioctl.h>
51219089Spjd#include <sys/sa.h>
52219089Spjd#include <sys/zfs_onexit.h>
53248571Smm#include <sys/dsl_destroy.h>
54286686Smav#include <sys/vdev.h>
55168404Spjd
56219089Spjd/*
57219089Spjd * Needed to close a window in dnode_move() that allows the objset to be freed
58219089Spjd * before it can be safely accessed.
59219089Spjd */
60219089Spjdkrwlock_t os_lock;
61219089Spjd
62286686Smav/*
63286686Smav * Tunable to overwrite the maximum number of threads for the parallization
64286686Smav * of dmu_objset_find_dp, needed to speed up the import of pools with many
65286686Smav * datasets.
66286686Smav * Default is 4 times the number of leaf vdevs.
67286686Smav */
68286686Smavint dmu_find_threads = 0;
69286686Smav
70286686Smavstatic void dmu_objset_find_dp_cb(void *arg);
71286686Smav
72219089Spjdvoid
73219089Spjddmu_objset_init(void)
74219089Spjd{
75219089Spjd	rw_init(&os_lock, NULL, RW_DEFAULT, NULL);
76219089Spjd}
77219089Spjd
78219089Spjdvoid
79219089Spjddmu_objset_fini(void)
80219089Spjd{
81219089Spjd	rw_destroy(&os_lock);
82219089Spjd}
83219089Spjd
84168404Spjdspa_t *
85168404Spjddmu_objset_spa(objset_t *os)
86168404Spjd{
87219089Spjd	return (os->os_spa);
88168404Spjd}
89168404Spjd
90168404Spjdzilog_t *
91168404Spjddmu_objset_zil(objset_t *os)
92168404Spjd{
93219089Spjd	return (os->os_zil);
94168404Spjd}
95168404Spjd
96168404Spjddsl_pool_t *
97168404Spjddmu_objset_pool(objset_t *os)
98168404Spjd{
99168404Spjd	dsl_dataset_t *ds;
100168404Spjd
101219089Spjd	if ((ds = os->os_dsl_dataset) != NULL && ds->ds_dir)
102168404Spjd		return (ds->ds_dir->dd_pool);
103168404Spjd	else
104219089Spjd		return (spa_get_dsl(os->os_spa));
105168404Spjd}
106168404Spjd
107168404Spjddsl_dataset_t *
108168404Spjddmu_objset_ds(objset_t *os)
109168404Spjd{
110219089Spjd	return (os->os_dsl_dataset);
111168404Spjd}
112168404Spjd
113168404Spjddmu_objset_type_t
114168404Spjddmu_objset_type(objset_t *os)
115168404Spjd{
116219089Spjd	return (os->os_phys->os_type);
117168404Spjd}
118168404Spjd
119168404Spjdvoid
120168404Spjddmu_objset_name(objset_t *os, char *buf)
121168404Spjd{
122219089Spjd	dsl_dataset_name(os->os_dsl_dataset, buf);
123168404Spjd}
124168404Spjd
125168404Spjduint64_t
126168404Spjddmu_objset_id(objset_t *os)
127168404Spjd{
128219089Spjd	dsl_dataset_t *ds = os->os_dsl_dataset;
129168404Spjd
130168404Spjd	return (ds ? ds->ds_object : 0);
131168404Spjd}
132168404Spjd
133266771Sdelphijzfs_sync_type_t
134219089Spjddmu_objset_syncprop(objset_t *os)
135219089Spjd{
136219089Spjd	return (os->os_sync);
137219089Spjd}
138219089Spjd
139266771Sdelphijzfs_logbias_op_t
140219089Spjddmu_objset_logbias(objset_t *os)
141219089Spjd{
142219089Spjd	return (os->os_logbias);
143219089Spjd}
144219089Spjd
145168404Spjdstatic void
146168404Spjdchecksum_changed_cb(void *arg, uint64_t newval)
147168404Spjd{
148219089Spjd	objset_t *os = arg;
149168404Spjd
150168404Spjd	/*
151168404Spjd	 * Inheritance should have been done by now.
152168404Spjd	 */
153168404Spjd	ASSERT(newval != ZIO_CHECKSUM_INHERIT);
154168404Spjd
155219089Spjd	os->os_checksum = zio_checksum_select(newval, ZIO_CHECKSUM_ON_VALUE);
156168404Spjd}
157168404Spjd
158168404Spjdstatic void
159168404Spjdcompression_changed_cb(void *arg, uint64_t newval)
160168404Spjd{
161219089Spjd	objset_t *os = arg;
162168404Spjd
163168404Spjd	/*
164168404Spjd	 * Inheritance and range checking should have been done by now.
165168404Spjd	 */
166168404Spjd	ASSERT(newval != ZIO_COMPRESS_INHERIT);
167168404Spjd
168286547Smav	os->os_compress = zio_compress_select(os->os_spa, newval,
169286547Smav	    ZIO_COMPRESS_ON);
170168404Spjd}
171168404Spjd
172168404Spjdstatic void
173168404Spjdcopies_changed_cb(void *arg, uint64_t newval)
174168404Spjd{
175219089Spjd	objset_t *os = arg;
176168404Spjd
177168404Spjd	/*
178168404Spjd	 * Inheritance and range checking should have been done by now.
179168404Spjd	 */
180168404Spjd	ASSERT(newval > 0);
181219089Spjd	ASSERT(newval <= spa_max_replication(os->os_spa));
182168404Spjd
183219089Spjd	os->os_copies = newval;
184168404Spjd}
185168404Spjd
186185029Spjdstatic void
187219089Spjddedup_changed_cb(void *arg, uint64_t newval)
188219089Spjd{
189219089Spjd	objset_t *os = arg;
190219089Spjd	spa_t *spa = os->os_spa;
191219089Spjd	enum zio_checksum checksum;
192219089Spjd
193219089Spjd	/*
194219089Spjd	 * Inheritance should have been done by now.
195219089Spjd	 */
196219089Spjd	ASSERT(newval != ZIO_CHECKSUM_INHERIT);
197219089Spjd
198219089Spjd	checksum = zio_checksum_dedup_select(spa, newval, ZIO_CHECKSUM_OFF);
199219089Spjd
200219089Spjd	os->os_dedup_checksum = checksum & ZIO_CHECKSUM_MASK;
201219089Spjd	os->os_dedup_verify = !!(checksum & ZIO_CHECKSUM_VERIFY);
202219089Spjd}
203219089Spjd
204219089Spjdstatic void
205185029Spjdprimary_cache_changed_cb(void *arg, uint64_t newval)
206185029Spjd{
207219089Spjd	objset_t *os = arg;
208185029Spjd
209185029Spjd	/*
210185029Spjd	 * Inheritance and range checking should have been done by now.
211185029Spjd	 */
212185029Spjd	ASSERT(newval == ZFS_CACHE_ALL || newval == ZFS_CACHE_NONE ||
213185029Spjd	    newval == ZFS_CACHE_METADATA);
214185029Spjd
215219089Spjd	os->os_primary_cache = newval;
216185029Spjd}
217185029Spjd
218185029Spjdstatic void
219185029Spjdsecondary_cache_changed_cb(void *arg, uint64_t newval)
220185029Spjd{
221219089Spjd	objset_t *os = arg;
222185029Spjd
223185029Spjd	/*
224185029Spjd	 * Inheritance and range checking should have been done by now.
225185029Spjd	 */
226185029Spjd	ASSERT(newval == ZFS_CACHE_ALL || newval == ZFS_CACHE_NONE ||
227185029Spjd	    newval == ZFS_CACHE_METADATA);
228185029Spjd
229219089Spjd	os->os_secondary_cache = newval;
230185029Spjd}
231185029Spjd
232219089Spjdstatic void
233219089Spjdsync_changed_cb(void *arg, uint64_t newval)
234219089Spjd{
235219089Spjd	objset_t *os = arg;
236219089Spjd
237219089Spjd	/*
238219089Spjd	 * Inheritance and range checking should have been done by now.
239219089Spjd	 */
240219089Spjd	ASSERT(newval == ZFS_SYNC_STANDARD || newval == ZFS_SYNC_ALWAYS ||
241219089Spjd	    newval == ZFS_SYNC_DISABLED);
242219089Spjd
243219089Spjd	os->os_sync = newval;
244219089Spjd	if (os->os_zil)
245219089Spjd		zil_set_sync(os->os_zil, newval);
246219089Spjd}
247219089Spjd
248219089Spjdstatic void
249266771Sdelphijredundant_metadata_changed_cb(void *arg, uint64_t newval)
250266771Sdelphij{
251266771Sdelphij	objset_t *os = arg;
252266771Sdelphij
253266771Sdelphij	/*
254266771Sdelphij	 * Inheritance and range checking should have been done by now.
255266771Sdelphij	 */
256266771Sdelphij	ASSERT(newval == ZFS_REDUNDANT_METADATA_ALL ||
257266771Sdelphij	    newval == ZFS_REDUNDANT_METADATA_MOST);
258266771Sdelphij
259266771Sdelphij	os->os_redundant_metadata = newval;
260266771Sdelphij}
261266771Sdelphij
262266771Sdelphijstatic void
263219089Spjdlogbias_changed_cb(void *arg, uint64_t newval)
264219089Spjd{
265219089Spjd	objset_t *os = arg;
266219089Spjd
267219089Spjd	ASSERT(newval == ZFS_LOGBIAS_LATENCY ||
268219089Spjd	    newval == ZFS_LOGBIAS_THROUGHPUT);
269219089Spjd	os->os_logbias = newval;
270219089Spjd	if (os->os_zil)
271219089Spjd		zil_set_logbias(os->os_zil, newval);
272219089Spjd}
273219089Spjd
274274337Sdelphijstatic void
275274337Sdelphijrecordsize_changed_cb(void *arg, uint64_t newval)
276274337Sdelphij{
277274337Sdelphij	objset_t *os = arg;
278274337Sdelphij
279274337Sdelphij	os->os_recordsize = newval;
280274337Sdelphij}
281274337Sdelphij
282168404Spjdvoid
283168404Spjddmu_objset_byteswap(void *buf, size_t size)
284168404Spjd{
285168404Spjd	objset_phys_t *osp = buf;
286168404Spjd
287209962Smm	ASSERT(size == OBJSET_OLD_PHYS_SIZE || size == sizeof (objset_phys_t));
288168404Spjd	dnode_byteswap(&osp->os_meta_dnode);
289168404Spjd	byteswap_uint64_array(&osp->os_zil_header, sizeof (zil_header_t));
290168404Spjd	osp->os_type = BSWAP_64(osp->os_type);
291209962Smm	osp->os_flags = BSWAP_64(osp->os_flags);
292209962Smm	if (size == sizeof (objset_phys_t)) {
293209962Smm		dnode_byteswap(&osp->os_userused_dnode);
294209962Smm		dnode_byteswap(&osp->os_groupused_dnode);
295209962Smm	}
296168404Spjd}
297168404Spjd
298168404Spjdint
299168404Spjddmu_objset_open_impl(spa_t *spa, dsl_dataset_t *ds, blkptr_t *bp,
300219089Spjd    objset_t **osp)
301168404Spjd{
302219089Spjd	objset_t *os;
303185029Spjd	int i, err;
304168404Spjd
305185029Spjd	ASSERT(ds == NULL || MUTEX_HELD(&ds->ds_opening_lock));
306185029Spjd
307219089Spjd	os = kmem_zalloc(sizeof (objset_t), KM_SLEEP);
308219089Spjd	os->os_dsl_dataset = ds;
309219089Spjd	os->os_spa = spa;
310219089Spjd	os->os_rootbp = bp;
311219089Spjd	if (!BP_IS_HOLE(os->os_rootbp)) {
312275811Sdelphij		arc_flags_t aflags = ARC_FLAG_WAIT;
313268123Sdelphij		zbookmark_phys_t zb;
314219089Spjd		SET_BOOKMARK(&zb, ds ? ds->ds_object : DMU_META_OBJSET,
315219089Spjd		    ZB_ROOT_OBJECT, ZB_ROOT_LEVEL, ZB_ROOT_BLKID);
316219089Spjd
317219089Spjd		if (DMU_OS_IS_L2CACHEABLE(os))
318275811Sdelphij			aflags |= ARC_FLAG_L2CACHE;
319251478Sdelphij		if (DMU_OS_IS_L2COMPRESSIBLE(os))
320275811Sdelphij			aflags |= ARC_FLAG_L2COMPRESS;
321168404Spjd
322219089Spjd		dprintf_bp(os->os_rootbp, "reading %s", "");
323246666Smm		err = arc_read(NULL, spa, os->os_rootbp,
324219089Spjd		    arc_getbuf_func, &os->os_phys_buf,
325168404Spjd		    ZIO_PRIORITY_SYNC_READ, ZIO_FLAG_CANFAIL, &aflags, &zb);
326248571Smm		if (err != 0) {
327219089Spjd			kmem_free(os, sizeof (objset_t));
328185029Spjd			/* convert checksum errors into IO errors */
329185029Spjd			if (err == ECKSUM)
330249195Smm				err = SET_ERROR(EIO);
331168404Spjd			return (err);
332168404Spjd		}
333209962Smm
334209962Smm		/* Increase the blocksize if we are permitted. */
335209962Smm		if (spa_version(spa) >= SPA_VERSION_USERSPACE &&
336219089Spjd		    arc_buf_size(os->os_phys_buf) < sizeof (objset_phys_t)) {
337209962Smm			arc_buf_t *buf = arc_buf_alloc(spa,
338219089Spjd			    sizeof (objset_phys_t), &os->os_phys_buf,
339209962Smm			    ARC_BUFC_METADATA);
340209962Smm			bzero(buf->b_data, sizeof (objset_phys_t));
341219089Spjd			bcopy(os->os_phys_buf->b_data, buf->b_data,
342219089Spjd			    arc_buf_size(os->os_phys_buf));
343219089Spjd			(void) arc_buf_remove_ref(os->os_phys_buf,
344219089Spjd			    &os->os_phys_buf);
345219089Spjd			os->os_phys_buf = buf;
346209962Smm		}
347209962Smm
348219089Spjd		os->os_phys = os->os_phys_buf->b_data;
349219089Spjd		os->os_flags = os->os_phys->os_flags;
350168404Spjd	} else {
351209962Smm		int size = spa_version(spa) >= SPA_VERSION_USERSPACE ?
352209962Smm		    sizeof (objset_phys_t) : OBJSET_OLD_PHYS_SIZE;
353219089Spjd		os->os_phys_buf = arc_buf_alloc(spa, size,
354219089Spjd		    &os->os_phys_buf, ARC_BUFC_METADATA);
355219089Spjd		os->os_phys = os->os_phys_buf->b_data;
356219089Spjd		bzero(os->os_phys, size);
357168404Spjd	}
358168404Spjd
359168404Spjd	/*
360168404Spjd	 * Note: the changed_cb will be called once before the register
361168404Spjd	 * func returns, thus changing the checksum/compression from the
362185029Spjd	 * default (fletcher2/off).  Snapshots don't need to know about
363185029Spjd	 * checksum/compression/copies.
364168404Spjd	 */
365268075Sdelphij	if (ds != NULL) {
366289362Smav		boolean_t needlock = B_FALSE;
367289362Smav
368289362Smav		/*
369289362Smav		 * Note: it's valid to open the objset if the dataset is
370289362Smav		 * long-held, in which case the pool_config lock will not
371289362Smav		 * be held.
372289362Smav		 */
373289362Smav		if (!dsl_pool_config_held(dmu_objset_pool(os))) {
374289362Smav			needlock = B_TRUE;
375289362Smav			dsl_pool_config_enter(dmu_objset_pool(os), FTAG);
376289362Smav		}
377248571Smm		err = dsl_prop_register(ds,
378248571Smm		    zfs_prop_to_name(ZFS_PROP_PRIMARYCACHE),
379219089Spjd		    primary_cache_changed_cb, os);
380248571Smm		if (err == 0) {
381248571Smm			err = dsl_prop_register(ds,
382248571Smm			    zfs_prop_to_name(ZFS_PROP_SECONDARYCACHE),
383219089Spjd			    secondary_cache_changed_cb, os);
384248571Smm		}
385286575Smav		if (!ds->ds_is_snapshot) {
386248571Smm			if (err == 0) {
387248571Smm				err = dsl_prop_register(ds,
388248571Smm				    zfs_prop_to_name(ZFS_PROP_CHECKSUM),
389219089Spjd				    checksum_changed_cb, os);
390248571Smm			}
391248571Smm			if (err == 0) {
392248571Smm				err = dsl_prop_register(ds,
393248571Smm				    zfs_prop_to_name(ZFS_PROP_COMPRESSION),
394219089Spjd				    compression_changed_cb, os);
395248571Smm			}
396248571Smm			if (err == 0) {
397248571Smm				err = dsl_prop_register(ds,
398248571Smm				    zfs_prop_to_name(ZFS_PROP_COPIES),
399219089Spjd				    copies_changed_cb, os);
400248571Smm			}
401248571Smm			if (err == 0) {
402248571Smm				err = dsl_prop_register(ds,
403248571Smm				    zfs_prop_to_name(ZFS_PROP_DEDUP),
404219089Spjd				    dedup_changed_cb, os);
405248571Smm			}
406248571Smm			if (err == 0) {
407248571Smm				err = dsl_prop_register(ds,
408248571Smm				    zfs_prop_to_name(ZFS_PROP_LOGBIAS),
409219089Spjd				    logbias_changed_cb, os);
410248571Smm			}
411248571Smm			if (err == 0) {
412248571Smm				err = dsl_prop_register(ds,
413248571Smm				    zfs_prop_to_name(ZFS_PROP_SYNC),
414219089Spjd				    sync_changed_cb, os);
415248571Smm			}
416266771Sdelphij			if (err == 0) {
417266771Sdelphij				err = dsl_prop_register(ds,
418266771Sdelphij				    zfs_prop_to_name(
419266771Sdelphij				    ZFS_PROP_REDUNDANT_METADATA),
420266771Sdelphij				    redundant_metadata_changed_cb, os);
421266771Sdelphij			}
422274337Sdelphij			if (err == 0) {
423274337Sdelphij				err = dsl_prop_register(ds,
424274337Sdelphij				    zfs_prop_to_name(ZFS_PROP_RECORDSIZE),
425274337Sdelphij				    recordsize_changed_cb, os);
426274337Sdelphij			}
427185029Spjd		}
428289362Smav		if (needlock)
429289362Smav			dsl_pool_config_exit(dmu_objset_pool(os), FTAG);
430248571Smm		if (err != 0) {
431219089Spjd			VERIFY(arc_buf_remove_ref(os->os_phys_buf,
432248571Smm			    &os->os_phys_buf));
433219089Spjd			kmem_free(os, sizeof (objset_t));
434168404Spjd			return (err);
435168404Spjd		}
436268075Sdelphij	} else {
437168404Spjd		/* It's the meta-objset. */
438219089Spjd		os->os_checksum = ZIO_CHECKSUM_FLETCHER_4;
439286547Smav		os->os_compress = ZIO_COMPRESS_ON;
440219089Spjd		os->os_copies = spa_max_replication(spa);
441219089Spjd		os->os_dedup_checksum = ZIO_CHECKSUM_OFF;
442266771Sdelphij		os->os_dedup_verify = B_FALSE;
443266771Sdelphij		os->os_logbias = ZFS_LOGBIAS_LATENCY;
444266771Sdelphij		os->os_sync = ZFS_SYNC_STANDARD;
445219089Spjd		os->os_primary_cache = ZFS_CACHE_ALL;
446219089Spjd		os->os_secondary_cache = ZFS_CACHE_ALL;
447168404Spjd	}
448168404Spjd
449286575Smav	if (ds == NULL || !ds->ds_is_snapshot)
450219089Spjd		os->os_zil_header = os->os_phys->os_zil_header;
451219089Spjd	os->os_zil = zil_alloc(os, &os->os_zil_header);
452168404Spjd
453168404Spjd	for (i = 0; i < TXG_SIZE; i++) {
454219089Spjd		list_create(&os->os_dirty_dnodes[i], sizeof (dnode_t),
455168404Spjd		    offsetof(dnode_t, dn_dirty_link[i]));
456219089Spjd		list_create(&os->os_free_dnodes[i], sizeof (dnode_t),
457168404Spjd		    offsetof(dnode_t, dn_dirty_link[i]));
458168404Spjd	}
459219089Spjd	list_create(&os->os_dnodes, sizeof (dnode_t),
460168404Spjd	    offsetof(dnode_t, dn_link));
461219089Spjd	list_create(&os->os_downgraded_dbufs, sizeof (dmu_buf_impl_t),
462168404Spjd	    offsetof(dmu_buf_impl_t, db_link));
463168404Spjd
464219089Spjd	mutex_init(&os->os_lock, NULL, MUTEX_DEFAULT, NULL);
465219089Spjd	mutex_init(&os->os_obj_lock, NULL, MUTEX_DEFAULT, NULL);
466219089Spjd	mutex_init(&os->os_user_ptr_lock, NULL, MUTEX_DEFAULT, NULL);
467168404Spjd
468286575Smav	dnode_special_open(os, &os->os_phys->os_meta_dnode,
469286575Smav	    DMU_META_DNODE_OBJECT, &os->os_meta_dnode);
470219089Spjd	if (arc_buf_size(os->os_phys_buf) >= sizeof (objset_phys_t)) {
471286575Smav		dnode_special_open(os, &os->os_phys->os_userused_dnode,
472286575Smav		    DMU_USERUSED_OBJECT, &os->os_userused_dnode);
473286575Smav		dnode_special_open(os, &os->os_phys->os_groupused_dnode,
474286575Smav		    DMU_GROUPUSED_OBJECT, &os->os_groupused_dnode);
475209962Smm	}
476168404Spjd
477219089Spjd	*osp = os;
478168404Spjd	return (0);
479168404Spjd}
480168404Spjd
481219089Spjdint
482219089Spjddmu_objset_from_ds(dsl_dataset_t *ds, objset_t **osp)
483168404Spjd{
484219089Spjd	int err = 0;
485168404Spjd
486289362Smav	/*
487289362Smav	 * We shouldn't be doing anything with dsl_dataset_t's unless the
488289362Smav	 * pool_config lock is held, or the dataset is long-held.
489289362Smav	 */
490289362Smav	ASSERT(dsl_pool_config_held(ds->ds_dir->dd_pool) ||
491289362Smav	    dsl_dataset_long_held(ds));
492289362Smav
493185029Spjd	mutex_enter(&ds->ds_opening_lock);
494268075Sdelphij	if (ds->ds_objset == NULL) {
495268075Sdelphij		objset_t *os;
496168404Spjd		err = dmu_objset_open_impl(dsl_dataset_get_spa(ds),
497268075Sdelphij		    ds, dsl_dataset_get_blkptr(ds), &os);
498268075Sdelphij
499268075Sdelphij		if (err == 0) {
500268075Sdelphij			mutex_enter(&ds->ds_lock);
501268075Sdelphij			ASSERT(ds->ds_objset == NULL);
502268075Sdelphij			ds->ds_objset = os;
503268075Sdelphij			mutex_exit(&ds->ds_lock);
504268075Sdelphij		}
505168404Spjd	}
506268075Sdelphij	*osp = ds->ds_objset;
507185029Spjd	mutex_exit(&ds->ds_opening_lock);
508219089Spjd	return (err);
509168404Spjd}
510168404Spjd
511248571Smm/*
512248571Smm * Holds the pool while the objset is held.  Therefore only one objset
513248571Smm * can be held at a time.
514248571Smm */
515185029Spjdint
516219089Spjddmu_objset_hold(const char *name, void *tag, objset_t **osp)
517185029Spjd{
518248571Smm	dsl_pool_t *dp;
519219089Spjd	dsl_dataset_t *ds;
520185029Spjd	int err;
521185029Spjd
522248571Smm	err = dsl_pool_hold(name, tag, &dp);
523248571Smm	if (err != 0)
524219089Spjd		return (err);
525248571Smm	err = dsl_dataset_hold(dp, name, tag, &ds);
526248571Smm	if (err != 0) {
527248571Smm		dsl_pool_rele(dp, tag);
528248571Smm		return (err);
529248571Smm	}
530219089Spjd
531219089Spjd	err = dmu_objset_from_ds(ds, osp);
532248571Smm	if (err != 0) {
533219089Spjd		dsl_dataset_rele(ds, tag);
534248571Smm		dsl_pool_rele(dp, tag);
535248571Smm	}
536219089Spjd
537185029Spjd	return (err);
538185029Spjd}
539185029Spjd
540286686Smavstatic int
541286686Smavdmu_objset_own_impl(dsl_dataset_t *ds, dmu_objset_type_t type,
542286686Smav    boolean_t readonly, void *tag, objset_t **osp)
543286686Smav{
544286686Smav	int err;
545286686Smav
546286686Smav	err = dmu_objset_from_ds(ds, osp);
547286686Smav	if (err != 0) {
548286686Smav		dsl_dataset_disown(ds, tag);
549286686Smav	} else if (type != DMU_OST_ANY && type != (*osp)->os_phys->os_type) {
550286686Smav		dsl_dataset_disown(ds, tag);
551286686Smav		return (SET_ERROR(EINVAL));
552286686Smav	} else if (!readonly && dsl_dataset_is_snapshot(ds)) {
553286686Smav		dsl_dataset_disown(ds, tag);
554286686Smav		return (SET_ERROR(EROFS));
555286686Smav	}
556286686Smav	return (err);
557286686Smav}
558286686Smav
559248571Smm/*
560248571Smm * dsl_pool must not be held when this is called.
561248571Smm * Upon successful return, there will be a longhold on the dataset,
562248571Smm * and the dsl_pool will not be held.
563248571Smm */
564185029Spjdint
565219089Spjddmu_objset_own(const char *name, dmu_objset_type_t type,
566219089Spjd    boolean_t readonly, void *tag, objset_t **osp)
567185029Spjd{
568248571Smm	dsl_pool_t *dp;
569185029Spjd	dsl_dataset_t *ds;
570185029Spjd	int err;
571185029Spjd
572248571Smm	err = dsl_pool_hold(name, FTAG, &dp);
573248571Smm	if (err != 0)
574185029Spjd		return (err);
575248571Smm	err = dsl_dataset_own(dp, name, tag, &ds);
576248571Smm	if (err != 0) {
577248571Smm		dsl_pool_rele(dp, FTAG);
578248571Smm		return (err);
579248571Smm	}
580286686Smav	err = dmu_objset_own_impl(ds, type, readonly, tag, osp);
581286686Smav	dsl_pool_rele(dp, FTAG);
582185029Spjd
583185029Spjd	return (err);
584185029Spjd}
585185029Spjd
586286686Smavint
587286686Smavdmu_objset_own_obj(dsl_pool_t *dp, uint64_t obj, dmu_objset_type_t type,
588286686Smav    boolean_t readonly, void *tag, objset_t **osp)
589286686Smav{
590286686Smav	dsl_dataset_t *ds;
591286686Smav	int err;
592286686Smav
593286686Smav	err = dsl_dataset_own_obj(dp, obj, tag, &ds);
594286686Smav	if (err != 0)
595286686Smav		return (err);
596286686Smav
597286686Smav	return (dmu_objset_own_impl(ds, type, readonly, tag, osp));
598286686Smav}
599286686Smav
600168404Spjdvoid
601219089Spjddmu_objset_rele(objset_t *os, void *tag)
602168404Spjd{
603248571Smm	dsl_pool_t *dp = dmu_objset_pool(os);
604219089Spjd	dsl_dataset_rele(os->os_dsl_dataset, tag);
605248571Smm	dsl_pool_rele(dp, tag);
606219089Spjd}
607185029Spjd
608253816Sdelphij/*
609253816Sdelphij * When we are called, os MUST refer to an objset associated with a dataset
610253816Sdelphij * that is owned by 'tag'; that is, is held and long held by 'tag' and ds_owner
611253816Sdelphij * == tag.  We will then release and reacquire ownership of the dataset while
612253816Sdelphij * holding the pool config_rwlock to avoid intervening namespace or ownership
613253816Sdelphij * changes may occur.
614253816Sdelphij *
615253816Sdelphij * This exists solely to accommodate zfs_ioc_userspace_upgrade()'s desire to
616253816Sdelphij * release the hold on its dataset and acquire a new one on the dataset of the
617253816Sdelphij * same name so that it can be partially torn down and reconstructed.
618253816Sdelphij */
619219089Spjdvoid
620253816Sdelphijdmu_objset_refresh_ownership(objset_t *os, void *tag)
621253816Sdelphij{
622253816Sdelphij	dsl_pool_t *dp;
623253816Sdelphij	dsl_dataset_t *ds, *newds;
624253816Sdelphij	char name[MAXNAMELEN];
625253816Sdelphij
626253816Sdelphij	ds = os->os_dsl_dataset;
627253816Sdelphij	VERIFY3P(ds, !=, NULL);
628253816Sdelphij	VERIFY3P(ds->ds_owner, ==, tag);
629253816Sdelphij	VERIFY(dsl_dataset_long_held(ds));
630253816Sdelphij
631253816Sdelphij	dsl_dataset_name(ds, name);
632253816Sdelphij	dp = dmu_objset_pool(os);
633253816Sdelphij	dsl_pool_config_enter(dp, FTAG);
634253816Sdelphij	dmu_objset_disown(os, tag);
635253816Sdelphij	VERIFY0(dsl_dataset_own(dp, name, tag, &newds));
636253816Sdelphij	VERIFY3P(newds, ==, os->os_dsl_dataset);
637253816Sdelphij	dsl_pool_config_exit(dp, FTAG);
638253816Sdelphij}
639253816Sdelphij
640253816Sdelphijvoid
641219089Spjddmu_objset_disown(objset_t *os, void *tag)
642219089Spjd{
643219089Spjd	dsl_dataset_disown(os->os_dsl_dataset, tag);
644168404Spjd}
645168404Spjd
646248571Smmvoid
647185029Spjddmu_objset_evict_dbufs(objset_t *os)
648168404Spjd{
649286575Smav	dnode_t dn_marker;
650168404Spjd	dnode_t *dn;
651168404Spjd
652219089Spjd	mutex_enter(&os->os_lock);
653286575Smav	dn = list_head(&os->os_dnodes);
654286575Smav	while (dn != NULL) {
655286575Smav		/*
656286575Smav		 * Skip dnodes without holds.  We have to do this dance
657286575Smav		 * because dnode_add_ref() only works if there is already a
658286575Smav		 * hold.  If the dnode has no holds, then it has no dbufs.
659286575Smav		 */
660286575Smav		if (dnode_add_ref(dn, FTAG)) {
661286575Smav			list_insert_after(&os->os_dnodes, dn, &dn_marker);
662286575Smav			mutex_exit(&os->os_lock);
663168404Spjd
664286575Smav			dnode_evict_dbufs(dn);
665286575Smav			dnode_rele(dn, FTAG);
666168404Spjd
667286575Smav			mutex_enter(&os->os_lock);
668286575Smav			dn = list_next(&os->os_dnodes, &dn_marker);
669286575Smav			list_remove(&os->os_dnodes, &dn_marker);
670286575Smav		} else {
671286575Smav			dn = list_next(&os->os_dnodes, dn);
672286575Smav		}
673286575Smav	}
674286575Smav	mutex_exit(&os->os_lock);
675168404Spjd
676286575Smav	if (DMU_USERUSED_DNODE(os) != NULL) {
677286575Smav		dnode_evict_dbufs(DMU_GROUPUSED_DNODE(os));
678286575Smav		dnode_evict_dbufs(DMU_USERUSED_DNODE(os));
679168404Spjd	}
680286575Smav	dnode_evict_dbufs(DMU_META_DNODE(os));
681168404Spjd}
682168404Spjd
683286575Smav/*
684286575Smav * Objset eviction processing is split into into two pieces.
685286575Smav * The first marks the objset as evicting, evicts any dbufs that
686286575Smav * have a refcount of zero, and then queues up the objset for the
687286575Smav * second phase of eviction.  Once os->os_dnodes has been cleared by
688286575Smav * dnode_buf_pageout()->dnode_destroy(), the second phase is executed.
689286575Smav * The second phase closes the special dnodes, dequeues the objset from
690286575Smav * the list of those undergoing eviction, and finally frees the objset.
691286575Smav *
692286575Smav * NOTE: Due to asynchronous eviction processing (invocation of
693286575Smav *       dnode_buf_pageout()), it is possible for the meta dnode for the
694286575Smav *       objset to have no holds even though os->os_dnodes is not empty.
695286575Smav */
696168404Spjdvoid
697219089Spjddmu_objset_evict(objset_t *os)
698168404Spjd{
699219089Spjd	dsl_dataset_t *ds = os->os_dsl_dataset;
700168404Spjd
701219089Spjd	for (int t = 0; t < TXG_SIZE; t++)
702219089Spjd		ASSERT(!dmu_objset_is_dirty(os, t));
703168404Spjd
704288204Sdelphij	if (ds)
705288204Sdelphij		dsl_prop_unregister_all(ds, os);
706168404Spjd
707219089Spjd	if (os->os_sa)
708219089Spjd		sa_tear_down(os);
709219089Spjd
710248571Smm	dmu_objset_evict_dbufs(os);
711168404Spjd
712286575Smav	mutex_enter(&os->os_lock);
713286575Smav	spa_evicting_os_register(os->os_spa, os);
714286575Smav	if (list_is_empty(&os->os_dnodes)) {
715286575Smav		mutex_exit(&os->os_lock);
716286575Smav		dmu_objset_evict_done(os);
717286575Smav	} else {
718286575Smav		mutex_exit(&os->os_lock);
719286575Smav	}
720286575Smav}
721286575Smav
722286575Smavvoid
723286575Smavdmu_objset_evict_done(objset_t *os)
724286575Smav{
725286575Smav	ASSERT3P(list_head(&os->os_dnodes), ==, NULL);
726286575Smav
727219089Spjd	dnode_special_close(&os->os_meta_dnode);
728219089Spjd	if (DMU_USERUSED_DNODE(os)) {
729219089Spjd		dnode_special_close(&os->os_userused_dnode);
730219089Spjd		dnode_special_close(&os->os_groupused_dnode);
731209962Smm	}
732219089Spjd	zil_free(os->os_zil);
733168404Spjd
734248571Smm	VERIFY(arc_buf_remove_ref(os->os_phys_buf, &os->os_phys_buf));
735219089Spjd
736219089Spjd	/*
737219089Spjd	 * This is a barrier to prevent the objset from going away in
738219089Spjd	 * dnode_move() until we can safely ensure that the objset is still in
739219089Spjd	 * use. We consider the objset valid before the barrier and invalid
740219089Spjd	 * after the barrier.
741219089Spjd	 */
742219089Spjd	rw_enter(&os_lock, RW_READER);
743219089Spjd	rw_exit(&os_lock);
744219089Spjd
745219089Spjd	mutex_destroy(&os->os_lock);
746219089Spjd	mutex_destroy(&os->os_obj_lock);
747219089Spjd	mutex_destroy(&os->os_user_ptr_lock);
748286575Smav	spa_evicting_os_deregister(os->os_spa, os);
749219089Spjd	kmem_free(os, sizeof (objset_t));
750168404Spjd}
751168404Spjd
752219089Spjdtimestruc_t
753219089Spjddmu_objset_snap_cmtime(objset_t *os)
754219089Spjd{
755219089Spjd	return (dsl_dir_snap_cmtime(os->os_dsl_dataset->ds_dir));
756219089Spjd}
757219089Spjd
758168404Spjd/* called from dsl for meta-objset */
759219089Spjdobjset_t *
760168404Spjddmu_objset_create_impl(spa_t *spa, dsl_dataset_t *ds, blkptr_t *bp,
761168404Spjd    dmu_objset_type_t type, dmu_tx_t *tx)
762168404Spjd{
763219089Spjd	objset_t *os;
764168404Spjd	dnode_t *mdn;
765168404Spjd
766168404Spjd	ASSERT(dmu_tx_is_syncing(tx));
767248571Smm
768219089Spjd	if (ds != NULL)
769248571Smm		VERIFY0(dmu_objset_from_ds(ds, &os));
770219089Spjd	else
771248571Smm		VERIFY0(dmu_objset_open_impl(spa, NULL, bp, &os));
772168404Spjd
773219089Spjd	mdn = DMU_META_DNODE(os);
774219089Spjd
775168404Spjd	dnode_allocate(mdn, DMU_OT_DNODE, 1 << DNODE_BLOCK_SHIFT,
776168404Spjd	    DN_MAX_INDBLKSHIFT, DMU_OT_NONE, 0, tx);
777168404Spjd
778168404Spjd	/*
779168404Spjd	 * We don't want to have to increase the meta-dnode's nlevels
780168404Spjd	 * later, because then we could do it in quescing context while
781168404Spjd	 * we are also accessing it in open context.
782168404Spjd	 *
783168404Spjd	 * This precaution is not necessary for the MOS (ds == NULL),
784168404Spjd	 * because the MOS is only updated in syncing context.
785168404Spjd	 * This is most fortunate: the MOS is the only objset that
786168404Spjd	 * needs to be synced multiple times as spa_sync() iterates
787168404Spjd	 * to convergence, so minimizing its dn_nlevels matters.
788168404Spjd	 */
789168404Spjd	if (ds != NULL) {
790168404Spjd		int levels = 1;
791168404Spjd
792168404Spjd		/*
793168404Spjd		 * Determine the number of levels necessary for the meta-dnode
794168404Spjd		 * to contain DN_MAX_OBJECT dnodes.
795168404Spjd		 */
796168404Spjd		while ((uint64_t)mdn->dn_nblkptr << (mdn->dn_datablkshift +
797168404Spjd		    (levels - 1) * (mdn->dn_indblkshift - SPA_BLKPTRSHIFT)) <
798168404Spjd		    DN_MAX_OBJECT * sizeof (dnode_phys_t))
799168404Spjd			levels++;
800168404Spjd
801168404Spjd		mdn->dn_next_nlevels[tx->tx_txg & TXG_MASK] =
802168404Spjd		    mdn->dn_nlevels = levels;
803168404Spjd	}
804168404Spjd
805168404Spjd	ASSERT(type != DMU_OST_NONE);
806168404Spjd	ASSERT(type != DMU_OST_ANY);
807168404Spjd	ASSERT(type < DMU_OST_NUMTYPES);
808219089Spjd	os->os_phys->os_type = type;
809219089Spjd	if (dmu_objset_userused_enabled(os)) {
810219089Spjd		os->os_phys->os_flags |= OBJSET_FLAG_USERACCOUNTING_COMPLETE;
811219089Spjd		os->os_flags = os->os_phys->os_flags;
812209962Smm	}
813168404Spjd
814168404Spjd	dsl_dataset_dirty(ds, tx);
815168404Spjd
816219089Spjd	return (os);
817168404Spjd}
818168404Spjd
819248571Smmtypedef struct dmu_objset_create_arg {
820248571Smm	const char *doca_name;
821248571Smm	cred_t *doca_cred;
822248571Smm	void (*doca_userfunc)(objset_t *os, void *arg,
823248571Smm	    cred_t *cr, dmu_tx_t *tx);
824248571Smm	void *doca_userarg;
825248571Smm	dmu_objset_type_t doca_type;
826248571Smm	uint64_t doca_flags;
827248571Smm} dmu_objset_create_arg_t;
828168404Spjd
829185029Spjd/*ARGSUSED*/
830168404Spjdstatic int
831248571Smmdmu_objset_create_check(void *arg, dmu_tx_t *tx)
832168404Spjd{
833248571Smm	dmu_objset_create_arg_t *doca = arg;
834248571Smm	dsl_pool_t *dp = dmu_tx_pool(tx);
835248571Smm	dsl_dir_t *pdd;
836248571Smm	const char *tail;
837248571Smm	int error;
838168404Spjd
839248571Smm	if (strchr(doca->doca_name, '@') != NULL)
840249195Smm		return (SET_ERROR(EINVAL));
841168404Spjd
842248571Smm	error = dsl_dir_hold(dp, doca->doca_name, FTAG, &pdd, &tail);
843248571Smm	if (error != 0)
844248571Smm		return (error);
845248571Smm	if (tail == NULL) {
846248571Smm		dsl_dir_rele(pdd, FTAG);
847249195Smm		return (SET_ERROR(EEXIST));
848168404Spjd	}
849264835Sdelphij	error = dsl_fs_ss_limit_check(pdd, 1, ZFS_PROP_FILESYSTEM_LIMIT, NULL,
850264835Sdelphij	    doca->doca_cred);
851248571Smm	dsl_dir_rele(pdd, FTAG);
852185029Spjd
853264835Sdelphij	return (error);
854168404Spjd}
855168404Spjd
856168404Spjdstatic void
857248571Smmdmu_objset_create_sync(void *arg, dmu_tx_t *tx)
858168404Spjd{
859248571Smm	dmu_objset_create_arg_t *doca = arg;
860248571Smm	dsl_pool_t *dp = dmu_tx_pool(tx);
861248571Smm	dsl_dir_t *pdd;
862248571Smm	const char *tail;
863248571Smm	dsl_dataset_t *ds;
864219089Spjd	uint64_t obj;
865248571Smm	blkptr_t *bp;
866248571Smm	objset_t *os;
867168404Spjd
868248571Smm	VERIFY0(dsl_dir_hold(dp, doca->doca_name, FTAG, &pdd, &tail));
869168404Spjd
870248571Smm	obj = dsl_dataset_create_sync(pdd, tail, NULL, doca->doca_flags,
871248571Smm	    doca->doca_cred, tx);
872168404Spjd
873248571Smm	VERIFY0(dsl_dataset_hold_obj(pdd->dd_pool, obj, FTAG, &ds));
874248571Smm	bp = dsl_dataset_get_blkptr(ds);
875248571Smm	os = dmu_objset_create_impl(pdd->dd_pool->dp_spa,
876248571Smm	    ds, bp, doca->doca_type, tx);
877168404Spjd
878248571Smm	if (doca->doca_userfunc != NULL) {
879248571Smm		doca->doca_userfunc(os, doca->doca_userarg,
880248571Smm		    doca->doca_cred, tx);
881168404Spjd	}
882185029Spjd
883248571Smm	spa_history_log_internal_ds(ds, "create", tx, "");
884248571Smm	dsl_dataset_rele(ds, FTAG);
885248571Smm	dsl_dir_rele(pdd, FTAG);
886168404Spjd}
887168404Spjd
888168404Spjdint
889219089Spjddmu_objset_create(const char *name, dmu_objset_type_t type, uint64_t flags,
890185029Spjd    void (*func)(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx), void *arg)
891168404Spjd{
892248571Smm	dmu_objset_create_arg_t doca;
893168404Spjd
894248571Smm	doca.doca_name = name;
895248571Smm	doca.doca_cred = CRED();
896248571Smm	doca.doca_flags = flags;
897248571Smm	doca.doca_userfunc = func;
898248571Smm	doca.doca_userarg = arg;
899248571Smm	doca.doca_type = type;
900168404Spjd
901248571Smm	return (dsl_sync_task(name,
902268473Sdelphij	    dmu_objset_create_check, dmu_objset_create_sync, &doca,
903268473Sdelphij	    5, ZFS_SPACE_CHECK_NORMAL));
904168404Spjd}
905168404Spjd
906248571Smmtypedef struct dmu_objset_clone_arg {
907248571Smm	const char *doca_clone;
908248571Smm	const char *doca_origin;
909248571Smm	cred_t *doca_cred;
910248571Smm} dmu_objset_clone_arg_t;
911248571Smm
912248571Smm/*ARGSUSED*/
913248571Smmstatic int
914248571Smmdmu_objset_clone_check(void *arg, dmu_tx_t *tx)
915168404Spjd{
916248571Smm	dmu_objset_clone_arg_t *doca = arg;
917219089Spjd	dsl_dir_t *pdd;
918219089Spjd	const char *tail;
919248571Smm	int error;
920248571Smm	dsl_dataset_t *origin;
921248571Smm	dsl_pool_t *dp = dmu_tx_pool(tx);
922168404Spjd
923248571Smm	if (strchr(doca->doca_clone, '@') != NULL)
924249195Smm		return (SET_ERROR(EINVAL));
925248571Smm
926248571Smm	error = dsl_dir_hold(dp, doca->doca_clone, FTAG, &pdd, &tail);
927248571Smm	if (error != 0)
928248571Smm		return (error);
929219089Spjd	if (tail == NULL) {
930248571Smm		dsl_dir_rele(pdd, FTAG);
931249195Smm		return (SET_ERROR(EEXIST));
932168404Spjd	}
933282126Savg
934264835Sdelphij	error = dsl_fs_ss_limit_check(pdd, 1, ZFS_PROP_FILESYSTEM_LIMIT, NULL,
935264835Sdelphij	    doca->doca_cred);
936264835Sdelphij	if (error != 0) {
937264835Sdelphij		dsl_dir_rele(pdd, FTAG);
938264835Sdelphij		return (SET_ERROR(EDQUOT));
939264835Sdelphij	}
940248571Smm	dsl_dir_rele(pdd, FTAG);
941185029Spjd
942248571Smm	error = dsl_dataset_hold(dp, doca->doca_origin, FTAG, &origin);
943248571Smm	if (error != 0)
944219089Spjd		return (error);
945219089Spjd
946248571Smm	/* You can only clone snapshots, not the head datasets. */
947286575Smav	if (!origin->ds_is_snapshot) {
948248571Smm		dsl_dataset_rele(origin, FTAG);
949249195Smm		return (SET_ERROR(EINVAL));
950219089Spjd	}
951248571Smm	dsl_dataset_rele(origin, FTAG);
952248571Smm
953248571Smm	return (0);
954209962Smm}
955209962Smm
956209962Smmstatic void
957248571Smmdmu_objset_clone_sync(void *arg, dmu_tx_t *tx)
958209962Smm{
959248571Smm	dmu_objset_clone_arg_t *doca = arg;
960248571Smm	dsl_pool_t *dp = dmu_tx_pool(tx);
961248571Smm	dsl_dir_t *pdd;
962248571Smm	const char *tail;
963248571Smm	dsl_dataset_t *origin, *ds;
964248571Smm	uint64_t obj;
965248571Smm	char namebuf[MAXNAMELEN];
966209962Smm
967248571Smm	VERIFY0(dsl_dir_hold(dp, doca->doca_clone, FTAG, &pdd, &tail));
968248571Smm	VERIFY0(dsl_dataset_hold(dp, doca->doca_origin, FTAG, &origin));
969209962Smm
970248571Smm	obj = dsl_dataset_create_sync(pdd, tail, origin, 0,
971248571Smm	    doca->doca_cred, tx);
972219089Spjd
973248571Smm	VERIFY0(dsl_dataset_hold_obj(pdd->dd_pool, obj, FTAG, &ds));
974248571Smm	dsl_dataset_name(origin, namebuf);
975248571Smm	spa_history_log_internal_ds(ds, "clone", tx,
976248571Smm	    "origin=%s (%llu)", namebuf, origin->ds_object);
977248571Smm	dsl_dataset_rele(ds, FTAG);
978248571Smm	dsl_dataset_rele(origin, FTAG);
979248571Smm	dsl_dir_rele(pdd, FTAG);
980209962Smm}
981209962Smm
982248571Smmint
983248571Smmdmu_objset_clone(const char *clone, const char *origin)
984168404Spjd{
985248571Smm	dmu_objset_clone_arg_t doca;
986168404Spjd
987248571Smm	doca.doca_clone = clone;
988248571Smm	doca.doca_origin = origin;
989248571Smm	doca.doca_cred = CRED();
990219089Spjd
991248571Smm	return (dsl_sync_task(clone,
992268473Sdelphij	    dmu_objset_clone_check, dmu_objset_clone_sync, &doca,
993268473Sdelphij	    5, ZFS_SPACE_CHECK_NORMAL));
994168404Spjd}
995168404Spjd
996168404Spjdint
997248571Smmdmu_objset_snapshot_one(const char *fsname, const char *snapname)
998168404Spjd{
999168404Spjd	int err;
1000248571Smm	char *longsnap = kmem_asprintf("%s@%s", fsname, snapname);
1001248571Smm	nvlist_t *snaps = fnvlist_alloc();
1002168404Spjd
1003248571Smm	fnvlist_add_boolean(snaps, longsnap);
1004248571Smm	strfree(longsnap);
1005248571Smm	err = dsl_dataset_snapshot(snaps, NULL, NULL);
1006248571Smm	fnvlist_free(snaps);
1007168404Spjd	return (err);
1008168404Spjd}
1009168404Spjd
1010168404Spjdstatic void
1011209962Smmdmu_objset_sync_dnodes(list_t *list, list_t *newlist, dmu_tx_t *tx)
1012168404Spjd{
1013168404Spjd	dnode_t *dn;
1014168404Spjd
1015168404Spjd	while (dn = list_head(list)) {
1016168404Spjd		ASSERT(dn->dn_object != DMU_META_DNODE_OBJECT);
1017168404Spjd		ASSERT(dn->dn_dbuf->db_data_pending);
1018168404Spjd		/*
1019209962Smm		 * Initialize dn_zio outside dnode_sync() because the
1020209962Smm		 * meta-dnode needs to set it ouside dnode_sync().
1021168404Spjd		 */
1022168404Spjd		dn->dn_zio = dn->dn_dbuf->db_data_pending->dr_zio;
1023168404Spjd		ASSERT(dn->dn_zio);
1024168404Spjd
1025168404Spjd		ASSERT3U(dn->dn_nlevels, <=, DN_MAX_LEVELS);
1026168404Spjd		list_remove(list, dn);
1027209962Smm
1028209962Smm		if (newlist) {
1029209962Smm			(void) dnode_add_ref(dn, newlist);
1030209962Smm			list_insert_tail(newlist, dn);
1031209962Smm		}
1032209962Smm
1033168404Spjd		dnode_sync(dn, tx);
1034168404Spjd	}
1035168404Spjd}
1036168404Spjd
1037168404Spjd/* ARGSUSED */
1038168404Spjdstatic void
1039219089Spjddmu_objset_write_ready(zio_t *zio, arc_buf_t *abuf, void *arg)
1040168404Spjd{
1041185029Spjd	blkptr_t *bp = zio->io_bp;
1042219089Spjd	objset_t *os = arg;
1043168404Spjd	dnode_phys_t *dnp = &os->os_phys->os_meta_dnode;
1044168404Spjd
1045268075Sdelphij	ASSERT(!BP_IS_EMBEDDED(bp));
1046248571Smm	ASSERT3P(bp, ==, os->os_rootbp);
1047248571Smm	ASSERT3U(BP_GET_TYPE(bp), ==, DMU_OT_OBJSET);
1048248571Smm	ASSERT0(BP_GET_LEVEL(bp));
1049185029Spjd
1050168404Spjd	/*
1051209962Smm	 * Update rootbp fill count: it should be the number of objects
1052209962Smm	 * allocated in the object set (not counting the "special"
1053209962Smm	 * objects that are stored in the objset_phys_t -- the meta
1054209962Smm	 * dnode and user/group accounting objects).
1055168404Spjd	 */
1056209962Smm	bp->blk_fill = 0;
1057185029Spjd	for (int i = 0; i < dnp->dn_nblkptr; i++)
1058268075Sdelphij		bp->blk_fill += BP_GET_FILL(&dnp->dn_blkptr[i]);
1059219089Spjd}
1060168404Spjd
1061219089Spjd/* ARGSUSED */
1062219089Spjdstatic void
1063219089Spjddmu_objset_write_done(zio_t *zio, arc_buf_t *abuf, void *arg)
1064219089Spjd{
1065219089Spjd	blkptr_t *bp = zio->io_bp;
1066219089Spjd	blkptr_t *bp_orig = &zio->io_bp_orig;
1067219089Spjd	objset_t *os = arg;
1068219089Spjd
1069185029Spjd	if (zio->io_flags & ZIO_FLAG_IO_REWRITE) {
1070219089Spjd		ASSERT(BP_EQUAL(bp, bp_orig));
1071185029Spjd	} else {
1072219089Spjd		dsl_dataset_t *ds = os->os_dsl_dataset;
1073219089Spjd		dmu_tx_t *tx = os->os_synctx;
1074219089Spjd
1075219089Spjd		(void) dsl_dataset_block_kill(ds, bp_orig, tx, B_TRUE);
1076219089Spjd		dsl_dataset_block_born(ds, bp, tx);
1077168404Spjd	}
1078168404Spjd}
1079168404Spjd
1080168404Spjd/* called from dsl */
1081168404Spjdvoid
1082219089Spjddmu_objset_sync(objset_t *os, zio_t *pio, dmu_tx_t *tx)
1083168404Spjd{
1084168404Spjd	int txgoff;
1085268123Sdelphij	zbookmark_phys_t zb;
1086219089Spjd	zio_prop_t zp;
1087168404Spjd	zio_t *zio;
1088168404Spjd	list_t *list;
1089209962Smm	list_t *newlist = NULL;
1090168404Spjd	dbuf_dirty_record_t *dr;
1091168404Spjd
1092168404Spjd	dprintf_ds(os->os_dsl_dataset, "txg=%llu\n", tx->tx_txg);
1093168404Spjd
1094168404Spjd	ASSERT(dmu_tx_is_syncing(tx));
1095168404Spjd	/* XXX the write_done callback should really give us the tx... */
1096168404Spjd	os->os_synctx = tx;
1097168404Spjd
1098168404Spjd	if (os->os_dsl_dataset == NULL) {
1099168404Spjd		/*
1100168404Spjd		 * This is the MOS.  If we have upgraded,
1101168404Spjd		 * spa_max_replication() could change, so reset
1102168404Spjd		 * os_copies here.
1103168404Spjd		 */
1104168404Spjd		os->os_copies = spa_max_replication(os->os_spa);
1105168404Spjd	}
1106168404Spjd
1107168404Spjd	/*
1108168404Spjd	 * Create the root block IO
1109168404Spjd	 */
1110219089Spjd	SET_BOOKMARK(&zb, os->os_dsl_dataset ?
1111219089Spjd	    os->os_dsl_dataset->ds_object : DMU_META_OBJSET,
1112219089Spjd	    ZB_ROOT_OBJECT, ZB_ROOT_LEVEL, ZB_ROOT_BLKID);
1113246666Smm	arc_release(os->os_phys_buf, &os->os_phys_buf);
1114185029Spjd
1115219089Spjd	dmu_write_policy(os, NULL, 0, 0, &zp);
1116185029Spjd
1117219089Spjd	zio = arc_write(pio, os->os_spa, tx->tx_txg,
1118251478Sdelphij	    os->os_rootbp, os->os_phys_buf, DMU_OS_IS_L2CACHEABLE(os),
1119251478Sdelphij	    DMU_OS_IS_L2COMPRESSIBLE(os), &zp, dmu_objset_write_ready,
1120258632Savg	    NULL, dmu_objset_write_done, os, ZIO_PRIORITY_ASYNC_WRITE,
1121251478Sdelphij	    ZIO_FLAG_MUSTSUCCEED, &zb);
1122185029Spjd
1123168404Spjd	/*
1124209962Smm	 * Sync special dnodes - the parent IO for the sync is the root block
1125168404Spjd	 */
1126219089Spjd	DMU_META_DNODE(os)->dn_zio = zio;
1127219089Spjd	dnode_sync(DMU_META_DNODE(os), tx);
1128168404Spjd
1129209962Smm	os->os_phys->os_flags = os->os_flags;
1130209962Smm
1131219089Spjd	if (DMU_USERUSED_DNODE(os) &&
1132219089Spjd	    DMU_USERUSED_DNODE(os)->dn_type != DMU_OT_NONE) {
1133219089Spjd		DMU_USERUSED_DNODE(os)->dn_zio = zio;
1134219089Spjd		dnode_sync(DMU_USERUSED_DNODE(os), tx);
1135219089Spjd		DMU_GROUPUSED_DNODE(os)->dn_zio = zio;
1136219089Spjd		dnode_sync(DMU_GROUPUSED_DNODE(os), tx);
1137209962Smm	}
1138209962Smm
1139168404Spjd	txgoff = tx->tx_txg & TXG_MASK;
1140168404Spjd
1141209962Smm	if (dmu_objset_userused_enabled(os)) {
1142209962Smm		newlist = &os->os_synced_dnodes;
1143209962Smm		/*
1144209962Smm		 * We must create the list here because it uses the
1145209962Smm		 * dn_dirty_link[] of this txg.
1146209962Smm		 */
1147209962Smm		list_create(newlist, sizeof (dnode_t),
1148209962Smm		    offsetof(dnode_t, dn_dirty_link[txgoff]));
1149209962Smm	}
1150168404Spjd
1151209962Smm	dmu_objset_sync_dnodes(&os->os_free_dnodes[txgoff], newlist, tx);
1152209962Smm	dmu_objset_sync_dnodes(&os->os_dirty_dnodes[txgoff], newlist, tx);
1153209962Smm
1154219089Spjd	list = &DMU_META_DNODE(os)->dn_dirty_records[txgoff];
1155168404Spjd	while (dr = list_head(list)) {
1156248571Smm		ASSERT0(dr->dr_dbuf->db_level);
1157168404Spjd		list_remove(list, dr);
1158168404Spjd		if (dr->dr_zio)
1159168404Spjd			zio_nowait(dr->dr_zio);
1160168404Spjd	}
1161168404Spjd	/*
1162168404Spjd	 * Free intent log blocks up to this tx.
1163168404Spjd	 */
1164168404Spjd	zil_sync(os->os_zil, tx);
1165185029Spjd	os->os_phys->os_zil_header = os->os_zil_header;
1166168404Spjd	zio_nowait(zio);
1167168404Spjd}
1168168404Spjd
1169219089Spjdboolean_t
1170219089Spjddmu_objset_is_dirty(objset_t *os, uint64_t txg)
1171219089Spjd{
1172219089Spjd	return (!list_is_empty(&os->os_dirty_dnodes[txg & TXG_MASK]) ||
1173219089Spjd	    !list_is_empty(&os->os_free_dnodes[txg & TXG_MASK]));
1174219089Spjd}
1175219089Spjd
1176209962Smmstatic objset_used_cb_t *used_cbs[DMU_OST_NUMTYPES];
1177209962Smm
1178168404Spjdvoid
1179209962Smmdmu_objset_register_type(dmu_objset_type_t ost, objset_used_cb_t *cb)
1180209962Smm{
1181209962Smm	used_cbs[ost] = cb;
1182209962Smm}
1183209962Smm
1184209962Smmboolean_t
1185219089Spjddmu_objset_userused_enabled(objset_t *os)
1186209962Smm{
1187209962Smm	return (spa_version(os->os_spa) >= SPA_VERSION_USERSPACE &&
1188219089Spjd	    used_cbs[os->os_phys->os_type] != NULL &&
1189219089Spjd	    DMU_USERUSED_DNODE(os) != NULL);
1190209962Smm}
1191209962Smm
1192219089Spjdstatic void
1193219089Spjddo_userquota_update(objset_t *os, uint64_t used, uint64_t flags,
1194219089Spjd    uint64_t user, uint64_t group, boolean_t subtract, dmu_tx_t *tx)
1195219089Spjd{
1196219089Spjd	if ((flags & DNODE_FLAG_USERUSED_ACCOUNTED)) {
1197219089Spjd		int64_t delta = DNODE_SIZE + used;
1198219089Spjd		if (subtract)
1199219089Spjd			delta = -delta;
1200219089Spjd		VERIFY3U(0, ==, zap_increment_int(os, DMU_USERUSED_OBJECT,
1201219089Spjd		    user, delta, tx));
1202219089Spjd		VERIFY3U(0, ==, zap_increment_int(os, DMU_GROUPUSED_OBJECT,
1203219089Spjd		    group, delta, tx));
1204219089Spjd	}
1205219089Spjd}
1206219089Spjd
1207209962Smmvoid
1208219089Spjddmu_objset_do_userquota_updates(objset_t *os, dmu_tx_t *tx)
1209209962Smm{
1210209962Smm	dnode_t *dn;
1211209962Smm	list_t *list = &os->os_synced_dnodes;
1212209962Smm
1213209962Smm	ASSERT(list_head(list) == NULL || dmu_objset_userused_enabled(os));
1214209962Smm
1215209962Smm	while (dn = list_head(list)) {
1216219089Spjd		int flags;
1217209962Smm		ASSERT(!DMU_OBJECT_IS_SPECIAL(dn->dn_object));
1218209962Smm		ASSERT(dn->dn_phys->dn_type == DMU_OT_NONE ||
1219209962Smm		    dn->dn_phys->dn_flags &
1220209962Smm		    DNODE_FLAG_USERUSED_ACCOUNTED);
1221209962Smm
1222209962Smm		/* Allocate the user/groupused objects if necessary. */
1223219089Spjd		if (DMU_USERUSED_DNODE(os)->dn_type == DMU_OT_NONE) {
1224219089Spjd			VERIFY(0 == zap_create_claim(os,
1225209962Smm			    DMU_USERUSED_OBJECT,
1226209962Smm			    DMU_OT_USERGROUP_USED, DMU_OT_NONE, 0, tx));
1227219089Spjd			VERIFY(0 == zap_create_claim(os,
1228209962Smm			    DMU_GROUPUSED_OBJECT,
1229209962Smm			    DMU_OT_USERGROUP_USED, DMU_OT_NONE, 0, tx));
1230209962Smm		}
1231209962Smm
1232209962Smm		/*
1233219089Spjd		 * We intentionally modify the zap object even if the
1234219089Spjd		 * net delta is zero.  Otherwise
1235219089Spjd		 * the block of the zap obj could be shared between
1236219089Spjd		 * datasets but need to be different between them after
1237219089Spjd		 * a bprewrite.
1238209962Smm		 */
1239219089Spjd
1240219089Spjd		flags = dn->dn_id_flags;
1241219089Spjd		ASSERT(flags);
1242219089Spjd		if (flags & DN_ID_OLD_EXIST)  {
1243219089Spjd			do_userquota_update(os, dn->dn_oldused, dn->dn_oldflags,
1244219089Spjd			    dn->dn_olduid, dn->dn_oldgid, B_TRUE, tx);
1245209962Smm		}
1246219089Spjd		if (flags & DN_ID_NEW_EXIST) {
1247219089Spjd			do_userquota_update(os, DN_USED_BYTES(dn->dn_phys),
1248219089Spjd			    dn->dn_phys->dn_flags,  dn->dn_newuid,
1249219089Spjd			    dn->dn_newgid, B_FALSE, tx);
1250219089Spjd		}
1251209962Smm
1252209962Smm		mutex_enter(&dn->dn_mtx);
1253219089Spjd		dn->dn_oldused = 0;
1254219089Spjd		dn->dn_oldflags = 0;
1255219089Spjd		if (dn->dn_id_flags & DN_ID_NEW_EXIST) {
1256219089Spjd			dn->dn_olduid = dn->dn_newuid;
1257219089Spjd			dn->dn_oldgid = dn->dn_newgid;
1258219089Spjd			dn->dn_id_flags |= DN_ID_OLD_EXIST;
1259219089Spjd			if (dn->dn_bonuslen == 0)
1260219089Spjd				dn->dn_id_flags |= DN_ID_CHKED_SPILL;
1261219089Spjd			else
1262219089Spjd				dn->dn_id_flags |= DN_ID_CHKED_BONUS;
1263219089Spjd		}
1264219089Spjd		dn->dn_id_flags &= ~(DN_ID_NEW_EXIST);
1265209962Smm		mutex_exit(&dn->dn_mtx);
1266209962Smm
1267209962Smm		list_remove(list, dn);
1268209962Smm		dnode_rele(dn, list);
1269209962Smm	}
1270209962Smm}
1271209962Smm
1272219089Spjd/*
1273219089Spjd * Returns a pointer to data to find uid/gid from
1274219089Spjd *
1275219089Spjd * If a dirty record for transaction group that is syncing can't
1276219089Spjd * be found then NULL is returned.  In the NULL case it is assumed
1277219089Spjd * the uid/gid aren't changing.
1278219089Spjd */
1279219089Spjdstatic void *
1280219089Spjddmu_objset_userquota_find_data(dmu_buf_impl_t *db, dmu_tx_t *tx)
1281219089Spjd{
1282219089Spjd	dbuf_dirty_record_t *dr, **drp;
1283219089Spjd	void *data;
1284219089Spjd
1285219089Spjd	if (db->db_dirtycnt == 0)
1286219089Spjd		return (db->db.db_data);  /* Nothing is changing */
1287219089Spjd
1288219089Spjd	for (drp = &db->db_last_dirty; (dr = *drp) != NULL; drp = &dr->dr_next)
1289219089Spjd		if (dr->dr_txg == tx->tx_txg)
1290219089Spjd			break;
1291219089Spjd
1292219089Spjd	if (dr == NULL) {
1293219089Spjd		data = NULL;
1294219089Spjd	} else {
1295219089Spjd		dnode_t *dn;
1296219089Spjd
1297219089Spjd		DB_DNODE_ENTER(dr->dr_dbuf);
1298219089Spjd		dn = DB_DNODE(dr->dr_dbuf);
1299219089Spjd
1300219089Spjd		if (dn->dn_bonuslen == 0 &&
1301219089Spjd		    dr->dr_dbuf->db_blkid == DMU_SPILL_BLKID)
1302219089Spjd			data = dr->dt.dl.dr_data->b_data;
1303219089Spjd		else
1304219089Spjd			data = dr->dt.dl.dr_data;
1305219089Spjd
1306219089Spjd		DB_DNODE_EXIT(dr->dr_dbuf);
1307219089Spjd	}
1308219089Spjd
1309219089Spjd	return (data);
1310219089Spjd}
1311219089Spjd
1312219089Spjdvoid
1313219089Spjddmu_objset_userquota_get_ids(dnode_t *dn, boolean_t before, dmu_tx_t *tx)
1314219089Spjd{
1315219089Spjd	objset_t *os = dn->dn_objset;
1316219089Spjd	void *data = NULL;
1317219089Spjd	dmu_buf_impl_t *db = NULL;
1318247187Smm	uint64_t *user = NULL;
1319247187Smm	uint64_t *group = NULL;
1320219089Spjd	int flags = dn->dn_id_flags;
1321219089Spjd	int error;
1322219089Spjd	boolean_t have_spill = B_FALSE;
1323219089Spjd
1324219089Spjd	if (!dmu_objset_userused_enabled(dn->dn_objset))
1325219089Spjd		return;
1326219089Spjd
1327219089Spjd	if (before && (flags & (DN_ID_CHKED_BONUS|DN_ID_OLD_EXIST|
1328219089Spjd	    DN_ID_CHKED_SPILL)))
1329219089Spjd		return;
1330219089Spjd
1331219089Spjd	if (before && dn->dn_bonuslen != 0)
1332219089Spjd		data = DN_BONUS(dn->dn_phys);
1333219089Spjd	else if (!before && dn->dn_bonuslen != 0) {
1334219089Spjd		if (dn->dn_bonus) {
1335219089Spjd			db = dn->dn_bonus;
1336219089Spjd			mutex_enter(&db->db_mtx);
1337219089Spjd			data = dmu_objset_userquota_find_data(db, tx);
1338219089Spjd		} else {
1339219089Spjd			data = DN_BONUS(dn->dn_phys);
1340219089Spjd		}
1341219089Spjd	} else if (dn->dn_bonuslen == 0 && dn->dn_bonustype == DMU_OT_SA) {
1342219089Spjd			int rf = 0;
1343219089Spjd
1344219089Spjd			if (RW_WRITE_HELD(&dn->dn_struct_rwlock))
1345219089Spjd				rf |= DB_RF_HAVESTRUCT;
1346219089Spjd			error = dmu_spill_hold_by_dnode(dn,
1347219089Spjd			    rf | DB_RF_MUST_SUCCEED,
1348219089Spjd			    FTAG, (dmu_buf_t **)&db);
1349219089Spjd			ASSERT(error == 0);
1350219089Spjd			mutex_enter(&db->db_mtx);
1351219089Spjd			data = (before) ? db->db.db_data :
1352219089Spjd			    dmu_objset_userquota_find_data(db, tx);
1353219089Spjd			have_spill = B_TRUE;
1354219089Spjd	} else {
1355219089Spjd		mutex_enter(&dn->dn_mtx);
1356219089Spjd		dn->dn_id_flags |= DN_ID_CHKED_BONUS;
1357219089Spjd		mutex_exit(&dn->dn_mtx);
1358219089Spjd		return;
1359219089Spjd	}
1360219089Spjd
1361219089Spjd	if (before) {
1362219089Spjd		ASSERT(data);
1363219089Spjd		user = &dn->dn_olduid;
1364219089Spjd		group = &dn->dn_oldgid;
1365219089Spjd	} else if (data) {
1366219089Spjd		user = &dn->dn_newuid;
1367219089Spjd		group = &dn->dn_newgid;
1368219089Spjd	}
1369219089Spjd
1370219089Spjd	/*
1371219089Spjd	 * Must always call the callback in case the object
1372219089Spjd	 * type has changed and that type isn't an object type to track
1373219089Spjd	 */
1374219089Spjd	error = used_cbs[os->os_phys->os_type](dn->dn_bonustype, data,
1375219089Spjd	    user, group);
1376219089Spjd
1377219089Spjd	/*
1378219089Spjd	 * Preserve existing uid/gid when the callback can't determine
1379219089Spjd	 * what the new uid/gid are and the callback returned EEXIST.
1380219089Spjd	 * The EEXIST error tells us to just use the existing uid/gid.
1381219089Spjd	 * If we don't know what the old values are then just assign
1382219089Spjd	 * them to 0, since that is a new file  being created.
1383219089Spjd	 */
1384219089Spjd	if (!before && data == NULL && error == EEXIST) {
1385219089Spjd		if (flags & DN_ID_OLD_EXIST) {
1386219089Spjd			dn->dn_newuid = dn->dn_olduid;
1387219089Spjd			dn->dn_newgid = dn->dn_oldgid;
1388219089Spjd		} else {
1389219089Spjd			dn->dn_newuid = 0;
1390219089Spjd			dn->dn_newgid = 0;
1391219089Spjd		}
1392219089Spjd		error = 0;
1393219089Spjd	}
1394219089Spjd
1395219089Spjd	if (db)
1396219089Spjd		mutex_exit(&db->db_mtx);
1397219089Spjd
1398219089Spjd	mutex_enter(&dn->dn_mtx);
1399219089Spjd	if (error == 0 && before)
1400219089Spjd		dn->dn_id_flags |= DN_ID_OLD_EXIST;
1401219089Spjd	if (error == 0 && !before)
1402219089Spjd		dn->dn_id_flags |= DN_ID_NEW_EXIST;
1403219089Spjd
1404219089Spjd	if (have_spill) {
1405219089Spjd		dn->dn_id_flags |= DN_ID_CHKED_SPILL;
1406219089Spjd	} else {
1407219089Spjd		dn->dn_id_flags |= DN_ID_CHKED_BONUS;
1408219089Spjd	}
1409219089Spjd	mutex_exit(&dn->dn_mtx);
1410219089Spjd	if (have_spill)
1411219089Spjd		dmu_buf_rele((dmu_buf_t *)db, FTAG);
1412219089Spjd}
1413219089Spjd
1414209962Smmboolean_t
1415209962Smmdmu_objset_userspace_present(objset_t *os)
1416209962Smm{
1417219089Spjd	return (os->os_phys->os_flags &
1418209962Smm	    OBJSET_FLAG_USERACCOUNTING_COMPLETE);
1419209962Smm}
1420209962Smm
1421209962Smmint
1422209962Smmdmu_objset_userspace_upgrade(objset_t *os)
1423209962Smm{
1424209962Smm	uint64_t obj;
1425209962Smm	int err = 0;
1426209962Smm
1427209962Smm	if (dmu_objset_userspace_present(os))
1428209962Smm		return (0);
1429219089Spjd	if (!dmu_objset_userused_enabled(os))
1430249195Smm		return (SET_ERROR(ENOTSUP));
1431209962Smm	if (dmu_objset_is_snapshot(os))
1432249195Smm		return (SET_ERROR(EINVAL));
1433209962Smm
1434209962Smm	/*
1435209962Smm	 * We simply need to mark every object dirty, so that it will be
1436209962Smm	 * synced out and now accounted.  If this is called
1437209962Smm	 * concurrently, or if we already did some work before crashing,
1438209962Smm	 * that's fine, since we track each object's accounted state
1439209962Smm	 * independently.
1440209962Smm	 */
1441209962Smm
1442209962Smm	for (obj = 0; err == 0; err = dmu_object_next(os, &obj, FALSE, 0)) {
1443209962Smm		dmu_tx_t *tx;
1444209962Smm		dmu_buf_t *db;
1445209962Smm		int objerr;
1446209962Smm
1447209962Smm		if (issig(JUSTLOOKING) && issig(FORREAL))
1448249195Smm			return (SET_ERROR(EINTR));
1449209962Smm
1450209962Smm		objerr = dmu_bonus_hold(os, obj, FTAG, &db);
1451248571Smm		if (objerr != 0)
1452209962Smm			continue;
1453209962Smm		tx = dmu_tx_create(os);
1454209962Smm		dmu_tx_hold_bonus(tx, obj);
1455209962Smm		objerr = dmu_tx_assign(tx, TXG_WAIT);
1456248571Smm		if (objerr != 0) {
1457209962Smm			dmu_tx_abort(tx);
1458209962Smm			continue;
1459209962Smm		}
1460209962Smm		dmu_buf_will_dirty(db, tx);
1461209962Smm		dmu_buf_rele(db, FTAG);
1462209962Smm		dmu_tx_commit(tx);
1463209962Smm	}
1464209962Smm
1465219089Spjd	os->os_flags |= OBJSET_FLAG_USERACCOUNTING_COMPLETE;
1466209962Smm	txg_wait_synced(dmu_objset_pool(os), 0);
1467209962Smm	return (0);
1468209962Smm}
1469209962Smm
1470209962Smmvoid
1471168404Spjddmu_objset_space(objset_t *os, uint64_t *refdbytesp, uint64_t *availbytesp,
1472168404Spjd    uint64_t *usedobjsp, uint64_t *availobjsp)
1473168404Spjd{
1474219089Spjd	dsl_dataset_space(os->os_dsl_dataset, refdbytesp, availbytesp,
1475168404Spjd	    usedobjsp, availobjsp);
1476168404Spjd}
1477168404Spjd
1478168404Spjduint64_t
1479168404Spjddmu_objset_fsid_guid(objset_t *os)
1480168404Spjd{
1481219089Spjd	return (dsl_dataset_fsid_guid(os->os_dsl_dataset));
1482168404Spjd}
1483168404Spjd
1484168404Spjdvoid
1485168404Spjddmu_objset_fast_stat(objset_t *os, dmu_objset_stats_t *stat)
1486168404Spjd{
1487219089Spjd	stat->dds_type = os->os_phys->os_type;
1488219089Spjd	if (os->os_dsl_dataset)
1489219089Spjd		dsl_dataset_fast_stat(os->os_dsl_dataset, stat);
1490168404Spjd}
1491168404Spjd
1492168404Spjdvoid
1493168404Spjddmu_objset_stats(objset_t *os, nvlist_t *nv)
1494168404Spjd{
1495219089Spjd	ASSERT(os->os_dsl_dataset ||
1496219089Spjd	    os->os_phys->os_type == DMU_OST_META);
1497168404Spjd
1498219089Spjd	if (os->os_dsl_dataset != NULL)
1499219089Spjd		dsl_dataset_stats(os->os_dsl_dataset, nv);
1500168404Spjd
1501168404Spjd	dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_TYPE,
1502219089Spjd	    os->os_phys->os_type);
1503209962Smm	dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_USERACCOUNTING,
1504209962Smm	    dmu_objset_userspace_present(os));
1505168404Spjd}
1506168404Spjd
1507168404Spjdint
1508168404Spjddmu_objset_is_snapshot(objset_t *os)
1509168404Spjd{
1510219089Spjd	if (os->os_dsl_dataset != NULL)
1511286575Smav		return (os->os_dsl_dataset->ds_is_snapshot);
1512168404Spjd	else
1513168404Spjd		return (B_FALSE);
1514168404Spjd}
1515168404Spjd
1516168404Spjdint
1517185029Spjddmu_snapshot_realname(objset_t *os, char *name, char *real, int maxlen,
1518185029Spjd    boolean_t *conflict)
1519185029Spjd{
1520219089Spjd	dsl_dataset_t *ds = os->os_dsl_dataset;
1521185029Spjd	uint64_t ignored;
1522185029Spjd
1523275782Sdelphij	if (dsl_dataset_phys(ds)->ds_snapnames_zapobj == 0)
1524249195Smm		return (SET_ERROR(ENOENT));
1525185029Spjd
1526185029Spjd	return (zap_lookup_norm(ds->ds_dir->dd_pool->dp_meta_objset,
1527275782Sdelphij	    dsl_dataset_phys(ds)->ds_snapnames_zapobj, name, 8, 1, &ignored,
1528275782Sdelphij	    MT_FIRST, real, maxlen, conflict));
1529185029Spjd}
1530185029Spjd
1531185029Spjdint
1532168404Spjddmu_snapshot_list_next(objset_t *os, int namelen, char *name,
1533185029Spjd    uint64_t *idp, uint64_t *offp, boolean_t *case_conflict)
1534168404Spjd{
1535219089Spjd	dsl_dataset_t *ds = os->os_dsl_dataset;
1536168404Spjd	zap_cursor_t cursor;
1537168404Spjd	zap_attribute_t attr;
1538168404Spjd
1539248571Smm	ASSERT(dsl_pool_config_held(dmu_objset_pool(os)));
1540248571Smm
1541275782Sdelphij	if (dsl_dataset_phys(ds)->ds_snapnames_zapobj == 0)
1542249195Smm		return (SET_ERROR(ENOENT));
1543168404Spjd
1544168404Spjd	zap_cursor_init_serialized(&cursor,
1545168404Spjd	    ds->ds_dir->dd_pool->dp_meta_objset,
1546275782Sdelphij	    dsl_dataset_phys(ds)->ds_snapnames_zapobj, *offp);
1547168404Spjd
1548168404Spjd	if (zap_cursor_retrieve(&cursor, &attr) != 0) {
1549168404Spjd		zap_cursor_fini(&cursor);
1550249195Smm		return (SET_ERROR(ENOENT));
1551168404Spjd	}
1552168404Spjd
1553168404Spjd	if (strlen(attr.za_name) + 1 > namelen) {
1554168404Spjd		zap_cursor_fini(&cursor);
1555249195Smm		return (SET_ERROR(ENAMETOOLONG));
1556168404Spjd	}
1557168404Spjd
1558168404Spjd	(void) strcpy(name, attr.za_name);
1559168404Spjd	if (idp)
1560168404Spjd		*idp = attr.za_first_integer;
1561185029Spjd	if (case_conflict)
1562185029Spjd		*case_conflict = attr.za_normalization_conflict;
1563168404Spjd	zap_cursor_advance(&cursor);
1564168404Spjd	*offp = zap_cursor_serialize(&cursor);
1565168404Spjd	zap_cursor_fini(&cursor);
1566168404Spjd
1567168404Spjd	return (0);
1568168404Spjd}
1569168404Spjd
1570168404Spjdint
1571168404Spjddmu_dir_list_next(objset_t *os, int namelen, char *name,
1572168404Spjd    uint64_t *idp, uint64_t *offp)
1573168404Spjd{
1574219089Spjd	dsl_dir_t *dd = os->os_dsl_dataset->ds_dir;
1575168404Spjd	zap_cursor_t cursor;
1576168404Spjd	zap_attribute_t attr;
1577168404Spjd
1578168404Spjd	/* there is no next dir on a snapshot! */
1579219089Spjd	if (os->os_dsl_dataset->ds_object !=
1580275782Sdelphij	    dsl_dir_phys(dd)->dd_head_dataset_obj)
1581249195Smm		return (SET_ERROR(ENOENT));
1582168404Spjd
1583168404Spjd	zap_cursor_init_serialized(&cursor,
1584168404Spjd	    dd->dd_pool->dp_meta_objset,
1585275782Sdelphij	    dsl_dir_phys(dd)->dd_child_dir_zapobj, *offp);
1586168404Spjd
1587168404Spjd	if (zap_cursor_retrieve(&cursor, &attr) != 0) {
1588168404Spjd		zap_cursor_fini(&cursor);
1589249195Smm		return (SET_ERROR(ENOENT));
1590168404Spjd	}
1591168404Spjd
1592168404Spjd	if (strlen(attr.za_name) + 1 > namelen) {
1593168404Spjd		zap_cursor_fini(&cursor);
1594249195Smm		return (SET_ERROR(ENAMETOOLONG));
1595168404Spjd	}
1596168404Spjd
1597168404Spjd	(void) strcpy(name, attr.za_name);
1598168404Spjd	if (idp)
1599168404Spjd		*idp = attr.za_first_integer;
1600168404Spjd	zap_cursor_advance(&cursor);
1601168404Spjd	*offp = zap_cursor_serialize(&cursor);
1602168404Spjd	zap_cursor_fini(&cursor);
1603168404Spjd
1604168404Spjd	return (0);
1605168404Spjd}
1606168404Spjd
1607286686Smavtypedef struct dmu_objset_find_ctx {
1608286686Smav	taskq_t		*dc_tq;
1609286686Smav	dsl_pool_t	*dc_dp;
1610286686Smav	uint64_t	dc_ddobj;
1611286686Smav	int		(*dc_func)(dsl_pool_t *, dsl_dataset_t *, void *);
1612286686Smav	void		*dc_arg;
1613286686Smav	int		dc_flags;
1614286686Smav	kmutex_t	*dc_error_lock;
1615286686Smav	int		*dc_error;
1616286686Smav} dmu_objset_find_ctx_t;
1617286686Smav
1618286686Smavstatic void
1619286686Smavdmu_objset_find_dp_impl(dmu_objset_find_ctx_t *dcp)
1620168404Spjd{
1621286686Smav	dsl_pool_t *dp = dcp->dc_dp;
1622286686Smav	dmu_objset_find_ctx_t *child_dcp;
1623248571Smm	dsl_dir_t *dd;
1624248571Smm	dsl_dataset_t *ds;
1625248571Smm	zap_cursor_t zc;
1626248571Smm	zap_attribute_t *attr;
1627248571Smm	uint64_t thisobj;
1628286686Smav	int err = 0;
1629248571Smm
1630286686Smav	/* don't process if there already was an error */
1631286686Smav	if (*dcp->dc_error != 0)
1632286686Smav		goto out;
1633248571Smm
1634286686Smav	err = dsl_dir_hold_obj(dp, dcp->dc_ddobj, NULL, FTAG, &dd);
1635248571Smm	if (err != 0)
1636286686Smav		goto out;
1637248571Smm
1638248571Smm	/* Don't visit hidden ($MOS & $ORIGIN) objsets. */
1639248571Smm	if (dd->dd_myname[0] == '$') {
1640248571Smm		dsl_dir_rele(dd, FTAG);
1641286686Smav		goto out;
1642248571Smm	}
1643248571Smm
1644275782Sdelphij	thisobj = dsl_dir_phys(dd)->dd_head_dataset_obj;
1645248571Smm	attr = kmem_alloc(sizeof (zap_attribute_t), KM_SLEEP);
1646248571Smm
1647248571Smm	/*
1648248571Smm	 * Iterate over all children.
1649248571Smm	 */
1650286686Smav	if (dcp->dc_flags & DS_FIND_CHILDREN) {
1651248571Smm		for (zap_cursor_init(&zc, dp->dp_meta_objset,
1652275782Sdelphij		    dsl_dir_phys(dd)->dd_child_dir_zapobj);
1653248571Smm		    zap_cursor_retrieve(&zc, attr) == 0;
1654248571Smm		    (void) zap_cursor_advance(&zc)) {
1655248571Smm			ASSERT3U(attr->za_integer_length, ==,
1656248571Smm			    sizeof (uint64_t));
1657248571Smm			ASSERT3U(attr->za_num_integers, ==, 1);
1658248571Smm
1659286686Smav			child_dcp = kmem_alloc(sizeof (*child_dcp), KM_SLEEP);
1660286686Smav			*child_dcp = *dcp;
1661286686Smav			child_dcp->dc_ddobj = attr->za_first_integer;
1662286686Smav			if (dcp->dc_tq != NULL)
1663286686Smav				(void) taskq_dispatch(dcp->dc_tq,
1664286686Smav				    dmu_objset_find_dp_cb, child_dcp, TQ_SLEEP);
1665286686Smav			else
1666286686Smav				dmu_objset_find_dp_impl(child_dcp);
1667248571Smm		}
1668248571Smm		zap_cursor_fini(&zc);
1669248571Smm	}
1670248571Smm
1671248571Smm	/*
1672248571Smm	 * Iterate over all snapshots.
1673248571Smm	 */
1674286686Smav	if (dcp->dc_flags & DS_FIND_SNAPSHOTS) {
1675248571Smm		dsl_dataset_t *ds;
1676248571Smm		err = dsl_dataset_hold_obj(dp, thisobj, FTAG, &ds);
1677248571Smm
1678248571Smm		if (err == 0) {
1679275782Sdelphij			uint64_t snapobj;
1680275782Sdelphij
1681275782Sdelphij			snapobj = dsl_dataset_phys(ds)->ds_snapnames_zapobj;
1682248571Smm			dsl_dataset_rele(ds, FTAG);
1683248571Smm
1684248571Smm			for (zap_cursor_init(&zc, dp->dp_meta_objset, snapobj);
1685248571Smm			    zap_cursor_retrieve(&zc, attr) == 0;
1686248571Smm			    (void) zap_cursor_advance(&zc)) {
1687248571Smm				ASSERT3U(attr->za_integer_length, ==,
1688248571Smm				    sizeof (uint64_t));
1689248571Smm				ASSERT3U(attr->za_num_integers, ==, 1);
1690248571Smm
1691248571Smm				err = dsl_dataset_hold_obj(dp,
1692248571Smm				    attr->za_first_integer, FTAG, &ds);
1693248571Smm				if (err != 0)
1694248571Smm					break;
1695286686Smav				err = dcp->dc_func(dp, ds, dcp->dc_arg);
1696248571Smm				dsl_dataset_rele(ds, FTAG);
1697248571Smm				if (err != 0)
1698248571Smm					break;
1699248571Smm			}
1700248571Smm			zap_cursor_fini(&zc);
1701248571Smm		}
1702248571Smm	}
1703248571Smm
1704248571Smm	dsl_dir_rele(dd, FTAG);
1705248571Smm	kmem_free(attr, sizeof (zap_attribute_t));
1706248571Smm
1707248571Smm	if (err != 0)
1708286686Smav		goto out;
1709248571Smm
1710248571Smm	/*
1711248571Smm	 * Apply to self.
1712248571Smm	 */
1713248571Smm	err = dsl_dataset_hold_obj(dp, thisobj, FTAG, &ds);
1714248571Smm	if (err != 0)
1715286686Smav		goto out;
1716286686Smav	err = dcp->dc_func(dp, ds, dcp->dc_arg);
1717248571Smm	dsl_dataset_rele(ds, FTAG);
1718286686Smav
1719286686Smavout:
1720286686Smav	if (err != 0) {
1721286686Smav		mutex_enter(dcp->dc_error_lock);
1722286686Smav		/* only keep first error */
1723286686Smav		if (*dcp->dc_error == 0)
1724286686Smav			*dcp->dc_error = err;
1725286686Smav		mutex_exit(dcp->dc_error_lock);
1726286686Smav	}
1727286686Smav
1728286686Smav	kmem_free(dcp, sizeof (*dcp));
1729185029Spjd}
1730185029Spjd
1731286686Smavstatic void
1732286686Smavdmu_objset_find_dp_cb(void *arg)
1733286686Smav{
1734286686Smav	dmu_objset_find_ctx_t *dcp = arg;
1735286686Smav	dsl_pool_t *dp = dcp->dc_dp;
1736286686Smav
1737286689Smav	/*
1738286689Smav	 * We need to get a pool_config_lock here, as there are several
1739286689Smav	 * asssert(pool_config_held) down the stack. Getting a lock via
1740286689Smav	 * dsl_pool_config_enter is risky, as it might be stalled by a
1741286689Smav	 * pending writer. This would deadlock, as the write lock can
1742286689Smav	 * only be granted when our parent thread gives up the lock.
1743286689Smav	 * The _prio interface gives us priority over a pending writer.
1744286689Smav	 */
1745286689Smav	dsl_pool_config_enter_prio(dp, FTAG);
1746286686Smav
1747286686Smav	dmu_objset_find_dp_impl(dcp);
1748286686Smav
1749286686Smav	dsl_pool_config_exit(dp, FTAG);
1750286686Smav}
1751286686Smav
1752185029Spjd/*
1753286686Smav * Find objsets under and including ddobj, call func(ds) on each.
1754286686Smav * The order for the enumeration is completely undefined.
1755286686Smav * func is called with dsl_pool_config held.
1756286686Smav */
1757286686Smavint
1758286686Smavdmu_objset_find_dp(dsl_pool_t *dp, uint64_t ddobj,
1759286686Smav    int func(dsl_pool_t *, dsl_dataset_t *, void *), void *arg, int flags)
1760286686Smav{
1761286686Smav	int error = 0;
1762286686Smav	taskq_t *tq = NULL;
1763286686Smav	int ntasks;
1764286686Smav	dmu_objset_find_ctx_t *dcp;
1765286686Smav	kmutex_t err_lock;
1766286686Smav
1767286686Smav	mutex_init(&err_lock, NULL, MUTEX_DEFAULT, NULL);
1768286686Smav	dcp = kmem_alloc(sizeof (*dcp), KM_SLEEP);
1769286686Smav	dcp->dc_tq = NULL;
1770286686Smav	dcp->dc_dp = dp;
1771286686Smav	dcp->dc_ddobj = ddobj;
1772286686Smav	dcp->dc_func = func;
1773286686Smav	dcp->dc_arg = arg;
1774286686Smav	dcp->dc_flags = flags;
1775286686Smav	dcp->dc_error_lock = &err_lock;
1776286686Smav	dcp->dc_error = &error;
1777286686Smav
1778286686Smav	if ((flags & DS_FIND_SERIALIZE) || dsl_pool_config_held_writer(dp)) {
1779286686Smav		/*
1780286686Smav		 * In case a write lock is held we can't make use of
1781286686Smav		 * parallelism, as down the stack of the worker threads
1782286686Smav		 * the lock is asserted via dsl_pool_config_held.
1783286686Smav		 * In case of a read lock this is solved by getting a read
1784286686Smav		 * lock in each worker thread, which isn't possible in case
1785286686Smav		 * of a writer lock. So we fall back to the synchronous path
1786286686Smav		 * here.
1787286686Smav		 * In the future it might be possible to get some magic into
1788286686Smav		 * dsl_pool_config_held in a way that it returns true for
1789286686Smav		 * the worker threads so that a single lock held from this
1790286686Smav		 * thread suffices. For now, stay single threaded.
1791286686Smav		 */
1792286686Smav		dmu_objset_find_dp_impl(dcp);
1793294820Smav		mutex_destroy(&err_lock);
1794286686Smav
1795286686Smav		return (error);
1796286686Smav	}
1797286686Smav
1798286686Smav	ntasks = dmu_find_threads;
1799286686Smav	if (ntasks == 0)
1800286686Smav		ntasks = vdev_count_leaves(dp->dp_spa) * 4;
1801286686Smav	tq = taskq_create("dmu_objset_find", ntasks, minclsyspri, ntasks,
1802286686Smav	    INT_MAX, 0);
1803286686Smav	if (tq == NULL) {
1804286686Smav		kmem_free(dcp, sizeof (*dcp));
1805294820Smav		mutex_destroy(&err_lock);
1806294820Smav
1807286686Smav		return (SET_ERROR(ENOMEM));
1808286686Smav	}
1809286686Smav	dcp->dc_tq = tq;
1810286686Smav
1811286686Smav	/* dcp will be freed by task */
1812286686Smav	(void) taskq_dispatch(tq, dmu_objset_find_dp_cb, dcp, TQ_SLEEP);
1813286686Smav
1814286686Smav	/*
1815286686Smav	 * PORTING: this code relies on the property of taskq_wait to wait
1816286686Smav	 * until no more tasks are queued and no more tasks are active. As
1817286686Smav	 * we always queue new tasks from within other tasks, task_wait
1818286686Smav	 * reliably waits for the full recursion to finish, even though we
1819286686Smav	 * enqueue new tasks after taskq_wait has been called.
1820286686Smav	 * On platforms other than illumos, taskq_wait may not have this
1821286686Smav	 * property.
1822286686Smav	 */
1823286686Smav	taskq_wait(tq);
1824286686Smav	taskq_destroy(tq);
1825286686Smav	mutex_destroy(&err_lock);
1826286686Smav
1827286686Smav	return (error);
1828286686Smav}
1829286686Smav
1830286686Smav/*
1831248571Smm * Find all objsets under name, and for each, call 'func(child_name, arg)'.
1832248571Smm * The dp_config_rwlock must not be held when this is called, and it
1833248571Smm * will not be held when the callback is called.
1834248571Smm * Therefore this function should only be used when the pool is not changing
1835248571Smm * (e.g. in syncing context), or the callback can deal with the possible races.
1836185029Spjd */
1837248571Smmstatic int
1838248571Smmdmu_objset_find_impl(spa_t *spa, const char *name,
1839248571Smm    int func(const char *, void *), void *arg, int flags)
1840185029Spjd{
1841168404Spjd	dsl_dir_t *dd;
1842248571Smm	dsl_pool_t *dp = spa_get_dsl(spa);
1843185029Spjd	dsl_dataset_t *ds;
1844168404Spjd	zap_cursor_t zc;
1845168498Spjd	zap_attribute_t *attr;
1846168404Spjd	char *child;
1847185029Spjd	uint64_t thisobj;
1848185029Spjd	int err;
1849168404Spjd
1850248571Smm	dsl_pool_config_enter(dp, FTAG);
1851248571Smm
1852248571Smm	err = dsl_dir_hold(dp, name, FTAG, &dd, NULL);
1853248571Smm	if (err != 0) {
1854248571Smm		dsl_pool_config_exit(dp, FTAG);
1855168404Spjd		return (err);
1856248571Smm	}
1857168404Spjd
1858185029Spjd	/* Don't visit hidden ($MOS & $ORIGIN) objsets. */
1859185029Spjd	if (dd->dd_myname[0] == '$') {
1860248571Smm		dsl_dir_rele(dd, FTAG);
1861248571Smm		dsl_pool_config_exit(dp, FTAG);
1862185029Spjd		return (0);
1863185029Spjd	}
1864185029Spjd
1865275782Sdelphij	thisobj = dsl_dir_phys(dd)->dd_head_dataset_obj;
1866168498Spjd	attr = kmem_alloc(sizeof (zap_attribute_t), KM_SLEEP);
1867168404Spjd
1868168404Spjd	/*
1869168404Spjd	 * Iterate over all children.
1870168404Spjd	 */
1871168404Spjd	if (flags & DS_FIND_CHILDREN) {
1872185029Spjd		for (zap_cursor_init(&zc, dp->dp_meta_objset,
1873275782Sdelphij		    dsl_dir_phys(dd)->dd_child_dir_zapobj);
1874168498Spjd		    zap_cursor_retrieve(&zc, attr) == 0;
1875168404Spjd		    (void) zap_cursor_advance(&zc)) {
1876248571Smm			ASSERT3U(attr->za_integer_length, ==,
1877248571Smm			    sizeof (uint64_t));
1878248571Smm			ASSERT3U(attr->za_num_integers, ==, 1);
1879168404Spjd
1880219089Spjd			child = kmem_asprintf("%s/%s", name, attr->za_name);
1881248571Smm			dsl_pool_config_exit(dp, FTAG);
1882248571Smm			err = dmu_objset_find_impl(spa, child,
1883248571Smm			    func, arg, flags);
1884248571Smm			dsl_pool_config_enter(dp, FTAG);
1885219089Spjd			strfree(child);
1886248571Smm			if (err != 0)
1887168404Spjd				break;
1888168404Spjd		}
1889168404Spjd		zap_cursor_fini(&zc);
1890168404Spjd
1891248571Smm		if (err != 0) {
1892248571Smm			dsl_dir_rele(dd, FTAG);
1893248571Smm			dsl_pool_config_exit(dp, FTAG);
1894168498Spjd			kmem_free(attr, sizeof (zap_attribute_t));
1895168404Spjd			return (err);
1896168404Spjd		}
1897168404Spjd	}
1898168404Spjd
1899168404Spjd	/*
1900168404Spjd	 * Iterate over all snapshots.
1901168404Spjd	 */
1902185029Spjd	if (flags & DS_FIND_SNAPSHOTS) {
1903185029Spjd		err = dsl_dataset_hold_obj(dp, thisobj, FTAG, &ds);
1904168404Spjd
1905185029Spjd		if (err == 0) {
1906275782Sdelphij			uint64_t snapobj;
1907275782Sdelphij
1908275782Sdelphij			snapobj = dsl_dataset_phys(ds)->ds_snapnames_zapobj;
1909185029Spjd			dsl_dataset_rele(ds, FTAG);
1910168404Spjd
1911185029Spjd			for (zap_cursor_init(&zc, dp->dp_meta_objset, snapobj);
1912185029Spjd			    zap_cursor_retrieve(&zc, attr) == 0;
1913185029Spjd			    (void) zap_cursor_advance(&zc)) {
1914248571Smm				ASSERT3U(attr->za_integer_length, ==,
1915185029Spjd				    sizeof (uint64_t));
1916248571Smm				ASSERT3U(attr->za_num_integers, ==, 1);
1917168404Spjd
1918219089Spjd				child = kmem_asprintf("%s@%s",
1919219089Spjd				    name, attr->za_name);
1920248571Smm				dsl_pool_config_exit(dp, FTAG);
1921248571Smm				err = func(child, arg);
1922248571Smm				dsl_pool_config_enter(dp, FTAG);
1923219089Spjd				strfree(child);
1924248571Smm				if (err != 0)
1925185029Spjd					break;
1926185029Spjd			}
1927185029Spjd			zap_cursor_fini(&zc);
1928168404Spjd		}
1929168404Spjd	}
1930168404Spjd
1931248571Smm	dsl_dir_rele(dd, FTAG);
1932168498Spjd	kmem_free(attr, sizeof (zap_attribute_t));
1933248571Smm	dsl_pool_config_exit(dp, FTAG);
1934168404Spjd
1935248571Smm	if (err != 0)
1936168404Spjd		return (err);
1937168404Spjd
1938248571Smm	/* Apply to self. */
1939248571Smm	return (func(name, arg));
1940168404Spjd}
1941185029Spjd
1942248571Smm/*
1943248571Smm * See comment above dmu_objset_find_impl().
1944248571Smm */
1945207626Smmint
1946248571Smmdmu_objset_find(char *name, int func(const char *, void *), void *arg,
1947248571Smm    int flags)
1948207626Smm{
1949248571Smm	spa_t *spa;
1950248571Smm	int error;
1951207626Smm
1952248571Smm	error = spa_open(name, &spa, FTAG);
1953248571Smm	if (error != 0)
1954248571Smm		return (error);
1955248571Smm	error = dmu_objset_find_impl(spa, name, func, arg, flags);
1956248571Smm	spa_close(spa, FTAG);
1957248571Smm	return (error);
1958207626Smm}
1959207626Smm
1960185029Spjdvoid
1961185029Spjddmu_objset_set_user(objset_t *os, void *user_ptr)
1962185029Spjd{
1963219089Spjd	ASSERT(MUTEX_HELD(&os->os_user_ptr_lock));
1964219089Spjd	os->os_user_ptr = user_ptr;
1965185029Spjd}
1966185029Spjd
1967185029Spjdvoid *
1968185029Spjddmu_objset_get_user(objset_t *os)
1969185029Spjd{
1970219089Spjd	ASSERT(MUTEX_HELD(&os->os_user_ptr_lock));
1971219089Spjd	return (os->os_user_ptr);
1972185029Spjd}
1973248571Smm
1974248571Smm/*
1975248571Smm * Determine name of filesystem, given name of snapshot.
1976248571Smm * buf must be at least MAXNAMELEN bytes
1977248571Smm */
1978248571Smmint
1979248571Smmdmu_fsname(const char *snapname, char *buf)
1980248571Smm{
1981248571Smm	char *atp = strchr(snapname, '@');
1982248571Smm	if (atp == NULL)
1983249195Smm		return (SET_ERROR(EINVAL));
1984248571Smm	if (atp - snapname >= MAXNAMELEN)
1985249195Smm		return (SET_ERROR(ENAMETOOLONG));
1986248571Smm	(void) strlcpy(buf, snapname, atp - snapname + 1);
1987248571Smm	return (0);
1988248571Smm}
1989