1168404Spjd/* 2168404Spjd * CDDL HEADER START 3168404Spjd * 4168404Spjd * The contents of this file are subject to the terms of the 5168404Spjd * Common Development and Distribution License (the "License"). 6168404Spjd * You may not use this file except in compliance with the License. 7168404Spjd * 8168404Spjd * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9168404Spjd * or http://www.opensolaris.org/os/licensing. 10168404Spjd * See the License for the specific language governing permissions 11168404Spjd * and limitations under the License. 12168404Spjd * 13168404Spjd * When distributing Covered Code, include this CDDL HEADER in each 14168404Spjd * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15168404Spjd * If applicable, add the following below this CDDL HEADER, with the 16168404Spjd * fields enclosed by brackets "[]" replaced with your own identifying 17168404Spjd * information: Portions Copyright [yyyy] [name of copyright owner] 18168404Spjd * 19168404Spjd * CDDL HEADER END 20168404Spjd */ 21168404Spjd/* 22219089Spjd * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. 23304139Savg * Copyright (c) 2012, 2016 by Delphix. All rights reserved. 24251478Sdelphij * Copyright (c) 2013 by Saso Kiselkov. All rights reserved. 25265744Sdelphij * Copyright (c) 2013, Joyent, Inc. All rights reserved. 26288549Smav * Copyright (c) 2014 Spectra Logic Corporation, All rights reserved. 27282756Savg * Copyright 2015 Nexenta Systems, Inc. All rights reserved. 28288569Smav * Copyright (c) 2015, STRATO AG, Inc. All rights reserved. 29297112Smav * Copyright (c) 2014 Integros [integros.com] 30168404Spjd */ 31168404Spjd 32219089Spjd/* Portions Copyright 2010 Robert Milkowski */ 33219089Spjd 34185029Spjd#include <sys/cred.h> 35168404Spjd#include <sys/zfs_context.h> 36168404Spjd#include <sys/dmu_objset.h> 37168404Spjd#include <sys/dsl_dir.h> 38168404Spjd#include <sys/dsl_dataset.h> 39168404Spjd#include <sys/dsl_prop.h> 40168404Spjd#include <sys/dsl_pool.h> 41168404Spjd#include <sys/dsl_synctask.h> 42185029Spjd#include <sys/dsl_deleg.h> 43168404Spjd#include <sys/dnode.h> 44168404Spjd#include <sys/dbuf.h> 45168404Spjd#include <sys/zvol.h> 46168404Spjd#include <sys/dmu_tx.h> 47168404Spjd#include <sys/zap.h> 48168404Spjd#include <sys/zil.h> 49168404Spjd#include <sys/dmu_impl.h> 50185029Spjd#include <sys/zfs_ioctl.h> 51219089Spjd#include <sys/sa.h> 52219089Spjd#include <sys/zfs_onexit.h> 53248571Smm#include <sys/dsl_destroy.h> 54288569Smav#include <sys/vdev.h> 55168404Spjd 56219089Spjd/* 57219089Spjd * Needed to close a window in dnode_move() that allows the objset to be freed 58219089Spjd * before it can be safely accessed. 59219089Spjd */ 60219089Spjdkrwlock_t os_lock; 61219089Spjd 62288569Smav/* 63288569Smav * Tunable to overwrite the maximum number of threads for the parallization 64288569Smav * of dmu_objset_find_dp, needed to speed up the import of pools with many 65288569Smav * datasets. 66288569Smav * Default is 4 times the number of leaf vdevs. 67288569Smav */ 68288569Smavint dmu_find_threads = 0; 69288569Smav 70288569Smavstatic void dmu_objset_find_dp_cb(void *arg); 71288569Smav 72219089Spjdvoid 73219089Spjddmu_objset_init(void) 74219089Spjd{ 75219089Spjd rw_init(&os_lock, NULL, RW_DEFAULT, NULL); 76219089Spjd} 77219089Spjd 78219089Spjdvoid 79219089Spjddmu_objset_fini(void) 80219089Spjd{ 81219089Spjd rw_destroy(&os_lock); 82219089Spjd} 83219089Spjd 84168404Spjdspa_t * 85168404Spjddmu_objset_spa(objset_t *os) 86168404Spjd{ 87219089Spjd return (os->os_spa); 88168404Spjd} 89168404Spjd 90168404Spjdzilog_t * 91168404Spjddmu_objset_zil(objset_t *os) 92168404Spjd{ 93219089Spjd return (os->os_zil); 94168404Spjd} 95168404Spjd 96168404Spjddsl_pool_t * 97168404Spjddmu_objset_pool(objset_t *os) 98168404Spjd{ 99168404Spjd dsl_dataset_t *ds; 100168404Spjd 101219089Spjd if ((ds = os->os_dsl_dataset) != NULL && ds->ds_dir) 102168404Spjd return (ds->ds_dir->dd_pool); 103168404Spjd else 104219089Spjd return (spa_get_dsl(os->os_spa)); 105168404Spjd} 106168404Spjd 107168404Spjddsl_dataset_t * 108168404Spjddmu_objset_ds(objset_t *os) 109168404Spjd{ 110219089Spjd return (os->os_dsl_dataset); 111168404Spjd} 112168404Spjd 113168404Spjddmu_objset_type_t 114168404Spjddmu_objset_type(objset_t *os) 115168404Spjd{ 116219089Spjd return (os->os_phys->os_type); 117168404Spjd} 118168404Spjd 119168404Spjdvoid 120168404Spjddmu_objset_name(objset_t *os, char *buf) 121168404Spjd{ 122219089Spjd dsl_dataset_name(os->os_dsl_dataset, buf); 123168404Spjd} 124168404Spjd 125168404Spjduint64_t 126168404Spjddmu_objset_id(objset_t *os) 127168404Spjd{ 128219089Spjd dsl_dataset_t *ds = os->os_dsl_dataset; 129168404Spjd 130168404Spjd return (ds ? ds->ds_object : 0); 131168404Spjd} 132168404Spjd 133268647Sdelphijzfs_sync_type_t 134219089Spjddmu_objset_syncprop(objset_t *os) 135219089Spjd{ 136219089Spjd return (os->os_sync); 137219089Spjd} 138219089Spjd 139268647Sdelphijzfs_logbias_op_t 140219089Spjddmu_objset_logbias(objset_t *os) 141219089Spjd{ 142219089Spjd return (os->os_logbias); 143219089Spjd} 144219089Spjd 145168404Spjdstatic void 146168404Spjdchecksum_changed_cb(void *arg, uint64_t newval) 147168404Spjd{ 148219089Spjd objset_t *os = arg; 149168404Spjd 150168404Spjd /* 151168404Spjd * Inheritance should have been done by now. 152168404Spjd */ 153168404Spjd ASSERT(newval != ZIO_CHECKSUM_INHERIT); 154168404Spjd 155219089Spjd os->os_checksum = zio_checksum_select(newval, ZIO_CHECKSUM_ON_VALUE); 156168404Spjd} 157168404Spjd 158168404Spjdstatic void 159168404Spjdcompression_changed_cb(void *arg, uint64_t newval) 160168404Spjd{ 161219089Spjd objset_t *os = arg; 162168404Spjd 163168404Spjd /* 164168404Spjd * Inheritance and range checking should have been done by now. 165168404Spjd */ 166168404Spjd ASSERT(newval != ZIO_COMPRESS_INHERIT); 167168404Spjd 168288542Smav os->os_compress = zio_compress_select(os->os_spa, newval, 169288542Smav ZIO_COMPRESS_ON); 170168404Spjd} 171168404Spjd 172168404Spjdstatic void 173168404Spjdcopies_changed_cb(void *arg, uint64_t newval) 174168404Spjd{ 175219089Spjd objset_t *os = arg; 176168404Spjd 177168404Spjd /* 178168404Spjd * Inheritance and range checking should have been done by now. 179168404Spjd */ 180168404Spjd ASSERT(newval > 0); 181219089Spjd ASSERT(newval <= spa_max_replication(os->os_spa)); 182168404Spjd 183219089Spjd os->os_copies = newval; 184168404Spjd} 185168404Spjd 186185029Spjdstatic void 187219089Spjddedup_changed_cb(void *arg, uint64_t newval) 188219089Spjd{ 189219089Spjd objset_t *os = arg; 190219089Spjd spa_t *spa = os->os_spa; 191219089Spjd enum zio_checksum checksum; 192219089Spjd 193219089Spjd /* 194219089Spjd * Inheritance should have been done by now. 195219089Spjd */ 196219089Spjd ASSERT(newval != ZIO_CHECKSUM_INHERIT); 197219089Spjd 198219089Spjd checksum = zio_checksum_dedup_select(spa, newval, ZIO_CHECKSUM_OFF); 199219089Spjd 200219089Spjd os->os_dedup_checksum = checksum & ZIO_CHECKSUM_MASK; 201219089Spjd os->os_dedup_verify = !!(checksum & ZIO_CHECKSUM_VERIFY); 202219089Spjd} 203219089Spjd 204219089Spjdstatic void 205185029Spjdprimary_cache_changed_cb(void *arg, uint64_t newval) 206185029Spjd{ 207219089Spjd objset_t *os = arg; 208185029Spjd 209185029Spjd /* 210185029Spjd * Inheritance and range checking should have been done by now. 211185029Spjd */ 212185029Spjd ASSERT(newval == ZFS_CACHE_ALL || newval == ZFS_CACHE_NONE || 213185029Spjd newval == ZFS_CACHE_METADATA); 214185029Spjd 215219089Spjd os->os_primary_cache = newval; 216185029Spjd} 217185029Spjd 218185029Spjdstatic void 219185029Spjdsecondary_cache_changed_cb(void *arg, uint64_t newval) 220185029Spjd{ 221219089Spjd objset_t *os = arg; 222185029Spjd 223185029Spjd /* 224185029Spjd * Inheritance and range checking should have been done by now. 225185029Spjd */ 226185029Spjd ASSERT(newval == ZFS_CACHE_ALL || newval == ZFS_CACHE_NONE || 227185029Spjd newval == ZFS_CACHE_METADATA); 228185029Spjd 229219089Spjd os->os_secondary_cache = newval; 230185029Spjd} 231185029Spjd 232219089Spjdstatic void 233219089Spjdsync_changed_cb(void *arg, uint64_t newval) 234219089Spjd{ 235219089Spjd objset_t *os = arg; 236219089Spjd 237219089Spjd /* 238219089Spjd * Inheritance and range checking should have been done by now. 239219089Spjd */ 240219089Spjd ASSERT(newval == ZFS_SYNC_STANDARD || newval == ZFS_SYNC_ALWAYS || 241219089Spjd newval == ZFS_SYNC_DISABLED); 242219089Spjd 243219089Spjd os->os_sync = newval; 244219089Spjd if (os->os_zil) 245219089Spjd zil_set_sync(os->os_zil, newval); 246219089Spjd} 247219089Spjd 248219089Spjdstatic void 249268647Sdelphijredundant_metadata_changed_cb(void *arg, uint64_t newval) 250268647Sdelphij{ 251268647Sdelphij objset_t *os = arg; 252268647Sdelphij 253268647Sdelphij /* 254268647Sdelphij * Inheritance and range checking should have been done by now. 255268647Sdelphij */ 256268647Sdelphij ASSERT(newval == ZFS_REDUNDANT_METADATA_ALL || 257268647Sdelphij newval == ZFS_REDUNDANT_METADATA_MOST); 258268647Sdelphij 259268647Sdelphij os->os_redundant_metadata = newval; 260268647Sdelphij} 261268647Sdelphij 262268647Sdelphijstatic void 263219089Spjdlogbias_changed_cb(void *arg, uint64_t newval) 264219089Spjd{ 265219089Spjd objset_t *os = arg; 266219089Spjd 267219089Spjd ASSERT(newval == ZFS_LOGBIAS_LATENCY || 268219089Spjd newval == ZFS_LOGBIAS_THROUGHPUT); 269219089Spjd os->os_logbias = newval; 270219089Spjd if (os->os_zil) 271219089Spjd zil_set_logbias(os->os_zil, newval); 272219089Spjd} 273219089Spjd 274276081Sdelphijstatic void 275276081Sdelphijrecordsize_changed_cb(void *arg, uint64_t newval) 276276081Sdelphij{ 277276081Sdelphij objset_t *os = arg; 278276081Sdelphij 279276081Sdelphij os->os_recordsize = newval; 280276081Sdelphij} 281276081Sdelphij 282168404Spjdvoid 283168404Spjddmu_objset_byteswap(void *buf, size_t size) 284168404Spjd{ 285168404Spjd objset_phys_t *osp = buf; 286168404Spjd 287209962Smm ASSERT(size == OBJSET_OLD_PHYS_SIZE || size == sizeof (objset_phys_t)); 288168404Spjd dnode_byteswap(&osp->os_meta_dnode); 289168404Spjd byteswap_uint64_array(&osp->os_zil_header, sizeof (zil_header_t)); 290168404Spjd osp->os_type = BSWAP_64(osp->os_type); 291209962Smm osp->os_flags = BSWAP_64(osp->os_flags); 292209962Smm if (size == sizeof (objset_phys_t)) { 293209962Smm dnode_byteswap(&osp->os_userused_dnode); 294209962Smm dnode_byteswap(&osp->os_groupused_dnode); 295209962Smm } 296168404Spjd} 297168404Spjd 298168404Spjdint 299168404Spjddmu_objset_open_impl(spa_t *spa, dsl_dataset_t *ds, blkptr_t *bp, 300219089Spjd objset_t **osp) 301168404Spjd{ 302219089Spjd objset_t *os; 303185029Spjd int i, err; 304168404Spjd 305185029Spjd ASSERT(ds == NULL || MUTEX_HELD(&ds->ds_opening_lock)); 306185029Spjd 307219089Spjd os = kmem_zalloc(sizeof (objset_t), KM_SLEEP); 308219089Spjd os->os_dsl_dataset = ds; 309219089Spjd os->os_spa = spa; 310219089Spjd os->os_rootbp = bp; 311219089Spjd if (!BP_IS_HOLE(os->os_rootbp)) { 312277586Sdelphij arc_flags_t aflags = ARC_FLAG_WAIT; 313268657Sdelphij zbookmark_phys_t zb; 314219089Spjd SET_BOOKMARK(&zb, ds ? ds->ds_object : DMU_META_OBJSET, 315219089Spjd ZB_ROOT_OBJECT, ZB_ROOT_LEVEL, ZB_ROOT_BLKID); 316219089Spjd 317219089Spjd if (DMU_OS_IS_L2CACHEABLE(os)) 318277586Sdelphij aflags |= ARC_FLAG_L2CACHE; 319168404Spjd 320219089Spjd dprintf_bp(os->os_rootbp, "reading %s", ""); 321246666Smm err = arc_read(NULL, spa, os->os_rootbp, 322219089Spjd arc_getbuf_func, &os->os_phys_buf, 323168404Spjd ZIO_PRIORITY_SYNC_READ, ZIO_FLAG_CANFAIL, &aflags, &zb); 324248571Smm if (err != 0) { 325219089Spjd kmem_free(os, sizeof (objset_t)); 326185029Spjd /* convert checksum errors into IO errors */ 327185029Spjd if (err == ECKSUM) 328249195Smm err = SET_ERROR(EIO); 329168404Spjd return (err); 330168404Spjd } 331209962Smm 332209962Smm /* Increase the blocksize if we are permitted. */ 333209962Smm if (spa_version(spa) >= SPA_VERSION_USERSPACE && 334219089Spjd arc_buf_size(os->os_phys_buf) < sizeof (objset_phys_t)) { 335307266Smav arc_buf_t *buf = arc_alloc_buf(spa, 336219089Spjd sizeof (objset_phys_t), &os->os_phys_buf, 337209962Smm ARC_BUFC_METADATA); 338209962Smm bzero(buf->b_data, sizeof (objset_phys_t)); 339219089Spjd bcopy(os->os_phys_buf->b_data, buf->b_data, 340219089Spjd arc_buf_size(os->os_phys_buf)); 341307266Smav arc_buf_destroy(os->os_phys_buf, &os->os_phys_buf); 342219089Spjd os->os_phys_buf = buf; 343209962Smm } 344209962Smm 345219089Spjd os->os_phys = os->os_phys_buf->b_data; 346219089Spjd os->os_flags = os->os_phys->os_flags; 347168404Spjd } else { 348209962Smm int size = spa_version(spa) >= SPA_VERSION_USERSPACE ? 349209962Smm sizeof (objset_phys_t) : OBJSET_OLD_PHYS_SIZE; 350307266Smav os->os_phys_buf = arc_alloc_buf(spa, size, 351219089Spjd &os->os_phys_buf, ARC_BUFC_METADATA); 352219089Spjd os->os_phys = os->os_phys_buf->b_data; 353219089Spjd bzero(os->os_phys, size); 354168404Spjd } 355168404Spjd 356168404Spjd /* 357168404Spjd * Note: the changed_cb will be called once before the register 358168404Spjd * func returns, thus changing the checksum/compression from the 359185029Spjd * default (fletcher2/off). Snapshots don't need to know about 360185029Spjd * checksum/compression/copies. 361168404Spjd */ 362268649Sdelphij if (ds != NULL) { 363290756Smav boolean_t needlock = B_FALSE; 364290756Smav 365290756Smav /* 366290756Smav * Note: it's valid to open the objset if the dataset is 367290756Smav * long-held, in which case the pool_config lock will not 368290756Smav * be held. 369290756Smav */ 370290756Smav if (!dsl_pool_config_held(dmu_objset_pool(os))) { 371290756Smav needlock = B_TRUE; 372290756Smav dsl_pool_config_enter(dmu_objset_pool(os), FTAG); 373290756Smav } 374248571Smm err = dsl_prop_register(ds, 375248571Smm zfs_prop_to_name(ZFS_PROP_PRIMARYCACHE), 376219089Spjd primary_cache_changed_cb, os); 377248571Smm if (err == 0) { 378248571Smm err = dsl_prop_register(ds, 379248571Smm zfs_prop_to_name(ZFS_PROP_SECONDARYCACHE), 380219089Spjd secondary_cache_changed_cb, os); 381248571Smm } 382288549Smav if (!ds->ds_is_snapshot) { 383248571Smm if (err == 0) { 384248571Smm err = dsl_prop_register(ds, 385248571Smm zfs_prop_to_name(ZFS_PROP_CHECKSUM), 386219089Spjd checksum_changed_cb, os); 387248571Smm } 388248571Smm if (err == 0) { 389248571Smm err = dsl_prop_register(ds, 390248571Smm zfs_prop_to_name(ZFS_PROP_COMPRESSION), 391219089Spjd compression_changed_cb, os); 392248571Smm } 393248571Smm if (err == 0) { 394248571Smm err = dsl_prop_register(ds, 395248571Smm zfs_prop_to_name(ZFS_PROP_COPIES), 396219089Spjd copies_changed_cb, os); 397248571Smm } 398248571Smm if (err == 0) { 399248571Smm err = dsl_prop_register(ds, 400248571Smm zfs_prop_to_name(ZFS_PROP_DEDUP), 401219089Spjd dedup_changed_cb, os); 402248571Smm } 403248571Smm if (err == 0) { 404248571Smm err = dsl_prop_register(ds, 405248571Smm zfs_prop_to_name(ZFS_PROP_LOGBIAS), 406219089Spjd logbias_changed_cb, os); 407248571Smm } 408248571Smm if (err == 0) { 409248571Smm err = dsl_prop_register(ds, 410248571Smm zfs_prop_to_name(ZFS_PROP_SYNC), 411219089Spjd sync_changed_cb, os); 412248571Smm } 413268647Sdelphij if (err == 0) { 414268647Sdelphij err = dsl_prop_register(ds, 415268647Sdelphij zfs_prop_to_name( 416268647Sdelphij ZFS_PROP_REDUNDANT_METADATA), 417268647Sdelphij redundant_metadata_changed_cb, os); 418268647Sdelphij } 419276081Sdelphij if (err == 0) { 420276081Sdelphij err = dsl_prop_register(ds, 421276081Sdelphij zfs_prop_to_name(ZFS_PROP_RECORDSIZE), 422276081Sdelphij recordsize_changed_cb, os); 423276081Sdelphij } 424185029Spjd } 425290756Smav if (needlock) 426290756Smav dsl_pool_config_exit(dmu_objset_pool(os), FTAG); 427248571Smm if (err != 0) { 428307266Smav arc_buf_destroy(os->os_phys_buf, &os->os_phys_buf); 429219089Spjd kmem_free(os, sizeof (objset_t)); 430168404Spjd return (err); 431168404Spjd } 432268649Sdelphij } else { 433168404Spjd /* It's the meta-objset. */ 434219089Spjd os->os_checksum = ZIO_CHECKSUM_FLETCHER_4; 435288542Smav os->os_compress = ZIO_COMPRESS_ON; 436219089Spjd os->os_copies = spa_max_replication(spa); 437219089Spjd os->os_dedup_checksum = ZIO_CHECKSUM_OFF; 438268647Sdelphij os->os_dedup_verify = B_FALSE; 439268647Sdelphij os->os_logbias = ZFS_LOGBIAS_LATENCY; 440268647Sdelphij os->os_sync = ZFS_SYNC_STANDARD; 441219089Spjd os->os_primary_cache = ZFS_CACHE_ALL; 442219089Spjd os->os_secondary_cache = ZFS_CACHE_ALL; 443168404Spjd } 444168404Spjd 445288549Smav if (ds == NULL || !ds->ds_is_snapshot) 446219089Spjd os->os_zil_header = os->os_phys->os_zil_header; 447219089Spjd os->os_zil = zil_alloc(os, &os->os_zil_header); 448168404Spjd 449168404Spjd for (i = 0; i < TXG_SIZE; i++) { 450219089Spjd list_create(&os->os_dirty_dnodes[i], sizeof (dnode_t), 451168404Spjd offsetof(dnode_t, dn_dirty_link[i])); 452219089Spjd list_create(&os->os_free_dnodes[i], sizeof (dnode_t), 453168404Spjd offsetof(dnode_t, dn_dirty_link[i])); 454168404Spjd } 455219089Spjd list_create(&os->os_dnodes, sizeof (dnode_t), 456168404Spjd offsetof(dnode_t, dn_link)); 457219089Spjd list_create(&os->os_downgraded_dbufs, sizeof (dmu_buf_impl_t), 458168404Spjd offsetof(dmu_buf_impl_t, db_link)); 459168404Spjd 460219089Spjd mutex_init(&os->os_lock, NULL, MUTEX_DEFAULT, NULL); 461219089Spjd mutex_init(&os->os_obj_lock, NULL, MUTEX_DEFAULT, NULL); 462219089Spjd mutex_init(&os->os_user_ptr_lock, NULL, MUTEX_DEFAULT, NULL); 463168404Spjd 464288549Smav dnode_special_open(os, &os->os_phys->os_meta_dnode, 465288549Smav DMU_META_DNODE_OBJECT, &os->os_meta_dnode); 466219089Spjd if (arc_buf_size(os->os_phys_buf) >= sizeof (objset_phys_t)) { 467288549Smav dnode_special_open(os, &os->os_phys->os_userused_dnode, 468288549Smav DMU_USERUSED_OBJECT, &os->os_userused_dnode); 469288549Smav dnode_special_open(os, &os->os_phys->os_groupused_dnode, 470288549Smav DMU_GROUPUSED_OBJECT, &os->os_groupused_dnode); 471209962Smm } 472168404Spjd 473219089Spjd *osp = os; 474168404Spjd return (0); 475168404Spjd} 476168404Spjd 477219089Spjdint 478219089Spjddmu_objset_from_ds(dsl_dataset_t *ds, objset_t **osp) 479168404Spjd{ 480219089Spjd int err = 0; 481168404Spjd 482290756Smav /* 483290756Smav * We shouldn't be doing anything with dsl_dataset_t's unless the 484290756Smav * pool_config lock is held, or the dataset is long-held. 485290756Smav */ 486290756Smav ASSERT(dsl_pool_config_held(ds->ds_dir->dd_pool) || 487290756Smav dsl_dataset_long_held(ds)); 488290756Smav 489185029Spjd mutex_enter(&ds->ds_opening_lock); 490268649Sdelphij if (ds->ds_objset == NULL) { 491268649Sdelphij objset_t *os; 492308083Smav rrw_enter(&ds->ds_bp_rwlock, RW_READER, FTAG); 493168404Spjd err = dmu_objset_open_impl(dsl_dataset_get_spa(ds), 494268649Sdelphij ds, dsl_dataset_get_blkptr(ds), &os); 495308083Smav rrw_exit(&ds->ds_bp_rwlock, FTAG); 496268649Sdelphij 497268649Sdelphij if (err == 0) { 498268649Sdelphij mutex_enter(&ds->ds_lock); 499268649Sdelphij ASSERT(ds->ds_objset == NULL); 500268649Sdelphij ds->ds_objset = os; 501268649Sdelphij mutex_exit(&ds->ds_lock); 502268649Sdelphij } 503168404Spjd } 504268649Sdelphij *osp = ds->ds_objset; 505185029Spjd mutex_exit(&ds->ds_opening_lock); 506219089Spjd return (err); 507168404Spjd} 508168404Spjd 509248571Smm/* 510248571Smm * Holds the pool while the objset is held. Therefore only one objset 511248571Smm * can be held at a time. 512248571Smm */ 513185029Spjdint 514219089Spjddmu_objset_hold(const char *name, void *tag, objset_t **osp) 515185029Spjd{ 516248571Smm dsl_pool_t *dp; 517219089Spjd dsl_dataset_t *ds; 518185029Spjd int err; 519185029Spjd 520248571Smm err = dsl_pool_hold(name, tag, &dp); 521248571Smm if (err != 0) 522219089Spjd return (err); 523248571Smm err = dsl_dataset_hold(dp, name, tag, &ds); 524248571Smm if (err != 0) { 525248571Smm dsl_pool_rele(dp, tag); 526248571Smm return (err); 527248571Smm } 528219089Spjd 529219089Spjd err = dmu_objset_from_ds(ds, osp); 530248571Smm if (err != 0) { 531219089Spjd dsl_dataset_rele(ds, tag); 532248571Smm dsl_pool_rele(dp, tag); 533248571Smm } 534219089Spjd 535185029Spjd return (err); 536185029Spjd} 537185029Spjd 538288569Smavstatic int 539288569Smavdmu_objset_own_impl(dsl_dataset_t *ds, dmu_objset_type_t type, 540288569Smav boolean_t readonly, void *tag, objset_t **osp) 541288569Smav{ 542288569Smav int err; 543288569Smav 544288569Smav err = dmu_objset_from_ds(ds, osp); 545288569Smav if (err != 0) { 546288569Smav dsl_dataset_disown(ds, tag); 547288569Smav } else if (type != DMU_OST_ANY && type != (*osp)->os_phys->os_type) { 548288569Smav dsl_dataset_disown(ds, tag); 549288569Smav return (SET_ERROR(EINVAL)); 550288569Smav } else if (!readonly && dsl_dataset_is_snapshot(ds)) { 551288569Smav dsl_dataset_disown(ds, tag); 552288569Smav return (SET_ERROR(EROFS)); 553288569Smav } 554288569Smav return (err); 555288569Smav} 556288569Smav 557248571Smm/* 558248571Smm * dsl_pool must not be held when this is called. 559248571Smm * Upon successful return, there will be a longhold on the dataset, 560248571Smm * and the dsl_pool will not be held. 561248571Smm */ 562185029Spjdint 563219089Spjddmu_objset_own(const char *name, dmu_objset_type_t type, 564219089Spjd boolean_t readonly, void *tag, objset_t **osp) 565185029Spjd{ 566248571Smm dsl_pool_t *dp; 567185029Spjd dsl_dataset_t *ds; 568185029Spjd int err; 569185029Spjd 570248571Smm err = dsl_pool_hold(name, FTAG, &dp); 571248571Smm if (err != 0) 572185029Spjd return (err); 573248571Smm err = dsl_dataset_own(dp, name, tag, &ds); 574248571Smm if (err != 0) { 575248571Smm dsl_pool_rele(dp, FTAG); 576248571Smm return (err); 577248571Smm } 578288569Smav err = dmu_objset_own_impl(ds, type, readonly, tag, osp); 579288569Smav dsl_pool_rele(dp, FTAG); 580185029Spjd 581185029Spjd return (err); 582185029Spjd} 583185029Spjd 584288569Smavint 585288569Smavdmu_objset_own_obj(dsl_pool_t *dp, uint64_t obj, dmu_objset_type_t type, 586288569Smav boolean_t readonly, void *tag, objset_t **osp) 587288569Smav{ 588288569Smav dsl_dataset_t *ds; 589288569Smav int err; 590288569Smav 591288569Smav err = dsl_dataset_own_obj(dp, obj, tag, &ds); 592288569Smav if (err != 0) 593288569Smav return (err); 594288569Smav 595288569Smav return (dmu_objset_own_impl(ds, type, readonly, tag, osp)); 596288569Smav} 597288569Smav 598168404Spjdvoid 599219089Spjddmu_objset_rele(objset_t *os, void *tag) 600168404Spjd{ 601248571Smm dsl_pool_t *dp = dmu_objset_pool(os); 602219089Spjd dsl_dataset_rele(os->os_dsl_dataset, tag); 603248571Smm dsl_pool_rele(dp, tag); 604219089Spjd} 605185029Spjd 606253816Sdelphij/* 607253816Sdelphij * When we are called, os MUST refer to an objset associated with a dataset 608253816Sdelphij * that is owned by 'tag'; that is, is held and long held by 'tag' and ds_owner 609253816Sdelphij * == tag. We will then release and reacquire ownership of the dataset while 610253816Sdelphij * holding the pool config_rwlock to avoid intervening namespace or ownership 611253816Sdelphij * changes may occur. 612253816Sdelphij * 613253816Sdelphij * This exists solely to accommodate zfs_ioc_userspace_upgrade()'s desire to 614253816Sdelphij * release the hold on its dataset and acquire a new one on the dataset of the 615253816Sdelphij * same name so that it can be partially torn down and reconstructed. 616253816Sdelphij */ 617219089Spjdvoid 618331612Savgdmu_objset_refresh_ownership(dsl_dataset_t *ds, dsl_dataset_t **newds, 619331612Savg void *tag) 620253816Sdelphij{ 621253816Sdelphij dsl_pool_t *dp; 622307122Smav char name[ZFS_MAX_DATASET_NAME_LEN]; 623253816Sdelphij 624253816Sdelphij VERIFY3P(ds, !=, NULL); 625253816Sdelphij VERIFY3P(ds->ds_owner, ==, tag); 626253816Sdelphij VERIFY(dsl_dataset_long_held(ds)); 627253816Sdelphij 628253816Sdelphij dsl_dataset_name(ds, name); 629331612Savg dp = ds->ds_dir->dd_pool; 630253816Sdelphij dsl_pool_config_enter(dp, FTAG); 631331612Savg dsl_dataset_disown(ds, tag); 632331612Savg VERIFY0(dsl_dataset_own(dp, name, tag, newds)); 633253816Sdelphij dsl_pool_config_exit(dp, FTAG); 634253816Sdelphij} 635253816Sdelphij 636253816Sdelphijvoid 637219089Spjddmu_objset_disown(objset_t *os, void *tag) 638219089Spjd{ 639219089Spjd dsl_dataset_disown(os->os_dsl_dataset, tag); 640168404Spjd} 641168404Spjd 642248571Smmvoid 643185029Spjddmu_objset_evict_dbufs(objset_t *os) 644168404Spjd{ 645288549Smav dnode_t dn_marker; 646168404Spjd dnode_t *dn; 647168404Spjd 648219089Spjd mutex_enter(&os->os_lock); 649288549Smav dn = list_head(&os->os_dnodes); 650288549Smav while (dn != NULL) { 651288549Smav /* 652288549Smav * Skip dnodes without holds. We have to do this dance 653288549Smav * because dnode_add_ref() only works if there is already a 654288549Smav * hold. If the dnode has no holds, then it has no dbufs. 655288549Smav */ 656288549Smav if (dnode_add_ref(dn, FTAG)) { 657288549Smav list_insert_after(&os->os_dnodes, dn, &dn_marker); 658288549Smav mutex_exit(&os->os_lock); 659168404Spjd 660288549Smav dnode_evict_dbufs(dn); 661288549Smav dnode_rele(dn, FTAG); 662168404Spjd 663288549Smav mutex_enter(&os->os_lock); 664288549Smav dn = list_next(&os->os_dnodes, &dn_marker); 665288549Smav list_remove(&os->os_dnodes, &dn_marker); 666288549Smav } else { 667288549Smav dn = list_next(&os->os_dnodes, dn); 668288549Smav } 669288549Smav } 670288549Smav mutex_exit(&os->os_lock); 671168404Spjd 672288549Smav if (DMU_USERUSED_DNODE(os) != NULL) { 673288549Smav dnode_evict_dbufs(DMU_GROUPUSED_DNODE(os)); 674288549Smav dnode_evict_dbufs(DMU_USERUSED_DNODE(os)); 675168404Spjd } 676288549Smav dnode_evict_dbufs(DMU_META_DNODE(os)); 677168404Spjd} 678168404Spjd 679288549Smav/* 680288549Smav * Objset eviction processing is split into into two pieces. 681288549Smav * The first marks the objset as evicting, evicts any dbufs that 682288549Smav * have a refcount of zero, and then queues up the objset for the 683288549Smav * second phase of eviction. Once os->os_dnodes has been cleared by 684288549Smav * dnode_buf_pageout()->dnode_destroy(), the second phase is executed. 685288549Smav * The second phase closes the special dnodes, dequeues the objset from 686288549Smav * the list of those undergoing eviction, and finally frees the objset. 687288549Smav * 688288549Smav * NOTE: Due to asynchronous eviction processing (invocation of 689288549Smav * dnode_buf_pageout()), it is possible for the meta dnode for the 690288549Smav * objset to have no holds even though os->os_dnodes is not empty. 691288549Smav */ 692168404Spjdvoid 693219089Spjddmu_objset_evict(objset_t *os) 694168404Spjd{ 695219089Spjd dsl_dataset_t *ds = os->os_dsl_dataset; 696168404Spjd 697219089Spjd for (int t = 0; t < TXG_SIZE; t++) 698219089Spjd ASSERT(!dmu_objset_is_dirty(os, t)); 699168404Spjd 700289100Sdelphij if (ds) 701289100Sdelphij dsl_prop_unregister_all(ds, os); 702168404Spjd 703219089Spjd if (os->os_sa) 704219089Spjd sa_tear_down(os); 705219089Spjd 706248571Smm dmu_objset_evict_dbufs(os); 707168404Spjd 708288549Smav mutex_enter(&os->os_lock); 709288549Smav spa_evicting_os_register(os->os_spa, os); 710288549Smav if (list_is_empty(&os->os_dnodes)) { 711288549Smav mutex_exit(&os->os_lock); 712288549Smav dmu_objset_evict_done(os); 713288549Smav } else { 714288549Smav mutex_exit(&os->os_lock); 715288549Smav } 716288549Smav} 717288549Smav 718288549Smavvoid 719288549Smavdmu_objset_evict_done(objset_t *os) 720288549Smav{ 721288549Smav ASSERT3P(list_head(&os->os_dnodes), ==, NULL); 722288549Smav 723219089Spjd dnode_special_close(&os->os_meta_dnode); 724219089Spjd if (DMU_USERUSED_DNODE(os)) { 725219089Spjd dnode_special_close(&os->os_userused_dnode); 726219089Spjd dnode_special_close(&os->os_groupused_dnode); 727209962Smm } 728219089Spjd zil_free(os->os_zil); 729168404Spjd 730307266Smav arc_buf_destroy(os->os_phys_buf, &os->os_phys_buf); 731219089Spjd 732219089Spjd /* 733219089Spjd * This is a barrier to prevent the objset from going away in 734219089Spjd * dnode_move() until we can safely ensure that the objset is still in 735219089Spjd * use. We consider the objset valid before the barrier and invalid 736219089Spjd * after the barrier. 737219089Spjd */ 738219089Spjd rw_enter(&os_lock, RW_READER); 739219089Spjd rw_exit(&os_lock); 740219089Spjd 741219089Spjd mutex_destroy(&os->os_lock); 742219089Spjd mutex_destroy(&os->os_obj_lock); 743219089Spjd mutex_destroy(&os->os_user_ptr_lock); 744288549Smav spa_evicting_os_deregister(os->os_spa, os); 745219089Spjd kmem_free(os, sizeof (objset_t)); 746168404Spjd} 747168404Spjd 748219089Spjdtimestruc_t 749219089Spjddmu_objset_snap_cmtime(objset_t *os) 750219089Spjd{ 751219089Spjd return (dsl_dir_snap_cmtime(os->os_dsl_dataset->ds_dir)); 752219089Spjd} 753219089Spjd 754168404Spjd/* called from dsl for meta-objset */ 755219089Spjdobjset_t * 756168404Spjddmu_objset_create_impl(spa_t *spa, dsl_dataset_t *ds, blkptr_t *bp, 757168404Spjd dmu_objset_type_t type, dmu_tx_t *tx) 758168404Spjd{ 759219089Spjd objset_t *os; 760168404Spjd dnode_t *mdn; 761168404Spjd 762168404Spjd ASSERT(dmu_tx_is_syncing(tx)); 763248571Smm 764219089Spjd if (ds != NULL) 765248571Smm VERIFY0(dmu_objset_from_ds(ds, &os)); 766219089Spjd else 767248571Smm VERIFY0(dmu_objset_open_impl(spa, NULL, bp, &os)); 768168404Spjd 769219089Spjd mdn = DMU_META_DNODE(os); 770219089Spjd 771168404Spjd dnode_allocate(mdn, DMU_OT_DNODE, 1 << DNODE_BLOCK_SHIFT, 772168404Spjd DN_MAX_INDBLKSHIFT, DMU_OT_NONE, 0, tx); 773168404Spjd 774168404Spjd /* 775168404Spjd * We don't want to have to increase the meta-dnode's nlevels 776168404Spjd * later, because then we could do it in quescing context while 777168404Spjd * we are also accessing it in open context. 778168404Spjd * 779168404Spjd * This precaution is not necessary for the MOS (ds == NULL), 780168404Spjd * because the MOS is only updated in syncing context. 781168404Spjd * This is most fortunate: the MOS is the only objset that 782168404Spjd * needs to be synced multiple times as spa_sync() iterates 783168404Spjd * to convergence, so minimizing its dn_nlevels matters. 784168404Spjd */ 785168404Spjd if (ds != NULL) { 786168404Spjd int levels = 1; 787168404Spjd 788168404Spjd /* 789168404Spjd * Determine the number of levels necessary for the meta-dnode 790307126Smav * to contain DN_MAX_OBJECT dnodes. Note that in order to 791307126Smav * ensure that we do not overflow 64 bits, there has to be 792307126Smav * a nlevels that gives us a number of blocks > DN_MAX_OBJECT 793307126Smav * but < 2^64. Therefore, 794307126Smav * (mdn->dn_indblkshift - SPA_BLKPTRSHIFT) (10) must be 795307126Smav * less than (64 - log2(DN_MAX_OBJECT)) (16). 796168404Spjd */ 797307126Smav while ((uint64_t)mdn->dn_nblkptr << 798307126Smav (mdn->dn_datablkshift - DNODE_SHIFT + 799168404Spjd (levels - 1) * (mdn->dn_indblkshift - SPA_BLKPTRSHIFT)) < 800307126Smav DN_MAX_OBJECT) 801168404Spjd levels++; 802168404Spjd 803168404Spjd mdn->dn_next_nlevels[tx->tx_txg & TXG_MASK] = 804168404Spjd mdn->dn_nlevels = levels; 805168404Spjd } 806168404Spjd 807168404Spjd ASSERT(type != DMU_OST_NONE); 808168404Spjd ASSERT(type != DMU_OST_ANY); 809168404Spjd ASSERT(type < DMU_OST_NUMTYPES); 810219089Spjd os->os_phys->os_type = type; 811219089Spjd if (dmu_objset_userused_enabled(os)) { 812219089Spjd os->os_phys->os_flags |= OBJSET_FLAG_USERACCOUNTING_COMPLETE; 813219089Spjd os->os_flags = os->os_phys->os_flags; 814209962Smm } 815168404Spjd 816168404Spjd dsl_dataset_dirty(ds, tx); 817168404Spjd 818219089Spjd return (os); 819168404Spjd} 820168404Spjd 821248571Smmtypedef struct dmu_objset_create_arg { 822248571Smm const char *doca_name; 823248571Smm cred_t *doca_cred; 824248571Smm void (*doca_userfunc)(objset_t *os, void *arg, 825248571Smm cred_t *cr, dmu_tx_t *tx); 826248571Smm void *doca_userarg; 827248571Smm dmu_objset_type_t doca_type; 828248571Smm uint64_t doca_flags; 829248571Smm} dmu_objset_create_arg_t; 830168404Spjd 831185029Spjd/*ARGSUSED*/ 832168404Spjdstatic int 833248571Smmdmu_objset_create_check(void *arg, dmu_tx_t *tx) 834168404Spjd{ 835248571Smm dmu_objset_create_arg_t *doca = arg; 836248571Smm dsl_pool_t *dp = dmu_tx_pool(tx); 837248571Smm dsl_dir_t *pdd; 838248571Smm const char *tail; 839248571Smm int error; 840168404Spjd 841248571Smm if (strchr(doca->doca_name, '@') != NULL) 842249195Smm return (SET_ERROR(EINVAL)); 843168404Spjd 844307122Smav if (strlen(doca->doca_name) >= ZFS_MAX_DATASET_NAME_LEN) 845307122Smav return (SET_ERROR(ENAMETOOLONG)); 846307122Smav 847248571Smm error = dsl_dir_hold(dp, doca->doca_name, FTAG, &pdd, &tail); 848248571Smm if (error != 0) 849248571Smm return (error); 850248571Smm if (tail == NULL) { 851248571Smm dsl_dir_rele(pdd, FTAG); 852249195Smm return (SET_ERROR(EEXIST)); 853168404Spjd } 854265744Sdelphij error = dsl_fs_ss_limit_check(pdd, 1, ZFS_PROP_FILESYSTEM_LIMIT, NULL, 855265744Sdelphij doca->doca_cred); 856248571Smm dsl_dir_rele(pdd, FTAG); 857185029Spjd 858265744Sdelphij return (error); 859168404Spjd} 860168404Spjd 861168404Spjdstatic void 862248571Smmdmu_objset_create_sync(void *arg, dmu_tx_t *tx) 863168404Spjd{ 864248571Smm dmu_objset_create_arg_t *doca = arg; 865248571Smm dsl_pool_t *dp = dmu_tx_pool(tx); 866248571Smm dsl_dir_t *pdd; 867248571Smm const char *tail; 868248571Smm dsl_dataset_t *ds; 869219089Spjd uint64_t obj; 870248571Smm blkptr_t *bp; 871248571Smm objset_t *os; 872168404Spjd 873248571Smm VERIFY0(dsl_dir_hold(dp, doca->doca_name, FTAG, &pdd, &tail)); 874168404Spjd 875248571Smm obj = dsl_dataset_create_sync(pdd, tail, NULL, doca->doca_flags, 876248571Smm doca->doca_cred, tx); 877168404Spjd 878248571Smm VERIFY0(dsl_dataset_hold_obj(pdd->dd_pool, obj, FTAG, &ds)); 879308083Smav rrw_enter(&ds->ds_bp_rwlock, RW_READER, FTAG); 880248571Smm bp = dsl_dataset_get_blkptr(ds); 881248571Smm os = dmu_objset_create_impl(pdd->dd_pool->dp_spa, 882248571Smm ds, bp, doca->doca_type, tx); 883308083Smav rrw_exit(&ds->ds_bp_rwlock, FTAG); 884168404Spjd 885248571Smm if (doca->doca_userfunc != NULL) { 886248571Smm doca->doca_userfunc(os, doca->doca_userarg, 887248571Smm doca->doca_cred, tx); 888168404Spjd } 889185029Spjd 890248571Smm spa_history_log_internal_ds(ds, "create", tx, ""); 891248571Smm dsl_dataset_rele(ds, FTAG); 892248571Smm dsl_dir_rele(pdd, FTAG); 893168404Spjd} 894168404Spjd 895168404Spjdint 896219089Spjddmu_objset_create(const char *name, dmu_objset_type_t type, uint64_t flags, 897185029Spjd void (*func)(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx), void *arg) 898168404Spjd{ 899248571Smm dmu_objset_create_arg_t doca; 900168404Spjd 901248571Smm doca.doca_name = name; 902248571Smm doca.doca_cred = CRED(); 903248571Smm doca.doca_flags = flags; 904248571Smm doca.doca_userfunc = func; 905248571Smm doca.doca_userarg = arg; 906248571Smm doca.doca_type = type; 907168404Spjd 908248571Smm return (dsl_sync_task(name, 909269006Sdelphij dmu_objset_create_check, dmu_objset_create_sync, &doca, 910269006Sdelphij 5, ZFS_SPACE_CHECK_NORMAL)); 911168404Spjd} 912168404Spjd 913248571Smmtypedef struct dmu_objset_clone_arg { 914248571Smm const char *doca_clone; 915248571Smm const char *doca_origin; 916248571Smm cred_t *doca_cred; 917248571Smm} dmu_objset_clone_arg_t; 918248571Smm 919248571Smm/*ARGSUSED*/ 920248571Smmstatic int 921248571Smmdmu_objset_clone_check(void *arg, dmu_tx_t *tx) 922168404Spjd{ 923248571Smm dmu_objset_clone_arg_t *doca = arg; 924219089Spjd dsl_dir_t *pdd; 925219089Spjd const char *tail; 926248571Smm int error; 927248571Smm dsl_dataset_t *origin; 928248571Smm dsl_pool_t *dp = dmu_tx_pool(tx); 929168404Spjd 930248571Smm if (strchr(doca->doca_clone, '@') != NULL) 931249195Smm return (SET_ERROR(EINVAL)); 932248571Smm 933307122Smav if (strlen(doca->doca_clone) >= ZFS_MAX_DATASET_NAME_LEN) 934307122Smav return (SET_ERROR(ENAMETOOLONG)); 935307122Smav 936248571Smm error = dsl_dir_hold(dp, doca->doca_clone, FTAG, &pdd, &tail); 937248571Smm if (error != 0) 938248571Smm return (error); 939219089Spjd if (tail == NULL) { 940248571Smm dsl_dir_rele(pdd, FTAG); 941249195Smm return (SET_ERROR(EEXIST)); 942168404Spjd } 943282756Savg 944265744Sdelphij error = dsl_fs_ss_limit_check(pdd, 1, ZFS_PROP_FILESYSTEM_LIMIT, NULL, 945265744Sdelphij doca->doca_cred); 946265744Sdelphij if (error != 0) { 947265744Sdelphij dsl_dir_rele(pdd, FTAG); 948265744Sdelphij return (SET_ERROR(EDQUOT)); 949265744Sdelphij } 950248571Smm dsl_dir_rele(pdd, FTAG); 951185029Spjd 952248571Smm error = dsl_dataset_hold(dp, doca->doca_origin, FTAG, &origin); 953248571Smm if (error != 0) 954219089Spjd return (error); 955219089Spjd 956248571Smm /* You can only clone snapshots, not the head datasets. */ 957288549Smav if (!origin->ds_is_snapshot) { 958248571Smm dsl_dataset_rele(origin, FTAG); 959249195Smm return (SET_ERROR(EINVAL)); 960219089Spjd } 961248571Smm dsl_dataset_rele(origin, FTAG); 962248571Smm 963248571Smm return (0); 964209962Smm} 965209962Smm 966209962Smmstatic void 967248571Smmdmu_objset_clone_sync(void *arg, dmu_tx_t *tx) 968209962Smm{ 969248571Smm dmu_objset_clone_arg_t *doca = arg; 970248571Smm dsl_pool_t *dp = dmu_tx_pool(tx); 971248571Smm dsl_dir_t *pdd; 972248571Smm const char *tail; 973248571Smm dsl_dataset_t *origin, *ds; 974248571Smm uint64_t obj; 975307122Smav char namebuf[ZFS_MAX_DATASET_NAME_LEN]; 976209962Smm 977248571Smm VERIFY0(dsl_dir_hold(dp, doca->doca_clone, FTAG, &pdd, &tail)); 978248571Smm VERIFY0(dsl_dataset_hold(dp, doca->doca_origin, FTAG, &origin)); 979209962Smm 980248571Smm obj = dsl_dataset_create_sync(pdd, tail, origin, 0, 981248571Smm doca->doca_cred, tx); 982219089Spjd 983248571Smm VERIFY0(dsl_dataset_hold_obj(pdd->dd_pool, obj, FTAG, &ds)); 984248571Smm dsl_dataset_name(origin, namebuf); 985248571Smm spa_history_log_internal_ds(ds, "clone", tx, 986248571Smm "origin=%s (%llu)", namebuf, origin->ds_object); 987248571Smm dsl_dataset_rele(ds, FTAG); 988248571Smm dsl_dataset_rele(origin, FTAG); 989248571Smm dsl_dir_rele(pdd, FTAG); 990209962Smm} 991209962Smm 992248571Smmint 993248571Smmdmu_objset_clone(const char *clone, const char *origin) 994168404Spjd{ 995248571Smm dmu_objset_clone_arg_t doca; 996168404Spjd 997248571Smm doca.doca_clone = clone; 998248571Smm doca.doca_origin = origin; 999248571Smm doca.doca_cred = CRED(); 1000219089Spjd 1001248571Smm return (dsl_sync_task(clone, 1002269006Sdelphij dmu_objset_clone_check, dmu_objset_clone_sync, &doca, 1003269006Sdelphij 5, ZFS_SPACE_CHECK_NORMAL)); 1004168404Spjd} 1005168404Spjd 1006168404Spjdint 1007248571Smmdmu_objset_snapshot_one(const char *fsname, const char *snapname) 1008168404Spjd{ 1009168404Spjd int err; 1010248571Smm char *longsnap = kmem_asprintf("%s@%s", fsname, snapname); 1011248571Smm nvlist_t *snaps = fnvlist_alloc(); 1012168404Spjd 1013248571Smm fnvlist_add_boolean(snaps, longsnap); 1014248571Smm strfree(longsnap); 1015248571Smm err = dsl_dataset_snapshot(snaps, NULL, NULL); 1016248571Smm fnvlist_free(snaps); 1017168404Spjd return (err); 1018168404Spjd} 1019168404Spjd 1020168404Spjdstatic void 1021209962Smmdmu_objset_sync_dnodes(list_t *list, list_t *newlist, dmu_tx_t *tx) 1022168404Spjd{ 1023168404Spjd dnode_t *dn; 1024168404Spjd 1025168404Spjd while (dn = list_head(list)) { 1026168404Spjd ASSERT(dn->dn_object != DMU_META_DNODE_OBJECT); 1027168404Spjd ASSERT(dn->dn_dbuf->db_data_pending); 1028168404Spjd /* 1029209962Smm * Initialize dn_zio outside dnode_sync() because the 1030209962Smm * meta-dnode needs to set it ouside dnode_sync(). 1031168404Spjd */ 1032168404Spjd dn->dn_zio = dn->dn_dbuf->db_data_pending->dr_zio; 1033168404Spjd ASSERT(dn->dn_zio); 1034168404Spjd 1035168404Spjd ASSERT3U(dn->dn_nlevels, <=, DN_MAX_LEVELS); 1036168404Spjd list_remove(list, dn); 1037209962Smm 1038209962Smm if (newlist) { 1039209962Smm (void) dnode_add_ref(dn, newlist); 1040209962Smm list_insert_tail(newlist, dn); 1041209962Smm } 1042209962Smm 1043168404Spjd dnode_sync(dn, tx); 1044168404Spjd } 1045168404Spjd} 1046168404Spjd 1047168404Spjd/* ARGSUSED */ 1048168404Spjdstatic void 1049219089Spjddmu_objset_write_ready(zio_t *zio, arc_buf_t *abuf, void *arg) 1050168404Spjd{ 1051185029Spjd blkptr_t *bp = zio->io_bp; 1052219089Spjd objset_t *os = arg; 1053168404Spjd dnode_phys_t *dnp = &os->os_phys->os_meta_dnode; 1054168404Spjd 1055268649Sdelphij ASSERT(!BP_IS_EMBEDDED(bp)); 1056248571Smm ASSERT3U(BP_GET_TYPE(bp), ==, DMU_OT_OBJSET); 1057248571Smm ASSERT0(BP_GET_LEVEL(bp)); 1058185029Spjd 1059168404Spjd /* 1060209962Smm * Update rootbp fill count: it should be the number of objects 1061209962Smm * allocated in the object set (not counting the "special" 1062209962Smm * objects that are stored in the objset_phys_t -- the meta 1063209962Smm * dnode and user/group accounting objects). 1064168404Spjd */ 1065209962Smm bp->blk_fill = 0; 1066185029Spjd for (int i = 0; i < dnp->dn_nblkptr; i++) 1067268649Sdelphij bp->blk_fill += BP_GET_FILL(&dnp->dn_blkptr[i]); 1068308083Smav if (os->os_dsl_dataset != NULL) 1069308083Smav rrw_enter(&os->os_dsl_dataset->ds_bp_rwlock, RW_WRITER, FTAG); 1070308083Smav *os->os_rootbp = *bp; 1071308083Smav if (os->os_dsl_dataset != NULL) 1072308083Smav rrw_exit(&os->os_dsl_dataset->ds_bp_rwlock, FTAG); 1073219089Spjd} 1074168404Spjd 1075219089Spjd/* ARGSUSED */ 1076219089Spjdstatic void 1077219089Spjddmu_objset_write_done(zio_t *zio, arc_buf_t *abuf, void *arg) 1078219089Spjd{ 1079219089Spjd blkptr_t *bp = zio->io_bp; 1080219089Spjd blkptr_t *bp_orig = &zio->io_bp_orig; 1081219089Spjd objset_t *os = arg; 1082219089Spjd 1083185029Spjd if (zio->io_flags & ZIO_FLAG_IO_REWRITE) { 1084219089Spjd ASSERT(BP_EQUAL(bp, bp_orig)); 1085185029Spjd } else { 1086219089Spjd dsl_dataset_t *ds = os->os_dsl_dataset; 1087219089Spjd dmu_tx_t *tx = os->os_synctx; 1088219089Spjd 1089219089Spjd (void) dsl_dataset_block_kill(ds, bp_orig, tx, B_TRUE); 1090219089Spjd dsl_dataset_block_born(ds, bp, tx); 1091168404Spjd } 1092308083Smav kmem_free(bp, sizeof (*bp)); 1093168404Spjd} 1094168404Spjd 1095168404Spjd/* called from dsl */ 1096168404Spjdvoid 1097219089Spjddmu_objset_sync(objset_t *os, zio_t *pio, dmu_tx_t *tx) 1098168404Spjd{ 1099168404Spjd int txgoff; 1100268657Sdelphij zbookmark_phys_t zb; 1101219089Spjd zio_prop_t zp; 1102168404Spjd zio_t *zio; 1103168404Spjd list_t *list; 1104209962Smm list_t *newlist = NULL; 1105168404Spjd dbuf_dirty_record_t *dr; 1106308083Smav blkptr_t *blkptr_copy = kmem_alloc(sizeof (*os->os_rootbp), KM_SLEEP); 1107308083Smav *blkptr_copy = *os->os_rootbp; 1108168404Spjd 1109168404Spjd dprintf_ds(os->os_dsl_dataset, "txg=%llu\n", tx->tx_txg); 1110168404Spjd 1111168404Spjd ASSERT(dmu_tx_is_syncing(tx)); 1112168404Spjd /* XXX the write_done callback should really give us the tx... */ 1113168404Spjd os->os_synctx = tx; 1114168404Spjd 1115168404Spjd if (os->os_dsl_dataset == NULL) { 1116168404Spjd /* 1117168404Spjd * This is the MOS. If we have upgraded, 1118168404Spjd * spa_max_replication() could change, so reset 1119168404Spjd * os_copies here. 1120168404Spjd */ 1121168404Spjd os->os_copies = spa_max_replication(os->os_spa); 1122168404Spjd } 1123168404Spjd 1124168404Spjd /* 1125168404Spjd * Create the root block IO 1126168404Spjd */ 1127219089Spjd SET_BOOKMARK(&zb, os->os_dsl_dataset ? 1128219089Spjd os->os_dsl_dataset->ds_object : DMU_META_OBJSET, 1129219089Spjd ZB_ROOT_OBJECT, ZB_ROOT_LEVEL, ZB_ROOT_BLKID); 1130246666Smm arc_release(os->os_phys_buf, &os->os_phys_buf); 1131185029Spjd 1132219089Spjd dmu_write_policy(os, NULL, 0, 0, &zp); 1133185029Spjd 1134219089Spjd zio = arc_write(pio, os->os_spa, tx->tx_txg, 1135308083Smav blkptr_copy, os->os_phys_buf, DMU_OS_IS_L2CACHEABLE(os), 1136304139Savg &zp, dmu_objset_write_ready, NULL, NULL, dmu_objset_write_done, 1137304139Savg os, ZIO_PRIORITY_ASYNC_WRITE, ZIO_FLAG_MUSTSUCCEED, &zb); 1138185029Spjd 1139168404Spjd /* 1140209962Smm * Sync special dnodes - the parent IO for the sync is the root block 1141168404Spjd */ 1142219089Spjd DMU_META_DNODE(os)->dn_zio = zio; 1143219089Spjd dnode_sync(DMU_META_DNODE(os), tx); 1144168404Spjd 1145209962Smm os->os_phys->os_flags = os->os_flags; 1146209962Smm 1147219089Spjd if (DMU_USERUSED_DNODE(os) && 1148219089Spjd DMU_USERUSED_DNODE(os)->dn_type != DMU_OT_NONE) { 1149219089Spjd DMU_USERUSED_DNODE(os)->dn_zio = zio; 1150219089Spjd dnode_sync(DMU_USERUSED_DNODE(os), tx); 1151219089Spjd DMU_GROUPUSED_DNODE(os)->dn_zio = zio; 1152219089Spjd dnode_sync(DMU_GROUPUSED_DNODE(os), tx); 1153209962Smm } 1154209962Smm 1155168404Spjd txgoff = tx->tx_txg & TXG_MASK; 1156168404Spjd 1157209962Smm if (dmu_objset_userused_enabled(os)) { 1158209962Smm newlist = &os->os_synced_dnodes; 1159209962Smm /* 1160209962Smm * We must create the list here because it uses the 1161209962Smm * dn_dirty_link[] of this txg. 1162209962Smm */ 1163209962Smm list_create(newlist, sizeof (dnode_t), 1164209962Smm offsetof(dnode_t, dn_dirty_link[txgoff])); 1165209962Smm } 1166168404Spjd 1167209962Smm dmu_objset_sync_dnodes(&os->os_free_dnodes[txgoff], newlist, tx); 1168209962Smm dmu_objset_sync_dnodes(&os->os_dirty_dnodes[txgoff], newlist, tx); 1169209962Smm 1170219089Spjd list = &DMU_META_DNODE(os)->dn_dirty_records[txgoff]; 1171168404Spjd while (dr = list_head(list)) { 1172248571Smm ASSERT0(dr->dr_dbuf->db_level); 1173168404Spjd list_remove(list, dr); 1174168404Spjd if (dr->dr_zio) 1175168404Spjd zio_nowait(dr->dr_zio); 1176168404Spjd } 1177168404Spjd /* 1178168404Spjd * Free intent log blocks up to this tx. 1179168404Spjd */ 1180168404Spjd zil_sync(os->os_zil, tx); 1181185029Spjd os->os_phys->os_zil_header = os->os_zil_header; 1182168404Spjd zio_nowait(zio); 1183168404Spjd} 1184168404Spjd 1185219089Spjdboolean_t 1186219089Spjddmu_objset_is_dirty(objset_t *os, uint64_t txg) 1187219089Spjd{ 1188219089Spjd return (!list_is_empty(&os->os_dirty_dnodes[txg & TXG_MASK]) || 1189219089Spjd !list_is_empty(&os->os_free_dnodes[txg & TXG_MASK])); 1190219089Spjd} 1191219089Spjd 1192209962Smmstatic objset_used_cb_t *used_cbs[DMU_OST_NUMTYPES]; 1193209962Smm 1194168404Spjdvoid 1195209962Smmdmu_objset_register_type(dmu_objset_type_t ost, objset_used_cb_t *cb) 1196209962Smm{ 1197209962Smm used_cbs[ost] = cb; 1198209962Smm} 1199209962Smm 1200209962Smmboolean_t 1201219089Spjddmu_objset_userused_enabled(objset_t *os) 1202209962Smm{ 1203209962Smm return (spa_version(os->os_spa) >= SPA_VERSION_USERSPACE && 1204219089Spjd used_cbs[os->os_phys->os_type] != NULL && 1205219089Spjd DMU_USERUSED_DNODE(os) != NULL); 1206209962Smm} 1207209962Smm 1208308586Smavtypedef struct userquota_node { 1209308586Smav uint64_t uqn_id; 1210308586Smav int64_t uqn_delta; 1211308586Smav avl_node_t uqn_node; 1212308586Smav} userquota_node_t; 1213308586Smav 1214308586Smavtypedef struct userquota_cache { 1215308586Smav avl_tree_t uqc_user_deltas; 1216308586Smav avl_tree_t uqc_group_deltas; 1217308586Smav} userquota_cache_t; 1218308586Smav 1219308586Smavstatic int 1220308586Smavuserquota_compare(const void *l, const void *r) 1221308586Smav{ 1222308586Smav const userquota_node_t *luqn = l; 1223308586Smav const userquota_node_t *ruqn = r; 1224308586Smav 1225308586Smav if (luqn->uqn_id < ruqn->uqn_id) 1226308586Smav return (-1); 1227308586Smav if (luqn->uqn_id > ruqn->uqn_id) 1228308586Smav return (1); 1229308586Smav return (0); 1230308586Smav} 1231308586Smav 1232219089Spjdstatic void 1233308586Smavdo_userquota_cacheflush(objset_t *os, userquota_cache_t *cache, dmu_tx_t *tx) 1234219089Spjd{ 1235308586Smav void *cookie; 1236308586Smav userquota_node_t *uqn; 1237308586Smav 1238308586Smav ASSERT(dmu_tx_is_syncing(tx)); 1239308586Smav 1240308586Smav cookie = NULL; 1241308586Smav while ((uqn = avl_destroy_nodes(&cache->uqc_user_deltas, 1242308586Smav &cookie)) != NULL) { 1243308586Smav VERIFY0(zap_increment_int(os, DMU_USERUSED_OBJECT, 1244308586Smav uqn->uqn_id, uqn->uqn_delta, tx)); 1245308586Smav kmem_free(uqn, sizeof (*uqn)); 1246308586Smav } 1247308586Smav avl_destroy(&cache->uqc_user_deltas); 1248308586Smav 1249308586Smav cookie = NULL; 1250308586Smav while ((uqn = avl_destroy_nodes(&cache->uqc_group_deltas, 1251308586Smav &cookie)) != NULL) { 1252308586Smav VERIFY0(zap_increment_int(os, DMU_GROUPUSED_OBJECT, 1253308586Smav uqn->uqn_id, uqn->uqn_delta, tx)); 1254308586Smav kmem_free(uqn, sizeof (*uqn)); 1255308586Smav } 1256308586Smav avl_destroy(&cache->uqc_group_deltas); 1257308586Smav} 1258308586Smav 1259308586Smavstatic void 1260308586Smavuserquota_update_cache(avl_tree_t *avl, uint64_t id, int64_t delta) 1261308586Smav{ 1262308586Smav userquota_node_t search = { .uqn_id = id }; 1263308586Smav avl_index_t idx; 1264308586Smav 1265308586Smav userquota_node_t *uqn = avl_find(avl, &search, &idx); 1266308586Smav if (uqn == NULL) { 1267308586Smav uqn = kmem_zalloc(sizeof (*uqn), KM_SLEEP); 1268308586Smav uqn->uqn_id = id; 1269308586Smav avl_insert(avl, uqn, idx); 1270308586Smav } 1271308586Smav uqn->uqn_delta += delta; 1272308586Smav} 1273308586Smav 1274308586Smavstatic void 1275308586Smavdo_userquota_update(userquota_cache_t *cache, uint64_t used, uint64_t flags, 1276308586Smav uint64_t user, uint64_t group, boolean_t subtract) 1277308586Smav{ 1278219089Spjd if ((flags & DNODE_FLAG_USERUSED_ACCOUNTED)) { 1279219089Spjd int64_t delta = DNODE_SIZE + used; 1280219089Spjd if (subtract) 1281219089Spjd delta = -delta; 1282308586Smav 1283308586Smav userquota_update_cache(&cache->uqc_user_deltas, user, delta); 1284308586Smav userquota_update_cache(&cache->uqc_group_deltas, group, delta); 1285219089Spjd } 1286219089Spjd} 1287219089Spjd 1288209962Smmvoid 1289219089Spjddmu_objset_do_userquota_updates(objset_t *os, dmu_tx_t *tx) 1290209962Smm{ 1291209962Smm dnode_t *dn; 1292209962Smm list_t *list = &os->os_synced_dnodes; 1293308586Smav userquota_cache_t cache = { 0 }; 1294209962Smm 1295209962Smm ASSERT(list_head(list) == NULL || dmu_objset_userused_enabled(os)); 1296209962Smm 1297308586Smav avl_create(&cache.uqc_user_deltas, userquota_compare, 1298308586Smav sizeof (userquota_node_t), offsetof(userquota_node_t, uqn_node)); 1299308586Smav avl_create(&cache.uqc_group_deltas, userquota_compare, 1300308586Smav sizeof (userquota_node_t), offsetof(userquota_node_t, uqn_node)); 1301308586Smav 1302209962Smm while (dn = list_head(list)) { 1303219089Spjd int flags; 1304209962Smm ASSERT(!DMU_OBJECT_IS_SPECIAL(dn->dn_object)); 1305209962Smm ASSERT(dn->dn_phys->dn_type == DMU_OT_NONE || 1306209962Smm dn->dn_phys->dn_flags & 1307209962Smm DNODE_FLAG_USERUSED_ACCOUNTED); 1308209962Smm 1309209962Smm /* Allocate the user/groupused objects if necessary. */ 1310219089Spjd if (DMU_USERUSED_DNODE(os)->dn_type == DMU_OT_NONE) { 1311308586Smav VERIFY0(zap_create_claim(os, 1312209962Smm DMU_USERUSED_OBJECT, 1313209962Smm DMU_OT_USERGROUP_USED, DMU_OT_NONE, 0, tx)); 1314308586Smav VERIFY0(zap_create_claim(os, 1315209962Smm DMU_GROUPUSED_OBJECT, 1316209962Smm DMU_OT_USERGROUP_USED, DMU_OT_NONE, 0, tx)); 1317209962Smm } 1318209962Smm 1319219089Spjd flags = dn->dn_id_flags; 1320219089Spjd ASSERT(flags); 1321219089Spjd if (flags & DN_ID_OLD_EXIST) { 1322308586Smav do_userquota_update(&cache, 1323308586Smav dn->dn_oldused, dn->dn_oldflags, 1324308586Smav dn->dn_olduid, dn->dn_oldgid, B_TRUE); 1325209962Smm } 1326219089Spjd if (flags & DN_ID_NEW_EXIST) { 1327308586Smav do_userquota_update(&cache, 1328308586Smav DN_USED_BYTES(dn->dn_phys), 1329219089Spjd dn->dn_phys->dn_flags, dn->dn_newuid, 1330308586Smav dn->dn_newgid, B_FALSE); 1331219089Spjd } 1332209962Smm 1333209962Smm mutex_enter(&dn->dn_mtx); 1334219089Spjd dn->dn_oldused = 0; 1335219089Spjd dn->dn_oldflags = 0; 1336219089Spjd if (dn->dn_id_flags & DN_ID_NEW_EXIST) { 1337219089Spjd dn->dn_olduid = dn->dn_newuid; 1338219089Spjd dn->dn_oldgid = dn->dn_newgid; 1339219089Spjd dn->dn_id_flags |= DN_ID_OLD_EXIST; 1340219089Spjd if (dn->dn_bonuslen == 0) 1341219089Spjd dn->dn_id_flags |= DN_ID_CHKED_SPILL; 1342219089Spjd else 1343219089Spjd dn->dn_id_flags |= DN_ID_CHKED_BONUS; 1344219089Spjd } 1345219089Spjd dn->dn_id_flags &= ~(DN_ID_NEW_EXIST); 1346209962Smm mutex_exit(&dn->dn_mtx); 1347209962Smm 1348209962Smm list_remove(list, dn); 1349209962Smm dnode_rele(dn, list); 1350209962Smm } 1351308586Smav do_userquota_cacheflush(os, &cache, tx); 1352209962Smm} 1353209962Smm 1354219089Spjd/* 1355219089Spjd * Returns a pointer to data to find uid/gid from 1356219089Spjd * 1357219089Spjd * If a dirty record for transaction group that is syncing can't 1358219089Spjd * be found then NULL is returned. In the NULL case it is assumed 1359219089Spjd * the uid/gid aren't changing. 1360219089Spjd */ 1361219089Spjdstatic void * 1362219089Spjddmu_objset_userquota_find_data(dmu_buf_impl_t *db, dmu_tx_t *tx) 1363219089Spjd{ 1364219089Spjd dbuf_dirty_record_t *dr, **drp; 1365219089Spjd void *data; 1366219089Spjd 1367219089Spjd if (db->db_dirtycnt == 0) 1368219089Spjd return (db->db.db_data); /* Nothing is changing */ 1369219089Spjd 1370219089Spjd for (drp = &db->db_last_dirty; (dr = *drp) != NULL; drp = &dr->dr_next) 1371219089Spjd if (dr->dr_txg == tx->tx_txg) 1372219089Spjd break; 1373219089Spjd 1374219089Spjd if (dr == NULL) { 1375219089Spjd data = NULL; 1376219089Spjd } else { 1377219089Spjd dnode_t *dn; 1378219089Spjd 1379219089Spjd DB_DNODE_ENTER(dr->dr_dbuf); 1380219089Spjd dn = DB_DNODE(dr->dr_dbuf); 1381219089Spjd 1382219089Spjd if (dn->dn_bonuslen == 0 && 1383219089Spjd dr->dr_dbuf->db_blkid == DMU_SPILL_BLKID) 1384219089Spjd data = dr->dt.dl.dr_data->b_data; 1385219089Spjd else 1386219089Spjd data = dr->dt.dl.dr_data; 1387219089Spjd 1388219089Spjd DB_DNODE_EXIT(dr->dr_dbuf); 1389219089Spjd } 1390219089Spjd 1391219089Spjd return (data); 1392219089Spjd} 1393219089Spjd 1394219089Spjdvoid 1395219089Spjddmu_objset_userquota_get_ids(dnode_t *dn, boolean_t before, dmu_tx_t *tx) 1396219089Spjd{ 1397219089Spjd objset_t *os = dn->dn_objset; 1398219089Spjd void *data = NULL; 1399219089Spjd dmu_buf_impl_t *db = NULL; 1400247187Smm uint64_t *user = NULL; 1401247187Smm uint64_t *group = NULL; 1402219089Spjd int flags = dn->dn_id_flags; 1403219089Spjd int error; 1404219089Spjd boolean_t have_spill = B_FALSE; 1405219089Spjd 1406219089Spjd if (!dmu_objset_userused_enabled(dn->dn_objset)) 1407219089Spjd return; 1408219089Spjd 1409219089Spjd if (before && (flags & (DN_ID_CHKED_BONUS|DN_ID_OLD_EXIST| 1410219089Spjd DN_ID_CHKED_SPILL))) 1411219089Spjd return; 1412219089Spjd 1413219089Spjd if (before && dn->dn_bonuslen != 0) 1414219089Spjd data = DN_BONUS(dn->dn_phys); 1415219089Spjd else if (!before && dn->dn_bonuslen != 0) { 1416219089Spjd if (dn->dn_bonus) { 1417219089Spjd db = dn->dn_bonus; 1418219089Spjd mutex_enter(&db->db_mtx); 1419219089Spjd data = dmu_objset_userquota_find_data(db, tx); 1420219089Spjd } else { 1421219089Spjd data = DN_BONUS(dn->dn_phys); 1422219089Spjd } 1423219089Spjd } else if (dn->dn_bonuslen == 0 && dn->dn_bonustype == DMU_OT_SA) { 1424219089Spjd int rf = 0; 1425219089Spjd 1426219089Spjd if (RW_WRITE_HELD(&dn->dn_struct_rwlock)) 1427219089Spjd rf |= DB_RF_HAVESTRUCT; 1428219089Spjd error = dmu_spill_hold_by_dnode(dn, 1429219089Spjd rf | DB_RF_MUST_SUCCEED, 1430219089Spjd FTAG, (dmu_buf_t **)&db); 1431219089Spjd ASSERT(error == 0); 1432219089Spjd mutex_enter(&db->db_mtx); 1433219089Spjd data = (before) ? db->db.db_data : 1434219089Spjd dmu_objset_userquota_find_data(db, tx); 1435219089Spjd have_spill = B_TRUE; 1436219089Spjd } else { 1437219089Spjd mutex_enter(&dn->dn_mtx); 1438219089Spjd dn->dn_id_flags |= DN_ID_CHKED_BONUS; 1439219089Spjd mutex_exit(&dn->dn_mtx); 1440219089Spjd return; 1441219089Spjd } 1442219089Spjd 1443219089Spjd if (before) { 1444219089Spjd ASSERT(data); 1445219089Spjd user = &dn->dn_olduid; 1446219089Spjd group = &dn->dn_oldgid; 1447219089Spjd } else if (data) { 1448219089Spjd user = &dn->dn_newuid; 1449219089Spjd group = &dn->dn_newgid; 1450219089Spjd } 1451219089Spjd 1452219089Spjd /* 1453219089Spjd * Must always call the callback in case the object 1454219089Spjd * type has changed and that type isn't an object type to track 1455219089Spjd */ 1456219089Spjd error = used_cbs[os->os_phys->os_type](dn->dn_bonustype, data, 1457219089Spjd user, group); 1458219089Spjd 1459219089Spjd /* 1460219089Spjd * Preserve existing uid/gid when the callback can't determine 1461219089Spjd * what the new uid/gid are and the callback returned EEXIST. 1462219089Spjd * The EEXIST error tells us to just use the existing uid/gid. 1463219089Spjd * If we don't know what the old values are then just assign 1464219089Spjd * them to 0, since that is a new file being created. 1465219089Spjd */ 1466219089Spjd if (!before && data == NULL && error == EEXIST) { 1467219089Spjd if (flags & DN_ID_OLD_EXIST) { 1468219089Spjd dn->dn_newuid = dn->dn_olduid; 1469219089Spjd dn->dn_newgid = dn->dn_oldgid; 1470219089Spjd } else { 1471219089Spjd dn->dn_newuid = 0; 1472219089Spjd dn->dn_newgid = 0; 1473219089Spjd } 1474219089Spjd error = 0; 1475219089Spjd } 1476219089Spjd 1477219089Spjd if (db) 1478219089Spjd mutex_exit(&db->db_mtx); 1479219089Spjd 1480219089Spjd mutex_enter(&dn->dn_mtx); 1481219089Spjd if (error == 0 && before) 1482219089Spjd dn->dn_id_flags |= DN_ID_OLD_EXIST; 1483219089Spjd if (error == 0 && !before) 1484219089Spjd dn->dn_id_flags |= DN_ID_NEW_EXIST; 1485219089Spjd 1486219089Spjd if (have_spill) { 1487219089Spjd dn->dn_id_flags |= DN_ID_CHKED_SPILL; 1488219089Spjd } else { 1489219089Spjd dn->dn_id_flags |= DN_ID_CHKED_BONUS; 1490219089Spjd } 1491219089Spjd mutex_exit(&dn->dn_mtx); 1492219089Spjd if (have_spill) 1493219089Spjd dmu_buf_rele((dmu_buf_t *)db, FTAG); 1494219089Spjd} 1495219089Spjd 1496209962Smmboolean_t 1497209962Smmdmu_objset_userspace_present(objset_t *os) 1498209962Smm{ 1499219089Spjd return (os->os_phys->os_flags & 1500209962Smm OBJSET_FLAG_USERACCOUNTING_COMPLETE); 1501209962Smm} 1502209962Smm 1503209962Smmint 1504209962Smmdmu_objset_userspace_upgrade(objset_t *os) 1505209962Smm{ 1506209962Smm uint64_t obj; 1507209962Smm int err = 0; 1508209962Smm 1509209962Smm if (dmu_objset_userspace_present(os)) 1510209962Smm return (0); 1511219089Spjd if (!dmu_objset_userused_enabled(os)) 1512249195Smm return (SET_ERROR(ENOTSUP)); 1513209962Smm if (dmu_objset_is_snapshot(os)) 1514249195Smm return (SET_ERROR(EINVAL)); 1515209962Smm 1516209962Smm /* 1517209962Smm * We simply need to mark every object dirty, so that it will be 1518209962Smm * synced out and now accounted. If this is called 1519209962Smm * concurrently, or if we already did some work before crashing, 1520209962Smm * that's fine, since we track each object's accounted state 1521209962Smm * independently. 1522209962Smm */ 1523209962Smm 1524209962Smm for (obj = 0; err == 0; err = dmu_object_next(os, &obj, FALSE, 0)) { 1525209962Smm dmu_tx_t *tx; 1526209962Smm dmu_buf_t *db; 1527209962Smm int objerr; 1528209962Smm 1529209962Smm if (issig(JUSTLOOKING) && issig(FORREAL)) 1530249195Smm return (SET_ERROR(EINTR)); 1531209962Smm 1532209962Smm objerr = dmu_bonus_hold(os, obj, FTAG, &db); 1533248571Smm if (objerr != 0) 1534209962Smm continue; 1535209962Smm tx = dmu_tx_create(os); 1536209962Smm dmu_tx_hold_bonus(tx, obj); 1537209962Smm objerr = dmu_tx_assign(tx, TXG_WAIT); 1538248571Smm if (objerr != 0) { 1539209962Smm dmu_tx_abort(tx); 1540209962Smm continue; 1541209962Smm } 1542209962Smm dmu_buf_will_dirty(db, tx); 1543209962Smm dmu_buf_rele(db, FTAG); 1544209962Smm dmu_tx_commit(tx); 1545209962Smm } 1546209962Smm 1547219089Spjd os->os_flags |= OBJSET_FLAG_USERACCOUNTING_COMPLETE; 1548209962Smm txg_wait_synced(dmu_objset_pool(os), 0); 1549209962Smm return (0); 1550209962Smm} 1551209962Smm 1552209962Smmvoid 1553168404Spjddmu_objset_space(objset_t *os, uint64_t *refdbytesp, uint64_t *availbytesp, 1554168404Spjd uint64_t *usedobjsp, uint64_t *availobjsp) 1555168404Spjd{ 1556219089Spjd dsl_dataset_space(os->os_dsl_dataset, refdbytesp, availbytesp, 1557168404Spjd usedobjsp, availobjsp); 1558168404Spjd} 1559168404Spjd 1560168404Spjduint64_t 1561168404Spjddmu_objset_fsid_guid(objset_t *os) 1562168404Spjd{ 1563219089Spjd return (dsl_dataset_fsid_guid(os->os_dsl_dataset)); 1564168404Spjd} 1565168404Spjd 1566168404Spjdvoid 1567168404Spjddmu_objset_fast_stat(objset_t *os, dmu_objset_stats_t *stat) 1568168404Spjd{ 1569219089Spjd stat->dds_type = os->os_phys->os_type; 1570219089Spjd if (os->os_dsl_dataset) 1571219089Spjd dsl_dataset_fast_stat(os->os_dsl_dataset, stat); 1572168404Spjd} 1573168404Spjd 1574168404Spjdvoid 1575168404Spjddmu_objset_stats(objset_t *os, nvlist_t *nv) 1576168404Spjd{ 1577219089Spjd ASSERT(os->os_dsl_dataset || 1578219089Spjd os->os_phys->os_type == DMU_OST_META); 1579168404Spjd 1580219089Spjd if (os->os_dsl_dataset != NULL) 1581219089Spjd dsl_dataset_stats(os->os_dsl_dataset, nv); 1582168404Spjd 1583168404Spjd dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_TYPE, 1584219089Spjd os->os_phys->os_type); 1585209962Smm dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_USERACCOUNTING, 1586209962Smm dmu_objset_userspace_present(os)); 1587168404Spjd} 1588168404Spjd 1589168404Spjdint 1590168404Spjddmu_objset_is_snapshot(objset_t *os) 1591168404Spjd{ 1592219089Spjd if (os->os_dsl_dataset != NULL) 1593288549Smav return (os->os_dsl_dataset->ds_is_snapshot); 1594168404Spjd else 1595168404Spjd return (B_FALSE); 1596168404Spjd} 1597168404Spjd 1598168404Spjdint 1599185029Spjddmu_snapshot_realname(objset_t *os, char *name, char *real, int maxlen, 1600185029Spjd boolean_t *conflict) 1601185029Spjd{ 1602219089Spjd dsl_dataset_t *ds = os->os_dsl_dataset; 1603185029Spjd uint64_t ignored; 1604185029Spjd 1605277585Sdelphij if (dsl_dataset_phys(ds)->ds_snapnames_zapobj == 0) 1606249195Smm return (SET_ERROR(ENOENT)); 1607185029Spjd 1608185029Spjd return (zap_lookup_norm(ds->ds_dir->dd_pool->dp_meta_objset, 1609277585Sdelphij dsl_dataset_phys(ds)->ds_snapnames_zapobj, name, 8, 1, &ignored, 1610277585Sdelphij MT_FIRST, real, maxlen, conflict)); 1611185029Spjd} 1612185029Spjd 1613185029Spjdint 1614168404Spjddmu_snapshot_list_next(objset_t *os, int namelen, char *name, 1615185029Spjd uint64_t *idp, uint64_t *offp, boolean_t *case_conflict) 1616168404Spjd{ 1617219089Spjd dsl_dataset_t *ds = os->os_dsl_dataset; 1618168404Spjd zap_cursor_t cursor; 1619168404Spjd zap_attribute_t attr; 1620168404Spjd 1621248571Smm ASSERT(dsl_pool_config_held(dmu_objset_pool(os))); 1622248571Smm 1623277585Sdelphij if (dsl_dataset_phys(ds)->ds_snapnames_zapobj == 0) 1624249195Smm return (SET_ERROR(ENOENT)); 1625168404Spjd 1626168404Spjd zap_cursor_init_serialized(&cursor, 1627168404Spjd ds->ds_dir->dd_pool->dp_meta_objset, 1628277585Sdelphij dsl_dataset_phys(ds)->ds_snapnames_zapobj, *offp); 1629168404Spjd 1630168404Spjd if (zap_cursor_retrieve(&cursor, &attr) != 0) { 1631168404Spjd zap_cursor_fini(&cursor); 1632249195Smm return (SET_ERROR(ENOENT)); 1633168404Spjd } 1634168404Spjd 1635168404Spjd if (strlen(attr.za_name) + 1 > namelen) { 1636168404Spjd zap_cursor_fini(&cursor); 1637249195Smm return (SET_ERROR(ENAMETOOLONG)); 1638168404Spjd } 1639168404Spjd 1640168404Spjd (void) strcpy(name, attr.za_name); 1641168404Spjd if (idp) 1642168404Spjd *idp = attr.za_first_integer; 1643185029Spjd if (case_conflict) 1644185029Spjd *case_conflict = attr.za_normalization_conflict; 1645168404Spjd zap_cursor_advance(&cursor); 1646168404Spjd *offp = zap_cursor_serialize(&cursor); 1647168404Spjd zap_cursor_fini(&cursor); 1648168404Spjd 1649168404Spjd return (0); 1650168404Spjd} 1651168404Spjd 1652168404Spjdint 1653168404Spjddmu_dir_list_next(objset_t *os, int namelen, char *name, 1654168404Spjd uint64_t *idp, uint64_t *offp) 1655168404Spjd{ 1656219089Spjd dsl_dir_t *dd = os->os_dsl_dataset->ds_dir; 1657168404Spjd zap_cursor_t cursor; 1658168404Spjd zap_attribute_t attr; 1659168404Spjd 1660168404Spjd /* there is no next dir on a snapshot! */ 1661219089Spjd if (os->os_dsl_dataset->ds_object != 1662277585Sdelphij dsl_dir_phys(dd)->dd_head_dataset_obj) 1663249195Smm return (SET_ERROR(ENOENT)); 1664168404Spjd 1665168404Spjd zap_cursor_init_serialized(&cursor, 1666168404Spjd dd->dd_pool->dp_meta_objset, 1667277585Sdelphij dsl_dir_phys(dd)->dd_child_dir_zapobj, *offp); 1668168404Spjd 1669168404Spjd if (zap_cursor_retrieve(&cursor, &attr) != 0) { 1670168404Spjd zap_cursor_fini(&cursor); 1671249195Smm return (SET_ERROR(ENOENT)); 1672168404Spjd } 1673168404Spjd 1674168404Spjd if (strlen(attr.za_name) + 1 > namelen) { 1675168404Spjd zap_cursor_fini(&cursor); 1676249195Smm return (SET_ERROR(ENAMETOOLONG)); 1677168404Spjd } 1678168404Spjd 1679168404Spjd (void) strcpy(name, attr.za_name); 1680168404Spjd if (idp) 1681168404Spjd *idp = attr.za_first_integer; 1682168404Spjd zap_cursor_advance(&cursor); 1683168404Spjd *offp = zap_cursor_serialize(&cursor); 1684168404Spjd zap_cursor_fini(&cursor); 1685168404Spjd 1686168404Spjd return (0); 1687168404Spjd} 1688168404Spjd 1689288569Smavtypedef struct dmu_objset_find_ctx { 1690288569Smav taskq_t *dc_tq; 1691288569Smav dsl_pool_t *dc_dp; 1692288569Smav uint64_t dc_ddobj; 1693288569Smav int (*dc_func)(dsl_pool_t *, dsl_dataset_t *, void *); 1694288569Smav void *dc_arg; 1695288569Smav int dc_flags; 1696288569Smav kmutex_t *dc_error_lock; 1697288569Smav int *dc_error; 1698288569Smav} dmu_objset_find_ctx_t; 1699288569Smav 1700288569Smavstatic void 1701288569Smavdmu_objset_find_dp_impl(dmu_objset_find_ctx_t *dcp) 1702168404Spjd{ 1703288569Smav dsl_pool_t *dp = dcp->dc_dp; 1704288569Smav dmu_objset_find_ctx_t *child_dcp; 1705248571Smm dsl_dir_t *dd; 1706248571Smm dsl_dataset_t *ds; 1707248571Smm zap_cursor_t zc; 1708248571Smm zap_attribute_t *attr; 1709248571Smm uint64_t thisobj; 1710288569Smav int err = 0; 1711248571Smm 1712288569Smav /* don't process if there already was an error */ 1713288569Smav if (*dcp->dc_error != 0) 1714288569Smav goto out; 1715248571Smm 1716288569Smav err = dsl_dir_hold_obj(dp, dcp->dc_ddobj, NULL, FTAG, &dd); 1717248571Smm if (err != 0) 1718288569Smav goto out; 1719248571Smm 1720248571Smm /* Don't visit hidden ($MOS & $ORIGIN) objsets. */ 1721248571Smm if (dd->dd_myname[0] == '$') { 1722248571Smm dsl_dir_rele(dd, FTAG); 1723288569Smav goto out; 1724248571Smm } 1725248571Smm 1726277585Sdelphij thisobj = dsl_dir_phys(dd)->dd_head_dataset_obj; 1727248571Smm attr = kmem_alloc(sizeof (zap_attribute_t), KM_SLEEP); 1728248571Smm 1729248571Smm /* 1730248571Smm * Iterate over all children. 1731248571Smm */ 1732288569Smav if (dcp->dc_flags & DS_FIND_CHILDREN) { 1733248571Smm for (zap_cursor_init(&zc, dp->dp_meta_objset, 1734277585Sdelphij dsl_dir_phys(dd)->dd_child_dir_zapobj); 1735248571Smm zap_cursor_retrieve(&zc, attr) == 0; 1736248571Smm (void) zap_cursor_advance(&zc)) { 1737248571Smm ASSERT3U(attr->za_integer_length, ==, 1738248571Smm sizeof (uint64_t)); 1739248571Smm ASSERT3U(attr->za_num_integers, ==, 1); 1740248571Smm 1741288569Smav child_dcp = kmem_alloc(sizeof (*child_dcp), KM_SLEEP); 1742288569Smav *child_dcp = *dcp; 1743288569Smav child_dcp->dc_ddobj = attr->za_first_integer; 1744288569Smav if (dcp->dc_tq != NULL) 1745288569Smav (void) taskq_dispatch(dcp->dc_tq, 1746288569Smav dmu_objset_find_dp_cb, child_dcp, TQ_SLEEP); 1747288569Smav else 1748288569Smav dmu_objset_find_dp_impl(child_dcp); 1749248571Smm } 1750248571Smm zap_cursor_fini(&zc); 1751248571Smm } 1752248571Smm 1753248571Smm /* 1754248571Smm * Iterate over all snapshots. 1755248571Smm */ 1756288569Smav if (dcp->dc_flags & DS_FIND_SNAPSHOTS) { 1757248571Smm dsl_dataset_t *ds; 1758248571Smm err = dsl_dataset_hold_obj(dp, thisobj, FTAG, &ds); 1759248571Smm 1760248571Smm if (err == 0) { 1761277585Sdelphij uint64_t snapobj; 1762277585Sdelphij 1763277585Sdelphij snapobj = dsl_dataset_phys(ds)->ds_snapnames_zapobj; 1764248571Smm dsl_dataset_rele(ds, FTAG); 1765248571Smm 1766248571Smm for (zap_cursor_init(&zc, dp->dp_meta_objset, snapobj); 1767248571Smm zap_cursor_retrieve(&zc, attr) == 0; 1768248571Smm (void) zap_cursor_advance(&zc)) { 1769248571Smm ASSERT3U(attr->za_integer_length, ==, 1770248571Smm sizeof (uint64_t)); 1771248571Smm ASSERT3U(attr->za_num_integers, ==, 1); 1772248571Smm 1773248571Smm err = dsl_dataset_hold_obj(dp, 1774248571Smm attr->za_first_integer, FTAG, &ds); 1775248571Smm if (err != 0) 1776248571Smm break; 1777288569Smav err = dcp->dc_func(dp, ds, dcp->dc_arg); 1778248571Smm dsl_dataset_rele(ds, FTAG); 1779248571Smm if (err != 0) 1780248571Smm break; 1781248571Smm } 1782248571Smm zap_cursor_fini(&zc); 1783248571Smm } 1784248571Smm } 1785248571Smm 1786248571Smm dsl_dir_rele(dd, FTAG); 1787248571Smm kmem_free(attr, sizeof (zap_attribute_t)); 1788248571Smm 1789248571Smm if (err != 0) 1790288569Smav goto out; 1791248571Smm 1792248571Smm /* 1793248571Smm * Apply to self. 1794248571Smm */ 1795248571Smm err = dsl_dataset_hold_obj(dp, thisobj, FTAG, &ds); 1796248571Smm if (err != 0) 1797288569Smav goto out; 1798288569Smav err = dcp->dc_func(dp, ds, dcp->dc_arg); 1799248571Smm dsl_dataset_rele(ds, FTAG); 1800288569Smav 1801288569Smavout: 1802288569Smav if (err != 0) { 1803288569Smav mutex_enter(dcp->dc_error_lock); 1804288569Smav /* only keep first error */ 1805288569Smav if (*dcp->dc_error == 0) 1806288569Smav *dcp->dc_error = err; 1807288569Smav mutex_exit(dcp->dc_error_lock); 1808288569Smav } 1809288569Smav 1810288569Smav kmem_free(dcp, sizeof (*dcp)); 1811185029Spjd} 1812185029Spjd 1813288569Smavstatic void 1814288569Smavdmu_objset_find_dp_cb(void *arg) 1815288569Smav{ 1816288569Smav dmu_objset_find_ctx_t *dcp = arg; 1817288569Smav dsl_pool_t *dp = dcp->dc_dp; 1818288569Smav 1819288570Smav /* 1820288570Smav * We need to get a pool_config_lock here, as there are several 1821288570Smav * asssert(pool_config_held) down the stack. Getting a lock via 1822288570Smav * dsl_pool_config_enter is risky, as it might be stalled by a 1823288570Smav * pending writer. This would deadlock, as the write lock can 1824288570Smav * only be granted when our parent thread gives up the lock. 1825288570Smav * The _prio interface gives us priority over a pending writer. 1826288570Smav */ 1827288570Smav dsl_pool_config_enter_prio(dp, FTAG); 1828288569Smav 1829288569Smav dmu_objset_find_dp_impl(dcp); 1830288569Smav 1831288569Smav dsl_pool_config_exit(dp, FTAG); 1832288569Smav} 1833288569Smav 1834185029Spjd/* 1835288569Smav * Find objsets under and including ddobj, call func(ds) on each. 1836288569Smav * The order for the enumeration is completely undefined. 1837288569Smav * func is called with dsl_pool_config held. 1838288569Smav */ 1839288569Smavint 1840288569Smavdmu_objset_find_dp(dsl_pool_t *dp, uint64_t ddobj, 1841288569Smav int func(dsl_pool_t *, dsl_dataset_t *, void *), void *arg, int flags) 1842288569Smav{ 1843288569Smav int error = 0; 1844288569Smav taskq_t *tq = NULL; 1845288569Smav int ntasks; 1846288569Smav dmu_objset_find_ctx_t *dcp; 1847288569Smav kmutex_t err_lock; 1848288569Smav 1849288569Smav mutex_init(&err_lock, NULL, MUTEX_DEFAULT, NULL); 1850288569Smav dcp = kmem_alloc(sizeof (*dcp), KM_SLEEP); 1851288569Smav dcp->dc_tq = NULL; 1852288569Smav dcp->dc_dp = dp; 1853288569Smav dcp->dc_ddobj = ddobj; 1854288569Smav dcp->dc_func = func; 1855288569Smav dcp->dc_arg = arg; 1856288569Smav dcp->dc_flags = flags; 1857288569Smav dcp->dc_error_lock = &err_lock; 1858288569Smav dcp->dc_error = &error; 1859288569Smav 1860288569Smav if ((flags & DS_FIND_SERIALIZE) || dsl_pool_config_held_writer(dp)) { 1861288569Smav /* 1862288569Smav * In case a write lock is held we can't make use of 1863288569Smav * parallelism, as down the stack of the worker threads 1864288569Smav * the lock is asserted via dsl_pool_config_held. 1865288569Smav * In case of a read lock this is solved by getting a read 1866288569Smav * lock in each worker thread, which isn't possible in case 1867288569Smav * of a writer lock. So we fall back to the synchronous path 1868288569Smav * here. 1869288569Smav * In the future it might be possible to get some magic into 1870288569Smav * dsl_pool_config_held in a way that it returns true for 1871288569Smav * the worker threads so that a single lock held from this 1872288569Smav * thread suffices. For now, stay single threaded. 1873288569Smav */ 1874288569Smav dmu_objset_find_dp_impl(dcp); 1875297104Smav mutex_destroy(&err_lock); 1876288569Smav 1877288569Smav return (error); 1878288569Smav } 1879288569Smav 1880288569Smav ntasks = dmu_find_threads; 1881288569Smav if (ntasks == 0) 1882288569Smav ntasks = vdev_count_leaves(dp->dp_spa) * 4; 1883288569Smav tq = taskq_create("dmu_objset_find", ntasks, minclsyspri, ntasks, 1884288569Smav INT_MAX, 0); 1885288569Smav if (tq == NULL) { 1886288569Smav kmem_free(dcp, sizeof (*dcp)); 1887297104Smav mutex_destroy(&err_lock); 1888297104Smav 1889288569Smav return (SET_ERROR(ENOMEM)); 1890288569Smav } 1891288569Smav dcp->dc_tq = tq; 1892288569Smav 1893288569Smav /* dcp will be freed by task */ 1894288569Smav (void) taskq_dispatch(tq, dmu_objset_find_dp_cb, dcp, TQ_SLEEP); 1895288569Smav 1896288569Smav /* 1897288569Smav * PORTING: this code relies on the property of taskq_wait to wait 1898288569Smav * until no more tasks are queued and no more tasks are active. As 1899288569Smav * we always queue new tasks from within other tasks, task_wait 1900288569Smav * reliably waits for the full recursion to finish, even though we 1901288569Smav * enqueue new tasks after taskq_wait has been called. 1902288569Smav * On platforms other than illumos, taskq_wait may not have this 1903288569Smav * property. 1904288569Smav */ 1905288569Smav taskq_wait(tq); 1906288569Smav taskq_destroy(tq); 1907288569Smav mutex_destroy(&err_lock); 1908288569Smav 1909288569Smav return (error); 1910288569Smav} 1911288569Smav 1912288569Smav/* 1913248571Smm * Find all objsets under name, and for each, call 'func(child_name, arg)'. 1914248571Smm * The dp_config_rwlock must not be held when this is called, and it 1915248571Smm * will not be held when the callback is called. 1916248571Smm * Therefore this function should only be used when the pool is not changing 1917248571Smm * (e.g. in syncing context), or the callback can deal with the possible races. 1918185029Spjd */ 1919248571Smmstatic int 1920248571Smmdmu_objset_find_impl(spa_t *spa, const char *name, 1921248571Smm int func(const char *, void *), void *arg, int flags) 1922185029Spjd{ 1923168404Spjd dsl_dir_t *dd; 1924248571Smm dsl_pool_t *dp = spa_get_dsl(spa); 1925185029Spjd dsl_dataset_t *ds; 1926168404Spjd zap_cursor_t zc; 1927168498Spjd zap_attribute_t *attr; 1928168404Spjd char *child; 1929185029Spjd uint64_t thisobj; 1930185029Spjd int err; 1931168404Spjd 1932248571Smm dsl_pool_config_enter(dp, FTAG); 1933248571Smm 1934248571Smm err = dsl_dir_hold(dp, name, FTAG, &dd, NULL); 1935248571Smm if (err != 0) { 1936248571Smm dsl_pool_config_exit(dp, FTAG); 1937168404Spjd return (err); 1938248571Smm } 1939168404Spjd 1940185029Spjd /* Don't visit hidden ($MOS & $ORIGIN) objsets. */ 1941185029Spjd if (dd->dd_myname[0] == '$') { 1942248571Smm dsl_dir_rele(dd, FTAG); 1943248571Smm dsl_pool_config_exit(dp, FTAG); 1944185029Spjd return (0); 1945185029Spjd } 1946185029Spjd 1947277585Sdelphij thisobj = dsl_dir_phys(dd)->dd_head_dataset_obj; 1948168498Spjd attr = kmem_alloc(sizeof (zap_attribute_t), KM_SLEEP); 1949168404Spjd 1950168404Spjd /* 1951168404Spjd * Iterate over all children. 1952168404Spjd */ 1953168404Spjd if (flags & DS_FIND_CHILDREN) { 1954185029Spjd for (zap_cursor_init(&zc, dp->dp_meta_objset, 1955277585Sdelphij dsl_dir_phys(dd)->dd_child_dir_zapobj); 1956168498Spjd zap_cursor_retrieve(&zc, attr) == 0; 1957168404Spjd (void) zap_cursor_advance(&zc)) { 1958248571Smm ASSERT3U(attr->za_integer_length, ==, 1959248571Smm sizeof (uint64_t)); 1960248571Smm ASSERT3U(attr->za_num_integers, ==, 1); 1961168404Spjd 1962219089Spjd child = kmem_asprintf("%s/%s", name, attr->za_name); 1963248571Smm dsl_pool_config_exit(dp, FTAG); 1964248571Smm err = dmu_objset_find_impl(spa, child, 1965248571Smm func, arg, flags); 1966248571Smm dsl_pool_config_enter(dp, FTAG); 1967219089Spjd strfree(child); 1968248571Smm if (err != 0) 1969168404Spjd break; 1970168404Spjd } 1971168404Spjd zap_cursor_fini(&zc); 1972168404Spjd 1973248571Smm if (err != 0) { 1974248571Smm dsl_dir_rele(dd, FTAG); 1975248571Smm dsl_pool_config_exit(dp, FTAG); 1976168498Spjd kmem_free(attr, sizeof (zap_attribute_t)); 1977168404Spjd return (err); 1978168404Spjd } 1979168404Spjd } 1980168404Spjd 1981168404Spjd /* 1982168404Spjd * Iterate over all snapshots. 1983168404Spjd */ 1984185029Spjd if (flags & DS_FIND_SNAPSHOTS) { 1985185029Spjd err = dsl_dataset_hold_obj(dp, thisobj, FTAG, &ds); 1986168404Spjd 1987185029Spjd if (err == 0) { 1988277585Sdelphij uint64_t snapobj; 1989277585Sdelphij 1990277585Sdelphij snapobj = dsl_dataset_phys(ds)->ds_snapnames_zapobj; 1991185029Spjd dsl_dataset_rele(ds, FTAG); 1992168404Spjd 1993185029Spjd for (zap_cursor_init(&zc, dp->dp_meta_objset, snapobj); 1994185029Spjd zap_cursor_retrieve(&zc, attr) == 0; 1995185029Spjd (void) zap_cursor_advance(&zc)) { 1996248571Smm ASSERT3U(attr->za_integer_length, ==, 1997185029Spjd sizeof (uint64_t)); 1998248571Smm ASSERT3U(attr->za_num_integers, ==, 1); 1999168404Spjd 2000219089Spjd child = kmem_asprintf("%s@%s", 2001219089Spjd name, attr->za_name); 2002248571Smm dsl_pool_config_exit(dp, FTAG); 2003248571Smm err = func(child, arg); 2004248571Smm dsl_pool_config_enter(dp, FTAG); 2005219089Spjd strfree(child); 2006248571Smm if (err != 0) 2007185029Spjd break; 2008185029Spjd } 2009185029Spjd zap_cursor_fini(&zc); 2010168404Spjd } 2011168404Spjd } 2012168404Spjd 2013248571Smm dsl_dir_rele(dd, FTAG); 2014168498Spjd kmem_free(attr, sizeof (zap_attribute_t)); 2015248571Smm dsl_pool_config_exit(dp, FTAG); 2016168404Spjd 2017248571Smm if (err != 0) 2018168404Spjd return (err); 2019168404Spjd 2020248571Smm /* Apply to self. */ 2021248571Smm return (func(name, arg)); 2022168404Spjd} 2023185029Spjd 2024248571Smm/* 2025248571Smm * See comment above dmu_objset_find_impl(). 2026248571Smm */ 2027207626Smmint 2028248571Smmdmu_objset_find(char *name, int func(const char *, void *), void *arg, 2029248571Smm int flags) 2030207626Smm{ 2031248571Smm spa_t *spa; 2032248571Smm int error; 2033207626Smm 2034248571Smm error = spa_open(name, &spa, FTAG); 2035248571Smm if (error != 0) 2036248571Smm return (error); 2037248571Smm error = dmu_objset_find_impl(spa, name, func, arg, flags); 2038248571Smm spa_close(spa, FTAG); 2039248571Smm return (error); 2040207626Smm} 2041207626Smm 2042185029Spjdvoid 2043185029Spjddmu_objset_set_user(objset_t *os, void *user_ptr) 2044185029Spjd{ 2045219089Spjd ASSERT(MUTEX_HELD(&os->os_user_ptr_lock)); 2046219089Spjd os->os_user_ptr = user_ptr; 2047185029Spjd} 2048185029Spjd 2049185029Spjdvoid * 2050185029Spjddmu_objset_get_user(objset_t *os) 2051185029Spjd{ 2052219089Spjd ASSERT(MUTEX_HELD(&os->os_user_ptr_lock)); 2053219089Spjd return (os->os_user_ptr); 2054185029Spjd} 2055248571Smm 2056248571Smm/* 2057248571Smm * Determine name of filesystem, given name of snapshot. 2058307122Smav * buf must be at least ZFS_MAX_DATASET_NAME_LEN bytes 2059248571Smm */ 2060248571Smmint 2061248571Smmdmu_fsname(const char *snapname, char *buf) 2062248571Smm{ 2063248571Smm char *atp = strchr(snapname, '@'); 2064248571Smm if (atp == NULL) 2065249195Smm return (SET_ERROR(EINVAL)); 2066307122Smav if (atp - snapname >= ZFS_MAX_DATASET_NAME_LEN) 2067249195Smm return (SET_ERROR(ENAMETOOLONG)); 2068248571Smm (void) strlcpy(buf, snapname, atp - snapname + 1); 2069248571Smm return (0); 2070248571Smm} 2071