1168404Spjd/* 2168404Spjd * CDDL HEADER START 3168404Spjd * 4168404Spjd * The contents of this file are subject to the terms of the 5168404Spjd * Common Development and Distribution License (the "License"). 6168404Spjd * You may not use this file except in compliance with the License. 7168404Spjd * 8168404Spjd * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9168404Spjd * or http://www.opensolaris.org/os/licensing. 10168404Spjd * See the License for the specific language governing permissions 11168404Spjd * and limitations under the License. 12168404Spjd * 13168404Spjd * When distributing Covered Code, include this CDDL HEADER in each 14168404Spjd * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15168404Spjd * If applicable, add the following below this CDDL HEADER, with the 16168404Spjd * fields enclosed by brackets "[]" replaced with your own identifying 17168404Spjd * information: Portions Copyright [yyyy] [name of copyright owner] 18168404Spjd * 19168404Spjd * CDDL HEADER END 20168404Spjd */ 21168404Spjd/* 22219089Spjd * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. 23321553Smav * Copyright (c) 2011, 2017 by Delphix. All rights reserved. 24251646Sdelphij * Copyright (c) 2013 Steven Hartland. All rights reserved. 25286575Smav * Copyright (c) 2014 Spectra Logic Corporation, All rights reserved. 26296519Smav * Copyright (c) 2014 Integros [integros.com] 27321523Smav * Copyright 2016 Nexenta Systems, Inc. All rights reserved. 28168404Spjd */ 29168404Spjd 30168404Spjd#include <sys/dsl_pool.h> 31168404Spjd#include <sys/dsl_dataset.h> 32219089Spjd#include <sys/dsl_prop.h> 33168404Spjd#include <sys/dsl_dir.h> 34168404Spjd#include <sys/dsl_synctask.h> 35219089Spjd#include <sys/dsl_scan.h> 36219089Spjd#include <sys/dnode.h> 37168404Spjd#include <sys/dmu_tx.h> 38168404Spjd#include <sys/dmu_objset.h> 39168404Spjd#include <sys/arc.h> 40168404Spjd#include <sys/zap.h> 41168404Spjd#include <sys/zio.h> 42168404Spjd#include <sys/zfs_context.h> 43168404Spjd#include <sys/fs/zfs.h> 44185029Spjd#include <sys/zfs_znode.h> 45185029Spjd#include <sys/spa_impl.h> 46219089Spjd#include <sys/dsl_deadlist.h> 47332547Smav#include <sys/vdev_impl.h> 48332547Smav#include <sys/metaslab_impl.h> 49236884Smm#include <sys/bptree.h> 50236884Smm#include <sys/zfeature.h> 51239620Smm#include <sys/zil_impl.h> 52248571Smm#include <sys/dsl_userhold.h> 53168404Spjd 54297813Ssmh#if defined(__FreeBSD__) && defined(_KERNEL) 55302265Ssmh#include <sys/types.h> 56266497Ssmh#include <sys/sysctl.h> 57266497Ssmh#endif 58266497Ssmh 59258632Savg/* 60258632Savg * ZFS Write Throttle 61258632Savg * ------------------ 62258632Savg * 63258632Savg * ZFS must limit the rate of incoming writes to the rate at which it is able 64258632Savg * to sync data modifications to the backend storage. Throttling by too much 65258632Savg * creates an artificial limit; throttling by too little can only be sustained 66258632Savg * for short periods and would lead to highly lumpy performance. On a per-pool 67258632Savg * basis, ZFS tracks the amount of modified (dirty) data. As operations change 68258632Savg * data, the amount of dirty data increases; as ZFS syncs out data, the amount 69258632Savg * of dirty data decreases. When the amount of dirty data exceeds a 70258632Savg * predetermined threshold further modifications are blocked until the amount 71258632Savg * of dirty data decreases (as data is synced out). 72258632Savg * 73258632Savg * The limit on dirty data is tunable, and should be adjusted according to 74258632Savg * both the IO capacity and available memory of the system. The larger the 75258632Savg * window, the more ZFS is able to aggregate and amortize metadata (and data) 76258632Savg * changes. However, memory is a limited resource, and allowing for more dirty 77258632Savg * data comes at the cost of keeping other useful data in memory (for example 78258632Savg * ZFS data cached by the ARC). 79258632Savg * 80258632Savg * Implementation 81258632Savg * 82258632Savg * As buffers are modified dsl_pool_willuse_space() increments both the per- 83258632Savg * txg (dp_dirty_pertxg[]) and poolwide (dp_dirty_total) accounting of 84258632Savg * dirty space used; dsl_pool_dirty_space() decrements those values as data 85258632Savg * is synced out from dsl_pool_sync(). While only the poolwide value is 86258632Savg * relevant, the per-txg value is useful for debugging. The tunable 87258632Savg * zfs_dirty_data_max determines the dirty space limit. Once that value is 88258632Savg * exceeded, new writes are halted until space frees up. 89258632Savg * 90258632Savg * The zfs_dirty_data_sync tunable dictates the threshold at which we 91258632Savg * ensure that there is a txg syncing (see the comment in txg.c for a full 92258632Savg * description of transaction group stages). 93258632Savg * 94258632Savg * The IO scheduler uses both the dirty space limit and current amount of 95258632Savg * dirty data as inputs. Those values affect the number of concurrent IOs ZFS 96258632Savg * issues. See the comment in vdev_queue.c for details of the IO scheduler. 97258632Savg * 98258632Savg * The delay is also calculated based on the amount of dirty data. See the 99258632Savg * comment above dmu_tx_delay() for details. 100258632Savg */ 101185029Spjd 102258632Savg/* 103258632Savg * zfs_dirty_data_max will be set to zfs_dirty_data_max_percent% of all memory, 104258632Savg * capped at zfs_dirty_data_max_max. It can also be overridden in /etc/system. 105258632Savg */ 106258632Savguint64_t zfs_dirty_data_max; 107258632Savguint64_t zfs_dirty_data_max_max = 4ULL * 1024 * 1024 * 1024; 108258632Savgint zfs_dirty_data_max_percent = 10; 109185029Spjd 110258632Savg/* 111258632Savg * If there is at least this much dirty data, push out a txg. 112258632Savg */ 113258632Savguint64_t zfs_dirty_data_sync = 64 * 1024 * 1024; 114185029Spjd 115258632Savg/* 116258632Savg * Once there is this amount of dirty data, the dmu_tx_delay() will kick in 117258632Savg * and delay each transaction. 118258632Savg * This value should be >= zfs_vdev_async_write_active_max_dirty_percent. 119258632Savg */ 120258632Savgint zfs_delay_min_dirty_percent = 60; 121185029Spjd 122258632Savg/* 123258632Savg * This controls how quickly the delay approaches infinity. 124271528Sdelphij * Larger values cause it to delay more for a given amount of dirty data. 125271528Sdelphij * Therefore larger values will cause there to be less dirty data for a 126258632Savg * given throughput. 127258632Savg * 128258632Savg * For the smoothest delay, this value should be about 1 billion divided 129258632Savg * by the maximum number of operations per second. This will smoothly 130258632Savg * handle between 10x and 1/10th this number. 131258632Savg * 132258632Savg * Note: zfs_delay_scale * zfs_dirty_data_max must be < 2^64, due to the 133258632Savg * multiply in dmu_tx_delay(). 134258632Savg */ 135258632Savguint64_t zfs_delay_scale = 1000 * 1000 * 1000 / 2000; 136258632Savg 137321553Smav/* 138321553Smav * This determines the number of threads used by the dp_sync_taskq. 139321553Smav */ 140321553Smavint zfs_sync_taskq_batch_pct = 75; 141258632Savg 142324205Savg/* 143324205Savg * These tunables determine the behavior of how zil_itxg_clean() is 144324205Savg * called via zil_clean() in the context of spa_sync(). When an itxg 145324205Savg * list needs to be cleaned, TQ_NOSLEEP will be used when dispatching. 146324205Savg * If the dispatch fails, the call to zil_itxg_clean() will occur 147324205Savg * synchronously in the context of spa_sync(), which can negatively 148324205Savg * impact the performance of spa_sync() (e.g. in the case of the itxg 149324205Savg * list having a large number of itxs that needs to be cleaned). 150324205Savg * 151324205Savg * Thus, these tunables can be used to manipulate the behavior of the 152324205Savg * taskq used by zil_clean(); they determine the number of taskq entries 153324205Savg * that are pre-populated when the taskq is first created (via the 154324205Savg * "zfs_zil_clean_taskq_minalloc" tunable) and the maximum number of 155324205Savg * taskq entries that are cached after an on-demand allocation (via the 156324205Savg * "zfs_zil_clean_taskq_maxalloc"). 157324205Savg * 158324205Savg * The idea being, we want to try reasonably hard to ensure there will 159324205Savg * already be a taskq entry pre-allocated by the time that it is needed 160324205Savg * by zil_clean(). This way, we can avoid the possibility of an 161324205Savg * on-demand allocation of a new taskq entry from failing, which would 162324205Savg * result in zil_itxg_clean() being called synchronously from zil_clean() 163324205Savg * (which can adversely affect performance of spa_sync()). 164324205Savg * 165324205Savg * Additionally, the number of threads used by the taskq can be 166324205Savg * configured via the "zfs_zil_clean_taskq_nthr_pct" tunable. 167324205Savg */ 168324205Savgint zfs_zil_clean_taskq_nthr_pct = 100; 169324205Savgint zfs_zil_clean_taskq_minalloc = 1024; 170324205Savgint zfs_zil_clean_taskq_maxalloc = 1024 * 1024; 171324205Savg 172297813Ssmh#if defined(__FreeBSD__) && defined(_KERNEL) 173258632Savg 174266497Ssmhextern int zfs_vdev_async_write_active_max_dirty_percent; 175266497Ssmh 176219089SpjdSYSCTL_DECL(_vfs_zfs); 177219089Spjd 178266497SsmhSYSCTL_UQUAD(_vfs_zfs, OID_AUTO, dirty_data_max, CTLFLAG_RWTUN, 179266497Ssmh &zfs_dirty_data_max, 0, 180266533Sallanjude "The maximum amount of dirty data in bytes after which new writes are " 181266533Sallanjude "halted until space becomes available"); 182266497Ssmh 183266497SsmhSYSCTL_UQUAD(_vfs_zfs, OID_AUTO, dirty_data_max_max, CTLFLAG_RDTUN, 184266497Ssmh &zfs_dirty_data_max_max, 0, 185266533Sallanjude "The absolute cap on dirty_data_max when auto calculating"); 186266497Ssmh 187271589Ssmhstatic int sysctl_zfs_dirty_data_max_percent(SYSCTL_HANDLER_ARGS); 188271589SsmhSYSCTL_PROC(_vfs_zfs, OID_AUTO, dirty_data_max_percent, 189271589Ssmh CTLTYPE_INT | CTLFLAG_MPSAFE | CTLFLAG_RWTUN, 0, sizeof(int), 190271589Ssmh sysctl_zfs_dirty_data_max_percent, "I", 191266497Ssmh "The percent of physical memory used to auto calculate dirty_data_max"); 192266497Ssmh 193266497SsmhSYSCTL_UQUAD(_vfs_zfs, OID_AUTO, dirty_data_sync, CTLFLAG_RWTUN, 194266497Ssmh &zfs_dirty_data_sync, 0, 195266533Sallanjude "Force a txg if the number of dirty buffer bytes exceed this value"); 196266497Ssmh 197266497Ssmhstatic int sysctl_zfs_delay_min_dirty_percent(SYSCTL_HANDLER_ARGS); 198266497Ssmh/* No zfs_delay_min_dirty_percent tunable due to limit requirements */ 199266497SsmhSYSCTL_PROC(_vfs_zfs, OID_AUTO, delay_min_dirty_percent, 200266497Ssmh CTLTYPE_INT | CTLFLAG_MPSAFE | CTLFLAG_RW, 0, sizeof(int), 201266497Ssmh sysctl_zfs_delay_min_dirty_percent, "I", 202321591Semaste "The limit of outstanding dirty data before transactions are delayed"); 203266497Ssmh 204266497Ssmhstatic int sysctl_zfs_delay_scale(SYSCTL_HANDLER_ARGS); 205266497Ssmh/* No zfs_delay_scale tunable due to limit requirements */ 206266497SsmhSYSCTL_PROC(_vfs_zfs, OID_AUTO, delay_scale, 207266497Ssmh CTLTYPE_U64 | CTLFLAG_MPSAFE | CTLFLAG_RW, 0, sizeof(uint64_t), 208266497Ssmh sysctl_zfs_delay_scale, "QU", 209266497Ssmh "Controls how quickly the delay approaches infinity"); 210266497Ssmh 211266497Ssmhstatic int 212271589Ssmhsysctl_zfs_dirty_data_max_percent(SYSCTL_HANDLER_ARGS) 213271589Ssmh{ 214271589Ssmh int val, err; 215271589Ssmh 216271589Ssmh val = zfs_dirty_data_max_percent; 217271589Ssmh err = sysctl_handle_int(oidp, &val, 0, req); 218271589Ssmh if (err != 0 || req->newptr == NULL) 219271589Ssmh return (err); 220271589Ssmh 221271589Ssmh if (val < 0 || val > 100) 222271589Ssmh return (EINVAL); 223271589Ssmh 224271589Ssmh zfs_dirty_data_max_percent = val; 225271589Ssmh 226271589Ssmh return (0); 227271589Ssmh} 228271589Ssmh 229271589Ssmhstatic int 230266497Ssmhsysctl_zfs_delay_min_dirty_percent(SYSCTL_HANDLER_ARGS) 231266497Ssmh{ 232266497Ssmh int val, err; 233266497Ssmh 234266497Ssmh val = zfs_delay_min_dirty_percent; 235266497Ssmh err = sysctl_handle_int(oidp, &val, 0, req); 236266497Ssmh if (err != 0 || req->newptr == NULL) 237266497Ssmh return (err); 238266497Ssmh 239266497Ssmh if (val < zfs_vdev_async_write_active_max_dirty_percent) 240266497Ssmh return (EINVAL); 241266497Ssmh 242266497Ssmh zfs_delay_min_dirty_percent = val; 243266497Ssmh 244266497Ssmh return (0); 245266497Ssmh} 246266497Ssmh 247266497Ssmhstatic int 248266497Ssmhsysctl_zfs_delay_scale(SYSCTL_HANDLER_ARGS) 249266497Ssmh{ 250266497Ssmh uint64_t val; 251266497Ssmh int err; 252266497Ssmh 253266497Ssmh val = zfs_delay_scale; 254266497Ssmh err = sysctl_handle_64(oidp, &val, 0, req); 255266497Ssmh if (err != 0 || req->newptr == NULL) 256266497Ssmh return (err); 257266497Ssmh 258266497Ssmh if (val > UINT64_MAX / zfs_dirty_data_max) 259266497Ssmh return (EINVAL); 260266497Ssmh 261266497Ssmh zfs_delay_scale = val; 262266497Ssmh 263266497Ssmh return (0); 264266497Ssmh} 265258632Savg#endif 266219089Spjd 267219089Spjdint 268185029Spjddsl_pool_open_special_dir(dsl_pool_t *dp, const char *name, dsl_dir_t **ddp) 269168404Spjd{ 270168404Spjd uint64_t obj; 271168404Spjd int err; 272168404Spjd 273168404Spjd err = zap_lookup(dp->dp_meta_objset, 274275782Sdelphij dsl_dir_phys(dp->dp_root_dir)->dd_child_dir_zapobj, 275185029Spjd name, sizeof (obj), 1, &obj); 276168404Spjd if (err) 277168404Spjd return (err); 278168404Spjd 279248571Smm return (dsl_dir_hold_obj(dp, obj, name, dp, ddp)); 280168404Spjd} 281168404Spjd 282168404Spjdstatic dsl_pool_t * 283168404Spjddsl_pool_open_impl(spa_t *spa, uint64_t txg) 284168404Spjd{ 285168404Spjd dsl_pool_t *dp; 286168404Spjd blkptr_t *bp = spa_get_rootblkptr(spa); 287168404Spjd 288168404Spjd dp = kmem_zalloc(sizeof (dsl_pool_t), KM_SLEEP); 289168404Spjd dp->dp_spa = spa; 290168404Spjd dp->dp_meta_rootbp = *bp; 291248571Smm rrw_init(&dp->dp_config_rwlock, B_TRUE); 292168404Spjd txg_init(dp, txg); 293168404Spjd 294321567Smav txg_list_create(&dp->dp_dirty_datasets, spa, 295168404Spjd offsetof(dsl_dataset_t, ds_dirty_link)); 296321567Smav txg_list_create(&dp->dp_dirty_zilogs, spa, 297239620Smm offsetof(zilog_t, zl_dirty_link)); 298321567Smav txg_list_create(&dp->dp_dirty_dirs, spa, 299168404Spjd offsetof(dsl_dir_t, dd_dirty_link)); 300321567Smav txg_list_create(&dp->dp_sync_tasks, spa, 301248571Smm offsetof(dsl_sync_task_t, dst_node)); 302332547Smav txg_list_create(&dp->dp_early_sync_tasks, spa, 303332547Smav offsetof(dsl_sync_task_t, dst_node)); 304168404Spjd 305321553Smav dp->dp_sync_taskq = taskq_create("dp_sync_taskq", 306321553Smav zfs_sync_taskq_batch_pct, minclsyspri, 1, INT_MAX, 307321553Smav TASKQ_THREADS_CPU_PCT); 308321553Smav 309324205Savg dp->dp_zil_clean_taskq = taskq_create("dp_zil_clean_taskq", 310324205Savg zfs_zil_clean_taskq_nthr_pct, minclsyspri, 311324205Savg zfs_zil_clean_taskq_minalloc, 312324205Savg zfs_zil_clean_taskq_maxalloc, 313324205Savg TASKQ_PREPOPULATE | TASKQ_THREADS_CPU_PCT); 314324205Savg 315185029Spjd mutex_init(&dp->dp_lock, NULL, MUTEX_DEFAULT, NULL); 316258632Savg cv_init(&dp->dp_spaceavail_cv, NULL, CV_DEFAULT, NULL); 317185029Spjd 318196307Spjd dp->dp_vnrele_taskq = taskq_create("zfs_vn_rele_taskq", 1, minclsyspri, 319196307Spjd 1, 4, 0); 320196307Spjd 321168404Spjd return (dp); 322168404Spjd} 323168404Spjd 324168404Spjdint 325236884Smmdsl_pool_init(spa_t *spa, uint64_t txg, dsl_pool_t **dpp) 326168404Spjd{ 327168404Spjd int err; 328168404Spjd dsl_pool_t *dp = dsl_pool_open_impl(spa, txg); 329236884Smm 330236884Smm err = dmu_objset_open_impl(spa, NULL, &dp->dp_meta_rootbp, 331236884Smm &dp->dp_meta_objset); 332236884Smm if (err != 0) 333236884Smm dsl_pool_close(dp); 334236884Smm else 335236884Smm *dpp = dp; 336236884Smm 337236884Smm return (err); 338236884Smm} 339236884Smm 340236884Smmint 341236884Smmdsl_pool_open(dsl_pool_t *dp) 342236884Smm{ 343236884Smm int err; 344185029Spjd dsl_dir_t *dd; 345185029Spjd dsl_dataset_t *ds; 346219089Spjd uint64_t obj; 347168404Spjd 348248571Smm rrw_enter(&dp->dp_config_rwlock, RW_WRITER, FTAG); 349168404Spjd err = zap_lookup(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT, 350168404Spjd DMU_POOL_ROOT_DATASET, sizeof (uint64_t), 1, 351168404Spjd &dp->dp_root_dir_obj); 352168404Spjd if (err) 353168404Spjd goto out; 354168404Spjd 355248571Smm err = dsl_dir_hold_obj(dp, dp->dp_root_dir_obj, 356168404Spjd NULL, dp, &dp->dp_root_dir); 357168404Spjd if (err) 358168404Spjd goto out; 359168404Spjd 360185029Spjd err = dsl_pool_open_special_dir(dp, MOS_DIR_NAME, &dp->dp_mos_dir); 361168404Spjd if (err) 362168404Spjd goto out; 363168404Spjd 364236884Smm if (spa_version(dp->dp_spa) >= SPA_VERSION_ORIGIN) { 365185029Spjd err = dsl_pool_open_special_dir(dp, ORIGIN_DIR_NAME, &dd); 366185029Spjd if (err) 367185029Spjd goto out; 368275782Sdelphij err = dsl_dataset_hold_obj(dp, 369275782Sdelphij dsl_dir_phys(dd)->dd_head_dataset_obj, FTAG, &ds); 370209962Smm if (err == 0) { 371209962Smm err = dsl_dataset_hold_obj(dp, 372275782Sdelphij dsl_dataset_phys(ds)->ds_prev_snap_obj, dp, 373219089Spjd &dp->dp_origin_snap); 374209962Smm dsl_dataset_rele(ds, FTAG); 375209962Smm } 376248571Smm dsl_dir_rele(dd, dp); 377185029Spjd if (err) 378185029Spjd goto out; 379185029Spjd } 380185029Spjd 381236884Smm if (spa_version(dp->dp_spa) >= SPA_VERSION_DEADLISTS) { 382219089Spjd err = dsl_pool_open_special_dir(dp, FREE_DIR_NAME, 383219089Spjd &dp->dp_free_dir); 384185029Spjd if (err) 385185029Spjd goto out; 386219089Spjd 387185029Spjd err = zap_lookup(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT, 388219089Spjd DMU_POOL_FREE_BPOBJ, sizeof (uint64_t), 1, &obj); 389185029Spjd if (err) 390185029Spjd goto out; 391248571Smm VERIFY0(bpobj_open(&dp->dp_free_bpobj, 392219089Spjd dp->dp_meta_objset, obj)); 393185029Spjd } 394185029Spjd 395332525Smav if (spa_feature_is_active(dp->dp_spa, SPA_FEATURE_OBSOLETE_COUNTS)) { 396332525Smav err = zap_lookup(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT, 397332525Smav DMU_POOL_OBSOLETE_BPOBJ, sizeof (uint64_t), 1, &obj); 398332525Smav if (err == 0) { 399332525Smav VERIFY0(bpobj_open(&dp->dp_obsolete_bpobj, 400332525Smav dp->dp_meta_objset, obj)); 401332525Smav } else if (err == ENOENT) { 402332525Smav /* 403332525Smav * We might not have created the remap bpobj yet. 404332525Smav */ 405332525Smav err = 0; 406332525Smav } else { 407332525Smav goto out; 408332525Smav } 409332525Smav } 410332525Smav 411268079Sdelphij /* 412332525Smav * Note: errors ignored, because the these special dirs, used for 413332525Smav * space accounting, are only created on demand. 414268079Sdelphij */ 415268079Sdelphij (void) dsl_pool_open_special_dir(dp, LEAK_DIR_NAME, 416268079Sdelphij &dp->dp_leak_dir); 417268079Sdelphij 418259813Sdelphij if (spa_feature_is_active(dp->dp_spa, SPA_FEATURE_ASYNC_DESTROY)) { 419236884Smm err = zap_lookup(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT, 420236884Smm DMU_POOL_BPTREE_OBJ, sizeof (uint64_t), 1, 421236884Smm &dp->dp_bptree_obj); 422236884Smm if (err != 0) 423236884Smm goto out; 424236884Smm } 425236884Smm 426259813Sdelphij if (spa_feature_is_active(dp->dp_spa, SPA_FEATURE_EMPTY_BPOBJ)) { 427239774Smm err = zap_lookup(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT, 428239774Smm DMU_POOL_EMPTY_BPOBJ, sizeof (uint64_t), 1, 429239774Smm &dp->dp_empty_bpobj); 430239774Smm if (err != 0) 431239774Smm goto out; 432239774Smm } 433239774Smm 434219089Spjd err = zap_lookup(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT, 435219089Spjd DMU_POOL_TMP_USERREFS, sizeof (uint64_t), 1, 436219089Spjd &dp->dp_tmp_userrefs_obj); 437219089Spjd if (err == ENOENT) 438219089Spjd err = 0; 439219089Spjd if (err) 440219089Spjd goto out; 441219089Spjd 442236884Smm err = dsl_scan_init(dp, dp->dp_tx.tx_open_txg); 443219089Spjd 444168404Spjdout: 445248571Smm rrw_exit(&dp->dp_config_rwlock, FTAG); 446168404Spjd return (err); 447168404Spjd} 448168404Spjd 449168404Spjdvoid 450168404Spjddsl_pool_close(dsl_pool_t *dp) 451168404Spjd{ 452185029Spjd /* 453258632Savg * Drop our references from dsl_pool_open(). 454258632Savg * 455185029Spjd * Since we held the origin_snap from "syncing" context (which 456185029Spjd * includes pool-opening context), it actually only got a "ref" 457185029Spjd * and not a hold, so just drop that here. 458185029Spjd */ 459332525Smav if (dp->dp_origin_snap != NULL) 460248571Smm dsl_dataset_rele(dp->dp_origin_snap, dp); 461332525Smav if (dp->dp_mos_dir != NULL) 462248571Smm dsl_dir_rele(dp->dp_mos_dir, dp); 463332525Smav if (dp->dp_free_dir != NULL) 464248571Smm dsl_dir_rele(dp->dp_free_dir, dp); 465332525Smav if (dp->dp_leak_dir != NULL) 466268079Sdelphij dsl_dir_rele(dp->dp_leak_dir, dp); 467332525Smav if (dp->dp_root_dir != NULL) 468248571Smm dsl_dir_rele(dp->dp_root_dir, dp); 469168404Spjd 470219089Spjd bpobj_close(&dp->dp_free_bpobj); 471332525Smav bpobj_close(&dp->dp_obsolete_bpobj); 472219089Spjd 473168404Spjd /* undo the dmu_objset_open_impl(mos) from dsl_pool_open() */ 474332525Smav if (dp->dp_meta_objset != NULL) 475219089Spjd dmu_objset_evict(dp->dp_meta_objset); 476168404Spjd 477168404Spjd txg_list_destroy(&dp->dp_dirty_datasets); 478239620Smm txg_list_destroy(&dp->dp_dirty_zilogs); 479219089Spjd txg_list_destroy(&dp->dp_sync_tasks); 480332547Smav txg_list_destroy(&dp->dp_early_sync_tasks); 481168404Spjd txg_list_destroy(&dp->dp_dirty_dirs); 482168404Spjd 483324205Savg taskq_destroy(dp->dp_zil_clean_taskq); 484321553Smav taskq_destroy(dp->dp_sync_taskq); 485321553Smav 486286763Smav /* 487286763Smav * We can't set retry to TRUE since we're explicitly specifying 488286763Smav * a spa to flush. This is good enough; any missed buffers for 489286763Smav * this spa won't cause trouble, and they'll eventually fall 490286763Smav * out of the ARC just like any other unused buffer. 491286763Smav */ 492286763Smav arc_flush(dp->dp_spa, FALSE); 493286763Smav 494168404Spjd txg_fini(dp); 495219089Spjd dsl_scan_fini(dp); 496286575Smav dmu_buf_user_evict_wait(); 497286575Smav 498248571Smm rrw_destroy(&dp->dp_config_rwlock); 499185029Spjd mutex_destroy(&dp->dp_lock); 500196307Spjd taskq_destroy(dp->dp_vnrele_taskq); 501347049Smav if (dp->dp_blkstats != NULL) { 502347049Smav mutex_destroy(&dp->dp_blkstats->zab_lock); 503208047Smm kmem_free(dp->dp_blkstats, sizeof (zfs_all_blkstats_t)); 504347049Smav } 505168404Spjd kmem_free(dp, sizeof (dsl_pool_t)); 506168404Spjd} 507168404Spjd 508332525Smavvoid 509332525Smavdsl_pool_create_obsolete_bpobj(dsl_pool_t *dp, dmu_tx_t *tx) 510332525Smav{ 511332525Smav uint64_t obj; 512332525Smav /* 513332525Smav * Currently, we only create the obsolete_bpobj where there are 514332525Smav * indirect vdevs with referenced mappings. 515332525Smav */ 516332525Smav ASSERT(spa_feature_is_active(dp->dp_spa, SPA_FEATURE_DEVICE_REMOVAL)); 517332525Smav /* create and open the obsolete_bpobj */ 518332525Smav obj = bpobj_alloc(dp->dp_meta_objset, SPA_OLD_MAXBLOCKSIZE, tx); 519332525Smav VERIFY0(bpobj_open(&dp->dp_obsolete_bpobj, dp->dp_meta_objset, obj)); 520332525Smav VERIFY0(zap_add(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT, 521332525Smav DMU_POOL_OBSOLETE_BPOBJ, sizeof (uint64_t), 1, &obj, tx)); 522332525Smav spa_feature_incr(dp->dp_spa, SPA_FEATURE_OBSOLETE_COUNTS, tx); 523332525Smav} 524332525Smav 525332525Smavvoid 526332525Smavdsl_pool_destroy_obsolete_bpobj(dsl_pool_t *dp, dmu_tx_t *tx) 527332525Smav{ 528332525Smav spa_feature_decr(dp->dp_spa, SPA_FEATURE_OBSOLETE_COUNTS, tx); 529332525Smav VERIFY0(zap_remove(dp->dp_meta_objset, 530332525Smav DMU_POOL_DIRECTORY_OBJECT, 531332525Smav DMU_POOL_OBSOLETE_BPOBJ, tx)); 532332525Smav bpobj_free(dp->dp_meta_objset, 533332525Smav dp->dp_obsolete_bpobj.bpo_object, tx); 534332525Smav bpobj_close(&dp->dp_obsolete_bpobj); 535332525Smav} 536332525Smav 537168404Spjddsl_pool_t * 538185029Spjddsl_pool_create(spa_t *spa, nvlist_t *zplprops, uint64_t txg) 539168404Spjd{ 540168404Spjd int err; 541168404Spjd dsl_pool_t *dp = dsl_pool_open_impl(spa, txg); 542168404Spjd dmu_tx_t *tx = dmu_tx_create_assigned(dp, txg); 543185029Spjd dsl_dataset_t *ds; 544219089Spjd uint64_t obj; 545185029Spjd 546248571Smm rrw_enter(&dp->dp_config_rwlock, RW_WRITER, FTAG); 547248571Smm 548185029Spjd /* create and open the MOS (meta-objset) */ 549219089Spjd dp->dp_meta_objset = dmu_objset_create_impl(spa, 550219089Spjd NULL, &dp->dp_meta_rootbp, DMU_OST_META, tx); 551168404Spjd 552168404Spjd /* create the pool directory */ 553168404Spjd err = zap_create_claim(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT, 554168404Spjd DMU_OT_OBJECT_DIRECTORY, DMU_OT_NONE, 0, tx); 555240415Smm ASSERT0(err); 556168404Spjd 557219089Spjd /* Initialize scan structures */ 558248571Smm VERIFY0(dsl_scan_init(dp, txg)); 559219089Spjd 560168404Spjd /* create and open the root dir */ 561185029Spjd dp->dp_root_dir_obj = dsl_dir_create_sync(dp, NULL, NULL, tx); 562248571Smm VERIFY0(dsl_dir_hold_obj(dp, dp->dp_root_dir_obj, 563168404Spjd NULL, dp, &dp->dp_root_dir)); 564168404Spjd 565168404Spjd /* create and open the meta-objset dir */ 566185029Spjd (void) dsl_dir_create_sync(dp, dp->dp_root_dir, MOS_DIR_NAME, tx); 567248571Smm VERIFY0(dsl_pool_open_special_dir(dp, 568185029Spjd MOS_DIR_NAME, &dp->dp_mos_dir)); 569168404Spjd 570219089Spjd if (spa_version(spa) >= SPA_VERSION_DEADLISTS) { 571219089Spjd /* create and open the free dir */ 572219089Spjd (void) dsl_dir_create_sync(dp, dp->dp_root_dir, 573219089Spjd FREE_DIR_NAME, tx); 574248571Smm VERIFY0(dsl_pool_open_special_dir(dp, 575219089Spjd FREE_DIR_NAME, &dp->dp_free_dir)); 576219089Spjd 577219089Spjd /* create and open the free_bplist */ 578274337Sdelphij obj = bpobj_alloc(dp->dp_meta_objset, SPA_OLD_MAXBLOCKSIZE, tx); 579219089Spjd VERIFY(zap_add(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT, 580219089Spjd DMU_POOL_FREE_BPOBJ, sizeof (uint64_t), 1, &obj, tx) == 0); 581248571Smm VERIFY0(bpobj_open(&dp->dp_free_bpobj, 582219089Spjd dp->dp_meta_objset, obj)); 583219089Spjd } 584219089Spjd 585185029Spjd if (spa_version(spa) >= SPA_VERSION_DSL_SCRUB) 586185029Spjd dsl_pool_create_origin(dp, tx); 587185029Spjd 588185029Spjd /* create the root dataset */ 589219089Spjd obj = dsl_dataset_create_sync_dd(dp->dp_root_dir, NULL, 0, tx); 590185029Spjd 591185029Spjd /* create the root objset */ 592248571Smm VERIFY0(dsl_dataset_hold_obj(dp, obj, FTAG, &ds)); 593185029Spjd#ifdef _KERNEL 594325911Savg { 595325911Savg objset_t *os; 596325911Savg rrw_enter(&ds->ds_bp_rwlock, RW_READER, FTAG); 597325911Savg os = dmu_objset_create_impl(dp->dp_spa, ds, 598325911Savg dsl_dataset_get_blkptr(ds), DMU_OST_ZFS, tx); 599325911Savg rrw_exit(&ds->ds_bp_rwlock, FTAG); 600325911Savg zfs_create_fs(os, kcred, zplprops, tx); 601325911Savg } 602185029Spjd#endif 603185029Spjd dsl_dataset_rele(ds, FTAG); 604185029Spjd 605168404Spjd dmu_tx_commit(tx); 606168404Spjd 607248571Smm rrw_exit(&dp->dp_config_rwlock, FTAG); 608248571Smm 609168404Spjd return (dp); 610168404Spjd} 611168404Spjd 612239620Smm/* 613239620Smm * Account for the meta-objset space in its placeholder dsl_dir. 614239620Smm */ 615239620Smmvoid 616239620Smmdsl_pool_mos_diduse_space(dsl_pool_t *dp, 617239620Smm int64_t used, int64_t comp, int64_t uncomp) 618239620Smm{ 619239620Smm ASSERT3U(comp, ==, uncomp); /* it's all metadata */ 620239620Smm mutex_enter(&dp->dp_lock); 621239620Smm dp->dp_mos_used_delta += used; 622239620Smm dp->dp_mos_compressed_delta += comp; 623239620Smm dp->dp_mos_uncompressed_delta += uncomp; 624239620Smm mutex_exit(&dp->dp_lock); 625239620Smm} 626239620Smm 627258632Savgstatic void 628258632Savgdsl_pool_sync_mos(dsl_pool_t *dp, dmu_tx_t *tx) 629258632Savg{ 630258632Savg zio_t *zio = zio_root(dp->dp_spa, NULL, NULL, ZIO_FLAG_MUSTSUCCEED); 631258632Savg dmu_objset_sync(dp->dp_meta_objset, zio, tx); 632258632Savg VERIFY0(zio_wait(zio)); 633258632Savg dprintf_bp(&dp->dp_meta_rootbp, "meta objset rootbp is %s", ""); 634258632Savg spa_set_rootblkptr(dp->dp_spa, &dp->dp_meta_rootbp); 635258632Savg} 636258632Savg 637258632Savgstatic void 638258632Savgdsl_pool_dirty_delta(dsl_pool_t *dp, int64_t delta) 639258632Savg{ 640258632Savg ASSERT(MUTEX_HELD(&dp->dp_lock)); 641258632Savg 642258632Savg if (delta < 0) 643258632Savg ASSERT3U(-delta, <=, dp->dp_dirty_total); 644258632Savg 645258632Savg dp->dp_dirty_total += delta; 646258632Savg 647258632Savg /* 648258632Savg * Note: we signal even when increasing dp_dirty_total. 649258632Savg * This ensures forward progress -- each thread wakes the next waiter. 650258632Savg */ 651319420Savg if (dp->dp_dirty_total < zfs_dirty_data_max) 652258632Savg cv_signal(&dp->dp_spaceavail_cv); 653258632Savg} 654258632Savg 655332547Smavstatic boolean_t 656332547Smavdsl_early_sync_task_verify(dsl_pool_t *dp, uint64_t txg) 657332547Smav{ 658332547Smav spa_t *spa = dp->dp_spa; 659332547Smav vdev_t *rvd = spa->spa_root_vdev; 660332547Smav 661332547Smav for (uint64_t c = 0; c < rvd->vdev_children; c++) { 662332547Smav vdev_t *vd = rvd->vdev_child[c]; 663332547Smav txg_list_t *tl = &vd->vdev_ms_list; 664332547Smav metaslab_t *ms; 665332547Smav 666332547Smav for (ms = txg_list_head(tl, TXG_CLEAN(txg)); ms; 667332547Smav ms = txg_list_next(tl, ms, TXG_CLEAN(txg))) { 668332547Smav VERIFY(range_tree_is_empty(ms->ms_freeing)); 669332547Smav VERIFY(range_tree_is_empty(ms->ms_checkpointing)); 670332547Smav } 671332547Smav } 672332547Smav 673332547Smav return (B_TRUE); 674332547Smav} 675332547Smav 676168404Spjdvoid 677168404Spjddsl_pool_sync(dsl_pool_t *dp, uint64_t txg) 678168404Spjd{ 679168404Spjd zio_t *zio; 680168404Spjd dmu_tx_t *tx; 681168404Spjd dsl_dir_t *dd; 682168404Spjd dsl_dataset_t *ds; 683219089Spjd objset_t *mos = dp->dp_meta_objset; 684239620Smm list_t synced_datasets; 685168404Spjd 686239620Smm list_create(&synced_datasets, sizeof (dsl_dataset_t), 687239620Smm offsetof(dsl_dataset_t, ds_synced_link)); 688239620Smm 689258632Savg tx = dmu_tx_create_assigned(dp, txg); 690258632Savg 691219089Spjd /* 692332547Smav * Run all early sync tasks before writing out any dirty blocks. 693332547Smav * For more info on early sync tasks see block comment in 694332547Smav * dsl_early_sync_task(). 695332547Smav */ 696332547Smav if (!txg_list_empty(&dp->dp_early_sync_tasks, txg)) { 697332547Smav dsl_sync_task_t *dst; 698332547Smav 699332547Smav ASSERT3U(spa_sync_pass(dp->dp_spa), ==, 1); 700332547Smav while ((dst = 701332547Smav txg_list_remove(&dp->dp_early_sync_tasks, txg)) != NULL) { 702332547Smav ASSERT(dsl_early_sync_task_verify(dp, txg)); 703332547Smav dsl_sync_task_sync(dst, tx); 704332547Smav } 705332547Smav ASSERT(dsl_early_sync_task_verify(dp, txg)); 706332547Smav } 707332547Smav 708332547Smav /* 709258632Savg * Write out all dirty blocks of dirty datasets. 710219089Spjd */ 711168404Spjd zio = zio_root(dp->dp_spa, NULL, NULL, ZIO_FLAG_MUSTSUCCEED); 712258632Savg while ((ds = txg_list_remove(&dp->dp_dirty_datasets, txg)) != NULL) { 713209962Smm /* 714209962Smm * We must not sync any non-MOS datasets twice, because 715209962Smm * we may have taken a snapshot of them. However, we 716209962Smm * may sync newly-created datasets on pass 2. 717209962Smm */ 718209962Smm ASSERT(!list_link_active(&ds->ds_synced_link)); 719239620Smm list_insert_tail(&synced_datasets, ds); 720168404Spjd dsl_dataset_sync(ds, zio, tx); 721168404Spjd } 722258632Savg VERIFY0(zio_wait(zio)); 723185029Spjd 724258632Savg /* 725258632Savg * We have written all of the accounted dirty data, so our 726258632Savg * dp_space_towrite should now be zero. However, some seldom-used 727258632Savg * code paths do not adhere to this (e.g. dbuf_undirty(), also 728258632Savg * rounding error in dbuf_write_physdone). 729258632Savg * Shore up the accounting of any dirtied space now. 730258632Savg */ 731258632Savg dsl_pool_undirty_space(dp, dp->dp_dirty_pertxg[txg & TXG_MASK], txg); 732168404Spjd 733239620Smm /* 734321523Smav * Update the long range free counter after 735321523Smav * we're done syncing user data 736321523Smav */ 737321523Smav mutex_enter(&dp->dp_lock); 738321523Smav ASSERT(spa_sync_pass(dp->dp_spa) == 1 || 739321523Smav dp->dp_long_free_dirty_pertxg[txg & TXG_MASK] == 0); 740321523Smav dp->dp_long_free_dirty_pertxg[txg & TXG_MASK] = 0; 741321523Smav mutex_exit(&dp->dp_lock); 742321523Smav 743321523Smav /* 744239620Smm * After the data blocks have been written (ensured by the zio_wait() 745321553Smav * above), update the user/group space accounting. This happens 746321553Smav * in tasks dispatched to dp_sync_taskq, so wait for them before 747321553Smav * continuing. 748239620Smm */ 749258632Savg for (ds = list_head(&synced_datasets); ds != NULL; 750258632Savg ds = list_next(&synced_datasets, ds)) { 751219089Spjd dmu_objset_do_userquota_updates(ds->ds_objset, tx); 752258632Savg } 753321553Smav taskq_wait(dp->dp_sync_taskq); 754209962Smm 755209962Smm /* 756209962Smm * Sync the datasets again to push out the changes due to 757219089Spjd * userspace updates. This must be done before we process the 758239620Smm * sync tasks, so that any snapshots will have the correct 759239620Smm * user accounting information (and we won't get confused 760239620Smm * about which blocks are part of the snapshot). 761209962Smm */ 762209962Smm zio = zio_root(dp->dp_spa, NULL, NULL, ZIO_FLAG_MUSTSUCCEED); 763258632Savg while ((ds = txg_list_remove(&dp->dp_dirty_datasets, txg)) != NULL) { 764209962Smm ASSERT(list_link_active(&ds->ds_synced_link)); 765209962Smm dmu_buf_rele(ds->ds_dbuf, ds); 766209962Smm dsl_dataset_sync(ds, zio, tx); 767209962Smm } 768258632Savg VERIFY0(zio_wait(zio)); 769209962Smm 770219089Spjd /* 771239620Smm * Now that the datasets have been completely synced, we can 772239620Smm * clean up our in-memory structures accumulated while syncing: 773239620Smm * 774239620Smm * - move dead blocks from the pending deadlist to the on-disk deadlist 775239620Smm * - release hold from dsl_dataset_dirty() 776219089Spjd */ 777258632Savg while ((ds = list_remove_head(&synced_datasets)) != NULL) { 778310511Savg dsl_dataset_sync_done(ds, tx); 779219089Spjd } 780258632Savg while ((dd = txg_list_remove(&dp->dp_dirty_dirs, txg)) != NULL) { 781168404Spjd dsl_dir_sync(dd, tx); 782258632Savg } 783168404Spjd 784239620Smm /* 785239620Smm * The MOS's space is accounted for in the pool/$MOS 786239620Smm * (dp_mos_dir). We can't modify the mos while we're syncing 787239620Smm * it, so we remember the deltas and apply them here. 788239620Smm */ 789239620Smm if (dp->dp_mos_used_delta != 0 || dp->dp_mos_compressed_delta != 0 || 790239620Smm dp->dp_mos_uncompressed_delta != 0) { 791239620Smm dsl_dir_diduse_space(dp->dp_mos_dir, DD_USED_HEAD, 792239620Smm dp->dp_mos_used_delta, 793239620Smm dp->dp_mos_compressed_delta, 794239620Smm dp->dp_mos_uncompressed_delta, tx); 795239620Smm dp->dp_mos_used_delta = 0; 796239620Smm dp->dp_mos_compressed_delta = 0; 797239620Smm dp->dp_mos_uncompressed_delta = 0; 798239620Smm } 799239620Smm 800321553Smav if (!multilist_is_empty(mos->os_dirty_dnodes[txg & TXG_MASK])) { 801258632Savg dsl_pool_sync_mos(dp, tx); 802168404Spjd } 803168404Spjd 804239620Smm /* 805239620Smm * If we modify a dataset in the same txg that we want to destroy it, 806239620Smm * its dsl_dir's dd_dbuf will be dirty, and thus have a hold on it. 807239620Smm * dsl_dir_destroy_check() will fail if there are unexpected holds. 808239620Smm * Therefore, we want to sync the MOS (thus syncing the dd_dbuf 809239620Smm * and clearing the hold on it) before we process the sync_tasks. 810239620Smm * The MOS data dirtied by the sync_tasks will be synced on the next 811239620Smm * pass. 812239620Smm */ 813239620Smm if (!txg_list_empty(&dp->dp_sync_tasks, txg)) { 814248571Smm dsl_sync_task_t *dst; 815239620Smm /* 816239620Smm * No more sync tasks should have been added while we 817239620Smm * were syncing. 818239620Smm */ 819258632Savg ASSERT3U(spa_sync_pass(dp->dp_spa), ==, 1); 820258632Savg while ((dst = txg_list_remove(&dp->dp_sync_tasks, txg)) != NULL) 821248571Smm dsl_sync_task_sync(dst, tx); 822239620Smm } 823239620Smm 824168404Spjd dmu_tx_commit(tx); 825185029Spjd 826258632Savg DTRACE_PROBE2(dsl_pool_sync__done, dsl_pool_t *dp, dp, uint64_t, txg); 827168404Spjd} 828168404Spjd 829168404Spjdvoid 830219089Spjddsl_pool_sync_done(dsl_pool_t *dp, uint64_t txg) 831168404Spjd{ 832239620Smm zilog_t *zilog; 833168404Spjd 834310515Savg while (zilog = txg_list_head(&dp->dp_dirty_zilogs, txg)) { 835258632Savg dsl_dataset_t *ds = dmu_objset_ds(zilog->zl_os); 836310515Savg /* 837310515Savg * We don't remove the zilog from the dp_dirty_zilogs 838310515Savg * list until after we've cleaned it. This ensures that 839310515Savg * callers of zilog_is_dirty() receive an accurate 840310515Savg * answer when they are racing with the spa sync thread. 841310515Savg */ 842239620Smm zil_clean(zilog, txg); 843310515Savg (void) txg_list_remove_this(&dp->dp_dirty_zilogs, zilog, txg); 844239620Smm ASSERT(!dmu_objset_is_dirty(zilog->zl_os, txg)); 845239620Smm dmu_buf_rele(ds->ds_dbuf, zilog); 846168404Spjd } 847219089Spjd ASSERT(!dmu_objset_is_dirty(dp->dp_meta_objset, txg)); 848168404Spjd} 849168404Spjd 850168404Spjd/* 851168404Spjd * TRUE if the current thread is the tx_sync_thread or if we 852168404Spjd * are being called from SPA context during pool initialization. 853168404Spjd */ 854168404Spjdint 855168404Spjddsl_pool_sync_context(dsl_pool_t *dp) 856168404Spjd{ 857168404Spjd return (curthread == dp->dp_tx.tx_sync_thread || 858321553Smav spa_is_initializing(dp->dp_spa) || 859321553Smav taskq_member(dp->dp_sync_taskq, curthread)); 860168404Spjd} 861168404Spjd 862332547Smav/* 863332547Smav * This function returns the amount of allocatable space in the pool 864332547Smav * minus whatever space is currently reserved by ZFS for specific 865332547Smav * purposes. Specifically: 866332547Smav * 867332547Smav * 1] Any reserved SLOP space 868332547Smav * 2] Any space used by the checkpoint 869332547Smav * 3] Any space used for deferred frees 870332547Smav * 871332547Smav * The latter 2 are especially important because they are needed to 872332547Smav * rectify the SPA's and DMU's different understanding of how much space 873332547Smav * is used. Now the DMU is aware of that extra space tracked by the SPA 874332547Smav * without having to maintain a separate special dir (e.g similar to 875332547Smav * $MOS, $FREEING, and $LEAKED). 876332547Smav * 877332547Smav * Note: By deferred frees here, we mean the frees that were deferred 878332547Smav * in spa_sync() after sync pass 1 (spa_deferred_bpobj), and not the 879332547Smav * segments placed in ms_defer trees during metaslab_sync_done(). 880332547Smav */ 881168404Spjduint64_t 882332547Smavdsl_pool_adjustedsize(dsl_pool_t *dp, zfs_space_check_t slop_policy) 883168404Spjd{ 884332547Smav spa_t *spa = dp->dp_spa; 885332547Smav uint64_t space, resv, adjustedsize; 886332547Smav uint64_t spa_deferred_frees = 887332547Smav spa->spa_deferred_bpobj.bpo_phys->bpo_bytes; 888168404Spjd 889332547Smav space = spa_get_dspace(spa) 890332547Smav - spa_get_checkpoint_space(spa) - spa_deferred_frees; 891332547Smav resv = spa_get_slop_space(spa); 892332547Smav 893332547Smav switch (slop_policy) { 894332547Smav case ZFS_SPACE_CHECK_NORMAL: 895332547Smav break; 896332547Smav case ZFS_SPACE_CHECK_RESERVED: 897168404Spjd resv >>= 1; 898332547Smav break; 899332547Smav case ZFS_SPACE_CHECK_EXTRA_RESERVED: 900332547Smav resv >>= 2; 901332547Smav break; 902332547Smav case ZFS_SPACE_CHECK_NONE: 903332547Smav resv = 0; 904332547Smav break; 905332547Smav default: 906332547Smav panic("invalid slop policy value: %d", slop_policy); 907332547Smav break; 908332547Smav } 909332547Smav adjustedsize = (space >= resv) ? (space - resv) : 0; 910168404Spjd 911332547Smav return (adjustedsize); 912168404Spjd} 913185029Spjd 914332547Smavuint64_t 915332547Smavdsl_pool_unreserved_space(dsl_pool_t *dp, zfs_space_check_t slop_policy) 916332547Smav{ 917332547Smav uint64_t poolsize = dsl_pool_adjustedsize(dp, slop_policy); 918332547Smav uint64_t deferred = 919332547Smav metaslab_class_get_deferred(spa_normal_class(dp->dp_spa)); 920332547Smav uint64_t quota = (poolsize >= deferred) ? (poolsize - deferred) : 0; 921332547Smav return (quota); 922332547Smav} 923332547Smav 924258632Savgboolean_t 925258632Savgdsl_pool_need_dirty_delay(dsl_pool_t *dp) 926185029Spjd{ 927258632Savg uint64_t delay_min_bytes = 928258632Savg zfs_dirty_data_max * zfs_delay_min_dirty_percent / 100; 929258632Savg boolean_t rv; 930185029Spjd 931258632Savg mutex_enter(&dp->dp_lock); 932258632Savg if (dp->dp_dirty_total > zfs_dirty_data_sync) 933258632Savg txg_kick(dp); 934258632Savg rv = (dp->dp_dirty_total > delay_min_bytes); 935258632Savg mutex_exit(&dp->dp_lock); 936258632Savg return (rv); 937185029Spjd} 938185029Spjd 939185029Spjdvoid 940258632Savgdsl_pool_dirty_space(dsl_pool_t *dp, int64_t space, dmu_tx_t *tx) 941185029Spjd{ 942258632Savg if (space > 0) { 943258632Savg mutex_enter(&dp->dp_lock); 944258632Savg dp->dp_dirty_pertxg[tx->tx_txg & TXG_MASK] += space; 945258632Savg dsl_pool_dirty_delta(dp, space); 946258632Savg mutex_exit(&dp->dp_lock); 947258632Savg } 948185029Spjd} 949185029Spjd 950185029Spjdvoid 951258632Savgdsl_pool_undirty_space(dsl_pool_t *dp, int64_t space, uint64_t txg) 952185029Spjd{ 953258632Savg ASSERT3S(space, >=, 0); 954258632Savg if (space == 0) 955185029Spjd return; 956258632Savg mutex_enter(&dp->dp_lock); 957258632Savg if (dp->dp_dirty_pertxg[txg & TXG_MASK] < space) { 958258632Savg /* XXX writing something we didn't dirty? */ 959258632Savg space = dp->dp_dirty_pertxg[txg & TXG_MASK]; 960185029Spjd } 961258632Savg ASSERT3U(dp->dp_dirty_pertxg[txg & TXG_MASK], >=, space); 962258632Savg dp->dp_dirty_pertxg[txg & TXG_MASK] -= space; 963258632Savg ASSERT3U(dp->dp_dirty_total, >=, space); 964258632Savg dsl_pool_dirty_delta(dp, -space); 965258632Savg mutex_exit(&dp->dp_lock); 966185029Spjd} 967185029Spjd 968185029Spjd/* ARGSUSED */ 969185029Spjdstatic int 970248571Smmupgrade_clones_cb(dsl_pool_t *dp, dsl_dataset_t *hds, void *arg) 971185029Spjd{ 972185029Spjd dmu_tx_t *tx = arg; 973185029Spjd dsl_dataset_t *ds, *prev = NULL; 974185029Spjd int err; 975185029Spjd 976248571Smm err = dsl_dataset_hold_obj(dp, hds->ds_object, FTAG, &ds); 977185029Spjd if (err) 978185029Spjd return (err); 979185029Spjd 980275782Sdelphij while (dsl_dataset_phys(ds)->ds_prev_snap_obj != 0) { 981275782Sdelphij err = dsl_dataset_hold_obj(dp, 982275782Sdelphij dsl_dataset_phys(ds)->ds_prev_snap_obj, FTAG, &prev); 983185029Spjd if (err) { 984185029Spjd dsl_dataset_rele(ds, FTAG); 985185029Spjd return (err); 986185029Spjd } 987185029Spjd 988275782Sdelphij if (dsl_dataset_phys(prev)->ds_next_snap_obj != ds->ds_object) 989185029Spjd break; 990185029Spjd dsl_dataset_rele(ds, FTAG); 991185029Spjd ds = prev; 992185029Spjd prev = NULL; 993185029Spjd } 994185029Spjd 995185029Spjd if (prev == NULL) { 996185029Spjd prev = dp->dp_origin_snap; 997185029Spjd 998185029Spjd /* 999185029Spjd * The $ORIGIN can't have any data, or the accounting 1000185029Spjd * will be wrong. 1001185029Spjd */ 1002308082Smav rrw_enter(&ds->ds_bp_rwlock, RW_READER, FTAG); 1003275782Sdelphij ASSERT0(dsl_dataset_phys(prev)->ds_bp.blk_birth); 1004308082Smav rrw_exit(&ds->ds_bp_rwlock, FTAG); 1005185029Spjd 1006185029Spjd /* The origin doesn't get attached to itself */ 1007185029Spjd if (ds->ds_object == prev->ds_object) { 1008185029Spjd dsl_dataset_rele(ds, FTAG); 1009185029Spjd return (0); 1010185029Spjd } 1011185029Spjd 1012185029Spjd dmu_buf_will_dirty(ds->ds_dbuf, tx); 1013275782Sdelphij dsl_dataset_phys(ds)->ds_prev_snap_obj = prev->ds_object; 1014275782Sdelphij dsl_dataset_phys(ds)->ds_prev_snap_txg = 1015275782Sdelphij dsl_dataset_phys(prev)->ds_creation_txg; 1016185029Spjd 1017185029Spjd dmu_buf_will_dirty(ds->ds_dir->dd_dbuf, tx); 1018275782Sdelphij dsl_dir_phys(ds->ds_dir)->dd_origin_obj = prev->ds_object; 1019185029Spjd 1020185029Spjd dmu_buf_will_dirty(prev->ds_dbuf, tx); 1021275782Sdelphij dsl_dataset_phys(prev)->ds_num_children++; 1022185029Spjd 1023275782Sdelphij if (dsl_dataset_phys(ds)->ds_next_snap_obj == 0) { 1024185029Spjd ASSERT(ds->ds_prev == NULL); 1025248571Smm VERIFY0(dsl_dataset_hold_obj(dp, 1026275782Sdelphij dsl_dataset_phys(ds)->ds_prev_snap_obj, 1027275782Sdelphij ds, &ds->ds_prev)); 1028185029Spjd } 1029185029Spjd } 1030185029Spjd 1031275782Sdelphij ASSERT3U(dsl_dir_phys(ds->ds_dir)->dd_origin_obj, ==, prev->ds_object); 1032275782Sdelphij ASSERT3U(dsl_dataset_phys(ds)->ds_prev_snap_obj, ==, prev->ds_object); 1033185029Spjd 1034275782Sdelphij if (dsl_dataset_phys(prev)->ds_next_clones_obj == 0) { 1035209962Smm dmu_buf_will_dirty(prev->ds_dbuf, tx); 1036275782Sdelphij dsl_dataset_phys(prev)->ds_next_clones_obj = 1037185029Spjd zap_create(dp->dp_meta_objset, 1038185029Spjd DMU_OT_NEXT_CLONES, DMU_OT_NONE, 0, tx); 1039185029Spjd } 1040248571Smm VERIFY0(zap_add_int(dp->dp_meta_objset, 1041275782Sdelphij dsl_dataset_phys(prev)->ds_next_clones_obj, ds->ds_object, tx)); 1042185029Spjd 1043185029Spjd dsl_dataset_rele(ds, FTAG); 1044185029Spjd if (prev != dp->dp_origin_snap) 1045185029Spjd dsl_dataset_rele(prev, FTAG); 1046185029Spjd return (0); 1047185029Spjd} 1048185029Spjd 1049185029Spjdvoid 1050185029Spjddsl_pool_upgrade_clones(dsl_pool_t *dp, dmu_tx_t *tx) 1051185029Spjd{ 1052185029Spjd ASSERT(dmu_tx_is_syncing(tx)); 1053185029Spjd ASSERT(dp->dp_origin_snap != NULL); 1054185029Spjd 1055248571Smm VERIFY0(dmu_objset_find_dp(dp, dp->dp_root_dir_obj, upgrade_clones_cb, 1056286686Smav tx, DS_FIND_CHILDREN | DS_FIND_SERIALIZE)); 1057185029Spjd} 1058185029Spjd 1059219089Spjd/* ARGSUSED */ 1060219089Spjdstatic int 1061248571Smmupgrade_dir_clones_cb(dsl_pool_t *dp, dsl_dataset_t *ds, void *arg) 1062219089Spjd{ 1063219089Spjd dmu_tx_t *tx = arg; 1064219089Spjd objset_t *mos = dp->dp_meta_objset; 1065219089Spjd 1066275782Sdelphij if (dsl_dir_phys(ds->ds_dir)->dd_origin_obj != 0) { 1067219089Spjd dsl_dataset_t *origin; 1068219089Spjd 1069248571Smm VERIFY0(dsl_dataset_hold_obj(dp, 1070275782Sdelphij dsl_dir_phys(ds->ds_dir)->dd_origin_obj, FTAG, &origin)); 1071219089Spjd 1072275782Sdelphij if (dsl_dir_phys(origin->ds_dir)->dd_clones == 0) { 1073219089Spjd dmu_buf_will_dirty(origin->ds_dir->dd_dbuf, tx); 1074275782Sdelphij dsl_dir_phys(origin->ds_dir)->dd_clones = 1075275782Sdelphij zap_create(mos, DMU_OT_DSL_CLONES, DMU_OT_NONE, 1076275782Sdelphij 0, tx); 1077219089Spjd } 1078219089Spjd 1079248571Smm VERIFY0(zap_add_int(dp->dp_meta_objset, 1080275782Sdelphij dsl_dir_phys(origin->ds_dir)->dd_clones, 1081275782Sdelphij ds->ds_object, tx)); 1082219089Spjd 1083219089Spjd dsl_dataset_rele(origin, FTAG); 1084219089Spjd } 1085219089Spjd return (0); 1086219089Spjd} 1087219089Spjd 1088185029Spjdvoid 1089219089Spjddsl_pool_upgrade_dir_clones(dsl_pool_t *dp, dmu_tx_t *tx) 1090219089Spjd{ 1091219089Spjd ASSERT(dmu_tx_is_syncing(tx)); 1092219089Spjd uint64_t obj; 1093219089Spjd 1094219089Spjd (void) dsl_dir_create_sync(dp, dp->dp_root_dir, FREE_DIR_NAME, tx); 1095248571Smm VERIFY0(dsl_pool_open_special_dir(dp, 1096219089Spjd FREE_DIR_NAME, &dp->dp_free_dir)); 1097219089Spjd 1098219089Spjd /* 1099219089Spjd * We can't use bpobj_alloc(), because spa_version() still 1100219089Spjd * returns the old version, and we need a new-version bpobj with 1101219089Spjd * subobj support. So call dmu_object_alloc() directly. 1102219089Spjd */ 1103219089Spjd obj = dmu_object_alloc(dp->dp_meta_objset, DMU_OT_BPOBJ, 1104274337Sdelphij SPA_OLD_MAXBLOCKSIZE, DMU_OT_BPOBJ_HDR, sizeof (bpobj_phys_t), tx); 1105248571Smm VERIFY0(zap_add(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT, 1106219089Spjd DMU_POOL_FREE_BPOBJ, sizeof (uint64_t), 1, &obj, tx)); 1107248571Smm VERIFY0(bpobj_open(&dp->dp_free_bpobj, dp->dp_meta_objset, obj)); 1108219089Spjd 1109248571Smm VERIFY0(dmu_objset_find_dp(dp, dp->dp_root_dir_obj, 1110286686Smav upgrade_dir_clones_cb, tx, DS_FIND_CHILDREN | DS_FIND_SERIALIZE)); 1111219089Spjd} 1112219089Spjd 1113219089Spjdvoid 1114185029Spjddsl_pool_create_origin(dsl_pool_t *dp, dmu_tx_t *tx) 1115185029Spjd{ 1116185029Spjd uint64_t dsobj; 1117185029Spjd dsl_dataset_t *ds; 1118185029Spjd 1119185029Spjd ASSERT(dmu_tx_is_syncing(tx)); 1120185029Spjd ASSERT(dp->dp_origin_snap == NULL); 1121248571Smm ASSERT(rrw_held(&dp->dp_config_rwlock, RW_WRITER)); 1122185029Spjd 1123185029Spjd /* create the origin dir, ds, & snap-ds */ 1124185029Spjd dsobj = dsl_dataset_create_sync(dp->dp_root_dir, ORIGIN_DIR_NAME, 1125185029Spjd NULL, 0, kcred, tx); 1126248571Smm VERIFY0(dsl_dataset_hold_obj(dp, dsobj, FTAG, &ds)); 1127248571Smm dsl_dataset_snapshot_sync_impl(ds, ORIGIN_DIR_NAME, tx); 1128275782Sdelphij VERIFY0(dsl_dataset_hold_obj(dp, dsl_dataset_phys(ds)->ds_prev_snap_obj, 1129185029Spjd dp, &dp->dp_origin_snap)); 1130185029Spjd dsl_dataset_rele(ds, FTAG); 1131185029Spjd} 1132196307Spjd 1133196307Spjdtaskq_t * 1134196307Spjddsl_pool_vnrele_taskq(dsl_pool_t *dp) 1135196307Spjd{ 1136196307Spjd return (dp->dp_vnrele_taskq); 1137196307Spjd} 1138219089Spjd 1139219089Spjd/* 1140219089Spjd * Walk through the pool-wide zap object of temporary snapshot user holds 1141219089Spjd * and release them. 1142219089Spjd */ 1143219089Spjdvoid 1144219089Spjddsl_pool_clean_tmp_userrefs(dsl_pool_t *dp) 1145219089Spjd{ 1146219089Spjd zap_attribute_t za; 1147219089Spjd zap_cursor_t zc; 1148219089Spjd objset_t *mos = dp->dp_meta_objset; 1149219089Spjd uint64_t zapobj = dp->dp_tmp_userrefs_obj; 1150251646Sdelphij nvlist_t *holds; 1151219089Spjd 1152219089Spjd if (zapobj == 0) 1153219089Spjd return; 1154219089Spjd ASSERT(spa_version(dp->dp_spa) >= SPA_VERSION_USERREFS); 1155219089Spjd 1156251646Sdelphij holds = fnvlist_alloc(); 1157251646Sdelphij 1158219089Spjd for (zap_cursor_init(&zc, mos, zapobj); 1159219089Spjd zap_cursor_retrieve(&zc, &za) == 0; 1160219089Spjd zap_cursor_advance(&zc)) { 1161219089Spjd char *htag; 1162251646Sdelphij nvlist_t *tags; 1163219089Spjd 1164219089Spjd htag = strchr(za.za_name, '-'); 1165219089Spjd *htag = '\0'; 1166219089Spjd ++htag; 1167251646Sdelphij if (nvlist_lookup_nvlist(holds, za.za_name, &tags) != 0) { 1168251646Sdelphij tags = fnvlist_alloc(); 1169251646Sdelphij fnvlist_add_boolean(tags, htag); 1170251646Sdelphij fnvlist_add_nvlist(holds, za.za_name, tags); 1171251646Sdelphij fnvlist_free(tags); 1172251646Sdelphij } else { 1173251646Sdelphij fnvlist_add_boolean(tags, htag); 1174251646Sdelphij } 1175219089Spjd } 1176251646Sdelphij dsl_dataset_user_release_tmp(dp, holds); 1177251646Sdelphij fnvlist_free(holds); 1178219089Spjd zap_cursor_fini(&zc); 1179219089Spjd} 1180219089Spjd 1181219089Spjd/* 1182219089Spjd * Create the pool-wide zap object for storing temporary snapshot holds. 1183219089Spjd */ 1184219089Spjdvoid 1185219089Spjddsl_pool_user_hold_create_obj(dsl_pool_t *dp, dmu_tx_t *tx) 1186219089Spjd{ 1187219089Spjd objset_t *mos = dp->dp_meta_objset; 1188219089Spjd 1189219089Spjd ASSERT(dp->dp_tmp_userrefs_obj == 0); 1190219089Spjd ASSERT(dmu_tx_is_syncing(tx)); 1191219089Spjd 1192236884Smm dp->dp_tmp_userrefs_obj = zap_create_link(mos, DMU_OT_USERREFS, 1193236884Smm DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_TMP_USERREFS, tx); 1194219089Spjd} 1195219089Spjd 1196219089Spjdstatic int 1197219089Spjddsl_pool_user_hold_rele_impl(dsl_pool_t *dp, uint64_t dsobj, 1198248571Smm const char *tag, uint64_t now, dmu_tx_t *tx, boolean_t holding) 1199219089Spjd{ 1200219089Spjd objset_t *mos = dp->dp_meta_objset; 1201219089Spjd uint64_t zapobj = dp->dp_tmp_userrefs_obj; 1202219089Spjd char *name; 1203219089Spjd int error; 1204219089Spjd 1205219089Spjd ASSERT(spa_version(dp->dp_spa) >= SPA_VERSION_USERREFS); 1206219089Spjd ASSERT(dmu_tx_is_syncing(tx)); 1207219089Spjd 1208219089Spjd /* 1209219089Spjd * If the pool was created prior to SPA_VERSION_USERREFS, the 1210219089Spjd * zap object for temporary holds might not exist yet. 1211219089Spjd */ 1212219089Spjd if (zapobj == 0) { 1213219089Spjd if (holding) { 1214219089Spjd dsl_pool_user_hold_create_obj(dp, tx); 1215219089Spjd zapobj = dp->dp_tmp_userrefs_obj; 1216219089Spjd } else { 1217249195Smm return (SET_ERROR(ENOENT)); 1218219089Spjd } 1219219089Spjd } 1220219089Spjd 1221219089Spjd name = kmem_asprintf("%llx-%s", (u_longlong_t)dsobj, tag); 1222219089Spjd if (holding) 1223248571Smm error = zap_add(mos, zapobj, name, 8, 1, &now, tx); 1224219089Spjd else 1225219089Spjd error = zap_remove(mos, zapobj, name, tx); 1226219089Spjd strfree(name); 1227219089Spjd 1228219089Spjd return (error); 1229219089Spjd} 1230219089Spjd 1231219089Spjd/* 1232219089Spjd * Add a temporary hold for the given dataset object and tag. 1233219089Spjd */ 1234219089Spjdint 1235219089Spjddsl_pool_user_hold(dsl_pool_t *dp, uint64_t dsobj, const char *tag, 1236248571Smm uint64_t now, dmu_tx_t *tx) 1237219089Spjd{ 1238219089Spjd return (dsl_pool_user_hold_rele_impl(dp, dsobj, tag, now, tx, B_TRUE)); 1239219089Spjd} 1240219089Spjd 1241219089Spjd/* 1242219089Spjd * Release a temporary hold for the given dataset object and tag. 1243219089Spjd */ 1244219089Spjdint 1245219089Spjddsl_pool_user_release(dsl_pool_t *dp, uint64_t dsobj, const char *tag, 1246219089Spjd dmu_tx_t *tx) 1247219089Spjd{ 1248354026Savg return (dsl_pool_user_hold_rele_impl(dp, dsobj, tag, 0, tx, B_FALSE)); 1249219089Spjd} 1250248571Smm 1251248571Smm/* 1252248571Smm * DSL Pool Configuration Lock 1253248571Smm * 1254248571Smm * The dp_config_rwlock protects against changes to DSL state (e.g. dataset 1255248571Smm * creation / destruction / rename / property setting). It must be held for 1256248571Smm * read to hold a dataset or dsl_dir. I.e. you must call 1257248571Smm * dsl_pool_config_enter() or dsl_pool_hold() before calling 1258248571Smm * dsl_{dataset,dir}_hold{_obj}. In most circumstances, the dp_config_rwlock 1259248571Smm * must be held continuously until all datasets and dsl_dirs are released. 1260248571Smm * 1261248571Smm * The only exception to this rule is that if a "long hold" is placed on 1262248571Smm * a dataset, then the dp_config_rwlock may be dropped while the dataset 1263248571Smm * is still held. The long hold will prevent the dataset from being 1264248571Smm * destroyed -- the destroy will fail with EBUSY. A long hold can be 1265248571Smm * obtained by calling dsl_dataset_long_hold(), or by "owning" a dataset 1266248571Smm * (by calling dsl_{dataset,objset}_{try}own{_obj}). 1267248571Smm * 1268248571Smm * Legitimate long-holders (including owners) should be long-running, cancelable 1269248571Smm * tasks that should cause "zfs destroy" to fail. This includes DMU 1270248571Smm * consumers (i.e. a ZPL filesystem being mounted or ZVOL being open), 1271248571Smm * "zfs send", and "zfs diff". There are several other long-holders whose 1272248571Smm * uses are suboptimal (e.g. "zfs promote", and zil_suspend()). 1273248571Smm * 1274248571Smm * The usual formula for long-holding would be: 1275248571Smm * dsl_pool_hold() 1276248571Smm * dsl_dataset_hold() 1277248571Smm * ... perform checks ... 1278248571Smm * dsl_dataset_long_hold() 1279248571Smm * dsl_pool_rele() 1280248571Smm * ... perform long-running task ... 1281248571Smm * dsl_dataset_long_rele() 1282248571Smm * dsl_dataset_rele() 1283248571Smm * 1284248571Smm * Note that when the long hold is released, the dataset is still held but 1285248571Smm * the pool is not held. The dataset may change arbitrarily during this time 1286248571Smm * (e.g. it could be destroyed). Therefore you shouldn't do anything to the 1287248571Smm * dataset except release it. 1288248571Smm * 1289248571Smm * User-initiated operations (e.g. ioctls, zfs_ioc_*()) are either read-only 1290248571Smm * or modifying operations. 1291248571Smm * 1292248571Smm * Modifying operations should generally use dsl_sync_task(). The synctask 1293248571Smm * infrastructure enforces proper locking strategy with respect to the 1294248571Smm * dp_config_rwlock. See the comment above dsl_sync_task() for details. 1295248571Smm * 1296248571Smm * Read-only operations will manually hold the pool, then the dataset, obtain 1297248571Smm * information from the dataset, then release the pool and dataset. 1298248571Smm * dmu_objset_{hold,rele}() are convenience routines that also do the pool 1299248571Smm * hold/rele. 1300248571Smm */ 1301248571Smm 1302248571Smmint 1303248571Smmdsl_pool_hold(const char *name, void *tag, dsl_pool_t **dp) 1304248571Smm{ 1305248571Smm spa_t *spa; 1306248571Smm int error; 1307248571Smm 1308248571Smm error = spa_open(name, &spa, tag); 1309248571Smm if (error == 0) { 1310248571Smm *dp = spa_get_dsl(spa); 1311248571Smm dsl_pool_config_enter(*dp, tag); 1312248571Smm } 1313248571Smm return (error); 1314248571Smm} 1315248571Smm 1316248571Smmvoid 1317248571Smmdsl_pool_rele(dsl_pool_t *dp, void *tag) 1318248571Smm{ 1319248571Smm dsl_pool_config_exit(dp, tag); 1320248571Smm spa_close(dp->dp_spa, tag); 1321248571Smm} 1322248571Smm 1323248571Smmvoid 1324248571Smmdsl_pool_config_enter(dsl_pool_t *dp, void *tag) 1325248571Smm{ 1326248571Smm /* 1327248571Smm * We use a "reentrant" reader-writer lock, but not reentrantly. 1328248571Smm * 1329248571Smm * The rrwlock can (with the track_all flag) track all reading threads, 1330248571Smm * which is very useful for debugging which code path failed to release 1331248571Smm * the lock, and for verifying that the *current* thread does hold 1332248571Smm * the lock. 1333248571Smm * 1334248571Smm * (Unlike a rwlock, which knows that N threads hold it for 1335248571Smm * read, but not *which* threads, so rw_held(RW_READER) returns TRUE 1336248571Smm * if any thread holds it for read, even if this thread doesn't). 1337248571Smm */ 1338248571Smm ASSERT(!rrw_held(&dp->dp_config_rwlock, RW_READER)); 1339248571Smm rrw_enter(&dp->dp_config_rwlock, RW_READER, tag); 1340248571Smm} 1341248571Smm 1342248571Smmvoid 1343286689Smavdsl_pool_config_enter_prio(dsl_pool_t *dp, void *tag) 1344286689Smav{ 1345286689Smav ASSERT(!rrw_held(&dp->dp_config_rwlock, RW_READER)); 1346286689Smav rrw_enter_read_prio(&dp->dp_config_rwlock, tag); 1347286689Smav} 1348286689Smav 1349286689Smavvoid 1350248571Smmdsl_pool_config_exit(dsl_pool_t *dp, void *tag) 1351248571Smm{ 1352248571Smm rrw_exit(&dp->dp_config_rwlock, tag); 1353248571Smm} 1354248571Smm 1355248571Smmboolean_t 1356248571Smmdsl_pool_config_held(dsl_pool_t *dp) 1357248571Smm{ 1358248571Smm return (RRW_LOCK_HELD(&dp->dp_config_rwlock)); 1359248571Smm} 1360286686Smav 1361286686Smavboolean_t 1362286686Smavdsl_pool_config_held_writer(dsl_pool_t *dp) 1363286686Smav{ 1364286686Smav return (RRW_WRITE_HELD(&dp->dp_config_rwlock)); 1365286686Smav} 1366