1168404Spjd/* 2168404Spjd * CDDL HEADER START 3168404Spjd * 4168404Spjd * The contents of this file are subject to the terms of the 5168404Spjd * Common Development and Distribution License (the "License"). 6168404Spjd * You may not use this file except in compliance with the License. 7168404Spjd * 8168404Spjd * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9168404Spjd * or http://www.opensolaris.org/os/licensing. 10168404Spjd * See the License for the specific language governing permissions 11168404Spjd * and limitations under the License. 12168404Spjd * 13168404Spjd * When distributing Covered Code, include this CDDL HEADER in each 14168404Spjd * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15168404Spjd * If applicable, add the following below this CDDL HEADER, with the 16168404Spjd * fields enclosed by brackets "[]" replaced with your own identifying 17168404Spjd * information: Portions Copyright [yyyy] [name of copyright owner] 18168404Spjd * 19168404Spjd * CDDL HEADER END 20168404Spjd */ 21168404Spjd/* 22219089Spjd * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. 23262320Sdelphij * Portions Copyright (c) 2011 Martin Matuska <mm@FreeBSD.org> 24288572Smav * Copyright (c) 2011, 2015 by Delphix. All rights reserved. 25265744Sdelphij * Copyright (c) 2014, Joyent, Inc. All rights reserved. 26262320Sdelphij * Copyright (c) 2014 RackTop Systems. 27288549Smav * Copyright (c) 2014 Spectra Logic Corporation, All rights reserved. 28297112Smav * Copyright (c) 2014 Integros [integros.com] 29297113Smav * Copyright 2016, OmniTI Computer Consulting, Inc. All rights reserved. 30168404Spjd */ 31168404Spjd 32168404Spjd#include <sys/dmu_objset.h> 33168404Spjd#include <sys/dsl_dataset.h> 34168404Spjd#include <sys/dsl_dir.h> 35168404Spjd#include <sys/dsl_prop.h> 36168404Spjd#include <sys/dsl_synctask.h> 37168404Spjd#include <sys/dmu_traverse.h> 38235222Smm#include <sys/dmu_impl.h> 39290756Smav#include <sys/dmu_send.h> 40168404Spjd#include <sys/dmu_tx.h> 41168404Spjd#include <sys/arc.h> 42168404Spjd#include <sys/zio.h> 43168404Spjd#include <sys/zap.h> 44236884Smm#include <sys/zfeature.h> 45168404Spjd#include <sys/unique.h> 46168404Spjd#include <sys/zfs_context.h> 47168676Spjd#include <sys/zfs_ioctl.h> 48185029Spjd#include <sys/spa.h> 49185029Spjd#include <sys/zfs_znode.h> 50219089Spjd#include <sys/zfs_onexit.h> 51219089Spjd#include <sys/zvol.h> 52219089Spjd#include <sys/dsl_scan.h> 53219089Spjd#include <sys/dsl_deadlist.h> 54248571Smm#include <sys/dsl_destroy.h> 55248571Smm#include <sys/dsl_userhold.h> 56263407Sdelphij#include <sys/dsl_bookmark.h> 57290757Smav#include <sys/dmu_send.h> 58290757Smav#include <sys/zio_checksum.h> 59290756Smav#include <sys/zio_compress.h> 60290756Smav#include <zfs_fletcher.h> 61168404Spjd 62276081SdelphijSYSCTL_DECL(_vfs_zfs); 63276081Sdelphij 64276081Sdelphij/* 65276081Sdelphij * The SPA supports block sizes up to 16MB. However, very large blocks 66276081Sdelphij * can have an impact on i/o latency (e.g. tying up a spinning disk for 67276081Sdelphij * ~300ms), and also potentially on the memory allocator. Therefore, 68276081Sdelphij * we do not allow the recordsize to be set larger than zfs_max_recordsize 69276081Sdelphij * (default 1MB). Larger blocks can be created by changing this tunable, 70276081Sdelphij * and pools with larger blocks can always be imported and used, regardless 71276081Sdelphij * of this setting. 72276081Sdelphij */ 73276081Sdelphijint zfs_max_recordsize = 1 * 1024 * 1024; 74276081SdelphijSYSCTL_INT(_vfs_zfs, OID_AUTO, max_recordsize, CTLFLAG_RWTUN, 75276081Sdelphij &zfs_max_recordsize, 0, 76276081Sdelphij "Maximum block size. Expect dragons when tuning this."); 77276081Sdelphij 78219089Spjd#define SWITCH64(x, y) \ 79219089Spjd { \ 80219089Spjd uint64_t __tmp = (x); \ 81219089Spjd (x) = (y); \ 82219089Spjd (y) = __tmp; \ 83219089Spjd } 84219089Spjd 85168404Spjd#define DS_REF_MAX (1ULL << 62) 86168404Spjd 87277585Sdelphijextern inline dsl_dataset_phys_t *dsl_dataset_phys(dsl_dataset_t *ds); 88277585Sdelphij 89297113Smavextern int spa_asize_inflation; 90297113Smav 91310512Savgstatic zil_header_t zero_zil; 92310512Savg 93168404Spjd/* 94185029Spjd * Figure out how much of this delta should be propogated to the dsl_dir 95185029Spjd * layer. If there's a refreservation, that space has already been 96185029Spjd * partially accounted for in our ancestors. 97168404Spjd */ 98185029Spjdstatic int64_t 99185029Spjdparent_delta(dsl_dataset_t *ds, int64_t delta) 100185029Spjd{ 101277585Sdelphij dsl_dataset_phys_t *ds_phys; 102185029Spjd uint64_t old_bytes, new_bytes; 103168404Spjd 104185029Spjd if (ds->ds_reserved == 0) 105185029Spjd return (delta); 106168404Spjd 107277585Sdelphij ds_phys = dsl_dataset_phys(ds); 108277585Sdelphij old_bytes = MAX(ds_phys->ds_unique_bytes, ds->ds_reserved); 109277585Sdelphij new_bytes = MAX(ds_phys->ds_unique_bytes + delta, ds->ds_reserved); 110185029Spjd 111185029Spjd ASSERT3U(ABS((int64_t)(new_bytes - old_bytes)), <=, ABS(delta)); 112185029Spjd return (new_bytes - old_bytes); 113185029Spjd} 114185029Spjd 115168404Spjdvoid 116219089Spjddsl_dataset_block_born(dsl_dataset_t *ds, const blkptr_t *bp, dmu_tx_t *tx) 117168404Spjd{ 118219089Spjd int used = bp_get_dsize_sync(tx->tx_pool->dp_spa, bp); 119168404Spjd int compressed = BP_GET_PSIZE(bp); 120168404Spjd int uncompressed = BP_GET_UCSIZE(bp); 121185029Spjd int64_t delta; 122168404Spjd 123219089Spjd dprintf_bp(bp, "ds=%p", ds); 124168404Spjd 125168404Spjd ASSERT(dmu_tx_is_syncing(tx)); 126168404Spjd /* It could have been compressed away to nothing */ 127168404Spjd if (BP_IS_HOLE(bp)) 128168404Spjd return; 129168404Spjd ASSERT(BP_GET_TYPE(bp) != DMU_OT_NONE); 130236884Smm ASSERT(DMU_OT_IS_VALID(BP_GET_TYPE(bp))); 131168404Spjd if (ds == NULL) { 132239620Smm dsl_pool_mos_diduse_space(tx->tx_pool, 133239620Smm used, compressed, uncompressed); 134168404Spjd return; 135168404Spjd } 136254757Sdelphij 137310512Savg ASSERT3U(bp->blk_birth, >, dsl_dataset_phys(ds)->ds_prev_snap_txg); 138168404Spjd dmu_buf_will_dirty(ds->ds_dbuf, tx); 139168404Spjd mutex_enter(&ds->ds_lock); 140185029Spjd delta = parent_delta(ds, used); 141277585Sdelphij dsl_dataset_phys(ds)->ds_referenced_bytes += used; 142277585Sdelphij dsl_dataset_phys(ds)->ds_compressed_bytes += compressed; 143277585Sdelphij dsl_dataset_phys(ds)->ds_uncompressed_bytes += uncompressed; 144277585Sdelphij dsl_dataset_phys(ds)->ds_unique_bytes += used; 145290757Smav 146288572Smav if (BP_GET_LSIZE(bp) > SPA_OLD_MAXBLOCKSIZE) { 147288572Smav ds->ds_feature_activation_needed[SPA_FEATURE_LARGE_BLOCKS] = 148288572Smav B_TRUE; 149288572Smav } 150290757Smav 151290757Smav spa_feature_t f = zio_checksum_to_feature(BP_GET_CHECKSUM(bp)); 152290757Smav if (f != SPA_FEATURE_NONE) 153290757Smav ds->ds_feature_activation_needed[f] = B_TRUE; 154290757Smav 155168404Spjd mutex_exit(&ds->ds_lock); 156185029Spjd dsl_dir_diduse_space(ds->ds_dir, DD_USED_HEAD, delta, 157185029Spjd compressed, uncompressed, tx); 158185029Spjd dsl_dir_transfer_space(ds->ds_dir, used - delta, 159278142Smav DD_USED_REFRSRV, DD_USED_HEAD, NULL); 160168404Spjd} 161168404Spjd 162185029Spjdint 163219089Spjddsl_dataset_block_kill(dsl_dataset_t *ds, const blkptr_t *bp, dmu_tx_t *tx, 164219089Spjd boolean_t async) 165168404Spjd{ 166263397Sdelphij int used = bp_get_dsize_sync(tx->tx_pool->dp_spa, bp); 167263397Sdelphij int compressed = BP_GET_PSIZE(bp); 168263397Sdelphij int uncompressed = BP_GET_UCSIZE(bp); 169263397Sdelphij 170219089Spjd if (BP_IS_HOLE(bp)) 171219089Spjd return (0); 172219089Spjd 173219089Spjd ASSERT(dmu_tx_is_syncing(tx)); 174219089Spjd ASSERT(bp->blk_birth <= tx->tx_txg); 175219089Spjd 176168404Spjd if (ds == NULL) { 177219089Spjd dsl_free(tx->tx_pool, tx->tx_txg, bp); 178239620Smm dsl_pool_mos_diduse_space(tx->tx_pool, 179239620Smm -used, -compressed, -uncompressed); 180185029Spjd return (used); 181168404Spjd } 182168404Spjd ASSERT3P(tx->tx_pool, ==, ds->ds_dir->dd_pool); 183168404Spjd 184288549Smav ASSERT(!ds->ds_is_snapshot); 185168404Spjd dmu_buf_will_dirty(ds->ds_dbuf, tx); 186168404Spjd 187277585Sdelphij if (bp->blk_birth > dsl_dataset_phys(ds)->ds_prev_snap_txg) { 188185029Spjd int64_t delta; 189168404Spjd 190219089Spjd dprintf_bp(bp, "freeing ds=%llu", ds->ds_object); 191219089Spjd dsl_free(tx->tx_pool, tx->tx_txg, bp); 192168404Spjd 193168404Spjd mutex_enter(&ds->ds_lock); 194277585Sdelphij ASSERT(dsl_dataset_phys(ds)->ds_unique_bytes >= used || 195185029Spjd !DS_UNIQUE_IS_ACCURATE(ds)); 196185029Spjd delta = parent_delta(ds, -used); 197277585Sdelphij dsl_dataset_phys(ds)->ds_unique_bytes -= used; 198168404Spjd mutex_exit(&ds->ds_lock); 199185029Spjd dsl_dir_diduse_space(ds->ds_dir, DD_USED_HEAD, 200185029Spjd delta, -compressed, -uncompressed, tx); 201185029Spjd dsl_dir_transfer_space(ds->ds_dir, -used - delta, 202278142Smav DD_USED_REFRSRV, DD_USED_HEAD, NULL); 203168404Spjd } else { 204168404Spjd dprintf_bp(bp, "putting on dead list: %s", ""); 205219089Spjd if (async) { 206219089Spjd /* 207219089Spjd * We are here as part of zio's write done callback, 208219089Spjd * which means we're a zio interrupt thread. We can't 209219089Spjd * call dsl_deadlist_insert() now because it may block 210219089Spjd * waiting for I/O. Instead, put bp on the deferred 211219089Spjd * queue and let dsl_pool_sync() finish the job. 212219089Spjd */ 213219089Spjd bplist_append(&ds->ds_pending_deadlist, bp); 214219089Spjd } else { 215219089Spjd dsl_deadlist_insert(&ds->ds_deadlist, bp, tx); 216219089Spjd } 217185029Spjd ASSERT3U(ds->ds_prev->ds_object, ==, 218277585Sdelphij dsl_dataset_phys(ds)->ds_prev_snap_obj); 219277585Sdelphij ASSERT(dsl_dataset_phys(ds->ds_prev)->ds_num_children > 0); 220168404Spjd /* if (bp->blk_birth > prev prev snap txg) prev unique += bs */ 221277585Sdelphij if (dsl_dataset_phys(ds->ds_prev)->ds_next_snap_obj == 222185029Spjd ds->ds_object && bp->blk_birth > 223277585Sdelphij dsl_dataset_phys(ds->ds_prev)->ds_prev_snap_txg) { 224185029Spjd dmu_buf_will_dirty(ds->ds_prev->ds_dbuf, tx); 225185029Spjd mutex_enter(&ds->ds_prev->ds_lock); 226277585Sdelphij dsl_dataset_phys(ds->ds_prev)->ds_unique_bytes += used; 227185029Spjd mutex_exit(&ds->ds_prev->ds_lock); 228168404Spjd } 229219089Spjd if (bp->blk_birth > ds->ds_dir->dd_origin_txg) { 230185029Spjd dsl_dir_transfer_space(ds->ds_dir, used, 231185029Spjd DD_USED_HEAD, DD_USED_SNAP, tx); 232185029Spjd } 233168404Spjd } 234168404Spjd mutex_enter(&ds->ds_lock); 235277585Sdelphij ASSERT3U(dsl_dataset_phys(ds)->ds_referenced_bytes, >=, used); 236277585Sdelphij dsl_dataset_phys(ds)->ds_referenced_bytes -= used; 237277585Sdelphij ASSERT3U(dsl_dataset_phys(ds)->ds_compressed_bytes, >=, compressed); 238277585Sdelphij dsl_dataset_phys(ds)->ds_compressed_bytes -= compressed; 239277585Sdelphij ASSERT3U(dsl_dataset_phys(ds)->ds_uncompressed_bytes, >=, uncompressed); 240277585Sdelphij dsl_dataset_phys(ds)->ds_uncompressed_bytes -= uncompressed; 241168404Spjd mutex_exit(&ds->ds_lock); 242185029Spjd 243185029Spjd return (used); 244168404Spjd} 245168404Spjd 246168404Spjduint64_t 247168404Spjddsl_dataset_prev_snap_txg(dsl_dataset_t *ds) 248168404Spjd{ 249168404Spjd uint64_t trysnap = 0; 250168404Spjd 251168404Spjd if (ds == NULL) 252168404Spjd return (0); 253168404Spjd /* 254168404Spjd * The snapshot creation could fail, but that would cause an 255168404Spjd * incorrect FALSE return, which would only result in an 256168404Spjd * overestimation of the amount of space that an operation would 257168404Spjd * consume, which is OK. 258168404Spjd * 259168404Spjd * There's also a small window where we could miss a pending 260168404Spjd * snapshot, because we could set the sync task in the quiescing 261168404Spjd * phase. So this should only be used as a guess. 262168404Spjd */ 263168404Spjd if (ds->ds_trysnap_txg > 264168404Spjd spa_last_synced_txg(ds->ds_dir->dd_pool->dp_spa)) 265168404Spjd trysnap = ds->ds_trysnap_txg; 266277585Sdelphij return (MAX(dsl_dataset_phys(ds)->ds_prev_snap_txg, trysnap)); 267168404Spjd} 268168404Spjd 269209962Smmboolean_t 270219089Spjddsl_dataset_block_freeable(dsl_dataset_t *ds, const blkptr_t *bp, 271219089Spjd uint64_t blk_birth) 272168404Spjd{ 273263397Sdelphij if (blk_birth <= dsl_dataset_prev_snap_txg(ds) || 274263397Sdelphij (bp != NULL && BP_IS_HOLE(bp))) 275219089Spjd return (B_FALSE); 276219089Spjd 277219089Spjd ddt_prefetch(dsl_dataset_get_spa(ds), bp); 278219089Spjd 279219089Spjd return (B_TRUE); 280168404Spjd} 281168404Spjd 282168404Spjdstatic void 283288549Smavdsl_dataset_evict(void *dbu) 284168404Spjd{ 285288549Smav dsl_dataset_t *ds = dbu; 286168404Spjd 287248571Smm ASSERT(ds->ds_owner == NULL); 288168404Spjd 289288549Smav ds->ds_dbuf = NULL; 290288549Smav 291185029Spjd unique_remove(ds->ds_fsid_guid); 292168404Spjd 293219089Spjd if (ds->ds_objset != NULL) 294219089Spjd dmu_objset_evict(ds->ds_objset); 295168404Spjd 296168404Spjd if (ds->ds_prev) { 297248571Smm dsl_dataset_rele(ds->ds_prev, ds); 298168404Spjd ds->ds_prev = NULL; 299168404Spjd } 300168404Spjd 301219089Spjd bplist_destroy(&ds->ds_pending_deadlist); 302288549Smav if (ds->ds_deadlist.dl_os != NULL) 303219089Spjd dsl_deadlist_close(&ds->ds_deadlist); 304185029Spjd if (ds->ds_dir) 305288549Smav dsl_dir_async_rele(ds->ds_dir, ds); 306168404Spjd 307185029Spjd ASSERT(!list_link_active(&ds->ds_synced_link)); 308168404Spjd 309289100Sdelphij list_destroy(&ds->ds_prop_cbs); 310185029Spjd if (mutex_owned(&ds->ds_lock)) 311185029Spjd mutex_exit(&ds->ds_lock); 312168404Spjd mutex_destroy(&ds->ds_lock); 313185029Spjd if (mutex_owned(&ds->ds_opening_lock)) 314185029Spjd mutex_exit(&ds->ds_opening_lock); 315185029Spjd mutex_destroy(&ds->ds_opening_lock); 316269218Sdelphij mutex_destroy(&ds->ds_sendstream_lock); 317248571Smm refcount_destroy(&ds->ds_longholds); 318308083Smav rrw_destroy(&ds->ds_bp_rwlock); 319168404Spjd 320168404Spjd kmem_free(ds, sizeof (dsl_dataset_t)); 321168404Spjd} 322168404Spjd 323248571Smmint 324168404Spjddsl_dataset_get_snapname(dsl_dataset_t *ds) 325168404Spjd{ 326168404Spjd dsl_dataset_phys_t *headphys; 327168404Spjd int err; 328168404Spjd dmu_buf_t *headdbuf; 329168404Spjd dsl_pool_t *dp = ds->ds_dir->dd_pool; 330168404Spjd objset_t *mos = dp->dp_meta_objset; 331168404Spjd 332168404Spjd if (ds->ds_snapname[0]) 333168404Spjd return (0); 334277585Sdelphij if (dsl_dataset_phys(ds)->ds_next_snap_obj == 0) 335168404Spjd return (0); 336168404Spjd 337277585Sdelphij err = dmu_bonus_hold(mos, dsl_dir_phys(ds->ds_dir)->dd_head_dataset_obj, 338168404Spjd FTAG, &headdbuf); 339248571Smm if (err != 0) 340168404Spjd return (err); 341168404Spjd headphys = headdbuf->db_data; 342168404Spjd err = zap_value_search(dp->dp_meta_objset, 343185029Spjd headphys->ds_snapnames_zapobj, ds->ds_object, 0, ds->ds_snapname); 344168404Spjd dmu_buf_rele(headdbuf, FTAG); 345168404Spjd return (err); 346168404Spjd} 347168404Spjd 348248571Smmint 349185029Spjddsl_dataset_snap_lookup(dsl_dataset_t *ds, const char *name, uint64_t *value) 350168404Spjd{ 351185029Spjd objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset; 352277585Sdelphij uint64_t snapobj = dsl_dataset_phys(ds)->ds_snapnames_zapobj; 353185029Spjd matchtype_t mt; 354185029Spjd int err; 355185029Spjd 356277585Sdelphij if (dsl_dataset_phys(ds)->ds_flags & DS_FLAG_CI_DATASET) 357185029Spjd mt = MT_FIRST; 358185029Spjd else 359185029Spjd mt = MT_EXACT; 360185029Spjd 361185029Spjd err = zap_lookup_norm(mos, snapobj, name, 8, 1, 362185029Spjd value, mt, NULL, 0, NULL); 363185029Spjd if (err == ENOTSUP && mt == MT_FIRST) 364185029Spjd err = zap_lookup(mos, snapobj, name, 8, 1, value); 365185029Spjd return (err); 366185029Spjd} 367185029Spjd 368248571Smmint 369265744Sdelphijdsl_dataset_snap_remove(dsl_dataset_t *ds, const char *name, dmu_tx_t *tx, 370265744Sdelphij boolean_t adj_cnt) 371185029Spjd{ 372185029Spjd objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset; 373277585Sdelphij uint64_t snapobj = dsl_dataset_phys(ds)->ds_snapnames_zapobj; 374185029Spjd matchtype_t mt; 375185029Spjd int err; 376185029Spjd 377219089Spjd dsl_dir_snap_cmtime_update(ds->ds_dir); 378219089Spjd 379277585Sdelphij if (dsl_dataset_phys(ds)->ds_flags & DS_FLAG_CI_DATASET) 380185029Spjd mt = MT_FIRST; 381185029Spjd else 382185029Spjd mt = MT_EXACT; 383185029Spjd 384185029Spjd err = zap_remove_norm(mos, snapobj, name, mt, tx); 385185029Spjd if (err == ENOTSUP && mt == MT_FIRST) 386185029Spjd err = zap_remove(mos, snapobj, name, tx); 387265744Sdelphij 388265744Sdelphij if (err == 0 && adj_cnt) 389265744Sdelphij dsl_fs_ss_count_adjust(ds->ds_dir, -1, 390265744Sdelphij DD_FIELD_SNAPSHOT_COUNT, tx); 391265744Sdelphij 392185029Spjd return (err); 393185029Spjd} 394185029Spjd 395288538Smavboolean_t 396288538Smavdsl_dataset_try_add_ref(dsl_pool_t *dp, dsl_dataset_t *ds, void *tag) 397288538Smav{ 398288539Smav dmu_buf_t *dbuf = ds->ds_dbuf; 399288539Smav boolean_t result = B_FALSE; 400288539Smav 401288539Smav if (dbuf != NULL && dmu_buf_try_add_ref(dbuf, dp->dp_meta_objset, 402288539Smav ds->ds_object, DMU_BONUS_BLKID, tag)) { 403288539Smav 404288539Smav if (ds == dmu_buf_get_user(dbuf)) 405288539Smav result = B_TRUE; 406288539Smav else 407288539Smav dmu_buf_rele(dbuf, tag); 408288539Smav } 409288539Smav 410288539Smav return (result); 411288538Smav} 412288538Smav 413248571Smmint 414248571Smmdsl_dataset_hold_obj(dsl_pool_t *dp, uint64_t dsobj, void *tag, 415185029Spjd dsl_dataset_t **dsp) 416185029Spjd{ 417168404Spjd objset_t *mos = dp->dp_meta_objset; 418168404Spjd dmu_buf_t *dbuf; 419168404Spjd dsl_dataset_t *ds; 420168404Spjd int err; 421219089Spjd dmu_object_info_t doi; 422168404Spjd 423248571Smm ASSERT(dsl_pool_config_held(dp)); 424168404Spjd 425168404Spjd err = dmu_bonus_hold(mos, dsobj, tag, &dbuf); 426248571Smm if (err != 0) 427168404Spjd return (err); 428219089Spjd 429219089Spjd /* Make sure dsobj has the correct object type. */ 430219089Spjd dmu_object_info_from_db(dbuf, &doi); 431263390Sdelphij if (doi.doi_bonus_type != DMU_OT_DSL_DATASET) { 432251632Sdelphij dmu_buf_rele(dbuf, tag); 433249195Smm return (SET_ERROR(EINVAL)); 434251632Sdelphij } 435219089Spjd 436168404Spjd ds = dmu_buf_get_user(dbuf); 437168404Spjd if (ds == NULL) { 438247187Smm dsl_dataset_t *winner = NULL; 439168404Spjd 440168404Spjd ds = kmem_zalloc(sizeof (dsl_dataset_t), KM_SLEEP); 441168404Spjd ds->ds_dbuf = dbuf; 442168404Spjd ds->ds_object = dsobj; 443288549Smav ds->ds_is_snapshot = dsl_dataset_phys(ds)->ds_num_children != 0; 444168404Spjd 445168404Spjd mutex_init(&ds->ds_lock, NULL, MUTEX_DEFAULT, NULL); 446185029Spjd mutex_init(&ds->ds_opening_lock, NULL, MUTEX_DEFAULT, NULL); 447235222Smm mutex_init(&ds->ds_sendstream_lock, NULL, MUTEX_DEFAULT, NULL); 448308083Smav rrw_init(&ds->ds_bp_rwlock, B_FALSE); 449248571Smm refcount_create(&ds->ds_longholds); 450235222Smm 451219089Spjd bplist_create(&ds->ds_pending_deadlist); 452219089Spjd dsl_deadlist_open(&ds->ds_deadlist, 453277585Sdelphij mos, dsl_dataset_phys(ds)->ds_deadlist_obj); 454219089Spjd 455235222Smm list_create(&ds->ds_sendstreams, sizeof (dmu_sendarg_t), 456235222Smm offsetof(dmu_sendarg_t, dsa_link)); 457235222Smm 458289100Sdelphij list_create(&ds->ds_prop_cbs, sizeof (dsl_prop_cb_record_t), 459289100Sdelphij offsetof(dsl_prop_cb_record_t, cbr_ds_node)); 460289100Sdelphij 461276081Sdelphij if (doi.doi_type == DMU_OTN_ZAP_METADATA) { 462288572Smav for (spa_feature_t f = 0; f < SPA_FEATURES; f++) { 463288572Smav if (!(spa_feature_table[f].fi_flags & 464288572Smav ZFEATURE_FLAG_PER_DATASET)) 465288572Smav continue; 466288572Smav err = zap_contains(mos, dsobj, 467288572Smav spa_feature_table[f].fi_guid); 468288572Smav if (err == 0) { 469288572Smav ds->ds_feature_inuse[f] = B_TRUE; 470288572Smav } else { 471288572Smav ASSERT3U(err, ==, ENOENT); 472288572Smav err = 0; 473288572Smav } 474276081Sdelphij } 475276081Sdelphij } 476276081Sdelphij 477288572Smav err = dsl_dir_hold_obj(dp, 478288572Smav dsl_dataset_phys(ds)->ds_dir_obj, NULL, ds, &ds->ds_dir); 479248571Smm if (err != 0) { 480168404Spjd mutex_destroy(&ds->ds_lock); 481185029Spjd mutex_destroy(&ds->ds_opening_lock); 482269218Sdelphij mutex_destroy(&ds->ds_sendstream_lock); 483248571Smm refcount_destroy(&ds->ds_longholds); 484219089Spjd bplist_destroy(&ds->ds_pending_deadlist); 485219089Spjd dsl_deadlist_close(&ds->ds_deadlist); 486168404Spjd kmem_free(ds, sizeof (dsl_dataset_t)); 487168404Spjd dmu_buf_rele(dbuf, tag); 488168404Spjd return (err); 489168404Spjd } 490168404Spjd 491288549Smav if (!ds->ds_is_snapshot) { 492168404Spjd ds->ds_snapname[0] = '\0'; 493277585Sdelphij if (dsl_dataset_phys(ds)->ds_prev_snap_obj != 0) { 494248571Smm err = dsl_dataset_hold_obj(dp, 495277585Sdelphij dsl_dataset_phys(ds)->ds_prev_snap_obj, 496185029Spjd ds, &ds->ds_prev); 497168404Spjd } 498263407Sdelphij if (doi.doi_type == DMU_OTN_ZAP_METADATA) { 499263407Sdelphij int zaperr = zap_lookup(mos, ds->ds_object, 500263407Sdelphij DS_FIELD_BOOKMARK_NAMES, 501263407Sdelphij sizeof (ds->ds_bookmarks), 1, 502263407Sdelphij &ds->ds_bookmarks); 503263407Sdelphij if (zaperr != ENOENT) 504263407Sdelphij VERIFY0(zaperr); 505263407Sdelphij } 506219089Spjd } else { 507219089Spjd if (zfs_flags & ZFS_DEBUG_SNAPNAMES) 508219089Spjd err = dsl_dataset_get_snapname(ds); 509277585Sdelphij if (err == 0 && 510277585Sdelphij dsl_dataset_phys(ds)->ds_userrefs_obj != 0) { 511219089Spjd err = zap_count( 512219089Spjd ds->ds_dir->dd_pool->dp_meta_objset, 513277585Sdelphij dsl_dataset_phys(ds)->ds_userrefs_obj, 514219089Spjd &ds->ds_userrefs); 515168404Spjd } 516168404Spjd } 517168404Spjd 518288549Smav if (err == 0 && !ds->ds_is_snapshot) { 519248571Smm err = dsl_prop_get_int_ds(ds, 520248571Smm zfs_prop_to_name(ZFS_PROP_REFRESERVATION), 521248571Smm &ds->ds_reserved); 522185029Spjd if (err == 0) { 523248571Smm err = dsl_prop_get_int_ds(ds, 524248571Smm zfs_prop_to_name(ZFS_PROP_REFQUOTA), 525248571Smm &ds->ds_quota); 526185029Spjd } 527185029Spjd } else { 528185029Spjd ds->ds_reserved = ds->ds_quota = 0; 529185029Spjd } 530185029Spjd 531288549Smav dmu_buf_init_user(&ds->ds_dbu, dsl_dataset_evict, &ds->ds_dbuf); 532288549Smav if (err == 0) 533288549Smav winner = dmu_buf_set_user_ie(dbuf, &ds->ds_dbu); 534288549Smav 535288549Smav if (err != 0 || winner != NULL) { 536219089Spjd bplist_destroy(&ds->ds_pending_deadlist); 537219089Spjd dsl_deadlist_close(&ds->ds_deadlist); 538185029Spjd if (ds->ds_prev) 539248571Smm dsl_dataset_rele(ds->ds_prev, ds); 540248571Smm dsl_dir_rele(ds->ds_dir, ds); 541168404Spjd mutex_destroy(&ds->ds_lock); 542185029Spjd mutex_destroy(&ds->ds_opening_lock); 543269218Sdelphij mutex_destroy(&ds->ds_sendstream_lock); 544248571Smm refcount_destroy(&ds->ds_longholds); 545168404Spjd kmem_free(ds, sizeof (dsl_dataset_t)); 546248571Smm if (err != 0) { 547168404Spjd dmu_buf_rele(dbuf, tag); 548168404Spjd return (err); 549168404Spjd } 550168404Spjd ds = winner; 551168404Spjd } else { 552185029Spjd ds->ds_fsid_guid = 553277585Sdelphij unique_insert(dsl_dataset_phys(ds)->ds_fsid_guid); 554168404Spjd } 555168404Spjd } 556168404Spjd ASSERT3P(ds->ds_dbuf, ==, dbuf); 557277585Sdelphij ASSERT3P(dsl_dataset_phys(ds), ==, dbuf->db_data); 558277585Sdelphij ASSERT(dsl_dataset_phys(ds)->ds_prev_snap_obj != 0 || 559185029Spjd spa_version(dp->dp_spa) < SPA_VERSION_ORIGIN || 560185029Spjd dp->dp_origin_snap == NULL || ds == dp->dp_origin_snap); 561168404Spjd *dsp = ds; 562168404Spjd return (0); 563168404Spjd} 564168404Spjd 565168404Spjdint 566248571Smmdsl_dataset_hold(dsl_pool_t *dp, const char *name, 567219089Spjd void *tag, dsl_dataset_t **dsp) 568185029Spjd{ 569168404Spjd dsl_dir_t *dd; 570185029Spjd const char *snapname; 571168404Spjd uint64_t obj; 572168404Spjd int err = 0; 573288571Smav dsl_dataset_t *ds; 574168404Spjd 575248571Smm err = dsl_dir_hold(dp, name, FTAG, &dd, &snapname); 576248571Smm if (err != 0) 577168404Spjd return (err); 578168404Spjd 579248571Smm ASSERT(dsl_pool_config_held(dp)); 580277585Sdelphij obj = dsl_dir_phys(dd)->dd_head_dataset_obj; 581248571Smm if (obj != 0) 582288571Smav err = dsl_dataset_hold_obj(dp, obj, tag, &ds); 583185029Spjd else 584249195Smm err = SET_ERROR(ENOENT); 585168404Spjd 586185029Spjd /* we may be looking for a snapshot */ 587185029Spjd if (err == 0 && snapname != NULL) { 588288571Smav dsl_dataset_t *snap_ds; 589168404Spjd 590185029Spjd if (*snapname++ != '@') { 591288571Smav dsl_dataset_rele(ds, tag); 592248571Smm dsl_dir_rele(dd, FTAG); 593249195Smm return (SET_ERROR(ENOENT)); 594168404Spjd } 595168404Spjd 596185029Spjd dprintf("looking for snapshot '%s'\n", snapname); 597288571Smav err = dsl_dataset_snap_lookup(ds, snapname, &obj); 598185029Spjd if (err == 0) 599288571Smav err = dsl_dataset_hold_obj(dp, obj, tag, &snap_ds); 600288571Smav dsl_dataset_rele(ds, tag); 601185029Spjd 602248571Smm if (err == 0) { 603288571Smav mutex_enter(&snap_ds->ds_lock); 604288571Smav if (snap_ds->ds_snapname[0] == 0) 605288571Smav (void) strlcpy(snap_ds->ds_snapname, snapname, 606288571Smav sizeof (snap_ds->ds_snapname)); 607288571Smav mutex_exit(&snap_ds->ds_lock); 608288571Smav ds = snap_ds; 609168404Spjd } 610168404Spjd } 611288571Smav if (err == 0) 612288571Smav *dsp = ds; 613248571Smm dsl_dir_rele(dd, FTAG); 614168404Spjd return (err); 615168404Spjd} 616168404Spjd 617168404Spjdint 618248571Smmdsl_dataset_own_obj(dsl_pool_t *dp, uint64_t dsobj, 619219089Spjd void *tag, dsl_dataset_t **dsp) 620168404Spjd{ 621248571Smm int err = dsl_dataset_hold_obj(dp, dsobj, tag, dsp); 622248571Smm if (err != 0) 623185029Spjd return (err); 624248571Smm if (!dsl_dataset_tryown(*dsp, tag)) { 625219089Spjd dsl_dataset_rele(*dsp, tag); 626248571Smm *dsp = NULL; 627249195Smm return (SET_ERROR(EBUSY)); 628185029Spjd } 629185029Spjd return (0); 630168404Spjd} 631168404Spjd 632248571Smmint 633248571Smmdsl_dataset_own(dsl_pool_t *dp, const char *name, 634248571Smm void *tag, dsl_dataset_t **dsp) 635248571Smm{ 636248571Smm int err = dsl_dataset_hold(dp, name, tag, dsp); 637248571Smm if (err != 0) 638248571Smm return (err); 639248571Smm if (!dsl_dataset_tryown(*dsp, tag)) { 640248571Smm dsl_dataset_rele(*dsp, tag); 641249195Smm return (SET_ERROR(EBUSY)); 642248571Smm } 643248571Smm return (0); 644248571Smm} 645248571Smm 646248571Smm/* 647248571Smm * See the comment above dsl_pool_hold() for details. In summary, a long 648248571Smm * hold is used to prevent destruction of a dataset while the pool hold 649248571Smm * is dropped, allowing other concurrent operations (e.g. spa_sync()). 650248571Smm * 651248571Smm * The dataset and pool must be held when this function is called. After it 652248571Smm * is called, the pool hold may be released while the dataset is still held 653248571Smm * and accessed. 654248571Smm */ 655168404Spjdvoid 656248571Smmdsl_dataset_long_hold(dsl_dataset_t *ds, void *tag) 657248571Smm{ 658248571Smm ASSERT(dsl_pool_config_held(ds->ds_dir->dd_pool)); 659248571Smm (void) refcount_add(&ds->ds_longholds, tag); 660248571Smm} 661248571Smm 662248571Smmvoid 663248571Smmdsl_dataset_long_rele(dsl_dataset_t *ds, void *tag) 664248571Smm{ 665248571Smm (void) refcount_remove(&ds->ds_longholds, tag); 666248571Smm} 667248571Smm 668248571Smm/* Return B_TRUE if there are any long holds on this dataset. */ 669248571Smmboolean_t 670248571Smmdsl_dataset_long_held(dsl_dataset_t *ds) 671248571Smm{ 672248571Smm return (!refcount_is_zero(&ds->ds_longholds)); 673248571Smm} 674248571Smm 675248571Smmvoid 676168404Spjddsl_dataset_name(dsl_dataset_t *ds, char *name) 677168404Spjd{ 678168404Spjd if (ds == NULL) { 679168404Spjd (void) strcpy(name, "mos"); 680168404Spjd } else { 681168404Spjd dsl_dir_name(ds->ds_dir, name); 682248571Smm VERIFY0(dsl_dataset_get_snapname(ds)); 683168404Spjd if (ds->ds_snapname[0]) { 684307122Smav VERIFY3U(strlcat(name, "@", ZFS_MAX_DATASET_NAME_LEN), 685307122Smav <, ZFS_MAX_DATASET_NAME_LEN); 686185029Spjd /* 687185029Spjd * We use a "recursive" mutex so that we 688185029Spjd * can call dprintf_ds() with ds_lock held. 689185029Spjd */ 690168404Spjd if (!MUTEX_HELD(&ds->ds_lock)) { 691168404Spjd mutex_enter(&ds->ds_lock); 692307122Smav VERIFY3U(strlcat(name, ds->ds_snapname, 693307122Smav ZFS_MAX_DATASET_NAME_LEN), <, 694307122Smav ZFS_MAX_DATASET_NAME_LEN); 695168404Spjd mutex_exit(&ds->ds_lock); 696168404Spjd } else { 697307122Smav VERIFY3U(strlcat(name, ds->ds_snapname, 698307122Smav ZFS_MAX_DATASET_NAME_LEN), <, 699307122Smav ZFS_MAX_DATASET_NAME_LEN); 700168404Spjd } 701168404Spjd } 702168404Spjd } 703168404Spjd} 704168404Spjd 705307122Smavint 706307122Smavdsl_dataset_namelen(dsl_dataset_t *ds) 707307122Smav{ 708307122Smav VERIFY0(dsl_dataset_get_snapname(ds)); 709307122Smav mutex_enter(&ds->ds_lock); 710307122Smav int len = dsl_dir_namelen(ds->ds_dir) + 1 + strlen(ds->ds_snapname); 711307122Smav mutex_exit(&ds->ds_lock); 712307122Smav return (len); 713307122Smav} 714307122Smav 715168404Spjdvoid 716248571Smmdsl_dataset_rele(dsl_dataset_t *ds, void *tag) 717168404Spjd{ 718185029Spjd dmu_buf_rele(ds->ds_dbuf, tag); 719185029Spjd} 720185029Spjd 721185029Spjdvoid 722219089Spjddsl_dataset_disown(dsl_dataset_t *ds, void *tag) 723185029Spjd{ 724277573Sdelphij ASSERT3P(ds->ds_owner, ==, tag); 725277573Sdelphij ASSERT(ds->ds_dbuf != NULL); 726185029Spjd 727168404Spjd mutex_enter(&ds->ds_lock); 728185029Spjd ds->ds_owner = NULL; 729168404Spjd mutex_exit(&ds->ds_lock); 730248571Smm dsl_dataset_long_rele(ds, tag); 731277573Sdelphij dsl_dataset_rele(ds, tag); 732185029Spjd} 733168404Spjd 734185029Spjdboolean_t 735248571Smmdsl_dataset_tryown(dsl_dataset_t *ds, void *tag) 736185029Spjd{ 737185029Spjd boolean_t gotit = FALSE; 738185029Spjd 739290756Smav ASSERT(dsl_pool_config_held(ds->ds_dir->dd_pool)); 740185029Spjd mutex_enter(&ds->ds_lock); 741248571Smm if (ds->ds_owner == NULL && !DS_IS_INCONSISTENT(ds)) { 742219089Spjd ds->ds_owner = tag; 743248571Smm dsl_dataset_long_hold(ds, tag); 744185029Spjd gotit = TRUE; 745185029Spjd } 746185029Spjd mutex_exit(&ds->ds_lock); 747185029Spjd return (gotit); 748168404Spjd} 749168404Spjd 750290756Smavboolean_t 751290756Smavdsl_dataset_has_owner(dsl_dataset_t *ds) 752290756Smav{ 753290756Smav boolean_t rv; 754290756Smav mutex_enter(&ds->ds_lock); 755290756Smav rv = (ds->ds_owner != NULL); 756290756Smav mutex_exit(&ds->ds_lock); 757290756Smav return (rv); 758290756Smav} 759290756Smav 760288572Smavstatic void 761288572Smavdsl_dataset_activate_feature(uint64_t dsobj, spa_feature_t f, dmu_tx_t *tx) 762288572Smav{ 763288572Smav spa_t *spa = dmu_tx_pool(tx)->dp_spa; 764288572Smav objset_t *mos = dmu_tx_pool(tx)->dp_meta_objset; 765288572Smav uint64_t zero = 0; 766288572Smav 767288572Smav VERIFY(spa_feature_table[f].fi_flags & ZFEATURE_FLAG_PER_DATASET); 768288572Smav 769288572Smav spa_feature_incr(spa, f, tx); 770288572Smav dmu_object_zapify(mos, dsobj, DMU_OT_DSL_DATASET, tx); 771288572Smav 772288572Smav VERIFY0(zap_add(mos, dsobj, spa_feature_table[f].fi_guid, 773288572Smav sizeof (zero), 1, &zero, tx)); 774288572Smav} 775288572Smav 776288572Smavvoid 777288572Smavdsl_dataset_deactivate_feature(uint64_t dsobj, spa_feature_t f, dmu_tx_t *tx) 778288572Smav{ 779288572Smav spa_t *spa = dmu_tx_pool(tx)->dp_spa; 780288572Smav objset_t *mos = dmu_tx_pool(tx)->dp_meta_objset; 781288572Smav 782288572Smav VERIFY(spa_feature_table[f].fi_flags & ZFEATURE_FLAG_PER_DATASET); 783288572Smav 784288572Smav VERIFY0(zap_remove(mos, dsobj, spa_feature_table[f].fi_guid, tx)); 785288572Smav spa_feature_decr(spa, f, tx); 786288572Smav} 787288572Smav 788185029Spjduint64_t 789185029Spjddsl_dataset_create_sync_dd(dsl_dir_t *dd, dsl_dataset_t *origin, 790185029Spjd uint64_t flags, dmu_tx_t *tx) 791185029Spjd{ 792185029Spjd dsl_pool_t *dp = dd->dd_pool; 793168404Spjd dmu_buf_t *dbuf; 794168404Spjd dsl_dataset_phys_t *dsphys; 795168404Spjd uint64_t dsobj; 796185029Spjd objset_t *mos = dp->dp_meta_objset; 797168404Spjd 798185029Spjd if (origin == NULL) 799185029Spjd origin = dp->dp_origin_snap; 800168404Spjd 801185029Spjd ASSERT(origin == NULL || origin->ds_dir->dd_pool == dp); 802277585Sdelphij ASSERT(origin == NULL || dsl_dataset_phys(origin)->ds_num_children > 0); 803185029Spjd ASSERT(dmu_tx_is_syncing(tx)); 804277585Sdelphij ASSERT(dsl_dir_phys(dd)->dd_head_dataset_obj == 0); 805185029Spjd 806168404Spjd dsobj = dmu_object_alloc(mos, DMU_OT_DSL_DATASET, 0, 807168404Spjd DMU_OT_DSL_DATASET, sizeof (dsl_dataset_phys_t), tx); 808248571Smm VERIFY0(dmu_bonus_hold(mos, dsobj, FTAG, &dbuf)); 809168404Spjd dmu_buf_will_dirty(dbuf, tx); 810168404Spjd dsphys = dbuf->db_data; 811185029Spjd bzero(dsphys, sizeof (dsl_dataset_phys_t)); 812168404Spjd dsphys->ds_dir_obj = dd->dd_object; 813185029Spjd dsphys->ds_flags = flags; 814168404Spjd dsphys->ds_fsid_guid = unique_create(); 815236823Spjd do { 816236823Spjd (void) random_get_pseudo_bytes((void*)&dsphys->ds_guid, 817236823Spjd sizeof (dsphys->ds_guid)); 818236823Spjd } while (dsphys->ds_guid == 0); 819168404Spjd dsphys->ds_snapnames_zapobj = 820185029Spjd zap_create_norm(mos, U8_TEXTPREP_TOUPPER, DMU_OT_DSL_DS_SNAP_MAP, 821185029Spjd DMU_OT_NONE, 0, tx); 822168404Spjd dsphys->ds_creation_time = gethrestime_sec(); 823185029Spjd dsphys->ds_creation_txg = tx->tx_txg == TXG_INITIAL ? 1 : tx->tx_txg; 824185029Spjd 825219089Spjd if (origin == NULL) { 826219089Spjd dsphys->ds_deadlist_obj = dsl_deadlist_alloc(mos, tx); 827219089Spjd } else { 828248571Smm dsl_dataset_t *ohds; /* head of the origin snapshot */ 829219089Spjd 830185029Spjd dsphys->ds_prev_snap_obj = origin->ds_object; 831185029Spjd dsphys->ds_prev_snap_txg = 832277585Sdelphij dsl_dataset_phys(origin)->ds_creation_txg; 833236884Smm dsphys->ds_referenced_bytes = 834277585Sdelphij dsl_dataset_phys(origin)->ds_referenced_bytes; 835185029Spjd dsphys->ds_compressed_bytes = 836277585Sdelphij dsl_dataset_phys(origin)->ds_compressed_bytes; 837185029Spjd dsphys->ds_uncompressed_bytes = 838277585Sdelphij dsl_dataset_phys(origin)->ds_uncompressed_bytes; 839308083Smav rrw_enter(&origin->ds_bp_rwlock, RW_READER, FTAG); 840277585Sdelphij dsphys->ds_bp = dsl_dataset_phys(origin)->ds_bp; 841308083Smav rrw_exit(&origin->ds_bp_rwlock, FTAG); 842185029Spjd 843273195Sdelphij /* 844273195Sdelphij * Inherit flags that describe the dataset's contents 845273195Sdelphij * (INCONSISTENT) or properties (Case Insensitive). 846273195Sdelphij */ 847277585Sdelphij dsphys->ds_flags |= dsl_dataset_phys(origin)->ds_flags & 848273195Sdelphij (DS_FLAG_INCONSISTENT | DS_FLAG_CI_DATASET); 849273195Sdelphij 850288572Smav for (spa_feature_t f = 0; f < SPA_FEATURES; f++) { 851288572Smav if (origin->ds_feature_inuse[f]) 852288572Smav dsl_dataset_activate_feature(dsobj, f, tx); 853288572Smav } 854276081Sdelphij 855185029Spjd dmu_buf_will_dirty(origin->ds_dbuf, tx); 856277585Sdelphij dsl_dataset_phys(origin)->ds_num_children++; 857185029Spjd 858248571Smm VERIFY0(dsl_dataset_hold_obj(dp, 859277585Sdelphij dsl_dir_phys(origin->ds_dir)->dd_head_dataset_obj, 860277585Sdelphij FTAG, &ohds)); 861219089Spjd dsphys->ds_deadlist_obj = dsl_deadlist_clone(&ohds->ds_deadlist, 862219089Spjd dsphys->ds_prev_snap_txg, dsphys->ds_prev_snap_obj, tx); 863219089Spjd dsl_dataset_rele(ohds, FTAG); 864219089Spjd 865185029Spjd if (spa_version(dp->dp_spa) >= SPA_VERSION_NEXT_CLONES) { 866277585Sdelphij if (dsl_dataset_phys(origin)->ds_next_clones_obj == 0) { 867277585Sdelphij dsl_dataset_phys(origin)->ds_next_clones_obj = 868185029Spjd zap_create(mos, 869185029Spjd DMU_OT_NEXT_CLONES, DMU_OT_NONE, 0, tx); 870185029Spjd } 871248571Smm VERIFY0(zap_add_int(mos, 872277585Sdelphij dsl_dataset_phys(origin)->ds_next_clones_obj, 873277585Sdelphij dsobj, tx)); 874185029Spjd } 875185029Spjd 876185029Spjd dmu_buf_will_dirty(dd->dd_dbuf, tx); 877277585Sdelphij dsl_dir_phys(dd)->dd_origin_obj = origin->ds_object; 878219089Spjd if (spa_version(dp->dp_spa) >= SPA_VERSION_DIR_CLONES) { 879277585Sdelphij if (dsl_dir_phys(origin->ds_dir)->dd_clones == 0) { 880219089Spjd dmu_buf_will_dirty(origin->ds_dir->dd_dbuf, tx); 881277585Sdelphij dsl_dir_phys(origin->ds_dir)->dd_clones = 882219089Spjd zap_create(mos, 883219089Spjd DMU_OT_DSL_CLONES, DMU_OT_NONE, 0, tx); 884219089Spjd } 885248571Smm VERIFY0(zap_add_int(mos, 886277585Sdelphij dsl_dir_phys(origin->ds_dir)->dd_clones, 887277585Sdelphij dsobj, tx)); 888219089Spjd } 889185029Spjd } 890185029Spjd 891185029Spjd if (spa_version(dp->dp_spa) >= SPA_VERSION_UNIQUE_ACCURATE) 892185029Spjd dsphys->ds_flags |= DS_FLAG_UNIQUE_ACCURATE; 893185029Spjd 894168404Spjd dmu_buf_rele(dbuf, FTAG); 895168404Spjd 896168404Spjd dmu_buf_will_dirty(dd->dd_dbuf, tx); 897277585Sdelphij dsl_dir_phys(dd)->dd_head_dataset_obj = dsobj; 898168404Spjd 899185029Spjd return (dsobj); 900168404Spjd} 901168404Spjd 902248571Smmstatic void 903248571Smmdsl_dataset_zero_zil(dsl_dataset_t *ds, dmu_tx_t *tx) 904248571Smm{ 905248571Smm objset_t *os; 906248571Smm 907248571Smm VERIFY0(dmu_objset_from_ds(ds, &os)); 908310512Savg if (bcmp(&os->os_zil_header, &zero_zil, sizeof (zero_zil)) != 0) { 909310512Savg dsl_pool_t *dp = ds->ds_dir->dd_pool; 910310512Savg zio_t *zio; 911310512Savg 912310512Savg bzero(&os->os_zil_header, sizeof (os->os_zil_header)); 913310512Savg 914310512Savg zio = zio_root(dp->dp_spa, NULL, NULL, ZIO_FLAG_MUSTSUCCEED); 915310512Savg dsl_dataset_sync(ds, zio, tx); 916310512Savg VERIFY0(zio_wait(zio)); 917310512Savg 918310512Savg /* dsl_dataset_sync_done will drop this reference. */ 919310512Savg dmu_buf_add_ref(ds->ds_dbuf, ds); 920310512Savg dsl_dataset_sync_done(ds, tx); 921310512Savg } 922248571Smm} 923248571Smm 924168404Spjduint64_t 925185029Spjddsl_dataset_create_sync(dsl_dir_t *pdd, const char *lastname, 926185029Spjd dsl_dataset_t *origin, uint64_t flags, cred_t *cr, dmu_tx_t *tx) 927168404Spjd{ 928168404Spjd dsl_pool_t *dp = pdd->dd_pool; 929168404Spjd uint64_t dsobj, ddobj; 930168404Spjd dsl_dir_t *dd; 931168404Spjd 932248571Smm ASSERT(dmu_tx_is_syncing(tx)); 933168404Spjd ASSERT(lastname[0] != '@'); 934168404Spjd 935185029Spjd ddobj = dsl_dir_create_sync(dp, pdd, lastname, tx); 936248571Smm VERIFY0(dsl_dir_hold_obj(dp, ddobj, lastname, FTAG, &dd)); 937168404Spjd 938248571Smm dsobj = dsl_dataset_create_sync_dd(dd, origin, 939248571Smm flags & ~DS_CREATE_FLAG_NODIRTY, tx); 940168404Spjd 941185029Spjd dsl_deleg_set_create_perms(dd, tx, cr); 942168404Spjd 943265744Sdelphij /* 944265744Sdelphij * Since we're creating a new node we know it's a leaf, so we can 945265744Sdelphij * initialize the counts if the limit feature is active. 946265744Sdelphij */ 947265744Sdelphij if (spa_feature_is_active(dp->dp_spa, SPA_FEATURE_FS_SS_LIMIT)) { 948265744Sdelphij uint64_t cnt = 0; 949265744Sdelphij objset_t *os = dd->dd_pool->dp_meta_objset; 950265744Sdelphij 951265744Sdelphij dsl_dir_zapify(dd, tx); 952265744Sdelphij VERIFY0(zap_add(os, dd->dd_object, DD_FIELD_FILESYSTEM_COUNT, 953265744Sdelphij sizeof (cnt), 1, &cnt, tx)); 954265744Sdelphij VERIFY0(zap_add(os, dd->dd_object, DD_FIELD_SNAPSHOT_COUNT, 955265744Sdelphij sizeof (cnt), 1, &cnt, tx)); 956265744Sdelphij } 957265744Sdelphij 958248571Smm dsl_dir_rele(dd, FTAG); 959168404Spjd 960219089Spjd /* 961219089Spjd * If we are creating a clone, make sure we zero out any stale 962219089Spjd * data from the origin snapshots zil header. 963219089Spjd */ 964248571Smm if (origin != NULL && !(flags & DS_CREATE_FLAG_NODIRTY)) { 965219089Spjd dsl_dataset_t *ds; 966219089Spjd 967248571Smm VERIFY0(dsl_dataset_hold_obj(dp, dsobj, FTAG, &ds)); 968248571Smm dsl_dataset_zero_zil(ds, tx); 969219089Spjd dsl_dataset_rele(ds, FTAG); 970219089Spjd } 971219089Spjd 972168404Spjd return (dsobj); 973168404Spjd} 974168404Spjd 975228103Smm#ifdef __FreeBSD__ 976228103Smm/* FreeBSD ioctl compat begin */ 977168404Spjdstruct destroyarg { 978228103Smm nvlist_t *nvl; 979228103Smm const char *snapname; 980168404Spjd}; 981168404Spjd 982168404Spjdstatic int 983228103Smmdsl_check_snap_cb(const char *name, void *arg) 984168404Spjd{ 985168404Spjd struct destroyarg *da = arg; 986168404Spjd dsl_dataset_t *ds; 987219089Spjd char *dsname; 988168404Spjd 989219089Spjd dsname = kmem_asprintf("%s@%s", name, da->snapname); 990248493Smm fnvlist_add_boolean(da->nvl, dsname); 991248493Smm kmem_free(dsname, strlen(dsname) + 1); 992219089Spjd 993228103Smm return (0); 994228103Smm} 995228103Smm 996228103Smmint 997248571Smmdmu_get_recursive_snaps_nvl(char *fsname, const char *snapname, 998228103Smm nvlist_t *snaps) 999228103Smm{ 1000228103Smm struct destroyarg *da; 1001228103Smm int err; 1002228103Smm 1003228103Smm da = kmem_zalloc(sizeof (struct destroyarg), KM_SLEEP); 1004228103Smm da->nvl = snaps; 1005228103Smm da->snapname = snapname; 1006228103Smm err = dmu_objset_find(fsname, dsl_check_snap_cb, da, 1007228103Smm DS_FIND_CHILDREN); 1008228103Smm kmem_free(da, sizeof (struct destroyarg)); 1009228103Smm 1010185029Spjd return (err); 1011168404Spjd} 1012228103Smm/* FreeBSD ioctl compat end */ 1013228103Smm#endif /* __FreeBSD__ */ 1014168404Spjd 1015168404Spjd/* 1016185029Spjd * The unique space in the head dataset can be calculated by subtracting 1017185029Spjd * the space used in the most recent snapshot, that is still being used 1018185029Spjd * in this file system, from the space currently in use. To figure out 1019185029Spjd * the space in the most recent snapshot still in use, we need to take 1020185029Spjd * the total space used in the snapshot and subtract out the space that 1021185029Spjd * has been freed up since the snapshot was taken. 1022185029Spjd */ 1023248571Smmvoid 1024185029Spjddsl_dataset_recalc_head_uniq(dsl_dataset_t *ds) 1025185029Spjd{ 1026185029Spjd uint64_t mrs_used; 1027185029Spjd uint64_t dlused, dlcomp, dluncomp; 1028185029Spjd 1029288549Smav ASSERT(!ds->ds_is_snapshot); 1030185029Spjd 1031277585Sdelphij if (dsl_dataset_phys(ds)->ds_prev_snap_obj != 0) 1032277585Sdelphij mrs_used = dsl_dataset_phys(ds->ds_prev)->ds_referenced_bytes; 1033185029Spjd else 1034185029Spjd mrs_used = 0; 1035185029Spjd 1036219089Spjd dsl_deadlist_space(&ds->ds_deadlist, &dlused, &dlcomp, &dluncomp); 1037185029Spjd 1038185029Spjd ASSERT3U(dlused, <=, mrs_used); 1039277585Sdelphij dsl_dataset_phys(ds)->ds_unique_bytes = 1040277585Sdelphij dsl_dataset_phys(ds)->ds_referenced_bytes - (mrs_used - dlused); 1041185029Spjd 1042219089Spjd if (spa_version(ds->ds_dir->dd_pool->dp_spa) >= 1043185029Spjd SPA_VERSION_UNIQUE_ACCURATE) 1044277585Sdelphij dsl_dataset_phys(ds)->ds_flags |= DS_FLAG_UNIQUE_ACCURATE; 1045185029Spjd} 1046185029Spjd 1047248571Smmvoid 1048248571Smmdsl_dataset_remove_from_next_clones(dsl_dataset_t *ds, uint64_t obj, 1049219089Spjd dmu_tx_t *tx) 1050219089Spjd{ 1051209962Smm objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset; 1052209962Smm uint64_t count; 1053209962Smm int err; 1054209962Smm 1055277585Sdelphij ASSERT(dsl_dataset_phys(ds)->ds_num_children >= 2); 1056277585Sdelphij err = zap_remove_int(mos, dsl_dataset_phys(ds)->ds_next_clones_obj, 1057277585Sdelphij obj, tx); 1058209962Smm /* 1059209962Smm * The err should not be ENOENT, but a bug in a previous version 1060209962Smm * of the code could cause upgrade_clones_cb() to not set 1061209962Smm * ds_next_snap_obj when it should, leading to a missing entry. 1062209962Smm * If we knew that the pool was created after 1063209962Smm * SPA_VERSION_NEXT_CLONES, we could assert that it isn't 1064209962Smm * ENOENT. However, at least we can check that we don't have 1065209962Smm * too many entries in the next_clones_obj even after failing to 1066209962Smm * remove this one. 1067209962Smm */ 1068248571Smm if (err != ENOENT) 1069240415Smm VERIFY0(err); 1070277585Sdelphij ASSERT0(zap_count(mos, dsl_dataset_phys(ds)->ds_next_clones_obj, 1071209962Smm &count)); 1072277585Sdelphij ASSERT3U(count, <=, dsl_dataset_phys(ds)->ds_num_children - 2); 1073209962Smm} 1074209962Smm 1075248571Smm 1076248571Smmblkptr_t * 1077248571Smmdsl_dataset_get_blkptr(dsl_dataset_t *ds) 1078219089Spjd{ 1079277585Sdelphij return (&dsl_dataset_phys(ds)->ds_bp); 1080219089Spjd} 1081219089Spjd 1082248571Smmspa_t * 1083248571Smmdsl_dataset_get_spa(dsl_dataset_t *ds) 1084219089Spjd{ 1085248571Smm return (ds->ds_dir->dd_pool->dp_spa); 1086219089Spjd} 1087219089Spjd 1088185029Spjdvoid 1089248571Smmdsl_dataset_dirty(dsl_dataset_t *ds, dmu_tx_t *tx) 1090185029Spjd{ 1091248571Smm dsl_pool_t *dp; 1092168404Spjd 1093248571Smm if (ds == NULL) /* this is the meta-objset */ 1094219089Spjd return; 1095219089Spjd 1096248571Smm ASSERT(ds->ds_objset != NULL); 1097185029Spjd 1098277585Sdelphij if (dsl_dataset_phys(ds)->ds_next_snap_obj != 0) 1099248571Smm panic("dirtying snapshot!"); 1100219089Spjd 1101310512Savg /* Must not dirty a dataset in the same txg where it got snapshotted. */ 1102310512Savg ASSERT3U(tx->tx_txg, >, dsl_dataset_phys(ds)->ds_prev_snap_txg); 1103310512Savg 1104248571Smm dp = ds->ds_dir->dd_pool; 1105248571Smm if (txg_list_add(&dp->dp_dirty_datasets, ds, tx->tx_txg)) { 1106248571Smm /* up the hold count until we can be written out */ 1107248571Smm dmu_buf_add_ref(ds->ds_dbuf, ds); 1108185029Spjd } 1109248571Smm} 1110185029Spjd 1111248571Smmboolean_t 1112248571Smmdsl_dataset_is_dirty(dsl_dataset_t *ds) 1113248571Smm{ 1114248571Smm for (int t = 0; t < TXG_SIZE; t++) { 1115248571Smm if (txg_list_member(&ds->ds_dir->dd_pool->dp_dirty_datasets, 1116248571Smm ds, t)) 1117248571Smm return (B_TRUE); 1118168404Spjd } 1119248571Smm return (B_FALSE); 1120185029Spjd} 1121168404Spjd 1122185029Spjdstatic int 1123185029Spjddsl_dataset_snapshot_reserve_space(dsl_dataset_t *ds, dmu_tx_t *tx) 1124185029Spjd{ 1125185029Spjd uint64_t asize; 1126185029Spjd 1127185029Spjd if (!dmu_tx_is_syncing(tx)) 1128185029Spjd return (0); 1129185029Spjd 1130185029Spjd /* 1131185029Spjd * If there's an fs-only reservation, any blocks that might become 1132185029Spjd * owned by the snapshot dataset must be accommodated by space 1133185029Spjd * outside of the reservation. 1134185029Spjd */ 1135219089Spjd ASSERT(ds->ds_reserved == 0 || DS_UNIQUE_IS_ACCURATE(ds)); 1136277585Sdelphij asize = MIN(dsl_dataset_phys(ds)->ds_unique_bytes, ds->ds_reserved); 1137219089Spjd if (asize > dsl_dir_space_available(ds->ds_dir, NULL, 0, TRUE)) 1138249195Smm return (SET_ERROR(ENOSPC)); 1139185029Spjd 1140185029Spjd /* 1141248571Smm * Propagate any reserved space for this snapshot to other 1142185029Spjd * snapshot checks in this sync group. 1143185029Spjd */ 1144185029Spjd if (asize > 0) 1145185029Spjd dsl_dir_willuse_space(ds->ds_dir, asize, tx); 1146185029Spjd 1147185029Spjd return (0); 1148168404Spjd} 1149168404Spjd 1150248571Smmtypedef struct dsl_dataset_snapshot_arg { 1151248571Smm nvlist_t *ddsa_snaps; 1152248571Smm nvlist_t *ddsa_props; 1153248571Smm nvlist_t *ddsa_errors; 1154265744Sdelphij cred_t *ddsa_cr; 1155248571Smm} dsl_dataset_snapshot_arg_t; 1156248571Smm 1157168404Spjdint 1158248571Smmdsl_dataset_snapshot_check_impl(dsl_dataset_t *ds, const char *snapname, 1159265744Sdelphij dmu_tx_t *tx, boolean_t recv, uint64_t cnt, cred_t *cr) 1160168404Spjd{ 1161248571Smm int error; 1162168404Spjd uint64_t value; 1163168404Spjd 1164248571Smm ds->ds_trysnap_txg = tx->tx_txg; 1165248571Smm 1166248571Smm if (!dmu_tx_is_syncing(tx)) 1167248571Smm return (0); 1168248571Smm 1169168404Spjd /* 1170168404Spjd * We don't allow multiple snapshots of the same txg. If there 1171168404Spjd * is already one, try again. 1172168404Spjd */ 1173277585Sdelphij if (dsl_dataset_phys(ds)->ds_prev_snap_txg >= tx->tx_txg) 1174249195Smm return (SET_ERROR(EAGAIN)); 1175168404Spjd 1176168404Spjd /* 1177248571Smm * Check for conflicting snapshot name. 1178168404Spjd */ 1179248571Smm error = dsl_dataset_snap_lookup(ds, snapname, &value); 1180248571Smm if (error == 0) 1181249195Smm return (SET_ERROR(EEXIST)); 1182248571Smm if (error != ENOENT) 1183248571Smm return (error); 1184168404Spjd 1185253819Sdelphij /* 1186253819Sdelphij * We don't allow taking snapshots of inconsistent datasets, such as 1187253819Sdelphij * those into which we are currently receiving. However, if we are 1188253819Sdelphij * creating this snapshot as part of a receive, this check will be 1189253819Sdelphij * executed atomically with respect to the completion of the receive 1190253819Sdelphij * itself but prior to the clearing of DS_FLAG_INCONSISTENT; in this 1191253819Sdelphij * case we ignore this, knowing it will be fixed up for us shortly in 1192253819Sdelphij * dmu_recv_end_sync(). 1193253819Sdelphij */ 1194253819Sdelphij if (!recv && DS_IS_INCONSISTENT(ds)) 1195253819Sdelphij return (SET_ERROR(EBUSY)); 1196253819Sdelphij 1197265744Sdelphij /* 1198265744Sdelphij * Skip the check for temporary snapshots or if we have already checked 1199265744Sdelphij * the counts in dsl_dataset_snapshot_check. This means we really only 1200265744Sdelphij * check the count here when we're receiving a stream. 1201265744Sdelphij */ 1202265744Sdelphij if (cnt != 0 && cr != NULL) { 1203265744Sdelphij error = dsl_fs_ss_limit_check(ds->ds_dir, cnt, 1204265744Sdelphij ZFS_PROP_SNAPSHOT_LIMIT, NULL, cr); 1205265744Sdelphij if (error != 0) 1206265744Sdelphij return (error); 1207265744Sdelphij } 1208265744Sdelphij 1209248571Smm error = dsl_dataset_snapshot_reserve_space(ds, tx); 1210248571Smm if (error != 0) 1211248571Smm return (error); 1212168498Spjd 1213168404Spjd return (0); 1214168404Spjd} 1215168404Spjd 1216248571Smmstatic int 1217248571Smmdsl_dataset_snapshot_check(void *arg, dmu_tx_t *tx) 1218248571Smm{ 1219248571Smm dsl_dataset_snapshot_arg_t *ddsa = arg; 1220248571Smm dsl_pool_t *dp = dmu_tx_pool(tx); 1221248571Smm nvpair_t *pair; 1222248571Smm int rv = 0; 1223248571Smm 1224265744Sdelphij /* 1225265744Sdelphij * Pre-compute how many total new snapshots will be created for each 1226265744Sdelphij * level in the tree and below. This is needed for validating the 1227265744Sdelphij * snapshot limit when either taking a recursive snapshot or when 1228265744Sdelphij * taking multiple snapshots. 1229265744Sdelphij * 1230265744Sdelphij * The problem is that the counts are not actually adjusted when 1231265744Sdelphij * we are checking, only when we finally sync. For a single snapshot, 1232265744Sdelphij * this is easy, the count will increase by 1 at each node up the tree, 1233265744Sdelphij * but its more complicated for the recursive/multiple snapshot case. 1234265744Sdelphij * 1235265744Sdelphij * The dsl_fs_ss_limit_check function does recursively check the count 1236265744Sdelphij * at each level up the tree but since it is validating each snapshot 1237265744Sdelphij * independently we need to be sure that we are validating the complete 1238265744Sdelphij * count for the entire set of snapshots. We do this by rolling up the 1239265744Sdelphij * counts for each component of the name into an nvlist and then 1240265744Sdelphij * checking each of those cases with the aggregated count. 1241265744Sdelphij * 1242265744Sdelphij * This approach properly handles not only the recursive snapshot 1243265744Sdelphij * case (where we get all of those on the ddsa_snaps list) but also 1244265744Sdelphij * the sibling case (e.g. snapshot a/b and a/c so that we will also 1245265744Sdelphij * validate the limit on 'a' using a count of 2). 1246265744Sdelphij * 1247265744Sdelphij * We validate the snapshot names in the third loop and only report 1248265744Sdelphij * name errors once. 1249265744Sdelphij */ 1250265744Sdelphij if (dmu_tx_is_syncing(tx)) { 1251265744Sdelphij nvlist_t *cnt_track = NULL; 1252265744Sdelphij cnt_track = fnvlist_alloc(); 1253265744Sdelphij 1254265744Sdelphij /* Rollup aggregated counts into the cnt_track list */ 1255265744Sdelphij for (pair = nvlist_next_nvpair(ddsa->ddsa_snaps, NULL); 1256265744Sdelphij pair != NULL; 1257265744Sdelphij pair = nvlist_next_nvpair(ddsa->ddsa_snaps, pair)) { 1258265744Sdelphij char *pdelim; 1259265744Sdelphij uint64_t val; 1260265744Sdelphij char nm[MAXPATHLEN]; 1261265744Sdelphij 1262265744Sdelphij (void) strlcpy(nm, nvpair_name(pair), sizeof (nm)); 1263265744Sdelphij pdelim = strchr(nm, '@'); 1264265744Sdelphij if (pdelim == NULL) 1265265744Sdelphij continue; 1266265744Sdelphij *pdelim = '\0'; 1267265744Sdelphij 1268265744Sdelphij do { 1269265744Sdelphij if (nvlist_lookup_uint64(cnt_track, nm, 1270265744Sdelphij &val) == 0) { 1271265744Sdelphij /* update existing entry */ 1272265744Sdelphij fnvlist_add_uint64(cnt_track, nm, 1273265744Sdelphij val + 1); 1274265744Sdelphij } else { 1275265744Sdelphij /* add to list */ 1276265744Sdelphij fnvlist_add_uint64(cnt_track, nm, 1); 1277265744Sdelphij } 1278265744Sdelphij 1279265744Sdelphij pdelim = strrchr(nm, '/'); 1280265744Sdelphij if (pdelim != NULL) 1281265744Sdelphij *pdelim = '\0'; 1282265744Sdelphij } while (pdelim != NULL); 1283265744Sdelphij } 1284265744Sdelphij 1285265744Sdelphij /* Check aggregated counts at each level */ 1286265744Sdelphij for (pair = nvlist_next_nvpair(cnt_track, NULL); 1287265744Sdelphij pair != NULL; pair = nvlist_next_nvpair(cnt_track, pair)) { 1288265744Sdelphij int error = 0; 1289265744Sdelphij char *name; 1290265744Sdelphij uint64_t cnt = 0; 1291265744Sdelphij dsl_dataset_t *ds; 1292265744Sdelphij 1293265744Sdelphij name = nvpair_name(pair); 1294265744Sdelphij cnt = fnvpair_value_uint64(pair); 1295265744Sdelphij ASSERT(cnt > 0); 1296265744Sdelphij 1297265744Sdelphij error = dsl_dataset_hold(dp, name, FTAG, &ds); 1298265744Sdelphij if (error == 0) { 1299265744Sdelphij error = dsl_fs_ss_limit_check(ds->ds_dir, cnt, 1300265744Sdelphij ZFS_PROP_SNAPSHOT_LIMIT, NULL, 1301265744Sdelphij ddsa->ddsa_cr); 1302265744Sdelphij dsl_dataset_rele(ds, FTAG); 1303265744Sdelphij } 1304265744Sdelphij 1305265744Sdelphij if (error != 0) { 1306265744Sdelphij if (ddsa->ddsa_errors != NULL) 1307265744Sdelphij fnvlist_add_int32(ddsa->ddsa_errors, 1308265744Sdelphij name, error); 1309265744Sdelphij rv = error; 1310265744Sdelphij /* only report one error for this check */ 1311265744Sdelphij break; 1312265744Sdelphij } 1313265744Sdelphij } 1314265744Sdelphij nvlist_free(cnt_track); 1315265744Sdelphij } 1316265744Sdelphij 1317248571Smm for (pair = nvlist_next_nvpair(ddsa->ddsa_snaps, NULL); 1318248571Smm pair != NULL; pair = nvlist_next_nvpair(ddsa->ddsa_snaps, pair)) { 1319248571Smm int error = 0; 1320248571Smm dsl_dataset_t *ds; 1321248571Smm char *name, *atp; 1322307122Smav char dsname[ZFS_MAX_DATASET_NAME_LEN]; 1323248571Smm 1324248571Smm name = nvpair_name(pair); 1325307122Smav if (strlen(name) >= ZFS_MAX_DATASET_NAME_LEN) 1326249195Smm error = SET_ERROR(ENAMETOOLONG); 1327248571Smm if (error == 0) { 1328248571Smm atp = strchr(name, '@'); 1329248571Smm if (atp == NULL) 1330249195Smm error = SET_ERROR(EINVAL); 1331248571Smm if (error == 0) 1332248571Smm (void) strlcpy(dsname, name, atp - name + 1); 1333248571Smm } 1334248571Smm if (error == 0) 1335248571Smm error = dsl_dataset_hold(dp, dsname, FTAG, &ds); 1336248571Smm if (error == 0) { 1337265744Sdelphij /* passing 0/NULL skips dsl_fs_ss_limit_check */ 1338248571Smm error = dsl_dataset_snapshot_check_impl(ds, 1339265744Sdelphij atp + 1, tx, B_FALSE, 0, NULL); 1340248571Smm dsl_dataset_rele(ds, FTAG); 1341248571Smm } 1342248571Smm 1343248571Smm if (error != 0) { 1344248571Smm if (ddsa->ddsa_errors != NULL) { 1345248571Smm fnvlist_add_int32(ddsa->ddsa_errors, 1346248571Smm name, error); 1347248571Smm } 1348248571Smm rv = error; 1349248571Smm } 1350248571Smm } 1351265744Sdelphij 1352248571Smm return (rv); 1353248571Smm} 1354248571Smm 1355168404Spjdvoid 1356248571Smmdsl_dataset_snapshot_sync_impl(dsl_dataset_t *ds, const char *snapname, 1357248571Smm dmu_tx_t *tx) 1358168404Spjd{ 1359168404Spjd dsl_pool_t *dp = ds->ds_dir->dd_pool; 1360168404Spjd dmu_buf_t *dbuf; 1361168404Spjd dsl_dataset_phys_t *dsphys; 1362185029Spjd uint64_t dsobj, crtxg; 1363168404Spjd objset_t *mos = dp->dp_meta_objset; 1364248571Smm objset_t *os; 1365168404Spjd 1366248571Smm ASSERT(RRW_WRITE_HELD(&dp->dp_config_rwlock)); 1367168404Spjd 1368185029Spjd /* 1369248571Smm * If we are on an old pool, the zil must not be active, in which 1370248571Smm * case it will be zeroed. Usually zil_suspend() accomplishes this. 1371248571Smm */ 1372248571Smm ASSERT(spa_version(dmu_tx_pool(tx)->dp_spa) >= SPA_VERSION_FAST_SNAP || 1373248571Smm dmu_objset_from_ds(ds, &os) != 0 || 1374248571Smm bcmp(&os->os_phys->os_zil_header, &zero_zil, 1375248571Smm sizeof (zero_zil)) == 0); 1376248571Smm 1377310512Savg /* Should not snapshot a dirty dataset. */ 1378310512Savg ASSERT(!txg_list_member(&ds->ds_dir->dd_pool->dp_dirty_datasets, 1379310512Savg ds, tx->tx_txg)); 1380310512Savg 1381265744Sdelphij dsl_fs_ss_count_adjust(ds->ds_dir, 1, DD_FIELD_SNAPSHOT_COUNT, tx); 1382248571Smm 1383248571Smm /* 1384185029Spjd * The origin's ds_creation_txg has to be < TXG_INITIAL 1385185029Spjd */ 1386185029Spjd if (strcmp(snapname, ORIGIN_DIR_NAME) == 0) 1387185029Spjd crtxg = 1; 1388185029Spjd else 1389185029Spjd crtxg = tx->tx_txg; 1390185029Spjd 1391168404Spjd dsobj = dmu_object_alloc(mos, DMU_OT_DSL_DATASET, 0, 1392168404Spjd DMU_OT_DSL_DATASET, sizeof (dsl_dataset_phys_t), tx); 1393248571Smm VERIFY0(dmu_bonus_hold(mos, dsobj, FTAG, &dbuf)); 1394168404Spjd dmu_buf_will_dirty(dbuf, tx); 1395168404Spjd dsphys = dbuf->db_data; 1396185029Spjd bzero(dsphys, sizeof (dsl_dataset_phys_t)); 1397168404Spjd dsphys->ds_dir_obj = ds->ds_dir->dd_object; 1398168404Spjd dsphys->ds_fsid_guid = unique_create(); 1399236823Spjd do { 1400236823Spjd (void) random_get_pseudo_bytes((void*)&dsphys->ds_guid, 1401236823Spjd sizeof (dsphys->ds_guid)); 1402236823Spjd } while (dsphys->ds_guid == 0); 1403277585Sdelphij dsphys->ds_prev_snap_obj = dsl_dataset_phys(ds)->ds_prev_snap_obj; 1404277585Sdelphij dsphys->ds_prev_snap_txg = dsl_dataset_phys(ds)->ds_prev_snap_txg; 1405168404Spjd dsphys->ds_next_snap_obj = ds->ds_object; 1406168404Spjd dsphys->ds_num_children = 1; 1407168404Spjd dsphys->ds_creation_time = gethrestime_sec(); 1408185029Spjd dsphys->ds_creation_txg = crtxg; 1409277585Sdelphij dsphys->ds_deadlist_obj = dsl_dataset_phys(ds)->ds_deadlist_obj; 1410277585Sdelphij dsphys->ds_referenced_bytes = dsl_dataset_phys(ds)->ds_referenced_bytes; 1411277585Sdelphij dsphys->ds_compressed_bytes = dsl_dataset_phys(ds)->ds_compressed_bytes; 1412277585Sdelphij dsphys->ds_uncompressed_bytes = 1413277585Sdelphij dsl_dataset_phys(ds)->ds_uncompressed_bytes; 1414277585Sdelphij dsphys->ds_flags = dsl_dataset_phys(ds)->ds_flags; 1415308083Smav rrw_enter(&ds->ds_bp_rwlock, RW_READER, FTAG); 1416277585Sdelphij dsphys->ds_bp = dsl_dataset_phys(ds)->ds_bp; 1417308083Smav rrw_exit(&ds->ds_bp_rwlock, FTAG); 1418168404Spjd dmu_buf_rele(dbuf, FTAG); 1419168404Spjd 1420288572Smav for (spa_feature_t f = 0; f < SPA_FEATURES; f++) { 1421288572Smav if (ds->ds_feature_inuse[f]) 1422288572Smav dsl_dataset_activate_feature(dsobj, f, tx); 1423288572Smav } 1424276081Sdelphij 1425277585Sdelphij ASSERT3U(ds->ds_prev != 0, ==, 1426277585Sdelphij dsl_dataset_phys(ds)->ds_prev_snap_obj != 0); 1427168404Spjd if (ds->ds_prev) { 1428185029Spjd uint64_t next_clones_obj = 1429277585Sdelphij dsl_dataset_phys(ds->ds_prev)->ds_next_clones_obj; 1430277585Sdelphij ASSERT(dsl_dataset_phys(ds->ds_prev)->ds_next_snap_obj == 1431168404Spjd ds->ds_object || 1432277585Sdelphij dsl_dataset_phys(ds->ds_prev)->ds_num_children > 1); 1433277585Sdelphij if (dsl_dataset_phys(ds->ds_prev)->ds_next_snap_obj == 1434277585Sdelphij ds->ds_object) { 1435168404Spjd dmu_buf_will_dirty(ds->ds_prev->ds_dbuf, tx); 1436277585Sdelphij ASSERT3U(dsl_dataset_phys(ds)->ds_prev_snap_txg, ==, 1437277585Sdelphij dsl_dataset_phys(ds->ds_prev)->ds_creation_txg); 1438277585Sdelphij dsl_dataset_phys(ds->ds_prev)->ds_next_snap_obj = dsobj; 1439185029Spjd } else if (next_clones_obj != 0) { 1440248571Smm dsl_dataset_remove_from_next_clones(ds->ds_prev, 1441209962Smm dsphys->ds_next_snap_obj, tx); 1442248571Smm VERIFY0(zap_add_int(mos, 1443185029Spjd next_clones_obj, dsobj, tx)); 1444168404Spjd } 1445168404Spjd } 1446168404Spjd 1447185029Spjd /* 1448185029Spjd * If we have a reference-reservation on this dataset, we will 1449185029Spjd * need to increase the amount of refreservation being charged 1450185029Spjd * since our unique space is going to zero. 1451185029Spjd */ 1452185029Spjd if (ds->ds_reserved) { 1453219089Spjd int64_t delta; 1454219089Spjd ASSERT(DS_UNIQUE_IS_ACCURATE(ds)); 1455277585Sdelphij delta = MIN(dsl_dataset_phys(ds)->ds_unique_bytes, 1456277585Sdelphij ds->ds_reserved); 1457185029Spjd dsl_dir_diduse_space(ds->ds_dir, DD_USED_REFRSRV, 1458219089Spjd delta, 0, 0, tx); 1459185029Spjd } 1460185029Spjd 1461168404Spjd dmu_buf_will_dirty(ds->ds_dbuf, tx); 1462277585Sdelphij dsl_dataset_phys(ds)->ds_deadlist_obj = 1463277585Sdelphij dsl_deadlist_clone(&ds->ds_deadlist, UINT64_MAX, 1464277585Sdelphij dsl_dataset_phys(ds)->ds_prev_snap_obj, tx); 1465219089Spjd dsl_deadlist_close(&ds->ds_deadlist); 1466277585Sdelphij dsl_deadlist_open(&ds->ds_deadlist, mos, 1467277585Sdelphij dsl_dataset_phys(ds)->ds_deadlist_obj); 1468219089Spjd dsl_deadlist_add_key(&ds->ds_deadlist, 1469277585Sdelphij dsl_dataset_phys(ds)->ds_prev_snap_txg, tx); 1470219089Spjd 1471277585Sdelphij ASSERT3U(dsl_dataset_phys(ds)->ds_prev_snap_txg, <, tx->tx_txg); 1472277585Sdelphij dsl_dataset_phys(ds)->ds_prev_snap_obj = dsobj; 1473277585Sdelphij dsl_dataset_phys(ds)->ds_prev_snap_txg = crtxg; 1474277585Sdelphij dsl_dataset_phys(ds)->ds_unique_bytes = 0; 1475185029Spjd if (spa_version(dp->dp_spa) >= SPA_VERSION_UNIQUE_ACCURATE) 1476277585Sdelphij dsl_dataset_phys(ds)->ds_flags |= DS_FLAG_UNIQUE_ACCURATE; 1477168404Spjd 1478277585Sdelphij VERIFY0(zap_add(mos, dsl_dataset_phys(ds)->ds_snapnames_zapobj, 1479248571Smm snapname, 8, 1, &dsobj, tx)); 1480168404Spjd 1481168404Spjd if (ds->ds_prev) 1482248571Smm dsl_dataset_rele(ds->ds_prev, ds); 1483248571Smm VERIFY0(dsl_dataset_hold_obj(dp, 1484277585Sdelphij dsl_dataset_phys(ds)->ds_prev_snap_obj, ds, &ds->ds_prev)); 1485185029Spjd 1486219089Spjd dsl_scan_ds_snapshotted(ds, tx); 1487185029Spjd 1488219089Spjd dsl_dir_snap_cmtime_update(ds->ds_dir); 1489219089Spjd 1490248571Smm spa_history_log_internal_ds(ds->ds_prev, "snapshot", tx, ""); 1491168404Spjd} 1492168404Spjd 1493248571Smmstatic void 1494248571Smmdsl_dataset_snapshot_sync(void *arg, dmu_tx_t *tx) 1495248571Smm{ 1496248571Smm dsl_dataset_snapshot_arg_t *ddsa = arg; 1497248571Smm dsl_pool_t *dp = dmu_tx_pool(tx); 1498248571Smm nvpair_t *pair; 1499248571Smm 1500248571Smm for (pair = nvlist_next_nvpair(ddsa->ddsa_snaps, NULL); 1501248571Smm pair != NULL; pair = nvlist_next_nvpair(ddsa->ddsa_snaps, pair)) { 1502248571Smm dsl_dataset_t *ds; 1503248571Smm char *name, *atp; 1504307122Smav char dsname[ZFS_MAX_DATASET_NAME_LEN]; 1505248571Smm 1506248571Smm name = nvpair_name(pair); 1507248571Smm atp = strchr(name, '@'); 1508248571Smm (void) strlcpy(dsname, name, atp - name + 1); 1509248571Smm VERIFY0(dsl_dataset_hold(dp, dsname, FTAG, &ds)); 1510248571Smm 1511248571Smm dsl_dataset_snapshot_sync_impl(ds, atp + 1, tx); 1512248571Smm if (ddsa->ddsa_props != NULL) { 1513248571Smm dsl_props_set_sync_impl(ds->ds_prev, 1514248571Smm ZPROP_SRC_LOCAL, ddsa->ddsa_props, tx); 1515248571Smm } 1516248571Smm dsl_dataset_rele(ds, FTAG); 1517248571Smm } 1518248571Smm} 1519248571Smm 1520248571Smm/* 1521248571Smm * The snapshots must all be in the same pool. 1522248571Smm * All-or-nothing: if there are any failures, nothing will be modified. 1523248571Smm */ 1524248571Smmint 1525248571Smmdsl_dataset_snapshot(nvlist_t *snaps, nvlist_t *props, nvlist_t *errors) 1526248571Smm{ 1527248571Smm dsl_dataset_snapshot_arg_t ddsa; 1528248571Smm nvpair_t *pair; 1529248571Smm boolean_t needsuspend; 1530248571Smm int error; 1531248571Smm spa_t *spa; 1532248571Smm char *firstname; 1533248571Smm nvlist_t *suspended = NULL; 1534248571Smm 1535248571Smm pair = nvlist_next_nvpair(snaps, NULL); 1536248571Smm if (pair == NULL) 1537248571Smm return (0); 1538248571Smm firstname = nvpair_name(pair); 1539248571Smm 1540248571Smm error = spa_open(firstname, &spa, FTAG); 1541248571Smm if (error != 0) 1542248571Smm return (error); 1543248571Smm needsuspend = (spa_version(spa) < SPA_VERSION_FAST_SNAP); 1544248571Smm spa_close(spa, FTAG); 1545248571Smm 1546248571Smm if (needsuspend) { 1547248571Smm suspended = fnvlist_alloc(); 1548248571Smm for (pair = nvlist_next_nvpair(snaps, NULL); pair != NULL; 1549248571Smm pair = nvlist_next_nvpair(snaps, pair)) { 1550307122Smav char fsname[ZFS_MAX_DATASET_NAME_LEN]; 1551248571Smm char *snapname = nvpair_name(pair); 1552248571Smm char *atp; 1553248571Smm void *cookie; 1554248571Smm 1555248571Smm atp = strchr(snapname, '@'); 1556248571Smm if (atp == NULL) { 1557249195Smm error = SET_ERROR(EINVAL); 1558248571Smm break; 1559248571Smm } 1560248571Smm (void) strlcpy(fsname, snapname, atp - snapname + 1); 1561248571Smm 1562248571Smm error = zil_suspend(fsname, &cookie); 1563248571Smm if (error != 0) 1564248571Smm break; 1565248571Smm fnvlist_add_uint64(suspended, fsname, 1566248571Smm (uintptr_t)cookie); 1567248571Smm } 1568248571Smm } 1569248571Smm 1570248571Smm ddsa.ddsa_snaps = snaps; 1571248571Smm ddsa.ddsa_props = props; 1572248571Smm ddsa.ddsa_errors = errors; 1573265744Sdelphij ddsa.ddsa_cr = CRED(); 1574248571Smm 1575248571Smm if (error == 0) { 1576248571Smm error = dsl_sync_task(firstname, dsl_dataset_snapshot_check, 1577248571Smm dsl_dataset_snapshot_sync, &ddsa, 1578269006Sdelphij fnvlist_num_pairs(snaps) * 3, ZFS_SPACE_CHECK_NORMAL); 1579248571Smm } 1580248571Smm 1581248571Smm if (suspended != NULL) { 1582248571Smm for (pair = nvlist_next_nvpair(suspended, NULL); pair != NULL; 1583248571Smm pair = nvlist_next_nvpair(suspended, pair)) { 1584248571Smm zil_resume((void *)(uintptr_t) 1585248571Smm fnvpair_value_uint64(pair)); 1586248571Smm } 1587248571Smm fnvlist_free(suspended); 1588248571Smm } 1589248571Smm 1590248571Smm#ifdef __FreeBSD__ 1591248571Smm#ifdef _KERNEL 1592248571Smm if (error == 0) { 1593248571Smm for (pair = nvlist_next_nvpair(snaps, NULL); pair != NULL; 1594248571Smm pair = nvlist_next_nvpair(snaps, pair)) { 1595248571Smm char *snapname = nvpair_name(pair); 1596248571Smm zvol_create_minors(snapname); 1597248571Smm } 1598248571Smm } 1599248571Smm#endif 1600248571Smm#endif 1601248571Smm return (error); 1602248571Smm} 1603248571Smm 1604248571Smmtypedef struct dsl_dataset_snapshot_tmp_arg { 1605248571Smm const char *ddsta_fsname; 1606248571Smm const char *ddsta_snapname; 1607248571Smm minor_t ddsta_cleanup_minor; 1608248571Smm const char *ddsta_htag; 1609248571Smm} dsl_dataset_snapshot_tmp_arg_t; 1610248571Smm 1611248571Smmstatic int 1612248571Smmdsl_dataset_snapshot_tmp_check(void *arg, dmu_tx_t *tx) 1613248571Smm{ 1614248571Smm dsl_dataset_snapshot_tmp_arg_t *ddsta = arg; 1615248571Smm dsl_pool_t *dp = dmu_tx_pool(tx); 1616248571Smm dsl_dataset_t *ds; 1617248571Smm int error; 1618248571Smm 1619248571Smm error = dsl_dataset_hold(dp, ddsta->ddsta_fsname, FTAG, &ds); 1620248571Smm if (error != 0) 1621248571Smm return (error); 1622248571Smm 1623265744Sdelphij /* NULL cred means no limit check for tmp snapshot */ 1624253819Sdelphij error = dsl_dataset_snapshot_check_impl(ds, ddsta->ddsta_snapname, 1625265744Sdelphij tx, B_FALSE, 0, NULL); 1626248571Smm if (error != 0) { 1627248571Smm dsl_dataset_rele(ds, FTAG); 1628248571Smm return (error); 1629248571Smm } 1630248571Smm 1631248571Smm if (spa_version(dp->dp_spa) < SPA_VERSION_USERREFS) { 1632248571Smm dsl_dataset_rele(ds, FTAG); 1633249195Smm return (SET_ERROR(ENOTSUP)); 1634248571Smm } 1635248571Smm error = dsl_dataset_user_hold_check_one(NULL, ddsta->ddsta_htag, 1636248571Smm B_TRUE, tx); 1637248571Smm if (error != 0) { 1638248571Smm dsl_dataset_rele(ds, FTAG); 1639248571Smm return (error); 1640248571Smm } 1641248571Smm 1642248571Smm dsl_dataset_rele(ds, FTAG); 1643248571Smm return (0); 1644248571Smm} 1645248571Smm 1646248571Smmstatic void 1647248571Smmdsl_dataset_snapshot_tmp_sync(void *arg, dmu_tx_t *tx) 1648248571Smm{ 1649248571Smm dsl_dataset_snapshot_tmp_arg_t *ddsta = arg; 1650248571Smm dsl_pool_t *dp = dmu_tx_pool(tx); 1651248571Smm dsl_dataset_t *ds; 1652248571Smm 1653248571Smm VERIFY0(dsl_dataset_hold(dp, ddsta->ddsta_fsname, FTAG, &ds)); 1654248571Smm 1655248571Smm dsl_dataset_snapshot_sync_impl(ds, ddsta->ddsta_snapname, tx); 1656248571Smm dsl_dataset_user_hold_sync_one(ds->ds_prev, ddsta->ddsta_htag, 1657248571Smm ddsta->ddsta_cleanup_minor, gethrestime_sec(), tx); 1658248571Smm dsl_destroy_snapshot_sync_impl(ds->ds_prev, B_TRUE, tx); 1659248571Smm 1660248571Smm dsl_dataset_rele(ds, FTAG); 1661248571Smm} 1662248571Smm 1663248571Smmint 1664248571Smmdsl_dataset_snapshot_tmp(const char *fsname, const char *snapname, 1665248571Smm minor_t cleanup_minor, const char *htag) 1666248571Smm{ 1667248571Smm dsl_dataset_snapshot_tmp_arg_t ddsta; 1668248571Smm int error; 1669248571Smm spa_t *spa; 1670248571Smm boolean_t needsuspend; 1671248571Smm void *cookie; 1672248571Smm 1673248571Smm ddsta.ddsta_fsname = fsname; 1674248571Smm ddsta.ddsta_snapname = snapname; 1675248571Smm ddsta.ddsta_cleanup_minor = cleanup_minor; 1676248571Smm ddsta.ddsta_htag = htag; 1677248571Smm 1678248571Smm error = spa_open(fsname, &spa, FTAG); 1679248571Smm if (error != 0) 1680248571Smm return (error); 1681248571Smm needsuspend = (spa_version(spa) < SPA_VERSION_FAST_SNAP); 1682248571Smm spa_close(spa, FTAG); 1683248571Smm 1684248571Smm if (needsuspend) { 1685248571Smm error = zil_suspend(fsname, &cookie); 1686248571Smm if (error != 0) 1687248571Smm return (error); 1688248571Smm } 1689248571Smm 1690248571Smm error = dsl_sync_task(fsname, dsl_dataset_snapshot_tmp_check, 1691269006Sdelphij dsl_dataset_snapshot_tmp_sync, &ddsta, 3, ZFS_SPACE_CHECK_RESERVED); 1692248571Smm 1693248571Smm if (needsuspend) 1694248571Smm zil_resume(cookie); 1695248571Smm return (error); 1696248571Smm} 1697248571Smm 1698248571Smm 1699168404Spjdvoid 1700168404Spjddsl_dataset_sync(dsl_dataset_t *ds, zio_t *zio, dmu_tx_t *tx) 1701168404Spjd{ 1702168404Spjd ASSERT(dmu_tx_is_syncing(tx)); 1703219089Spjd ASSERT(ds->ds_objset != NULL); 1704277585Sdelphij ASSERT(dsl_dataset_phys(ds)->ds_next_snap_obj == 0); 1705168404Spjd 1706185029Spjd /* 1707185029Spjd * in case we had to change ds_fsid_guid when we opened it, 1708185029Spjd * sync it out now. 1709185029Spjd */ 1710185029Spjd dmu_buf_will_dirty(ds->ds_dbuf, tx); 1711277585Sdelphij dsl_dataset_phys(ds)->ds_fsid_guid = ds->ds_fsid_guid; 1712185029Spjd 1713290756Smav if (ds->ds_resume_bytes[tx->tx_txg & TXG_MASK] != 0) { 1714290756Smav VERIFY0(zap_update(tx->tx_pool->dp_meta_objset, 1715290756Smav ds->ds_object, DS_FIELD_RESUME_OBJECT, 8, 1, 1716290756Smav &ds->ds_resume_object[tx->tx_txg & TXG_MASK], tx)); 1717290756Smav VERIFY0(zap_update(tx->tx_pool->dp_meta_objset, 1718290756Smav ds->ds_object, DS_FIELD_RESUME_OFFSET, 8, 1, 1719290756Smav &ds->ds_resume_offset[tx->tx_txg & TXG_MASK], tx)); 1720290756Smav VERIFY0(zap_update(tx->tx_pool->dp_meta_objset, 1721290756Smav ds->ds_object, DS_FIELD_RESUME_BYTES, 8, 1, 1722290756Smav &ds->ds_resume_bytes[tx->tx_txg & TXG_MASK], tx)); 1723290756Smav ds->ds_resume_object[tx->tx_txg & TXG_MASK] = 0; 1724290756Smav ds->ds_resume_offset[tx->tx_txg & TXG_MASK] = 0; 1725290756Smav ds->ds_resume_bytes[tx->tx_txg & TXG_MASK] = 0; 1726290756Smav } 1727290756Smav 1728219089Spjd dmu_objset_sync(ds->ds_objset, zio, tx); 1729276081Sdelphij 1730288572Smav for (spa_feature_t f = 0; f < SPA_FEATURES; f++) { 1731288572Smav if (ds->ds_feature_activation_needed[f]) { 1732288572Smav if (ds->ds_feature_inuse[f]) 1733288572Smav continue; 1734288572Smav dsl_dataset_activate_feature(ds->ds_object, f, tx); 1735288572Smav ds->ds_feature_inuse[f] = B_TRUE; 1736288572Smav } 1737276081Sdelphij } 1738168404Spjd} 1739168404Spjd 1740310512Savgstatic int 1741310512Savgdeadlist_enqueue_cb(void *arg, const blkptr_t *bp, dmu_tx_t *tx) 1742310512Savg{ 1743310512Savg dsl_deadlist_t *dl = arg; 1744310512Savg dsl_deadlist_insert(dl, bp, tx); 1745310512Savg return (0); 1746310512Savg} 1747310512Savg 1748310512Savgvoid 1749310512Savgdsl_dataset_sync_done(dsl_dataset_t *ds, dmu_tx_t *tx) 1750310512Savg{ 1751310512Savg objset_t *os = ds->ds_objset; 1752310512Savg 1753310512Savg bplist_iterate(&ds->ds_pending_deadlist, 1754310512Savg deadlist_enqueue_cb, &ds->ds_deadlist, tx); 1755310512Savg 1756310512Savg ASSERT(!dmu_objset_is_dirty(os, dmu_tx_get_txg(tx))); 1757310512Savg 1758310512Savg dmu_buf_rele(ds->ds_dbuf, ds); 1759310512Savg} 1760310512Savg 1761228103Smmstatic void 1762228103Smmget_clones_stat(dsl_dataset_t *ds, nvlist_t *nv) 1763228103Smm{ 1764228103Smm uint64_t count = 0; 1765228103Smm objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset; 1766228103Smm zap_cursor_t zc; 1767228103Smm zap_attribute_t za; 1768248571Smm nvlist_t *propval = fnvlist_alloc(); 1769315833Savg nvlist_t *val; 1770228103Smm 1771248571Smm ASSERT(dsl_pool_config_held(ds->ds_dir->dd_pool)); 1772228103Smm 1773228103Smm /* 1774315833Savg * We use nvlist_alloc() instead of fnvlist_alloc() because the 1775315833Savg * latter would allocate the list with NV_UNIQUE_NAME flag. 1776315833Savg * As a result, every time a clone name is appended to the list 1777315833Savg * it would be (linearly) searched for for a duplicate name. 1778315833Savg * We already know that all clone names must be unique and we 1779315833Savg * want avoid the quadratic complexity of double-checking that 1780315833Savg * because we can have a large number of clones. 1781315833Savg */ 1782315833Savg VERIFY0(nvlist_alloc(&val, 0, KM_SLEEP)); 1783315833Savg 1784315833Savg /* 1785248571Smm * There may be missing entries in ds_next_clones_obj 1786228103Smm * due to a bug in a previous version of the code. 1787228103Smm * Only trust it if it has the right number of entries. 1788228103Smm */ 1789277585Sdelphij if (dsl_dataset_phys(ds)->ds_next_clones_obj != 0) { 1790277585Sdelphij VERIFY0(zap_count(mos, dsl_dataset_phys(ds)->ds_next_clones_obj, 1791228103Smm &count)); 1792228103Smm } 1793277585Sdelphij if (count != dsl_dataset_phys(ds)->ds_num_children - 1) 1794228103Smm goto fail; 1795277585Sdelphij for (zap_cursor_init(&zc, mos, 1796277585Sdelphij dsl_dataset_phys(ds)->ds_next_clones_obj); 1797228103Smm zap_cursor_retrieve(&zc, &za) == 0; 1798228103Smm zap_cursor_advance(&zc)) { 1799228103Smm dsl_dataset_t *clone; 1800307122Smav char buf[ZFS_MAX_DATASET_NAME_LEN]; 1801248571Smm VERIFY0(dsl_dataset_hold_obj(ds->ds_dir->dd_pool, 1802248571Smm za.za_first_integer, FTAG, &clone)); 1803228103Smm dsl_dir_name(clone->ds_dir, buf); 1804248571Smm fnvlist_add_boolean(val, buf); 1805228103Smm dsl_dataset_rele(clone, FTAG); 1806228103Smm } 1807228103Smm zap_cursor_fini(&zc); 1808248571Smm fnvlist_add_nvlist(propval, ZPROP_VALUE, val); 1809248571Smm fnvlist_add_nvlist(nv, zfs_prop_to_name(ZFS_PROP_CLONES), propval); 1810228103Smmfail: 1811228103Smm nvlist_free(val); 1812228103Smm nvlist_free(propval); 1813228103Smm} 1814228103Smm 1815290756Smavstatic void 1816290756Smavget_receive_resume_stats(dsl_dataset_t *ds, nvlist_t *nv) 1817290756Smav{ 1818290756Smav dsl_pool_t *dp = ds->ds_dir->dd_pool; 1819290756Smav 1820290756Smav if (dsl_dataset_has_resume_receive_state(ds)) { 1821290756Smav char *str; 1822290756Smav void *packed; 1823290756Smav uint8_t *compressed; 1824290756Smav uint64_t val; 1825290756Smav nvlist_t *token_nv = fnvlist_alloc(); 1826290756Smav size_t packed_size, compressed_size; 1827290756Smav 1828290756Smav if (zap_lookup(dp->dp_meta_objset, ds->ds_object, 1829290756Smav DS_FIELD_RESUME_FROMGUID, sizeof (val), 1, &val) == 0) { 1830290756Smav fnvlist_add_uint64(token_nv, "fromguid", val); 1831290756Smav } 1832290756Smav if (zap_lookup(dp->dp_meta_objset, ds->ds_object, 1833290756Smav DS_FIELD_RESUME_OBJECT, sizeof (val), 1, &val) == 0) { 1834290756Smav fnvlist_add_uint64(token_nv, "object", val); 1835290756Smav } 1836290756Smav if (zap_lookup(dp->dp_meta_objset, ds->ds_object, 1837290756Smav DS_FIELD_RESUME_OFFSET, sizeof (val), 1, &val) == 0) { 1838290756Smav fnvlist_add_uint64(token_nv, "offset", val); 1839290756Smav } 1840290756Smav if (zap_lookup(dp->dp_meta_objset, ds->ds_object, 1841290756Smav DS_FIELD_RESUME_BYTES, sizeof (val), 1, &val) == 0) { 1842290756Smav fnvlist_add_uint64(token_nv, "bytes", val); 1843290756Smav } 1844290756Smav if (zap_lookup(dp->dp_meta_objset, ds->ds_object, 1845290756Smav DS_FIELD_RESUME_TOGUID, sizeof (val), 1, &val) == 0) { 1846290756Smav fnvlist_add_uint64(token_nv, "toguid", val); 1847290756Smav } 1848290756Smav char buf[256]; 1849290756Smav if (zap_lookup(dp->dp_meta_objset, ds->ds_object, 1850290756Smav DS_FIELD_RESUME_TONAME, 1, sizeof (buf), buf) == 0) { 1851290756Smav fnvlist_add_string(token_nv, "toname", buf); 1852290756Smav } 1853290756Smav if (zap_contains(dp->dp_meta_objset, ds->ds_object, 1854290756Smav DS_FIELD_RESUME_EMBEDOK) == 0) { 1855290756Smav fnvlist_add_boolean(token_nv, "embedok"); 1856290756Smav } 1857290756Smav packed = fnvlist_pack(token_nv, &packed_size); 1858290756Smav fnvlist_free(token_nv); 1859290756Smav compressed = kmem_alloc(packed_size, KM_SLEEP); 1860290756Smav 1861290756Smav compressed_size = gzip_compress(packed, compressed, 1862290756Smav packed_size, packed_size, 6); 1863290756Smav 1864290756Smav zio_cksum_t cksum; 1865290757Smav fletcher_4_native(compressed, compressed_size, NULL, &cksum); 1866290756Smav 1867290756Smav str = kmem_alloc(compressed_size * 2 + 1, KM_SLEEP); 1868290756Smav for (int i = 0; i < compressed_size; i++) { 1869290756Smav (void) sprintf(str + i * 2, "%02x", compressed[i]); 1870290756Smav } 1871290756Smav str[compressed_size * 2] = '\0'; 1872290756Smav char *propval = kmem_asprintf("%u-%llx-%llx-%s", 1873290756Smav ZFS_SEND_RESUME_TOKEN_VERSION, 1874290756Smav (longlong_t)cksum.zc_word[0], 1875290756Smav (longlong_t)packed_size, str); 1876290756Smav dsl_prop_nvlist_add_string(nv, 1877290756Smav ZFS_PROP_RECEIVE_RESUME_TOKEN, propval); 1878290756Smav kmem_free(packed, packed_size); 1879290756Smav kmem_free(str, compressed_size * 2 + 1); 1880290756Smav kmem_free(compressed, packed_size); 1881290756Smav strfree(propval); 1882290756Smav } 1883290756Smav} 1884290756Smav 1885168404Spjdvoid 1886168404Spjddsl_dataset_stats(dsl_dataset_t *ds, nvlist_t *nv) 1887168404Spjd{ 1888248571Smm dsl_pool_t *dp = ds->ds_dir->dd_pool; 1889223623Smm uint64_t refd, avail, uobjs, aobjs, ratio; 1890185029Spjd 1891248571Smm ASSERT(dsl_pool_config_held(dp)); 1892168404Spjd 1893277585Sdelphij ratio = dsl_dataset_phys(ds)->ds_compressed_bytes == 0 ? 100 : 1894277585Sdelphij (dsl_dataset_phys(ds)->ds_uncompressed_bytes * 100 / 1895277585Sdelphij dsl_dataset_phys(ds)->ds_compressed_bytes); 1896248571Smm 1897248571Smm dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_REFRATIO, ratio); 1898248571Smm dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_LOGICALREFERENCED, 1899277585Sdelphij dsl_dataset_phys(ds)->ds_uncompressed_bytes); 1900248571Smm 1901288549Smav if (ds->ds_is_snapshot) { 1902248571Smm dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_COMPRESSRATIO, ratio); 1903248571Smm dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_USED, 1904277585Sdelphij dsl_dataset_phys(ds)->ds_unique_bytes); 1905248571Smm get_clones_stat(ds, nv); 1906248571Smm } else { 1907268659Sdelphij if (ds->ds_prev != NULL && ds->ds_prev != dp->dp_origin_snap) { 1908307122Smav char buf[ZFS_MAX_DATASET_NAME_LEN]; 1909268659Sdelphij dsl_dataset_name(ds->ds_prev, buf); 1910268659Sdelphij dsl_prop_nvlist_add_string(nv, ZFS_PROP_PREV_SNAP, buf); 1911268659Sdelphij } 1912268659Sdelphij 1913248571Smm dsl_dir_stats(ds->ds_dir, nv); 1914248571Smm } 1915248571Smm 1916185029Spjd dsl_dataset_space(ds, &refd, &avail, &uobjs, &aobjs); 1917185029Spjd dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_AVAILABLE, avail); 1918185029Spjd dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_REFERENCED, refd); 1919185029Spjd 1920168404Spjd dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_CREATION, 1921277585Sdelphij dsl_dataset_phys(ds)->ds_creation_time); 1922168404Spjd dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_CREATETXG, 1923277585Sdelphij dsl_dataset_phys(ds)->ds_creation_txg); 1924185029Spjd dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_REFQUOTA, 1925185029Spjd ds->ds_quota); 1926185029Spjd dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_REFRESERVATION, 1927185029Spjd ds->ds_reserved); 1928185029Spjd dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_GUID, 1929277585Sdelphij dsl_dataset_phys(ds)->ds_guid); 1930219089Spjd dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_UNIQUE, 1931277585Sdelphij dsl_dataset_phys(ds)->ds_unique_bytes); 1932219089Spjd dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_OBJSETID, 1933219089Spjd ds->ds_object); 1934219089Spjd dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_USERREFS, 1935219089Spjd ds->ds_userrefs); 1936219089Spjd dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_DEFER_DESTROY, 1937219089Spjd DS_IS_DEFER_DESTROY(ds) ? 1 : 0); 1938168404Spjd 1939277585Sdelphij if (dsl_dataset_phys(ds)->ds_prev_snap_obj != 0) { 1940228103Smm uint64_t written, comp, uncomp; 1941228103Smm dsl_pool_t *dp = ds->ds_dir->dd_pool; 1942228103Smm dsl_dataset_t *prev; 1943228103Smm 1944228103Smm int err = dsl_dataset_hold_obj(dp, 1945277585Sdelphij dsl_dataset_phys(ds)->ds_prev_snap_obj, FTAG, &prev); 1946228103Smm if (err == 0) { 1947228103Smm err = dsl_dataset_space_written(prev, ds, &written, 1948228103Smm &comp, &uncomp); 1949228103Smm dsl_dataset_rele(prev, FTAG); 1950228103Smm if (err == 0) { 1951228103Smm dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_WRITTEN, 1952228103Smm written); 1953228103Smm } 1954228103Smm } 1955228103Smm } 1956290756Smav 1957290756Smav if (!dsl_dataset_is_snapshot(ds)) { 1958290756Smav /* 1959290756Smav * A failed "newfs" (e.g. full) resumable receive leaves 1960290756Smav * the stats set on this dataset. Check here for the prop. 1961290756Smav */ 1962290756Smav get_receive_resume_stats(ds, nv); 1963290756Smav 1964290756Smav /* 1965290756Smav * A failed incremental resumable receive leaves the 1966290756Smav * stats set on our child named "%recv". Check the child 1967290756Smav * for the prop. 1968290756Smav */ 1969307122Smav /* 6 extra bytes for /%recv */ 1970307122Smav char recvname[ZFS_MAX_DATASET_NAME_LEN + 6]; 1971290756Smav dsl_dataset_t *recv_ds; 1972290756Smav dsl_dataset_name(ds, recvname); 1973307122Smav if (strlcat(recvname, "/", sizeof (recvname)) < 1974307122Smav sizeof (recvname) && 1975307122Smav strlcat(recvname, recv_clone_name, sizeof (recvname)) < 1976307122Smav sizeof (recvname) && 1977307122Smav dsl_dataset_hold(dp, recvname, FTAG, &recv_ds) == 0) { 1978290756Smav get_receive_resume_stats(recv_ds, nv); 1979290756Smav dsl_dataset_rele(recv_ds, FTAG); 1980290756Smav } 1981290756Smav } 1982168404Spjd} 1983168404Spjd 1984168404Spjdvoid 1985168404Spjddsl_dataset_fast_stat(dsl_dataset_t *ds, dmu_objset_stats_t *stat) 1986168404Spjd{ 1987248571Smm dsl_pool_t *dp = ds->ds_dir->dd_pool; 1988248571Smm ASSERT(dsl_pool_config_held(dp)); 1989248571Smm 1990277585Sdelphij stat->dds_creation_txg = dsl_dataset_phys(ds)->ds_creation_txg; 1991277585Sdelphij stat->dds_inconsistent = 1992277585Sdelphij dsl_dataset_phys(ds)->ds_flags & DS_FLAG_INCONSISTENT; 1993277585Sdelphij stat->dds_guid = dsl_dataset_phys(ds)->ds_guid; 1994248571Smm stat->dds_origin[0] = '\0'; 1995288549Smav if (ds->ds_is_snapshot) { 1996168404Spjd stat->dds_is_snapshot = B_TRUE; 1997277585Sdelphij stat->dds_num_clones = 1998277585Sdelphij dsl_dataset_phys(ds)->ds_num_children - 1; 1999209962Smm } else { 2000209962Smm stat->dds_is_snapshot = B_FALSE; 2001209962Smm stat->dds_num_clones = 0; 2002168404Spjd 2003248571Smm if (dsl_dir_is_clone(ds->ds_dir)) { 2004248571Smm dsl_dataset_t *ods; 2005168404Spjd 2006248571Smm VERIFY0(dsl_dataset_hold_obj(dp, 2007277585Sdelphij dsl_dir_phys(ds->ds_dir)->dd_origin_obj, 2008277585Sdelphij FTAG, &ods)); 2009248571Smm dsl_dataset_name(ods, stat->dds_origin); 2010248571Smm dsl_dataset_rele(ods, FTAG); 2011248571Smm } 2012168404Spjd } 2013168404Spjd} 2014168404Spjd 2015168404Spjduint64_t 2016168404Spjddsl_dataset_fsid_guid(dsl_dataset_t *ds) 2017168404Spjd{ 2018185029Spjd return (ds->ds_fsid_guid); 2019168404Spjd} 2020168404Spjd 2021168404Spjdvoid 2022168404Spjddsl_dataset_space(dsl_dataset_t *ds, 2023168404Spjd uint64_t *refdbytesp, uint64_t *availbytesp, 2024168404Spjd uint64_t *usedobjsp, uint64_t *availobjsp) 2025168404Spjd{ 2026277585Sdelphij *refdbytesp = dsl_dataset_phys(ds)->ds_referenced_bytes; 2027168404Spjd *availbytesp = dsl_dir_space_available(ds->ds_dir, NULL, 0, TRUE); 2028277585Sdelphij if (ds->ds_reserved > dsl_dataset_phys(ds)->ds_unique_bytes) 2029277585Sdelphij *availbytesp += 2030277585Sdelphij ds->ds_reserved - dsl_dataset_phys(ds)->ds_unique_bytes; 2031185029Spjd if (ds->ds_quota != 0) { 2032185029Spjd /* 2033185029Spjd * Adjust available bytes according to refquota 2034185029Spjd */ 2035185029Spjd if (*refdbytesp < ds->ds_quota) 2036185029Spjd *availbytesp = MIN(*availbytesp, 2037185029Spjd ds->ds_quota - *refdbytesp); 2038185029Spjd else 2039185029Spjd *availbytesp = 0; 2040185029Spjd } 2041308083Smav rrw_enter(&ds->ds_bp_rwlock, RW_READER, FTAG); 2042277585Sdelphij *usedobjsp = BP_GET_FILL(&dsl_dataset_phys(ds)->ds_bp); 2043308083Smav rrw_exit(&ds->ds_bp_rwlock, FTAG); 2044168404Spjd *availobjsp = DN_MAX_OBJECT - *usedobjsp; 2045168404Spjd} 2046168404Spjd 2047185029Spjdboolean_t 2048253820Sdelphijdsl_dataset_modified_since_snap(dsl_dataset_t *ds, dsl_dataset_t *snap) 2049185029Spjd{ 2050185029Spjd dsl_pool_t *dp = ds->ds_dir->dd_pool; 2051308083Smav uint64_t birth; 2052185029Spjd 2053248571Smm ASSERT(dsl_pool_config_held(dp)); 2054253820Sdelphij if (snap == NULL) 2055185029Spjd return (B_FALSE); 2056308083Smav rrw_enter(&ds->ds_bp_rwlock, RW_READER, FTAG); 2057308083Smav birth = dsl_dataset_get_blkptr(ds)->blk_birth; 2058308083Smav rrw_exit(&ds->ds_bp_rwlock, FTAG); 2059308083Smav if (birth > dsl_dataset_phys(snap)->ds_creation_txg) { 2060253820Sdelphij objset_t *os, *os_snap; 2061219089Spjd /* 2062219089Spjd * It may be that only the ZIL differs, because it was 2063219089Spjd * reset in the head. Don't count that as being 2064219089Spjd * modified. 2065219089Spjd */ 2066219089Spjd if (dmu_objset_from_ds(ds, &os) != 0) 2067219089Spjd return (B_TRUE); 2068253820Sdelphij if (dmu_objset_from_ds(snap, &os_snap) != 0) 2069219089Spjd return (B_TRUE); 2070219089Spjd return (bcmp(&os->os_phys->os_meta_dnode, 2071253820Sdelphij &os_snap->os_phys->os_meta_dnode, 2072219089Spjd sizeof (os->os_phys->os_meta_dnode)) != 0); 2073219089Spjd } 2074185029Spjd return (B_FALSE); 2075185029Spjd} 2076185029Spjd 2077248571Smmtypedef struct dsl_dataset_rename_snapshot_arg { 2078248571Smm const char *ddrsa_fsname; 2079248571Smm const char *ddrsa_oldsnapname; 2080248571Smm const char *ddrsa_newsnapname; 2081248571Smm boolean_t ddrsa_recursive; 2082248571Smm dmu_tx_t *ddrsa_tx; 2083248571Smm} dsl_dataset_rename_snapshot_arg_t; 2084248571Smm 2085168404Spjd/* ARGSUSED */ 2086168404Spjdstatic int 2087248571Smmdsl_dataset_rename_snapshot_check_impl(dsl_pool_t *dp, 2088248571Smm dsl_dataset_t *hds, void *arg) 2089168404Spjd{ 2090248571Smm dsl_dataset_rename_snapshot_arg_t *ddrsa = arg; 2091248571Smm int error; 2092168404Spjd uint64_t val; 2093168404Spjd 2094248571Smm error = dsl_dataset_snap_lookup(hds, ddrsa->ddrsa_oldsnapname, &val); 2095248571Smm if (error != 0) { 2096248571Smm /* ignore nonexistent snapshots */ 2097248571Smm return (error == ENOENT ? 0 : error); 2098248571Smm } 2099168404Spjd 2100248571Smm /* new name should not exist */ 2101248571Smm error = dsl_dataset_snap_lookup(hds, ddrsa->ddrsa_newsnapname, &val); 2102248571Smm if (error == 0) 2103249195Smm error = SET_ERROR(EEXIST); 2104248571Smm else if (error == ENOENT) 2105248571Smm error = 0; 2106168404Spjd 2107168676Spjd /* dataset name + 1 for the "@" + the new snapshot name must fit */ 2108248571Smm if (dsl_dir_namelen(hds->ds_dir) + 1 + 2109307122Smav strlen(ddrsa->ddrsa_newsnapname) >= ZFS_MAX_DATASET_NAME_LEN) 2110249195Smm error = SET_ERROR(ENAMETOOLONG); 2111168676Spjd 2112248571Smm return (error); 2113168404Spjd} 2114168404Spjd 2115248571Smmstatic int 2116248571Smmdsl_dataset_rename_snapshot_check(void *arg, dmu_tx_t *tx) 2117168404Spjd{ 2118248571Smm dsl_dataset_rename_snapshot_arg_t *ddrsa = arg; 2119248571Smm dsl_pool_t *dp = dmu_tx_pool(tx); 2120168404Spjd dsl_dataset_t *hds; 2121248571Smm int error; 2122168404Spjd 2123248571Smm error = dsl_dataset_hold(dp, ddrsa->ddrsa_fsname, FTAG, &hds); 2124248571Smm if (error != 0) 2125248571Smm return (error); 2126168404Spjd 2127248571Smm if (ddrsa->ddrsa_recursive) { 2128248571Smm error = dmu_objset_find_dp(dp, hds->ds_dir->dd_object, 2129248571Smm dsl_dataset_rename_snapshot_check_impl, ddrsa, 2130248571Smm DS_FIND_CHILDREN); 2131248571Smm } else { 2132248571Smm error = dsl_dataset_rename_snapshot_check_impl(dp, hds, ddrsa); 2133248571Smm } 2134248571Smm dsl_dataset_rele(hds, FTAG); 2135248571Smm return (error); 2136248571Smm} 2137168404Spjd 2138248571Smmstatic int 2139248571Smmdsl_dataset_rename_snapshot_sync_impl(dsl_pool_t *dp, 2140248571Smm dsl_dataset_t *hds, void *arg) 2141248571Smm{ 2142248571Smm#ifdef __FreeBSD__ 2143248571Smm#ifdef _KERNEL 2144248571Smm char *oldname, *newname; 2145248571Smm#endif 2146248571Smm#endif 2147248571Smm dsl_dataset_rename_snapshot_arg_t *ddrsa = arg; 2148248571Smm dsl_dataset_t *ds; 2149248571Smm uint64_t val; 2150248571Smm dmu_tx_t *tx = ddrsa->ddrsa_tx; 2151248571Smm int error; 2152248571Smm 2153248571Smm error = dsl_dataset_snap_lookup(hds, ddrsa->ddrsa_oldsnapname, &val); 2154248571Smm ASSERT(error == 0 || error == ENOENT); 2155248571Smm if (error == ENOENT) { 2156248571Smm /* ignore nonexistent snapshots */ 2157248571Smm return (0); 2158248571Smm } 2159248571Smm 2160248571Smm VERIFY0(dsl_dataset_hold_obj(dp, val, FTAG, &ds)); 2161248571Smm 2162248571Smm /* log before we change the name */ 2163248571Smm spa_history_log_internal_ds(ds, "rename", tx, 2164248571Smm "-> @%s", ddrsa->ddrsa_newsnapname); 2165248571Smm 2166265744Sdelphij VERIFY0(dsl_dataset_snap_remove(hds, ddrsa->ddrsa_oldsnapname, tx, 2167265744Sdelphij B_FALSE)); 2168168404Spjd mutex_enter(&ds->ds_lock); 2169248571Smm (void) strcpy(ds->ds_snapname, ddrsa->ddrsa_newsnapname); 2170168404Spjd mutex_exit(&ds->ds_lock); 2171277585Sdelphij VERIFY0(zap_add(dp->dp_meta_objset, 2172277585Sdelphij dsl_dataset_phys(hds)->ds_snapnames_zapobj, 2173248571Smm ds->ds_snapname, 8, 1, &ds->ds_object, tx)); 2174248571Smm 2175248571Smm#ifdef __FreeBSD__ 2176219320Spjd#ifdef _KERNEL 2177248571Smm oldname = kmem_alloc(MAXPATHLEN, KM_SLEEP); 2178248571Smm newname = kmem_alloc(MAXPATHLEN, KM_SLEEP); 2179248571Smm snprintf(oldname, MAXPATHLEN, "%s@%s", ddrsa->ddrsa_fsname, 2180248571Smm ddrsa->ddrsa_oldsnapname); 2181248571Smm snprintf(newname, MAXPATHLEN, "%s@%s", ddrsa->ddrsa_fsname, 2182248571Smm ddrsa->ddrsa_newsnapname); 2183248571Smm zfsvfs_update_fromname(oldname, newname); 2184219317Spjd zvol_rename_minors(oldname, newname); 2185248571Smm kmem_free(newname, MAXPATHLEN); 2186248571Smm kmem_free(oldname, MAXPATHLEN); 2187219320Spjd#endif 2188248571Smm#endif 2189248571Smm dsl_dataset_rele(ds, FTAG); 2190168404Spjd 2191248571Smm return (0); 2192168404Spjd} 2193168404Spjd 2194248571Smmstatic void 2195248571Smmdsl_dataset_rename_snapshot_sync(void *arg, dmu_tx_t *tx) 2196168676Spjd{ 2197248571Smm dsl_dataset_rename_snapshot_arg_t *ddrsa = arg; 2198248571Smm dsl_pool_t *dp = dmu_tx_pool(tx); 2199248571Smm dsl_dataset_t *hds; 2200168676Spjd 2201248571Smm VERIFY0(dsl_dataset_hold(dp, ddrsa->ddrsa_fsname, FTAG, &hds)); 2202248571Smm ddrsa->ddrsa_tx = tx; 2203248571Smm if (ddrsa->ddrsa_recursive) { 2204248571Smm VERIFY0(dmu_objset_find_dp(dp, hds->ds_dir->dd_object, 2205248571Smm dsl_dataset_rename_snapshot_sync_impl, ddrsa, 2206248571Smm DS_FIND_CHILDREN)); 2207248571Smm } else { 2208248571Smm VERIFY0(dsl_dataset_rename_snapshot_sync_impl(dp, hds, ddrsa)); 2209168676Spjd } 2210248571Smm dsl_dataset_rele(hds, FTAG); 2211248571Smm} 2212168676Spjd 2213248571Smmint 2214248571Smmdsl_dataset_rename_snapshot(const char *fsname, 2215248571Smm const char *oldsnapname, const char *newsnapname, boolean_t recursive) 2216248571Smm{ 2217248571Smm dsl_dataset_rename_snapshot_arg_t ddrsa; 2218168676Spjd 2219248571Smm ddrsa.ddrsa_fsname = fsname; 2220248571Smm ddrsa.ddrsa_oldsnapname = oldsnapname; 2221248571Smm ddrsa.ddrsa_newsnapname = newsnapname; 2222248571Smm ddrsa.ddrsa_recursive = recursive; 2223168676Spjd 2224248571Smm return (dsl_sync_task(fsname, dsl_dataset_rename_snapshot_check, 2225269006Sdelphij dsl_dataset_rename_snapshot_sync, &ddrsa, 2226269006Sdelphij 1, ZFS_SPACE_CHECK_RESERVED)); 2227168676Spjd} 2228168676Spjd 2229253816Sdelphij/* 2230253816Sdelphij * If we're doing an ownership handoff, we need to make sure that there is 2231253816Sdelphij * only one long hold on the dataset. We're not allowed to change anything here 2232253816Sdelphij * so we don't permanently release the long hold or regular hold here. We want 2233253816Sdelphij * to do this only when syncing to avoid the dataset unexpectedly going away 2234253816Sdelphij * when we release the long hold. 2235253816Sdelphij */ 2236168676Spjdstatic int 2237253816Sdelphijdsl_dataset_handoff_check(dsl_dataset_t *ds, void *owner, dmu_tx_t *tx) 2238253816Sdelphij{ 2239253816Sdelphij boolean_t held; 2240253816Sdelphij 2241253816Sdelphij if (!dmu_tx_is_syncing(tx)) 2242253816Sdelphij return (0); 2243253816Sdelphij 2244253816Sdelphij if (owner != NULL) { 2245253816Sdelphij VERIFY3P(ds->ds_owner, ==, owner); 2246253816Sdelphij dsl_dataset_long_rele(ds, owner); 2247253816Sdelphij } 2248253816Sdelphij 2249253816Sdelphij held = dsl_dataset_long_held(ds); 2250253816Sdelphij 2251253816Sdelphij if (owner != NULL) 2252253816Sdelphij dsl_dataset_long_hold(ds, owner); 2253253816Sdelphij 2254253816Sdelphij if (held) 2255253816Sdelphij return (SET_ERROR(EBUSY)); 2256253816Sdelphij 2257253816Sdelphij return (0); 2258253816Sdelphij} 2259253816Sdelphij 2260253816Sdelphijtypedef struct dsl_dataset_rollback_arg { 2261253816Sdelphij const char *ddra_fsname; 2262253816Sdelphij void *ddra_owner; 2263254587Sdelphij nvlist_t *ddra_result; 2264253816Sdelphij} dsl_dataset_rollback_arg_t; 2265253816Sdelphij 2266253816Sdelphijstatic int 2267248571Smmdsl_dataset_rollback_check(void *arg, dmu_tx_t *tx) 2268168676Spjd{ 2269253816Sdelphij dsl_dataset_rollback_arg_t *ddra = arg; 2270248571Smm dsl_pool_t *dp = dmu_tx_pool(tx); 2271248571Smm dsl_dataset_t *ds; 2272248571Smm int64_t unused_refres_delta; 2273248571Smm int error; 2274168676Spjd 2275253816Sdelphij error = dsl_dataset_hold(dp, ddra->ddra_fsname, FTAG, &ds); 2276248571Smm if (error != 0) 2277248571Smm return (error); 2278168676Spjd 2279248571Smm /* must not be a snapshot */ 2280288549Smav if (ds->ds_is_snapshot) { 2281248571Smm dsl_dataset_rele(ds, FTAG); 2282249195Smm return (SET_ERROR(EINVAL)); 2283168676Spjd } 2284168676Spjd 2285248571Smm /* must have a most recent snapshot */ 2286277585Sdelphij if (dsl_dataset_phys(ds)->ds_prev_snap_txg < TXG_INITIAL) { 2287248571Smm dsl_dataset_rele(ds, FTAG); 2288249195Smm return (SET_ERROR(EINVAL)); 2289248571Smm } 2290168676Spjd 2291310512Savg /* 2292310512Savg * No rollback to a snapshot created in the current txg, because 2293310512Savg * the rollback may dirty the dataset and create blocks that are 2294310512Savg * not reachable from the rootbp while having a birth txg that 2295310512Savg * falls into the snapshot's range. 2296310512Savg */ 2297310512Savg if (dmu_tx_is_syncing(tx) && 2298310512Savg dsl_dataset_phys(ds)->ds_prev_snap_txg >= tx->tx_txg) { 2299310512Savg dsl_dataset_rele(ds, FTAG); 2300310512Savg return (SET_ERROR(EAGAIN)); 2301310512Savg } 2302310512Savg 2303263407Sdelphij /* must not have any bookmarks after the most recent snapshot */ 2304263407Sdelphij nvlist_t *proprequest = fnvlist_alloc(); 2305263407Sdelphij fnvlist_add_boolean(proprequest, zfs_prop_to_name(ZFS_PROP_CREATETXG)); 2306263407Sdelphij nvlist_t *bookmarks = fnvlist_alloc(); 2307263407Sdelphij error = dsl_get_bookmarks_impl(ds, proprequest, bookmarks); 2308263407Sdelphij fnvlist_free(proprequest); 2309263407Sdelphij if (error != 0) 2310263407Sdelphij return (error); 2311263407Sdelphij for (nvpair_t *pair = nvlist_next_nvpair(bookmarks, NULL); 2312263407Sdelphij pair != NULL; pair = nvlist_next_nvpair(bookmarks, pair)) { 2313263407Sdelphij nvlist_t *valuenv = 2314263407Sdelphij fnvlist_lookup_nvlist(fnvpair_value_nvlist(pair), 2315263407Sdelphij zfs_prop_to_name(ZFS_PROP_CREATETXG)); 2316263407Sdelphij uint64_t createtxg = fnvlist_lookup_uint64(valuenv, "value"); 2317277585Sdelphij if (createtxg > dsl_dataset_phys(ds)->ds_prev_snap_txg) { 2318263407Sdelphij fnvlist_free(bookmarks); 2319263407Sdelphij dsl_dataset_rele(ds, FTAG); 2320263407Sdelphij return (SET_ERROR(EEXIST)); 2321263407Sdelphij } 2322263407Sdelphij } 2323263407Sdelphij fnvlist_free(bookmarks); 2324263407Sdelphij 2325253816Sdelphij error = dsl_dataset_handoff_check(ds, ddra->ddra_owner, tx); 2326253816Sdelphij if (error != 0) { 2327248571Smm dsl_dataset_rele(ds, FTAG); 2328253816Sdelphij return (error); 2329248571Smm } 2330168676Spjd 2331248571Smm /* 2332248571Smm * Check if the snap we are rolling back to uses more than 2333248571Smm * the refquota. 2334248571Smm */ 2335248571Smm if (ds->ds_quota != 0 && 2336277585Sdelphij dsl_dataset_phys(ds->ds_prev)->ds_referenced_bytes > ds->ds_quota) { 2337248571Smm dsl_dataset_rele(ds, FTAG); 2338249195Smm return (SET_ERROR(EDQUOT)); 2339168676Spjd } 2340168676Spjd 2341248571Smm /* 2342248571Smm * When we do the clone swap, we will temporarily use more space 2343248571Smm * due to the refreservation (the head will no longer have any 2344248571Smm * unique space, so the entire amount of the refreservation will need 2345248571Smm * to be free). We will immediately destroy the clone, freeing 2346248571Smm * this space, but the freeing happens over many txg's. 2347248571Smm */ 2348248571Smm unused_refres_delta = (int64_t)MIN(ds->ds_reserved, 2349277585Sdelphij dsl_dataset_phys(ds)->ds_unique_bytes); 2350168676Spjd 2351248571Smm if (unused_refres_delta > 0 && 2352248571Smm unused_refres_delta > 2353248571Smm dsl_dir_space_available(ds->ds_dir, NULL, 0, TRUE)) { 2354248571Smm dsl_dataset_rele(ds, FTAG); 2355249195Smm return (SET_ERROR(ENOSPC)); 2356248571Smm } 2357168676Spjd 2358248571Smm dsl_dataset_rele(ds, FTAG); 2359185029Spjd return (0); 2360185029Spjd} 2361185029Spjd 2362248571Smmstatic void 2363248571Smmdsl_dataset_rollback_sync(void *arg, dmu_tx_t *tx) 2364168404Spjd{ 2365253816Sdelphij dsl_dataset_rollback_arg_t *ddra = arg; 2366248571Smm dsl_pool_t *dp = dmu_tx_pool(tx); 2367248571Smm dsl_dataset_t *ds, *clone; 2368248571Smm uint64_t cloneobj; 2369307122Smav char namebuf[ZFS_MAX_DATASET_NAME_LEN]; 2370168404Spjd 2371253816Sdelphij VERIFY0(dsl_dataset_hold(dp, ddra->ddra_fsname, FTAG, &ds)); 2372219089Spjd 2373254587Sdelphij dsl_dataset_name(ds->ds_prev, namebuf); 2374254587Sdelphij fnvlist_add_string(ddra->ddra_result, "target", namebuf); 2375254587Sdelphij 2376248571Smm cloneobj = dsl_dataset_create_sync(ds->ds_dir, "%rollback", 2377248571Smm ds->ds_prev, DS_CREATE_FLAG_NODIRTY, kcred, tx); 2378185029Spjd 2379248571Smm VERIFY0(dsl_dataset_hold_obj(dp, cloneobj, FTAG, &clone)); 2380185029Spjd 2381248571Smm dsl_dataset_clone_swap_sync_impl(clone, ds, tx); 2382248571Smm dsl_dataset_zero_zil(ds, tx); 2383219089Spjd 2384248571Smm dsl_destroy_head_sync_impl(clone, tx); 2385168404Spjd 2386248571Smm dsl_dataset_rele(clone, FTAG); 2387248571Smm dsl_dataset_rele(ds, FTAG); 2388248571Smm} 2389168404Spjd 2390253816Sdelphij/* 2391254587Sdelphij * Rolls back the given filesystem or volume to the most recent snapshot. 2392254587Sdelphij * The name of the most recent snapshot will be returned under key "target" 2393254587Sdelphij * in the result nvlist. 2394254587Sdelphij * 2395253816Sdelphij * If owner != NULL: 2396253816Sdelphij * - The existing dataset MUST be owned by the specified owner at entry 2397253816Sdelphij * - Upon return, dataset will still be held by the same owner, whether we 2398253816Sdelphij * succeed or not. 2399253816Sdelphij * 2400253816Sdelphij * This mode is required any time the existing filesystem is mounted. See 2401253816Sdelphij * notes above zfs_suspend_fs() for further details. 2402253816Sdelphij */ 2403248571Smmint 2404254587Sdelphijdsl_dataset_rollback(const char *fsname, void *owner, nvlist_t *result) 2405248571Smm{ 2406253816Sdelphij dsl_dataset_rollback_arg_t ddra; 2407253816Sdelphij 2408253816Sdelphij ddra.ddra_fsname = fsname; 2409253816Sdelphij ddra.ddra_owner = owner; 2410254587Sdelphij ddra.ddra_result = result; 2411253816Sdelphij 2412248571Smm return (dsl_sync_task(fsname, dsl_dataset_rollback_check, 2413269006Sdelphij dsl_dataset_rollback_sync, &ddra, 2414269006Sdelphij 1, ZFS_SPACE_CHECK_RESERVED)); 2415168404Spjd} 2416168404Spjd 2417185029Spjdstruct promotenode { 2418185029Spjd list_node_t link; 2419185029Spjd dsl_dataset_t *ds; 2420185029Spjd}; 2421185029Spjd 2422248571Smmtypedef struct dsl_dataset_promote_arg { 2423248571Smm const char *ddpa_clonename; 2424248571Smm dsl_dataset_t *ddpa_clone; 2425185029Spjd list_t shared_snaps, origin_snaps, clone_snaps; 2426248571Smm dsl_dataset_t *origin_origin; /* origin of the origin */ 2427185029Spjd uint64_t used, comp, uncomp, unique, cloneusedsnap, originusedsnap; 2428219089Spjd char *err_ds; 2429265744Sdelphij cred_t *cr; 2430248571Smm} dsl_dataset_promote_arg_t; 2431168404Spjd 2432185029Spjdstatic int snaplist_space(list_t *l, uint64_t mintxg, uint64_t *spacep); 2433248571Smmstatic int promote_hold(dsl_dataset_promote_arg_t *ddpa, dsl_pool_t *dp, 2434248571Smm void *tag); 2435248571Smmstatic void promote_rele(dsl_dataset_promote_arg_t *ddpa, void *tag); 2436185029Spjd 2437168404Spjdstatic int 2438248571Smmdsl_dataset_promote_check(void *arg, dmu_tx_t *tx) 2439168404Spjd{ 2440248571Smm dsl_dataset_promote_arg_t *ddpa = arg; 2441248571Smm dsl_pool_t *dp = dmu_tx_pool(tx); 2442248571Smm dsl_dataset_t *hds; 2443248571Smm struct promotenode *snap; 2444248571Smm dsl_dataset_t *origin_ds; 2445168404Spjd int err; 2446219089Spjd uint64_t unused; 2447265744Sdelphij uint64_t ss_mv_cnt; 2448284028Savg size_t max_snap_len; 2449168404Spjd 2450248571Smm err = promote_hold(ddpa, dp, FTAG); 2451248571Smm if (err != 0) 2452248571Smm return (err); 2453168404Spjd 2454248571Smm hds = ddpa->ddpa_clone; 2455284028Savg max_snap_len = MAXNAMELEN - strlen(ddpa->ddpa_clonename) - 1; 2456168404Spjd 2457277585Sdelphij if (dsl_dataset_phys(hds)->ds_flags & DS_FLAG_NOPROMOTE) { 2458248571Smm promote_rele(ddpa, FTAG); 2459249195Smm return (SET_ERROR(EXDEV)); 2460248571Smm } 2461168404Spjd 2462248571Smm /* 2463248571Smm * Compute and check the amount of space to transfer. Since this is 2464248571Smm * so expensive, don't do the preliminary check. 2465248571Smm */ 2466248571Smm if (!dmu_tx_is_syncing(tx)) { 2467248571Smm promote_rele(ddpa, FTAG); 2468248571Smm return (0); 2469248571Smm } 2470248571Smm 2471248571Smm snap = list_head(&ddpa->shared_snaps); 2472248571Smm origin_ds = snap->ds; 2473248571Smm 2474185029Spjd /* compute origin's new unique space */ 2475248571Smm snap = list_tail(&ddpa->clone_snaps); 2476277585Sdelphij ASSERT3U(dsl_dataset_phys(snap->ds)->ds_prev_snap_obj, ==, 2477277585Sdelphij origin_ds->ds_object); 2478219089Spjd dsl_deadlist_space_range(&snap->ds->ds_deadlist, 2479277585Sdelphij dsl_dataset_phys(origin_ds)->ds_prev_snap_txg, UINT64_MAX, 2480248571Smm &ddpa->unique, &unused, &unused); 2481168404Spjd 2482185029Spjd /* 2483185029Spjd * Walk the snapshots that we are moving 2484185029Spjd * 2485185029Spjd * Compute space to transfer. Consider the incremental changes 2486248571Smm * to used by each snapshot: 2487185029Spjd * (my used) = (prev's used) + (blocks born) - (blocks killed) 2488185029Spjd * So each snapshot gave birth to: 2489185029Spjd * (blocks born) = (my used) - (prev's used) + (blocks killed) 2490185029Spjd * So a sequence would look like: 2491185029Spjd * (uN - u(N-1) + kN) + ... + (u1 - u0 + k1) + (u0 - 0 + k0) 2492185029Spjd * Which simplifies to: 2493185029Spjd * uN + kN + kN-1 + ... + k1 + k0 2494185029Spjd * Note however, if we stop before we reach the ORIGIN we get: 2495185029Spjd * uN + kN + kN-1 + ... + kM - uM-1 2496185029Spjd */ 2497265744Sdelphij ss_mv_cnt = 0; 2498277585Sdelphij ddpa->used = dsl_dataset_phys(origin_ds)->ds_referenced_bytes; 2499277585Sdelphij ddpa->comp = dsl_dataset_phys(origin_ds)->ds_compressed_bytes; 2500277585Sdelphij ddpa->uncomp = dsl_dataset_phys(origin_ds)->ds_uncompressed_bytes; 2501248571Smm for (snap = list_head(&ddpa->shared_snaps); snap; 2502248571Smm snap = list_next(&ddpa->shared_snaps, snap)) { 2503185029Spjd uint64_t val, dlused, dlcomp, dluncomp; 2504185029Spjd dsl_dataset_t *ds = snap->ds; 2505168404Spjd 2506265744Sdelphij ss_mv_cnt++; 2507265744Sdelphij 2508248571Smm /* 2509248571Smm * If there are long holds, we won't be able to evict 2510248571Smm * the objset. 2511248571Smm */ 2512248571Smm if (dsl_dataset_long_held(ds)) { 2513249195Smm err = SET_ERROR(EBUSY); 2514248571Smm goto out; 2515248571Smm } 2516248571Smm 2517185029Spjd /* Check that the snapshot name does not conflict */ 2518248571Smm VERIFY0(dsl_dataset_get_snapname(ds)); 2519284028Savg if (strlen(ds->ds_snapname) >= max_snap_len) { 2520284028Savg err = SET_ERROR(ENAMETOOLONG); 2521284028Savg goto out; 2522284028Savg } 2523185029Spjd err = dsl_dataset_snap_lookup(hds, ds->ds_snapname, &val); 2524219089Spjd if (err == 0) { 2525248571Smm (void) strcpy(ddpa->err_ds, snap->ds->ds_snapname); 2526249195Smm err = SET_ERROR(EEXIST); 2527219089Spjd goto out; 2528219089Spjd } 2529185029Spjd if (err != ENOENT) 2530219089Spjd goto out; 2531168404Spjd 2532185029Spjd /* The very first snapshot does not have a deadlist */ 2533277585Sdelphij if (dsl_dataset_phys(ds)->ds_prev_snap_obj == 0) 2534185029Spjd continue; 2535185029Spjd 2536219089Spjd dsl_deadlist_space(&ds->ds_deadlist, 2537219089Spjd &dlused, &dlcomp, &dluncomp); 2538248571Smm ddpa->used += dlused; 2539248571Smm ddpa->comp += dlcomp; 2540248571Smm ddpa->uncomp += dluncomp; 2541168404Spjd } 2542168404Spjd 2543185029Spjd /* 2544185029Spjd * If we are a clone of a clone then we never reached ORIGIN, 2545185029Spjd * so we need to subtract out the clone origin's used space. 2546185029Spjd */ 2547248571Smm if (ddpa->origin_origin) { 2548277585Sdelphij ddpa->used -= 2549277585Sdelphij dsl_dataset_phys(ddpa->origin_origin)->ds_referenced_bytes; 2550277585Sdelphij ddpa->comp -= 2551277585Sdelphij dsl_dataset_phys(ddpa->origin_origin)->ds_compressed_bytes; 2552248571Smm ddpa->uncomp -= 2553277585Sdelphij dsl_dataset_phys(ddpa->origin_origin)-> 2554277585Sdelphij ds_uncompressed_bytes; 2555168404Spjd } 2556168404Spjd 2557265744Sdelphij /* Check that there is enough space and limit headroom here */ 2558185029Spjd err = dsl_dir_transfer_possible(origin_ds->ds_dir, hds->ds_dir, 2559265744Sdelphij 0, ss_mv_cnt, ddpa->used, ddpa->cr); 2560248571Smm if (err != 0) 2561248571Smm goto out; 2562168404Spjd 2563185029Spjd /* 2564185029Spjd * Compute the amounts of space that will be used by snapshots 2565185029Spjd * after the promotion (for both origin and clone). For each, 2566185029Spjd * it is the amount of space that will be on all of their 2567185029Spjd * deadlists (that was not born before their new origin). 2568185029Spjd */ 2569277585Sdelphij if (dsl_dir_phys(hds->ds_dir)->dd_flags & DD_FLAG_USED_BREAKDOWN) { 2570185029Spjd uint64_t space; 2571168404Spjd 2572168404Spjd /* 2573185029Spjd * Note, typically this will not be a clone of a clone, 2574219089Spjd * so dd_origin_txg will be < TXG_INITIAL, so 2575219089Spjd * these snaplist_space() -> dsl_deadlist_space_range() 2576185029Spjd * calls will be fast because they do not have to 2577185029Spjd * iterate over all bps. 2578168404Spjd */ 2579248571Smm snap = list_head(&ddpa->origin_snaps); 2580248571Smm err = snaplist_space(&ddpa->shared_snaps, 2581248571Smm snap->ds->ds_dir->dd_origin_txg, &ddpa->cloneusedsnap); 2582248571Smm if (err != 0) 2583248571Smm goto out; 2584168404Spjd 2585248571Smm err = snaplist_space(&ddpa->clone_snaps, 2586219089Spjd snap->ds->ds_dir->dd_origin_txg, &space); 2587248571Smm if (err != 0) 2588248571Smm goto out; 2589248571Smm ddpa->cloneusedsnap += space; 2590168404Spjd } 2591277585Sdelphij if (dsl_dir_phys(origin_ds->ds_dir)->dd_flags & 2592277585Sdelphij DD_FLAG_USED_BREAKDOWN) { 2593248571Smm err = snaplist_space(&ddpa->origin_snaps, 2594277585Sdelphij dsl_dataset_phys(origin_ds)->ds_creation_txg, 2595277585Sdelphij &ddpa->originusedsnap); 2596248571Smm if (err != 0) 2597248571Smm goto out; 2598185029Spjd } 2599168404Spjd 2600219089Spjdout: 2601248571Smm promote_rele(ddpa, FTAG); 2602219089Spjd return (err); 2603168404Spjd} 2604168404Spjd 2605168404Spjdstatic void 2606248571Smmdsl_dataset_promote_sync(void *arg, dmu_tx_t *tx) 2607168404Spjd{ 2608248571Smm dsl_dataset_promote_arg_t *ddpa = arg; 2609248571Smm dsl_pool_t *dp = dmu_tx_pool(tx); 2610248571Smm dsl_dataset_t *hds; 2611248571Smm struct promotenode *snap; 2612248571Smm dsl_dataset_t *origin_ds; 2613185029Spjd dsl_dataset_t *origin_head; 2614248571Smm dsl_dir_t *dd; 2615185029Spjd dsl_dir_t *odd = NULL; 2616185029Spjd uint64_t oldnext_obj; 2617185029Spjd int64_t delta; 2618272883Ssmh#if defined(__FreeBSD__) && defined(_KERNEL) 2619272883Ssmh char *oldname, *newname; 2620272883Ssmh#endif 2621168404Spjd 2622248571Smm VERIFY0(promote_hold(ddpa, dp, FTAG)); 2623248571Smm hds = ddpa->ddpa_clone; 2624168404Spjd 2625277585Sdelphij ASSERT0(dsl_dataset_phys(hds)->ds_flags & DS_FLAG_NOPROMOTE); 2626248571Smm 2627248571Smm snap = list_head(&ddpa->shared_snaps); 2628248571Smm origin_ds = snap->ds; 2629248571Smm dd = hds->ds_dir; 2630248571Smm 2631248571Smm snap = list_head(&ddpa->origin_snaps); 2632185029Spjd origin_head = snap->ds; 2633185029Spjd 2634168404Spjd /* 2635185029Spjd * We need to explicitly open odd, since origin_ds's dd will be 2636168404Spjd * changing. 2637168404Spjd */ 2638248571Smm VERIFY0(dsl_dir_hold_obj(dp, origin_ds->ds_dir->dd_object, 2639185029Spjd NULL, FTAG, &odd)); 2640168404Spjd 2641185029Spjd /* change origin's next snap */ 2642185029Spjd dmu_buf_will_dirty(origin_ds->ds_dbuf, tx); 2643277585Sdelphij oldnext_obj = dsl_dataset_phys(origin_ds)->ds_next_snap_obj; 2644248571Smm snap = list_tail(&ddpa->clone_snaps); 2645277585Sdelphij ASSERT3U(dsl_dataset_phys(snap->ds)->ds_prev_snap_obj, ==, 2646277585Sdelphij origin_ds->ds_object); 2647277585Sdelphij dsl_dataset_phys(origin_ds)->ds_next_snap_obj = snap->ds->ds_object; 2648185029Spjd 2649185029Spjd /* change the origin's next clone */ 2650277585Sdelphij if (dsl_dataset_phys(origin_ds)->ds_next_clones_obj) { 2651248571Smm dsl_dataset_remove_from_next_clones(origin_ds, 2652248571Smm snap->ds->ds_object, tx); 2653248571Smm VERIFY0(zap_add_int(dp->dp_meta_objset, 2654277585Sdelphij dsl_dataset_phys(origin_ds)->ds_next_clones_obj, 2655185029Spjd oldnext_obj, tx)); 2656185029Spjd } 2657185029Spjd 2658185029Spjd /* change origin */ 2659185029Spjd dmu_buf_will_dirty(dd->dd_dbuf, tx); 2660277585Sdelphij ASSERT3U(dsl_dir_phys(dd)->dd_origin_obj, ==, origin_ds->ds_object); 2661277585Sdelphij dsl_dir_phys(dd)->dd_origin_obj = dsl_dir_phys(odd)->dd_origin_obj; 2662219089Spjd dd->dd_origin_txg = origin_head->ds_dir->dd_origin_txg; 2663185029Spjd dmu_buf_will_dirty(odd->dd_dbuf, tx); 2664277585Sdelphij dsl_dir_phys(odd)->dd_origin_obj = origin_ds->ds_object; 2665219089Spjd origin_head->ds_dir->dd_origin_txg = 2666277585Sdelphij dsl_dataset_phys(origin_ds)->ds_creation_txg; 2667185029Spjd 2668219089Spjd /* change dd_clone entries */ 2669219089Spjd if (spa_version(dp->dp_spa) >= SPA_VERSION_DIR_CLONES) { 2670248571Smm VERIFY0(zap_remove_int(dp->dp_meta_objset, 2671277585Sdelphij dsl_dir_phys(odd)->dd_clones, hds->ds_object, tx)); 2672248571Smm VERIFY0(zap_add_int(dp->dp_meta_objset, 2673277585Sdelphij dsl_dir_phys(ddpa->origin_origin->ds_dir)->dd_clones, 2674219089Spjd hds->ds_object, tx)); 2675219089Spjd 2676248571Smm VERIFY0(zap_remove_int(dp->dp_meta_objset, 2677277585Sdelphij dsl_dir_phys(ddpa->origin_origin->ds_dir)->dd_clones, 2678219089Spjd origin_head->ds_object, tx)); 2679277585Sdelphij if (dsl_dir_phys(dd)->dd_clones == 0) { 2680277585Sdelphij dsl_dir_phys(dd)->dd_clones = 2681277585Sdelphij zap_create(dp->dp_meta_objset, DMU_OT_DSL_CLONES, 2682277585Sdelphij DMU_OT_NONE, 0, tx); 2683219089Spjd } 2684248571Smm VERIFY0(zap_add_int(dp->dp_meta_objset, 2685277585Sdelphij dsl_dir_phys(dd)->dd_clones, origin_head->ds_object, tx)); 2686219089Spjd } 2687219089Spjd 2688272883Ssmh#if defined(__FreeBSD__) && defined(_KERNEL) 2689272883Ssmh /* Take the spa_namespace_lock early so zvol renames don't deadlock. */ 2690272883Ssmh mutex_enter(&spa_namespace_lock); 2691272883Ssmh 2692272883Ssmh oldname = kmem_alloc(MAXPATHLEN, KM_SLEEP); 2693272883Ssmh newname = kmem_alloc(MAXPATHLEN, KM_SLEEP); 2694272883Ssmh#endif 2695272883Ssmh 2696168404Spjd /* move snapshots to this dir */ 2697248571Smm for (snap = list_head(&ddpa->shared_snaps); snap; 2698248571Smm snap = list_next(&ddpa->shared_snaps, snap)) { 2699185029Spjd dsl_dataset_t *ds = snap->ds; 2700168404Spjd 2701248571Smm /* 2702248571Smm * Property callbacks are registered to a particular 2703248571Smm * dsl_dir. Since ours is changing, evict the objset 2704248571Smm * so that they will be unregistered from the old dsl_dir. 2705248571Smm */ 2706219089Spjd if (ds->ds_objset) { 2707219089Spjd dmu_objset_evict(ds->ds_objset); 2708219089Spjd ds->ds_objset = NULL; 2709185029Spjd } 2710248571Smm 2711168404Spjd /* move snap name entry */ 2712248571Smm VERIFY0(dsl_dataset_get_snapname(ds)); 2713248571Smm VERIFY0(dsl_dataset_snap_remove(origin_head, 2714265744Sdelphij ds->ds_snapname, tx, B_TRUE)); 2715248571Smm VERIFY0(zap_add(dp->dp_meta_objset, 2716277585Sdelphij dsl_dataset_phys(hds)->ds_snapnames_zapobj, ds->ds_snapname, 2717168404Spjd 8, 1, &ds->ds_object, tx)); 2718265744Sdelphij dsl_fs_ss_count_adjust(hds->ds_dir, 1, 2719265744Sdelphij DD_FIELD_SNAPSHOT_COUNT, tx); 2720219089Spjd 2721168404Spjd /* change containing dsl_dir */ 2722168404Spjd dmu_buf_will_dirty(ds->ds_dbuf, tx); 2723277585Sdelphij ASSERT3U(dsl_dataset_phys(ds)->ds_dir_obj, ==, odd->dd_object); 2724277585Sdelphij dsl_dataset_phys(ds)->ds_dir_obj = dd->dd_object; 2725185029Spjd ASSERT3P(ds->ds_dir, ==, odd); 2726248571Smm dsl_dir_rele(ds->ds_dir, ds); 2727248571Smm VERIFY0(dsl_dir_hold_obj(dp, dd->dd_object, 2728168404Spjd NULL, ds, &ds->ds_dir)); 2729168404Spjd 2730272883Ssmh#if defined(__FreeBSD__) && defined(_KERNEL) 2731272883Ssmh dsl_dataset_name(ds, newname); 2732272883Ssmh zfsvfs_update_fromname(oldname, newname); 2733272883Ssmh zvol_rename_minors(oldname, newname); 2734272883Ssmh#endif 2735272883Ssmh 2736219089Spjd /* move any clone references */ 2737277585Sdelphij if (dsl_dataset_phys(ds)->ds_next_clones_obj && 2738219089Spjd spa_version(dp->dp_spa) >= SPA_VERSION_DIR_CLONES) { 2739219089Spjd zap_cursor_t zc; 2740219089Spjd zap_attribute_t za; 2741219089Spjd 2742219089Spjd for (zap_cursor_init(&zc, dp->dp_meta_objset, 2743277585Sdelphij dsl_dataset_phys(ds)->ds_next_clones_obj); 2744219089Spjd zap_cursor_retrieve(&zc, &za) == 0; 2745219089Spjd zap_cursor_advance(&zc)) { 2746219089Spjd dsl_dataset_t *cnds; 2747219089Spjd uint64_t o; 2748219089Spjd 2749219089Spjd if (za.za_first_integer == oldnext_obj) { 2750219089Spjd /* 2751219089Spjd * We've already moved the 2752219089Spjd * origin's reference. 2753219089Spjd */ 2754219089Spjd continue; 2755219089Spjd } 2756219089Spjd 2757248571Smm VERIFY0(dsl_dataset_hold_obj(dp, 2758219089Spjd za.za_first_integer, FTAG, &cnds)); 2759277585Sdelphij o = dsl_dir_phys(cnds->ds_dir)-> 2760277585Sdelphij dd_head_dataset_obj; 2761219089Spjd 2762248571Smm VERIFY0(zap_remove_int(dp->dp_meta_objset, 2763277585Sdelphij dsl_dir_phys(odd)->dd_clones, o, tx)); 2764248571Smm VERIFY0(zap_add_int(dp->dp_meta_objset, 2765277585Sdelphij dsl_dir_phys(dd)->dd_clones, o, tx)); 2766219089Spjd dsl_dataset_rele(cnds, FTAG); 2767219089Spjd } 2768219089Spjd zap_cursor_fini(&zc); 2769219089Spjd } 2770219089Spjd 2771248571Smm ASSERT(!dsl_prop_hascb(ds)); 2772185029Spjd } 2773168404Spjd 2774272883Ssmh#if defined(__FreeBSD__) && defined(_KERNEL) 2775272883Ssmh mutex_exit(&spa_namespace_lock); 2776272883Ssmh 2777272883Ssmh kmem_free(newname, MAXPATHLEN); 2778272883Ssmh kmem_free(oldname, MAXPATHLEN); 2779272883Ssmh#endif 2780185029Spjd /* 2781185029Spjd * Change space accounting. 2782185029Spjd * Note, pa->*usedsnap and dd_used_breakdown[SNAP] will either 2783185029Spjd * both be valid, or both be 0 (resulting in delta == 0). This 2784185029Spjd * is true for each of {clone,origin} independently. 2785185029Spjd */ 2786168404Spjd 2787248571Smm delta = ddpa->cloneusedsnap - 2788277585Sdelphij dsl_dir_phys(dd)->dd_used_breakdown[DD_USED_SNAP]; 2789185029Spjd ASSERT3S(delta, >=, 0); 2790248571Smm ASSERT3U(ddpa->used, >=, delta); 2791185029Spjd dsl_dir_diduse_space(dd, DD_USED_SNAP, delta, 0, 0, tx); 2792185029Spjd dsl_dir_diduse_space(dd, DD_USED_HEAD, 2793248571Smm ddpa->used - delta, ddpa->comp, ddpa->uncomp, tx); 2794168404Spjd 2795248571Smm delta = ddpa->originusedsnap - 2796277585Sdelphij dsl_dir_phys(odd)->dd_used_breakdown[DD_USED_SNAP]; 2797185029Spjd ASSERT3S(delta, <=, 0); 2798248571Smm ASSERT3U(ddpa->used, >=, -delta); 2799185029Spjd dsl_dir_diduse_space(odd, DD_USED_SNAP, delta, 0, 0, tx); 2800185029Spjd dsl_dir_diduse_space(odd, DD_USED_HEAD, 2801248571Smm -ddpa->used - delta, -ddpa->comp, -ddpa->uncomp, tx); 2802185029Spjd 2803277585Sdelphij dsl_dataset_phys(origin_ds)->ds_unique_bytes = ddpa->unique; 2804185029Spjd 2805185029Spjd /* log history record */ 2806248571Smm spa_history_log_internal_ds(hds, "promote", tx, ""); 2807185029Spjd 2808248571Smm dsl_dir_rele(odd, FTAG); 2809248571Smm promote_rele(ddpa, FTAG); 2810185029Spjd} 2811185029Spjd 2812185029Spjd/* 2813185029Spjd * Make a list of dsl_dataset_t's for the snapshots between first_obj 2814185029Spjd * (exclusive) and last_obj (inclusive). The list will be in reverse 2815185029Spjd * order (last_obj will be the list_head()). If first_obj == 0, do all 2816185029Spjd * snapshots back to this dataset's origin. 2817185029Spjd */ 2818185029Spjdstatic int 2819248571Smmsnaplist_make(dsl_pool_t *dp, 2820248571Smm uint64_t first_obj, uint64_t last_obj, list_t *l, void *tag) 2821185029Spjd{ 2822185029Spjd uint64_t obj = last_obj; 2823185029Spjd 2824185029Spjd list_create(l, sizeof (struct promotenode), 2825185029Spjd offsetof(struct promotenode, link)); 2826185029Spjd 2827185029Spjd while (obj != first_obj) { 2828185029Spjd dsl_dataset_t *ds; 2829185029Spjd struct promotenode *snap; 2830185029Spjd int err; 2831185029Spjd 2832248571Smm err = dsl_dataset_hold_obj(dp, obj, tag, &ds); 2833248571Smm ASSERT(err != ENOENT); 2834248571Smm if (err != 0) 2835185029Spjd return (err); 2836185029Spjd 2837185029Spjd if (first_obj == 0) 2838277585Sdelphij first_obj = dsl_dir_phys(ds->ds_dir)->dd_origin_obj; 2839185029Spjd 2840248571Smm snap = kmem_alloc(sizeof (*snap), KM_SLEEP); 2841185029Spjd snap->ds = ds; 2842185029Spjd list_insert_tail(l, snap); 2843277585Sdelphij obj = dsl_dataset_phys(ds)->ds_prev_snap_obj; 2844168404Spjd } 2845168404Spjd 2846185029Spjd return (0); 2847185029Spjd} 2848168404Spjd 2849185029Spjdstatic int 2850185029Spjdsnaplist_space(list_t *l, uint64_t mintxg, uint64_t *spacep) 2851185029Spjd{ 2852185029Spjd struct promotenode *snap; 2853168404Spjd 2854185029Spjd *spacep = 0; 2855185029Spjd for (snap = list_head(l); snap; snap = list_next(l, snap)) { 2856219089Spjd uint64_t used, comp, uncomp; 2857219089Spjd dsl_deadlist_space_range(&snap->ds->ds_deadlist, 2858219089Spjd mintxg, UINT64_MAX, &used, &comp, &uncomp); 2859185029Spjd *spacep += used; 2860185029Spjd } 2861185029Spjd return (0); 2862185029Spjd} 2863168404Spjd 2864185029Spjdstatic void 2865248571Smmsnaplist_destroy(list_t *l, void *tag) 2866185029Spjd{ 2867185029Spjd struct promotenode *snap; 2868185029Spjd 2869248571Smm if (l == NULL || !list_link_active(&l->list_head)) 2870185029Spjd return; 2871185029Spjd 2872185029Spjd while ((snap = list_tail(l)) != NULL) { 2873185029Spjd list_remove(l, snap); 2874248571Smm dsl_dataset_rele(snap->ds, tag); 2875248571Smm kmem_free(snap, sizeof (*snap)); 2876185029Spjd } 2877185029Spjd list_destroy(l); 2878168404Spjd} 2879168404Spjd 2880248571Smmstatic int 2881248571Smmpromote_hold(dsl_dataset_promote_arg_t *ddpa, dsl_pool_t *dp, void *tag) 2882168404Spjd{ 2883248571Smm int error; 2884185029Spjd dsl_dir_t *dd; 2885185029Spjd struct promotenode *snap; 2886168404Spjd 2887248571Smm error = dsl_dataset_hold(dp, ddpa->ddpa_clonename, tag, 2888248571Smm &ddpa->ddpa_clone); 2889248571Smm if (error != 0) 2890248571Smm return (error); 2891248571Smm dd = ddpa->ddpa_clone->ds_dir; 2892168404Spjd 2893288549Smav if (ddpa->ddpa_clone->ds_is_snapshot || 2894248571Smm !dsl_dir_is_clone(dd)) { 2895248571Smm dsl_dataset_rele(ddpa->ddpa_clone, tag); 2896249195Smm return (SET_ERROR(EINVAL)); 2897185029Spjd } 2898185029Spjd 2899277585Sdelphij error = snaplist_make(dp, 0, dsl_dir_phys(dd)->dd_origin_obj, 2900248571Smm &ddpa->shared_snaps, tag); 2901248571Smm if (error != 0) 2902185029Spjd goto out; 2903185029Spjd 2904248571Smm error = snaplist_make(dp, 0, ddpa->ddpa_clone->ds_object, 2905248571Smm &ddpa->clone_snaps, tag); 2906248571Smm if (error != 0) 2907185029Spjd goto out; 2908185029Spjd 2909248571Smm snap = list_head(&ddpa->shared_snaps); 2910277585Sdelphij ASSERT3U(snap->ds->ds_object, ==, dsl_dir_phys(dd)->dd_origin_obj); 2911277585Sdelphij error = snaplist_make(dp, dsl_dir_phys(dd)->dd_origin_obj, 2912277585Sdelphij dsl_dir_phys(snap->ds->ds_dir)->dd_head_dataset_obj, 2913248571Smm &ddpa->origin_snaps, tag); 2914248571Smm if (error != 0) 2915185029Spjd goto out; 2916185029Spjd 2917277585Sdelphij if (dsl_dir_phys(snap->ds->ds_dir)->dd_origin_obj != 0) { 2918248571Smm error = dsl_dataset_hold_obj(dp, 2919277585Sdelphij dsl_dir_phys(snap->ds->ds_dir)->dd_origin_obj, 2920248571Smm tag, &ddpa->origin_origin); 2921248571Smm if (error != 0) 2922185029Spjd goto out; 2923185029Spjd } 2924185029Spjdout: 2925248571Smm if (error != 0) 2926248571Smm promote_rele(ddpa, tag); 2927248571Smm return (error); 2928248571Smm} 2929185029Spjd 2930248571Smmstatic void 2931248571Smmpromote_rele(dsl_dataset_promote_arg_t *ddpa, void *tag) 2932248571Smm{ 2933248571Smm snaplist_destroy(&ddpa->shared_snaps, tag); 2934248571Smm snaplist_destroy(&ddpa->clone_snaps, tag); 2935248571Smm snaplist_destroy(&ddpa->origin_snaps, tag); 2936248571Smm if (ddpa->origin_origin != NULL) 2937248571Smm dsl_dataset_rele(ddpa->origin_origin, tag); 2938248571Smm dsl_dataset_rele(ddpa->ddpa_clone, tag); 2939248571Smm} 2940248571Smm 2941248571Smm/* 2942248571Smm * Promote a clone. 2943248571Smm * 2944248571Smm * If it fails due to a conflicting snapshot name, "conflsnap" will be filled 2945307122Smav * in with the name. (It must be at least ZFS_MAX_DATASET_NAME_LEN bytes long.) 2946248571Smm */ 2947248571Smmint 2948248571Smmdsl_dataset_promote(const char *name, char *conflsnap) 2949248571Smm{ 2950248571Smm dsl_dataset_promote_arg_t ddpa = { 0 }; 2951248571Smm uint64_t numsnaps; 2952248571Smm int error; 2953248571Smm objset_t *os; 2954248571Smm 2955185029Spjd /* 2956248571Smm * We will modify space proportional to the number of 2957248571Smm * snapshots. Compute numsnaps. 2958168404Spjd */ 2959248571Smm error = dmu_objset_hold(name, FTAG, &os); 2960248571Smm if (error != 0) 2961248571Smm return (error); 2962248571Smm error = zap_count(dmu_objset_pool(os)->dp_meta_objset, 2963277585Sdelphij dsl_dataset_phys(dmu_objset_ds(os))->ds_snapnames_zapobj, 2964277585Sdelphij &numsnaps); 2965248571Smm dmu_objset_rele(os, FTAG); 2966248571Smm if (error != 0) 2967248571Smm return (error); 2968185029Spjd 2969248571Smm ddpa.ddpa_clonename = name; 2970248571Smm ddpa.err_ds = conflsnap; 2971265744Sdelphij ddpa.cr = CRED(); 2972248571Smm 2973248571Smm return (dsl_sync_task(name, dsl_dataset_promote_check, 2974269006Sdelphij dsl_dataset_promote_sync, &ddpa, 2975269006Sdelphij 2 + numsnaps, ZFS_SPACE_CHECK_RESERVED)); 2976168404Spjd} 2977168404Spjd 2978248571Smmint 2979248571Smmdsl_dataset_clone_swap_check_impl(dsl_dataset_t *clone, 2980253816Sdelphij dsl_dataset_t *origin_head, boolean_t force, void *owner, dmu_tx_t *tx) 2981185029Spjd{ 2982297113Smav /* 2983297113Smav * "slack" factor for received datasets with refquota set on them. 2984297113Smav * See the bottom of this function for details on its use. 2985297113Smav */ 2986297113Smav uint64_t refquota_slack = DMU_MAX_ACCESS * spa_asize_inflation; 2987248571Smm int64_t unused_refres_delta; 2988185029Spjd 2989185029Spjd /* they should both be heads */ 2990288549Smav if (clone->ds_is_snapshot || 2991288549Smav origin_head->ds_is_snapshot) 2992249195Smm return (SET_ERROR(EINVAL)); 2993185029Spjd 2994253820Sdelphij /* if we are not forcing, the branch point should be just before them */ 2995253820Sdelphij if (!force && clone->ds_prev != origin_head->ds_prev) 2996249195Smm return (SET_ERROR(EINVAL)); 2997185029Spjd 2998248571Smm /* clone should be the clone (unless they are unrelated) */ 2999248571Smm if (clone->ds_prev != NULL && 3000248571Smm clone->ds_prev != clone->ds_dir->dd_pool->dp_origin_snap && 3001253820Sdelphij origin_head->ds_dir != clone->ds_prev->ds_dir) 3002249195Smm return (SET_ERROR(EINVAL)); 3003185029Spjd 3004185029Spjd /* the clone should be a child of the origin */ 3005248571Smm if (clone->ds_dir->dd_parent != origin_head->ds_dir) 3006249195Smm return (SET_ERROR(EINVAL)); 3007185029Spjd 3008248571Smm /* origin_head shouldn't be modified unless 'force' */ 3009253820Sdelphij if (!force && 3010253820Sdelphij dsl_dataset_modified_since_snap(origin_head, origin_head->ds_prev)) 3011249195Smm return (SET_ERROR(ETXTBSY)); 3012185029Spjd 3013248571Smm /* origin_head should have no long holds (e.g. is not mounted) */ 3014253816Sdelphij if (dsl_dataset_handoff_check(origin_head, owner, tx)) 3015249195Smm return (SET_ERROR(EBUSY)); 3016185029Spjd 3017248571Smm /* check amount of any unconsumed refreservation */ 3018248571Smm unused_refres_delta = 3019248571Smm (int64_t)MIN(origin_head->ds_reserved, 3020277585Sdelphij dsl_dataset_phys(origin_head)->ds_unique_bytes) - 3021248571Smm (int64_t)MIN(origin_head->ds_reserved, 3022277585Sdelphij dsl_dataset_phys(clone)->ds_unique_bytes); 3023248571Smm 3024248571Smm if (unused_refres_delta > 0 && 3025248571Smm unused_refres_delta > 3026248571Smm dsl_dir_space_available(origin_head->ds_dir, NULL, 0, TRUE)) 3027249195Smm return (SET_ERROR(ENOSPC)); 3028185029Spjd 3029297113Smav /* 3030297113Smav * The clone can't be too much over the head's refquota. 3031297113Smav * 3032297113Smav * To ensure that the entire refquota can be used, we allow one 3033297113Smav * transaction to exceed the the refquota. Therefore, this check 3034297113Smav * needs to also allow for the space referenced to be more than the 3035297113Smav * refquota. The maximum amount of space that one transaction can use 3036297113Smav * on disk is DMU_MAX_ACCESS * spa_asize_inflation. Allowing this 3037297113Smav * overage ensures that we are able to receive a filesystem that 3038297113Smav * exceeds the refquota on the source system. 3039297113Smav * 3040297113Smav * So that overage is the refquota_slack we use below. 3041297113Smav */ 3042248571Smm if (origin_head->ds_quota != 0 && 3043277585Sdelphij dsl_dataset_phys(clone)->ds_referenced_bytes > 3044297113Smav origin_head->ds_quota + refquota_slack) 3045249195Smm return (SET_ERROR(EDQUOT)); 3046219089Spjd 3047185029Spjd return (0); 3048185029Spjd} 3049185029Spjd 3050248571Smmvoid 3051248571Smmdsl_dataset_clone_swap_sync_impl(dsl_dataset_t *clone, 3052248571Smm dsl_dataset_t *origin_head, dmu_tx_t *tx) 3053185029Spjd{ 3054248571Smm dsl_pool_t *dp = dmu_tx_pool(tx); 3055248571Smm int64_t unused_refres_delta; 3056185029Spjd 3057248571Smm ASSERT(clone->ds_reserved == 0); 3058297113Smav /* 3059297113Smav * NOTE: On DEBUG kernels there could be a race between this and 3060297113Smav * the check function if spa_asize_inflation is adjusted... 3061297113Smav */ 3062248571Smm ASSERT(origin_head->ds_quota == 0 || 3063297113Smav dsl_dataset_phys(clone)->ds_unique_bytes <= origin_head->ds_quota + 3064297113Smav DMU_MAX_ACCESS * spa_asize_inflation); 3065253820Sdelphij ASSERT3P(clone->ds_prev, ==, origin_head->ds_prev); 3066185029Spjd 3067288572Smav /* 3068288572Smav * Swap per-dataset feature flags. 3069288572Smav */ 3070288572Smav for (spa_feature_t f = 0; f < SPA_FEATURES; f++) { 3071288572Smav if (!(spa_feature_table[f].fi_flags & 3072288572Smav ZFEATURE_FLAG_PER_DATASET)) { 3073288572Smav ASSERT(!clone->ds_feature_inuse[f]); 3074288572Smav ASSERT(!origin_head->ds_feature_inuse[f]); 3075288572Smav continue; 3076288572Smav } 3077288572Smav 3078288572Smav boolean_t clone_inuse = clone->ds_feature_inuse[f]; 3079288572Smav boolean_t origin_head_inuse = origin_head->ds_feature_inuse[f]; 3080288572Smav 3081288572Smav if (clone_inuse) { 3082288572Smav dsl_dataset_deactivate_feature(clone->ds_object, f, tx); 3083288572Smav clone->ds_feature_inuse[f] = B_FALSE; 3084288572Smav } 3085288572Smav if (origin_head_inuse) { 3086288572Smav dsl_dataset_deactivate_feature(origin_head->ds_object, 3087288572Smav f, tx); 3088288572Smav origin_head->ds_feature_inuse[f] = B_FALSE; 3089288572Smav } 3090288572Smav if (clone_inuse) { 3091288572Smav dsl_dataset_activate_feature(origin_head->ds_object, 3092288572Smav f, tx); 3093288572Smav origin_head->ds_feature_inuse[f] = B_TRUE; 3094288572Smav } 3095288572Smav if (origin_head_inuse) { 3096288572Smav dsl_dataset_activate_feature(clone->ds_object, f, tx); 3097288572Smav clone->ds_feature_inuse[f] = B_TRUE; 3098288572Smav } 3099288572Smav } 3100288572Smav 3101248571Smm dmu_buf_will_dirty(clone->ds_dbuf, tx); 3102248571Smm dmu_buf_will_dirty(origin_head->ds_dbuf, tx); 3103185029Spjd 3104248571Smm if (clone->ds_objset != NULL) { 3105248571Smm dmu_objset_evict(clone->ds_objset); 3106248571Smm clone->ds_objset = NULL; 3107185029Spjd } 3108185029Spjd 3109248571Smm if (origin_head->ds_objset != NULL) { 3110248571Smm dmu_objset_evict(origin_head->ds_objset); 3111248571Smm origin_head->ds_objset = NULL; 3112185029Spjd } 3113185029Spjd 3114248571Smm unused_refres_delta = 3115248571Smm (int64_t)MIN(origin_head->ds_reserved, 3116277585Sdelphij dsl_dataset_phys(origin_head)->ds_unique_bytes) - 3117248571Smm (int64_t)MIN(origin_head->ds_reserved, 3118277585Sdelphij dsl_dataset_phys(clone)->ds_unique_bytes); 3119248571Smm 3120219089Spjd /* 3121219089Spjd * Reset origin's unique bytes, if it exists. 3122219089Spjd */ 3123248571Smm if (clone->ds_prev) { 3124248571Smm dsl_dataset_t *origin = clone->ds_prev; 3125219089Spjd uint64_t comp, uncomp; 3126185029Spjd 3127219089Spjd dmu_buf_will_dirty(origin->ds_dbuf, tx); 3128248571Smm dsl_deadlist_space_range(&clone->ds_deadlist, 3129277585Sdelphij dsl_dataset_phys(origin)->ds_prev_snap_txg, UINT64_MAX, 3130277585Sdelphij &dsl_dataset_phys(origin)->ds_unique_bytes, &comp, &uncomp); 3131219089Spjd } 3132219089Spjd 3133185029Spjd /* swap blkptrs */ 3134185029Spjd { 3135308083Smav rrw_enter(&clone->ds_bp_rwlock, RW_WRITER, FTAG); 3136308083Smav rrw_enter(&origin_head->ds_bp_rwlock, RW_WRITER, FTAG); 3137185029Spjd blkptr_t tmp; 3138277585Sdelphij tmp = dsl_dataset_phys(origin_head)->ds_bp; 3139277585Sdelphij dsl_dataset_phys(origin_head)->ds_bp = 3140277585Sdelphij dsl_dataset_phys(clone)->ds_bp; 3141277585Sdelphij dsl_dataset_phys(clone)->ds_bp = tmp; 3142308083Smav rrw_exit(&origin_head->ds_bp_rwlock, FTAG); 3143308083Smav rrw_exit(&clone->ds_bp_rwlock, FTAG); 3144185029Spjd } 3145185029Spjd 3146185029Spjd /* set dd_*_bytes */ 3147185029Spjd { 3148185029Spjd int64_t dused, dcomp, duncomp; 3149185029Spjd uint64_t cdl_used, cdl_comp, cdl_uncomp; 3150185029Spjd uint64_t odl_used, odl_comp, odl_uncomp; 3151185029Spjd 3152277585Sdelphij ASSERT3U(dsl_dir_phys(clone->ds_dir)-> 3153185029Spjd dd_used_breakdown[DD_USED_SNAP], ==, 0); 3154185029Spjd 3155248571Smm dsl_deadlist_space(&clone->ds_deadlist, 3156219089Spjd &cdl_used, &cdl_comp, &cdl_uncomp); 3157248571Smm dsl_deadlist_space(&origin_head->ds_deadlist, 3158219089Spjd &odl_used, &odl_comp, &odl_uncomp); 3159185029Spjd 3160277585Sdelphij dused = dsl_dataset_phys(clone)->ds_referenced_bytes + 3161277585Sdelphij cdl_used - 3162277585Sdelphij (dsl_dataset_phys(origin_head)->ds_referenced_bytes + 3163277585Sdelphij odl_used); 3164277585Sdelphij dcomp = dsl_dataset_phys(clone)->ds_compressed_bytes + 3165277585Sdelphij cdl_comp - 3166277585Sdelphij (dsl_dataset_phys(origin_head)->ds_compressed_bytes + 3167277585Sdelphij odl_comp); 3168277585Sdelphij duncomp = dsl_dataset_phys(clone)->ds_uncompressed_bytes + 3169185029Spjd cdl_uncomp - 3170277585Sdelphij (dsl_dataset_phys(origin_head)->ds_uncompressed_bytes + 3171277585Sdelphij odl_uncomp); 3172185029Spjd 3173248571Smm dsl_dir_diduse_space(origin_head->ds_dir, DD_USED_HEAD, 3174185029Spjd dused, dcomp, duncomp, tx); 3175248571Smm dsl_dir_diduse_space(clone->ds_dir, DD_USED_HEAD, 3176185029Spjd -dused, -dcomp, -duncomp, tx); 3177185029Spjd 3178185029Spjd /* 3179185029Spjd * The difference in the space used by snapshots is the 3180185029Spjd * difference in snapshot space due to the head's 3181185029Spjd * deadlist (since that's the only thing that's 3182185029Spjd * changing that affects the snapused). 3183185029Spjd */ 3184248571Smm dsl_deadlist_space_range(&clone->ds_deadlist, 3185248571Smm origin_head->ds_dir->dd_origin_txg, UINT64_MAX, 3186219089Spjd &cdl_used, &cdl_comp, &cdl_uncomp); 3187248571Smm dsl_deadlist_space_range(&origin_head->ds_deadlist, 3188248571Smm origin_head->ds_dir->dd_origin_txg, UINT64_MAX, 3189219089Spjd &odl_used, &odl_comp, &odl_uncomp); 3190248571Smm dsl_dir_transfer_space(origin_head->ds_dir, cdl_used - odl_used, 3191278142Smav DD_USED_HEAD, DD_USED_SNAP, NULL); 3192185029Spjd } 3193185029Spjd 3194185029Spjd /* swap ds_*_bytes */ 3195277585Sdelphij SWITCH64(dsl_dataset_phys(origin_head)->ds_referenced_bytes, 3196277585Sdelphij dsl_dataset_phys(clone)->ds_referenced_bytes); 3197277585Sdelphij SWITCH64(dsl_dataset_phys(origin_head)->ds_compressed_bytes, 3198277585Sdelphij dsl_dataset_phys(clone)->ds_compressed_bytes); 3199277585Sdelphij SWITCH64(dsl_dataset_phys(origin_head)->ds_uncompressed_bytes, 3200277585Sdelphij dsl_dataset_phys(clone)->ds_uncompressed_bytes); 3201277585Sdelphij SWITCH64(dsl_dataset_phys(origin_head)->ds_unique_bytes, 3202277585Sdelphij dsl_dataset_phys(clone)->ds_unique_bytes); 3203185029Spjd 3204185029Spjd /* apply any parent delta for change in unconsumed refreservation */ 3205248571Smm dsl_dir_diduse_space(origin_head->ds_dir, DD_USED_REFRSRV, 3206248571Smm unused_refres_delta, 0, 0, tx); 3207185029Spjd 3208219089Spjd /* 3209219089Spjd * Swap deadlists. 3210219089Spjd */ 3211248571Smm dsl_deadlist_close(&clone->ds_deadlist); 3212248571Smm dsl_deadlist_close(&origin_head->ds_deadlist); 3213277585Sdelphij SWITCH64(dsl_dataset_phys(origin_head)->ds_deadlist_obj, 3214277585Sdelphij dsl_dataset_phys(clone)->ds_deadlist_obj); 3215248571Smm dsl_deadlist_open(&clone->ds_deadlist, dp->dp_meta_objset, 3216277585Sdelphij dsl_dataset_phys(clone)->ds_deadlist_obj); 3217248571Smm dsl_deadlist_open(&origin_head->ds_deadlist, dp->dp_meta_objset, 3218277585Sdelphij dsl_dataset_phys(origin_head)->ds_deadlist_obj); 3219208047Smm 3220248571Smm dsl_scan_ds_clone_swapped(origin_head, clone, tx); 3221185029Spjd 3222248571Smm spa_history_log_internal_ds(clone, "clone swap", tx, 3223248571Smm "parent=%s", origin_head->ds_dir->dd_myname); 3224185029Spjd} 3225185029Spjd 3226185029Spjd/* 3227168404Spjd * Given a pool name and a dataset object number in that pool, 3228168404Spjd * return the name of that dataset. 3229168404Spjd */ 3230168404Spjdint 3231168404Spjddsl_dsobj_to_dsname(char *pname, uint64_t obj, char *buf) 3232168404Spjd{ 3233168404Spjd dsl_pool_t *dp; 3234185029Spjd dsl_dataset_t *ds; 3235168404Spjd int error; 3236168404Spjd 3237248571Smm error = dsl_pool_hold(pname, FTAG, &dp); 3238248571Smm if (error != 0) 3239168404Spjd return (error); 3240248571Smm 3241248571Smm error = dsl_dataset_hold_obj(dp, obj, FTAG, &ds); 3242248571Smm if (error == 0) { 3243185029Spjd dsl_dataset_name(ds, buf); 3244185029Spjd dsl_dataset_rele(ds, FTAG); 3245168404Spjd } 3246248571Smm dsl_pool_rele(dp, FTAG); 3247168404Spjd 3248185029Spjd return (error); 3249185029Spjd} 3250185029Spjd 3251185029Spjdint 3252185029Spjddsl_dataset_check_quota(dsl_dataset_t *ds, boolean_t check_quota, 3253185029Spjd uint64_t asize, uint64_t inflight, uint64_t *used, uint64_t *ref_rsrv) 3254185029Spjd{ 3255185029Spjd int error = 0; 3256185029Spjd 3257185029Spjd ASSERT3S(asize, >, 0); 3258185029Spjd 3259185029Spjd /* 3260185029Spjd * *ref_rsrv is the portion of asize that will come from any 3261185029Spjd * unconsumed refreservation space. 3262185029Spjd */ 3263185029Spjd *ref_rsrv = 0; 3264185029Spjd 3265185029Spjd mutex_enter(&ds->ds_lock); 3266185029Spjd /* 3267185029Spjd * Make a space adjustment for reserved bytes. 3268185029Spjd */ 3269277585Sdelphij if (ds->ds_reserved > dsl_dataset_phys(ds)->ds_unique_bytes) { 3270185029Spjd ASSERT3U(*used, >=, 3271277585Sdelphij ds->ds_reserved - dsl_dataset_phys(ds)->ds_unique_bytes); 3272277585Sdelphij *used -= 3273277585Sdelphij (ds->ds_reserved - dsl_dataset_phys(ds)->ds_unique_bytes); 3274185029Spjd *ref_rsrv = 3275185029Spjd asize - MIN(asize, parent_delta(ds, asize + inflight)); 3276185029Spjd } 3277185029Spjd 3278185029Spjd if (!check_quota || ds->ds_quota == 0) { 3279185029Spjd mutex_exit(&ds->ds_lock); 3280185029Spjd return (0); 3281185029Spjd } 3282185029Spjd /* 3283185029Spjd * If they are requesting more space, and our current estimate 3284185029Spjd * is over quota, they get to try again unless the actual 3285185029Spjd * on-disk is over quota and there are no pending changes (which 3286185029Spjd * may free up space for us). 3287185029Spjd */ 3288277585Sdelphij if (dsl_dataset_phys(ds)->ds_referenced_bytes + inflight >= 3289277585Sdelphij ds->ds_quota) { 3290236884Smm if (inflight > 0 || 3291277585Sdelphij dsl_dataset_phys(ds)->ds_referenced_bytes < ds->ds_quota) 3292249195Smm error = SET_ERROR(ERESTART); 3293185029Spjd else 3294249195Smm error = SET_ERROR(EDQUOT); 3295185029Spjd } 3296185029Spjd mutex_exit(&ds->ds_lock); 3297185029Spjd 3298185029Spjd return (error); 3299185029Spjd} 3300185029Spjd 3301248571Smmtypedef struct dsl_dataset_set_qr_arg { 3302248571Smm const char *ddsqra_name; 3303248571Smm zprop_source_t ddsqra_source; 3304248571Smm uint64_t ddsqra_value; 3305248571Smm} dsl_dataset_set_qr_arg_t; 3306248571Smm 3307248571Smm 3308185029Spjd/* ARGSUSED */ 3309185029Spjdstatic int 3310248571Smmdsl_dataset_set_refquota_check(void *arg, dmu_tx_t *tx) 3311185029Spjd{ 3312248571Smm dsl_dataset_set_qr_arg_t *ddsqra = arg; 3313248571Smm dsl_pool_t *dp = dmu_tx_pool(tx); 3314248571Smm dsl_dataset_t *ds; 3315248571Smm int error; 3316248571Smm uint64_t newval; 3317185029Spjd 3318248571Smm if (spa_version(dp->dp_spa) < SPA_VERSION_REFQUOTA) 3319249195Smm return (SET_ERROR(ENOTSUP)); 3320185029Spjd 3321248571Smm error = dsl_dataset_hold(dp, ddsqra->ddsqra_name, FTAG, &ds); 3322248571Smm if (error != 0) 3323248571Smm return (error); 3324219089Spjd 3325288549Smav if (ds->ds_is_snapshot) { 3326248571Smm dsl_dataset_rele(ds, FTAG); 3327249195Smm return (SET_ERROR(EINVAL)); 3328248571Smm } 3329248571Smm 3330248571Smm error = dsl_prop_predict(ds->ds_dir, 3331248571Smm zfs_prop_to_name(ZFS_PROP_REFQUOTA), 3332248571Smm ddsqra->ddsqra_source, ddsqra->ddsqra_value, &newval); 3333248571Smm if (error != 0) { 3334248571Smm dsl_dataset_rele(ds, FTAG); 3335248571Smm return (error); 3336248571Smm } 3337248571Smm 3338248571Smm if (newval == 0) { 3339248571Smm dsl_dataset_rele(ds, FTAG); 3340185029Spjd return (0); 3341248571Smm } 3342185029Spjd 3343277585Sdelphij if (newval < dsl_dataset_phys(ds)->ds_referenced_bytes || 3344248571Smm newval < ds->ds_reserved) { 3345248571Smm dsl_dataset_rele(ds, FTAG); 3346249195Smm return (SET_ERROR(ENOSPC)); 3347248571Smm } 3348185029Spjd 3349248571Smm dsl_dataset_rele(ds, FTAG); 3350168404Spjd return (0); 3351168404Spjd} 3352185029Spjd 3353248571Smmstatic void 3354248571Smmdsl_dataset_set_refquota_sync(void *arg, dmu_tx_t *tx) 3355185029Spjd{ 3356248571Smm dsl_dataset_set_qr_arg_t *ddsqra = arg; 3357248571Smm dsl_pool_t *dp = dmu_tx_pool(tx); 3358248571Smm dsl_dataset_t *ds; 3359248571Smm uint64_t newval; 3360185029Spjd 3361248571Smm VERIFY0(dsl_dataset_hold(dp, ddsqra->ddsqra_name, FTAG, &ds)); 3362185029Spjd 3363248571Smm dsl_prop_set_sync_impl(ds, 3364248571Smm zfs_prop_to_name(ZFS_PROP_REFQUOTA), 3365248571Smm ddsqra->ddsqra_source, sizeof (ddsqra->ddsqra_value), 1, 3366248571Smm &ddsqra->ddsqra_value, tx); 3367248571Smm 3368248571Smm VERIFY0(dsl_prop_get_int_ds(ds, 3369248571Smm zfs_prop_to_name(ZFS_PROP_REFQUOTA), &newval)); 3370248571Smm 3371248571Smm if (ds->ds_quota != newval) { 3372219089Spjd dmu_buf_will_dirty(ds->ds_dbuf, tx); 3373248571Smm ds->ds_quota = newval; 3374219089Spjd } 3375248571Smm dsl_dataset_rele(ds, FTAG); 3376185029Spjd} 3377185029Spjd 3378185029Spjdint 3379248571Smmdsl_dataset_set_refquota(const char *dsname, zprop_source_t source, 3380248571Smm uint64_t refquota) 3381185029Spjd{ 3382248571Smm dsl_dataset_set_qr_arg_t ddsqra; 3383185029Spjd 3384248571Smm ddsqra.ddsqra_name = dsname; 3385248571Smm ddsqra.ddsqra_source = source; 3386248571Smm ddsqra.ddsqra_value = refquota; 3387219089Spjd 3388248571Smm return (dsl_sync_task(dsname, dsl_dataset_set_refquota_check, 3389269006Sdelphij dsl_dataset_set_refquota_sync, &ddsqra, 0, ZFS_SPACE_CHECK_NONE)); 3390185029Spjd} 3391185029Spjd 3392185029Spjdstatic int 3393248571Smmdsl_dataset_set_refreservation_check(void *arg, dmu_tx_t *tx) 3394185029Spjd{ 3395248571Smm dsl_dataset_set_qr_arg_t *ddsqra = arg; 3396248571Smm dsl_pool_t *dp = dmu_tx_pool(tx); 3397248571Smm dsl_dataset_t *ds; 3398248571Smm int error; 3399248571Smm uint64_t newval, unique; 3400185029Spjd 3401248571Smm if (spa_version(dp->dp_spa) < SPA_VERSION_REFRESERVATION) 3402249195Smm return (SET_ERROR(ENOTSUP)); 3403185029Spjd 3404248571Smm error = dsl_dataset_hold(dp, ddsqra->ddsqra_name, FTAG, &ds); 3405248571Smm if (error != 0) 3406248571Smm return (error); 3407248571Smm 3408288549Smav if (ds->ds_is_snapshot) { 3409248571Smm dsl_dataset_rele(ds, FTAG); 3410249195Smm return (SET_ERROR(EINVAL)); 3411248571Smm } 3412185029Spjd 3413248571Smm error = dsl_prop_predict(ds->ds_dir, 3414248571Smm zfs_prop_to_name(ZFS_PROP_REFRESERVATION), 3415248571Smm ddsqra->ddsqra_source, ddsqra->ddsqra_value, &newval); 3416248571Smm if (error != 0) { 3417248571Smm dsl_dataset_rele(ds, FTAG); 3418248571Smm return (error); 3419248571Smm } 3420219089Spjd 3421185029Spjd /* 3422185029Spjd * If we are doing the preliminary check in open context, the 3423185029Spjd * space estimates may be inaccurate. 3424185029Spjd */ 3425248571Smm if (!dmu_tx_is_syncing(tx)) { 3426248571Smm dsl_dataset_rele(ds, FTAG); 3427185029Spjd return (0); 3428248571Smm } 3429185029Spjd 3430185029Spjd mutex_enter(&ds->ds_lock); 3431219089Spjd if (!DS_UNIQUE_IS_ACCURATE(ds)) 3432219089Spjd dsl_dataset_recalc_head_uniq(ds); 3433277585Sdelphij unique = dsl_dataset_phys(ds)->ds_unique_bytes; 3434185029Spjd mutex_exit(&ds->ds_lock); 3435185029Spjd 3436248571Smm if (MAX(unique, newval) > MAX(unique, ds->ds_reserved)) { 3437248571Smm uint64_t delta = MAX(unique, newval) - 3438209962Smm MAX(unique, ds->ds_reserved); 3439185029Spjd 3440248571Smm if (delta > 3441248571Smm dsl_dir_space_available(ds->ds_dir, NULL, 0, B_TRUE) || 3442248571Smm (ds->ds_quota > 0 && newval > ds->ds_quota)) { 3443248571Smm dsl_dataset_rele(ds, FTAG); 3444249195Smm return (SET_ERROR(ENOSPC)); 3445248571Smm } 3446209962Smm } 3447209962Smm 3448248571Smm dsl_dataset_rele(ds, FTAG); 3449185029Spjd return (0); 3450185029Spjd} 3451185029Spjd 3452248571Smmvoid 3453248571Smmdsl_dataset_set_refreservation_sync_impl(dsl_dataset_t *ds, 3454248571Smm zprop_source_t source, uint64_t value, dmu_tx_t *tx) 3455185029Spjd{ 3456248571Smm uint64_t newval; 3457185029Spjd uint64_t unique; 3458185029Spjd int64_t delta; 3459185029Spjd 3460248571Smm dsl_prop_set_sync_impl(ds, zfs_prop_to_name(ZFS_PROP_REFRESERVATION), 3461248571Smm source, sizeof (value), 1, &value, tx); 3462219089Spjd 3463248571Smm VERIFY0(dsl_prop_get_int_ds(ds, 3464248571Smm zfs_prop_to_name(ZFS_PROP_REFRESERVATION), &newval)); 3465248571Smm 3466185029Spjd dmu_buf_will_dirty(ds->ds_dbuf, tx); 3467185029Spjd mutex_enter(&ds->ds_dir->dd_lock); 3468185029Spjd mutex_enter(&ds->ds_lock); 3469219089Spjd ASSERT(DS_UNIQUE_IS_ACCURATE(ds)); 3470277585Sdelphij unique = dsl_dataset_phys(ds)->ds_unique_bytes; 3471248571Smm delta = MAX(0, (int64_t)(newval - unique)) - 3472185029Spjd MAX(0, (int64_t)(ds->ds_reserved - unique)); 3473248571Smm ds->ds_reserved = newval; 3474185029Spjd mutex_exit(&ds->ds_lock); 3475185029Spjd 3476185029Spjd dsl_dir_diduse_space(ds->ds_dir, DD_USED_REFRSRV, delta, 0, 0, tx); 3477185029Spjd mutex_exit(&ds->ds_dir->dd_lock); 3478185029Spjd} 3479185029Spjd 3480248571Smmstatic void 3481248571Smmdsl_dataset_set_refreservation_sync(void *arg, dmu_tx_t *tx) 3482185029Spjd{ 3483248571Smm dsl_dataset_set_qr_arg_t *ddsqra = arg; 3484248571Smm dsl_pool_t *dp = dmu_tx_pool(tx); 3485185029Spjd dsl_dataset_t *ds; 3486185029Spjd 3487248571Smm VERIFY0(dsl_dataset_hold(dp, ddsqra->ddsqra_name, FTAG, &ds)); 3488248571Smm dsl_dataset_set_refreservation_sync_impl(ds, 3489248571Smm ddsqra->ddsqra_source, ddsqra->ddsqra_value, tx); 3490185029Spjd dsl_dataset_rele(ds, FTAG); 3491185029Spjd} 3492219089Spjd 3493219089Spjdint 3494248571Smmdsl_dataset_set_refreservation(const char *dsname, zprop_source_t source, 3495248571Smm uint64_t refreservation) 3496219089Spjd{ 3497248571Smm dsl_dataset_set_qr_arg_t ddsqra; 3498219089Spjd 3499248571Smm ddsqra.ddsqra_name = dsname; 3500248571Smm ddsqra.ddsqra_source = source; 3501248571Smm ddsqra.ddsqra_value = refreservation; 3502219089Spjd 3503248571Smm return (dsl_sync_task(dsname, dsl_dataset_set_refreservation_check, 3504269006Sdelphij dsl_dataset_set_refreservation_sync, &ddsqra, 3505269006Sdelphij 0, ZFS_SPACE_CHECK_NONE)); 3506219089Spjd} 3507219089Spjd 3508219089Spjd/* 3509228103Smm * Return (in *usedp) the amount of space written in new that is not 3510228103Smm * present in oldsnap. New may be a snapshot or the head. Old must be 3511228103Smm * a snapshot before new, in new's filesystem (or its origin). If not then 3512228103Smm * fail and return EINVAL. 3513228103Smm * 3514228103Smm * The written space is calculated by considering two components: First, we 3515228103Smm * ignore any freed space, and calculate the written as new's used space 3516228103Smm * minus old's used space. Next, we add in the amount of space that was freed 3517228103Smm * between the two snapshots, thus reducing new's used space relative to old's. 3518228103Smm * Specifically, this is the space that was born before old->ds_creation_txg, 3519228103Smm * and freed before new (ie. on new's deadlist or a previous deadlist). 3520228103Smm * 3521228103Smm * space freed [---------------------] 3522228103Smm * snapshots ---O-------O--------O-------O------ 3523228103Smm * oldsnap new 3524228103Smm */ 3525228103Smmint 3526228103Smmdsl_dataset_space_written(dsl_dataset_t *oldsnap, dsl_dataset_t *new, 3527228103Smm uint64_t *usedp, uint64_t *compp, uint64_t *uncompp) 3528228103Smm{ 3529228103Smm int err = 0; 3530228103Smm uint64_t snapobj; 3531228103Smm dsl_pool_t *dp = new->ds_dir->dd_pool; 3532228103Smm 3533248571Smm ASSERT(dsl_pool_config_held(dp)); 3534248571Smm 3535228103Smm *usedp = 0; 3536277585Sdelphij *usedp += dsl_dataset_phys(new)->ds_referenced_bytes; 3537277585Sdelphij *usedp -= dsl_dataset_phys(oldsnap)->ds_referenced_bytes; 3538228103Smm 3539228103Smm *compp = 0; 3540277585Sdelphij *compp += dsl_dataset_phys(new)->ds_compressed_bytes; 3541277585Sdelphij *compp -= dsl_dataset_phys(oldsnap)->ds_compressed_bytes; 3542228103Smm 3543228103Smm *uncompp = 0; 3544277585Sdelphij *uncompp += dsl_dataset_phys(new)->ds_uncompressed_bytes; 3545277585Sdelphij *uncompp -= dsl_dataset_phys(oldsnap)->ds_uncompressed_bytes; 3546228103Smm 3547228103Smm snapobj = new->ds_object; 3548228103Smm while (snapobj != oldsnap->ds_object) { 3549228103Smm dsl_dataset_t *snap; 3550228103Smm uint64_t used, comp, uncomp; 3551228103Smm 3552236884Smm if (snapobj == new->ds_object) { 3553236884Smm snap = new; 3554236884Smm } else { 3555236884Smm err = dsl_dataset_hold_obj(dp, snapobj, FTAG, &snap); 3556236884Smm if (err != 0) 3557236884Smm break; 3558236884Smm } 3559228103Smm 3560277585Sdelphij if (dsl_dataset_phys(snap)->ds_prev_snap_txg == 3561277585Sdelphij dsl_dataset_phys(oldsnap)->ds_creation_txg) { 3562228103Smm /* 3563228103Smm * The blocks in the deadlist can not be born after 3564228103Smm * ds_prev_snap_txg, so get the whole deadlist space, 3565228103Smm * which is more efficient (especially for old-format 3566228103Smm * deadlists). Unfortunately the deadlist code 3567228103Smm * doesn't have enough information to make this 3568228103Smm * optimization itself. 3569228103Smm */ 3570228103Smm dsl_deadlist_space(&snap->ds_deadlist, 3571228103Smm &used, &comp, &uncomp); 3572228103Smm } else { 3573228103Smm dsl_deadlist_space_range(&snap->ds_deadlist, 3574277585Sdelphij 0, dsl_dataset_phys(oldsnap)->ds_creation_txg, 3575228103Smm &used, &comp, &uncomp); 3576228103Smm } 3577228103Smm *usedp += used; 3578228103Smm *compp += comp; 3579228103Smm *uncompp += uncomp; 3580228103Smm 3581228103Smm /* 3582228103Smm * If we get to the beginning of the chain of snapshots 3583228103Smm * (ds_prev_snap_obj == 0) before oldsnap, then oldsnap 3584228103Smm * was not a snapshot of/before new. 3585228103Smm */ 3586277585Sdelphij snapobj = dsl_dataset_phys(snap)->ds_prev_snap_obj; 3587236884Smm if (snap != new) 3588236884Smm dsl_dataset_rele(snap, FTAG); 3589228103Smm if (snapobj == 0) { 3590249195Smm err = SET_ERROR(EINVAL); 3591228103Smm break; 3592228103Smm } 3593228103Smm 3594228103Smm } 3595228103Smm return (err); 3596228103Smm} 3597228103Smm 3598228103Smm/* 3599228103Smm * Return (in *usedp) the amount of space that will be reclaimed if firstsnap, 3600228103Smm * lastsnap, and all snapshots in between are deleted. 3601228103Smm * 3602228103Smm * blocks that would be freed [---------------------------] 3603228103Smm * snapshots ---O-------O--------O-------O--------O 3604228103Smm * firstsnap lastsnap 3605228103Smm * 3606228103Smm * This is the set of blocks that were born after the snap before firstsnap, 3607228103Smm * (birth > firstsnap->prev_snap_txg) and died before the snap after the 3608228103Smm * last snap (ie, is on lastsnap->ds_next->ds_deadlist or an earlier deadlist). 3609228103Smm * We calculate this by iterating over the relevant deadlists (from the snap 3610228103Smm * after lastsnap, backward to the snap after firstsnap), summing up the 3611228103Smm * space on the deadlist that was born after the snap before firstsnap. 3612228103Smm */ 3613228103Smmint 3614228103Smmdsl_dataset_space_wouldfree(dsl_dataset_t *firstsnap, 3615228103Smm dsl_dataset_t *lastsnap, 3616228103Smm uint64_t *usedp, uint64_t *compp, uint64_t *uncompp) 3617228103Smm{ 3618228103Smm int err = 0; 3619228103Smm uint64_t snapobj; 3620228103Smm dsl_pool_t *dp = firstsnap->ds_dir->dd_pool; 3621228103Smm 3622288549Smav ASSERT(firstsnap->ds_is_snapshot); 3623288549Smav ASSERT(lastsnap->ds_is_snapshot); 3624228103Smm 3625228103Smm /* 3626228103Smm * Check that the snapshots are in the same dsl_dir, and firstsnap 3627228103Smm * is before lastsnap. 3628228103Smm */ 3629228103Smm if (firstsnap->ds_dir != lastsnap->ds_dir || 3630277585Sdelphij dsl_dataset_phys(firstsnap)->ds_creation_txg > 3631277585Sdelphij dsl_dataset_phys(lastsnap)->ds_creation_txg) 3632249195Smm return (SET_ERROR(EINVAL)); 3633228103Smm 3634228103Smm *usedp = *compp = *uncompp = 0; 3635228103Smm 3636277585Sdelphij snapobj = dsl_dataset_phys(lastsnap)->ds_next_snap_obj; 3637228103Smm while (snapobj != firstsnap->ds_object) { 3638228103Smm dsl_dataset_t *ds; 3639228103Smm uint64_t used, comp, uncomp; 3640228103Smm 3641228103Smm err = dsl_dataset_hold_obj(dp, snapobj, FTAG, &ds); 3642228103Smm if (err != 0) 3643228103Smm break; 3644228103Smm 3645228103Smm dsl_deadlist_space_range(&ds->ds_deadlist, 3646277585Sdelphij dsl_dataset_phys(firstsnap)->ds_prev_snap_txg, UINT64_MAX, 3647228103Smm &used, &comp, &uncomp); 3648228103Smm *usedp += used; 3649228103Smm *compp += comp; 3650228103Smm *uncompp += uncomp; 3651228103Smm 3652277585Sdelphij snapobj = dsl_dataset_phys(ds)->ds_prev_snap_obj; 3653228103Smm ASSERT3U(snapobj, !=, 0); 3654228103Smm dsl_dataset_rele(ds, FTAG); 3655228103Smm } 3656228103Smm return (err); 3657228103Smm} 3658248571Smm 3659248571Smm/* 3660248571Smm * Return TRUE if 'earlier' is an earlier snapshot in 'later's timeline. 3661248571Smm * For example, they could both be snapshots of the same filesystem, and 3662248571Smm * 'earlier' is before 'later'. Or 'earlier' could be the origin of 3663248571Smm * 'later's filesystem. Or 'earlier' could be an older snapshot in the origin's 3664248571Smm * filesystem. Or 'earlier' could be the origin's origin. 3665263407Sdelphij * 3666263407Sdelphij * If non-zero, earlier_txg is used instead of earlier's ds_creation_txg. 3667248571Smm */ 3668248571Smmboolean_t 3669263407Sdelphijdsl_dataset_is_before(dsl_dataset_t *later, dsl_dataset_t *earlier, 3670290765Smav uint64_t earlier_txg) 3671248571Smm{ 3672248571Smm dsl_pool_t *dp = later->ds_dir->dd_pool; 3673248571Smm int error; 3674248571Smm boolean_t ret; 3675248571Smm 3676248571Smm ASSERT(dsl_pool_config_held(dp)); 3677288549Smav ASSERT(earlier->ds_is_snapshot || earlier_txg != 0); 3678248571Smm 3679263407Sdelphij if (earlier_txg == 0) 3680277585Sdelphij earlier_txg = dsl_dataset_phys(earlier)->ds_creation_txg; 3681263407Sdelphij 3682288549Smav if (later->ds_is_snapshot && 3683277585Sdelphij earlier_txg >= dsl_dataset_phys(later)->ds_creation_txg) 3684248571Smm return (B_FALSE); 3685248571Smm 3686248571Smm if (later->ds_dir == earlier->ds_dir) 3687248571Smm return (B_TRUE); 3688248571Smm if (!dsl_dir_is_clone(later->ds_dir)) 3689248571Smm return (B_FALSE); 3690248571Smm 3691277585Sdelphij if (dsl_dir_phys(later->ds_dir)->dd_origin_obj == earlier->ds_object) 3692248571Smm return (B_TRUE); 3693248571Smm dsl_dataset_t *origin; 3694248571Smm error = dsl_dataset_hold_obj(dp, 3695277585Sdelphij dsl_dir_phys(later->ds_dir)->dd_origin_obj, FTAG, &origin); 3696248571Smm if (error != 0) 3697248571Smm return (B_FALSE); 3698263407Sdelphij ret = dsl_dataset_is_before(origin, earlier, earlier_txg); 3699248571Smm dsl_dataset_rele(origin, FTAG); 3700248571Smm return (ret); 3701248571Smm} 3702263390Sdelphij 3703263390Sdelphijvoid 3704263390Sdelphijdsl_dataset_zapify(dsl_dataset_t *ds, dmu_tx_t *tx) 3705263390Sdelphij{ 3706263390Sdelphij objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset; 3707263390Sdelphij dmu_object_zapify(mos, ds->ds_object, DMU_OT_DSL_DATASET, tx); 3708263390Sdelphij} 3709290756Smav 3710290756Smavboolean_t 3711290756Smavdsl_dataset_is_zapified(dsl_dataset_t *ds) 3712290756Smav{ 3713290756Smav dmu_object_info_t doi; 3714290756Smav 3715290756Smav dmu_object_info_from_db(ds->ds_dbuf, &doi); 3716290756Smav return (doi.doi_type == DMU_OTN_ZAP_METADATA); 3717290756Smav} 3718290756Smav 3719290756Smavboolean_t 3720290756Smavdsl_dataset_has_resume_receive_state(dsl_dataset_t *ds) 3721290756Smav{ 3722290756Smav return (dsl_dataset_is_zapified(ds) && 3723290756Smav zap_contains(ds->ds_dir->dd_pool->dp_meta_objset, 3724290756Smav ds->ds_object, DS_FIELD_RESUME_TOGUID) == 0); 3725290756Smav} 3726