1/* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21/* 22 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. 23 * Copyright (c) 2013 by Delphix. All rights reserved. 24 * Copyright (c) 2012, Joyent, Inc. All rights reserved. 25 * Portions Copyright (c) 2011 Martin Matuska <mm@FreeBSD.org> 26 */ 27 28#include <sys/dmu_objset.h> 29#include <sys/dsl_dataset.h> 30#include <sys/dsl_dir.h> 31#include <sys/dsl_prop.h> 32#include <sys/dsl_synctask.h> 33#include <sys/dmu_traverse.h> 34#include <sys/dmu_impl.h> 35#include <sys/dmu_tx.h> 36#include <sys/arc.h> 37#include <sys/zio.h> 38#include <sys/zap.h> 39#include <sys/zfeature.h> 40#include <sys/unique.h> 41#include <sys/zfs_context.h> 42#include <sys/zfs_ioctl.h> 43#include <sys/spa.h> 44#include <sys/zfs_znode.h> 45#include <sys/zfs_onexit.h> 46#include <sys/zvol.h> 47#include <sys/dsl_scan.h> 48#include <sys/dsl_deadlist.h> 49#include <sys/dsl_destroy.h> 50#include <sys/dsl_userhold.h> 51 52#define SWITCH64(x, y) \ 53 { \ 54 uint64_t __tmp = (x); \ 55 (x) = (y); \ 56 (y) = __tmp; \ 57 } 58 59#define DS_REF_MAX (1ULL << 62) 60 61#define DSL_DEADLIST_BLOCKSIZE SPA_MAXBLOCKSIZE 62 63/* 64 * Figure out how much of this delta should be propogated to the dsl_dir 65 * layer. If there's a refreservation, that space has already been 66 * partially accounted for in our ancestors. 67 */ 68static int64_t 69parent_delta(dsl_dataset_t *ds, int64_t delta) 70{ 71 uint64_t old_bytes, new_bytes; 72 73 if (ds->ds_reserved == 0) 74 return (delta); 75 76 old_bytes = MAX(ds->ds_phys->ds_unique_bytes, ds->ds_reserved); 77 new_bytes = MAX(ds->ds_phys->ds_unique_bytes + delta, ds->ds_reserved); 78 79 ASSERT3U(ABS((int64_t)(new_bytes - old_bytes)), <=, ABS(delta)); 80 return (new_bytes - old_bytes); 81} 82 83void 84dsl_dataset_block_born(dsl_dataset_t *ds, const blkptr_t *bp, dmu_tx_t *tx) 85{ 86 int used = bp_get_dsize_sync(tx->tx_pool->dp_spa, bp); 87 int compressed = BP_GET_PSIZE(bp); 88 int uncompressed = BP_GET_UCSIZE(bp); 89 int64_t delta; 90 91 dprintf_bp(bp, "ds=%p", ds); 92 93 ASSERT(dmu_tx_is_syncing(tx)); 94 /* It could have been compressed away to nothing */ 95 if (BP_IS_HOLE(bp)) 96 return; 97 ASSERT(BP_GET_TYPE(bp) != DMU_OT_NONE); 98 ASSERT(DMU_OT_IS_VALID(BP_GET_TYPE(bp))); 99 if (ds == NULL) { 100 dsl_pool_mos_diduse_space(tx->tx_pool, 101 used, compressed, uncompressed); 102 return; 103 } 104 dmu_buf_will_dirty(ds->ds_dbuf, tx); 105 106 mutex_enter(&ds->ds_dir->dd_lock); 107 mutex_enter(&ds->ds_lock); 108 delta = parent_delta(ds, used); 109 ds->ds_phys->ds_referenced_bytes += used; 110 ds->ds_phys->ds_compressed_bytes += compressed; 111 ds->ds_phys->ds_uncompressed_bytes += uncompressed; 112 ds->ds_phys->ds_unique_bytes += used; 113 mutex_exit(&ds->ds_lock); 114 dsl_dir_diduse_space(ds->ds_dir, DD_USED_HEAD, delta, 115 compressed, uncompressed, tx); 116 dsl_dir_transfer_space(ds->ds_dir, used - delta, 117 DD_USED_REFRSRV, DD_USED_HEAD, tx); 118 mutex_exit(&ds->ds_dir->dd_lock); 119} 120 121int 122dsl_dataset_block_kill(dsl_dataset_t *ds, const blkptr_t *bp, dmu_tx_t *tx, 123 boolean_t async) 124{ 125 if (BP_IS_HOLE(bp)) 126 return (0); 127 128 ASSERT(dmu_tx_is_syncing(tx)); 129 ASSERT(bp->blk_birth <= tx->tx_txg); 130 131 int used = bp_get_dsize_sync(tx->tx_pool->dp_spa, bp); 132 int compressed = BP_GET_PSIZE(bp); 133 int uncompressed = BP_GET_UCSIZE(bp); 134 135 ASSERT(used > 0); 136 if (ds == NULL) { 137 dsl_free(tx->tx_pool, tx->tx_txg, bp); 138 dsl_pool_mos_diduse_space(tx->tx_pool, 139 -used, -compressed, -uncompressed); 140 return (used); 141 } 142 ASSERT3P(tx->tx_pool, ==, ds->ds_dir->dd_pool); 143 144 ASSERT(!dsl_dataset_is_snapshot(ds)); 145 dmu_buf_will_dirty(ds->ds_dbuf, tx); 146 147 if (bp->blk_birth > ds->ds_phys->ds_prev_snap_txg) { 148 int64_t delta; 149 150 dprintf_bp(bp, "freeing ds=%llu", ds->ds_object); 151 dsl_free(tx->tx_pool, tx->tx_txg, bp); 152 153 mutex_enter(&ds->ds_dir->dd_lock); 154 mutex_enter(&ds->ds_lock); 155 ASSERT(ds->ds_phys->ds_unique_bytes >= used || 156 !DS_UNIQUE_IS_ACCURATE(ds)); 157 delta = parent_delta(ds, -used); 158 ds->ds_phys->ds_unique_bytes -= used; 159 mutex_exit(&ds->ds_lock); 160 dsl_dir_diduse_space(ds->ds_dir, DD_USED_HEAD, 161 delta, -compressed, -uncompressed, tx); 162 dsl_dir_transfer_space(ds->ds_dir, -used - delta, 163 DD_USED_REFRSRV, DD_USED_HEAD, tx); 164 mutex_exit(&ds->ds_dir->dd_lock); 165 } else { 166 dprintf_bp(bp, "putting on dead list: %s", ""); 167 if (async) { 168 /* 169 * We are here as part of zio's write done callback, 170 * which means we're a zio interrupt thread. We can't 171 * call dsl_deadlist_insert() now because it may block 172 * waiting for I/O. Instead, put bp on the deferred 173 * queue and let dsl_pool_sync() finish the job. 174 */ 175 bplist_append(&ds->ds_pending_deadlist, bp); 176 } else { 177 dsl_deadlist_insert(&ds->ds_deadlist, bp, tx); 178 } 179 ASSERT3U(ds->ds_prev->ds_object, ==, 180 ds->ds_phys->ds_prev_snap_obj); 181 ASSERT(ds->ds_prev->ds_phys->ds_num_children > 0); 182 /* if (bp->blk_birth > prev prev snap txg) prev unique += bs */ 183 if (ds->ds_prev->ds_phys->ds_next_snap_obj == 184 ds->ds_object && bp->blk_birth > 185 ds->ds_prev->ds_phys->ds_prev_snap_txg) { 186 dmu_buf_will_dirty(ds->ds_prev->ds_dbuf, tx); 187 mutex_enter(&ds->ds_prev->ds_lock); 188 ds->ds_prev->ds_phys->ds_unique_bytes += used; 189 mutex_exit(&ds->ds_prev->ds_lock); 190 } 191 if (bp->blk_birth > ds->ds_dir->dd_origin_txg) { 192 dsl_dir_transfer_space(ds->ds_dir, used, 193 DD_USED_HEAD, DD_USED_SNAP, tx); 194 } 195 } 196 mutex_enter(&ds->ds_lock); 197 ASSERT3U(ds->ds_phys->ds_referenced_bytes, >=, used); 198 ds->ds_phys->ds_referenced_bytes -= used; 199 ASSERT3U(ds->ds_phys->ds_compressed_bytes, >=, compressed); 200 ds->ds_phys->ds_compressed_bytes -= compressed; 201 ASSERT3U(ds->ds_phys->ds_uncompressed_bytes, >=, uncompressed); 202 ds->ds_phys->ds_uncompressed_bytes -= uncompressed; 203 mutex_exit(&ds->ds_lock); 204 205 return (used); 206} 207 208uint64_t 209dsl_dataset_prev_snap_txg(dsl_dataset_t *ds) 210{ 211 uint64_t trysnap = 0; 212 213 if (ds == NULL) 214 return (0); 215 /* 216 * The snapshot creation could fail, but that would cause an 217 * incorrect FALSE return, which would only result in an 218 * overestimation of the amount of space that an operation would 219 * consume, which is OK. 220 * 221 * There's also a small window where we could miss a pending 222 * snapshot, because we could set the sync task in the quiescing 223 * phase. So this should only be used as a guess. 224 */ 225 if (ds->ds_trysnap_txg > 226 spa_last_synced_txg(ds->ds_dir->dd_pool->dp_spa)) 227 trysnap = ds->ds_trysnap_txg; 228 return (MAX(ds->ds_phys->ds_prev_snap_txg, trysnap)); 229} 230 231boolean_t 232dsl_dataset_block_freeable(dsl_dataset_t *ds, const blkptr_t *bp, 233 uint64_t blk_birth) 234{ 235 if (blk_birth <= dsl_dataset_prev_snap_txg(ds)) 236 return (B_FALSE); 237 238 ddt_prefetch(dsl_dataset_get_spa(ds), bp); 239 240 return (B_TRUE); 241} 242 243/* ARGSUSED */ 244static void 245dsl_dataset_evict(dmu_buf_t *db, void *dsv) 246{ 247 dsl_dataset_t *ds = dsv; 248 249 ASSERT(ds->ds_owner == NULL); 250 251 unique_remove(ds->ds_fsid_guid); 252 253 if (ds->ds_objset != NULL) 254 dmu_objset_evict(ds->ds_objset); 255 256 if (ds->ds_prev) { 257 dsl_dataset_rele(ds->ds_prev, ds); 258 ds->ds_prev = NULL; 259 } 260 261 bplist_destroy(&ds->ds_pending_deadlist); 262 if (ds->ds_phys->ds_deadlist_obj != 0) 263 dsl_deadlist_close(&ds->ds_deadlist); 264 if (ds->ds_dir) 265 dsl_dir_rele(ds->ds_dir, ds); 266 267 ASSERT(!list_link_active(&ds->ds_synced_link)); 268 269 if (mutex_owned(&ds->ds_lock)) 270 mutex_exit(&ds->ds_lock); 271 mutex_destroy(&ds->ds_lock); 272 if (mutex_owned(&ds->ds_opening_lock)) 273 mutex_exit(&ds->ds_opening_lock); 274 mutex_destroy(&ds->ds_opening_lock); 275 refcount_destroy(&ds->ds_longholds); 276 277 kmem_free(ds, sizeof (dsl_dataset_t)); 278} 279 280int 281dsl_dataset_get_snapname(dsl_dataset_t *ds) 282{ 283 dsl_dataset_phys_t *headphys; 284 int err; 285 dmu_buf_t *headdbuf; 286 dsl_pool_t *dp = ds->ds_dir->dd_pool; 287 objset_t *mos = dp->dp_meta_objset; 288 289 if (ds->ds_snapname[0]) 290 return (0); 291 if (ds->ds_phys->ds_next_snap_obj == 0) 292 return (0); 293 294 err = dmu_bonus_hold(mos, ds->ds_dir->dd_phys->dd_head_dataset_obj, 295 FTAG, &headdbuf); 296 if (err != 0) 297 return (err); 298 headphys = headdbuf->db_data; 299 err = zap_value_search(dp->dp_meta_objset, 300 headphys->ds_snapnames_zapobj, ds->ds_object, 0, ds->ds_snapname); 301 dmu_buf_rele(headdbuf, FTAG); 302 return (err); 303} 304 305int 306dsl_dataset_snap_lookup(dsl_dataset_t *ds, const char *name, uint64_t *value) 307{ 308 objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset; 309 uint64_t snapobj = ds->ds_phys->ds_snapnames_zapobj; 310 matchtype_t mt; 311 int err; 312 313 if (ds->ds_phys->ds_flags & DS_FLAG_CI_DATASET) 314 mt = MT_FIRST; 315 else 316 mt = MT_EXACT; 317 318 err = zap_lookup_norm(mos, snapobj, name, 8, 1, 319 value, mt, NULL, 0, NULL); 320 if (err == ENOTSUP && mt == MT_FIRST) 321 err = zap_lookup(mos, snapobj, name, 8, 1, value); 322 return (err); 323} 324 325int 326dsl_dataset_snap_remove(dsl_dataset_t *ds, const char *name, dmu_tx_t *tx) 327{ 328 objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset; 329 uint64_t snapobj = ds->ds_phys->ds_snapnames_zapobj; 330 matchtype_t mt; 331 int err; 332 333 dsl_dir_snap_cmtime_update(ds->ds_dir); 334 335 if (ds->ds_phys->ds_flags & DS_FLAG_CI_DATASET) 336 mt = MT_FIRST; 337 else 338 mt = MT_EXACT; 339 340 err = zap_remove_norm(mos, snapobj, name, mt, tx); 341 if (err == ENOTSUP && mt == MT_FIRST) 342 err = zap_remove(mos, snapobj, name, tx); 343 return (err); 344} 345 346int 347dsl_dataset_hold_obj(dsl_pool_t *dp, uint64_t dsobj, void *tag, 348 dsl_dataset_t **dsp) 349{ 350 objset_t *mos = dp->dp_meta_objset; 351 dmu_buf_t *dbuf; 352 dsl_dataset_t *ds; 353 int err; 354 dmu_object_info_t doi; 355 356 ASSERT(dsl_pool_config_held(dp)); 357 358 err = dmu_bonus_hold(mos, dsobj, tag, &dbuf); 359 if (err != 0) 360 return (err); 361 362 /* Make sure dsobj has the correct object type. */ 363 dmu_object_info_from_db(dbuf, &doi); 364 if (doi.doi_type != DMU_OT_DSL_DATASET) { 365 dmu_buf_rele(dbuf, tag); 366 return (SET_ERROR(EINVAL)); 367 } 368 369 ds = dmu_buf_get_user(dbuf); 370 if (ds == NULL) { 371 dsl_dataset_t *winner = NULL; 372 373 ds = kmem_zalloc(sizeof (dsl_dataset_t), KM_SLEEP); 374 ds->ds_dbuf = dbuf; 375 ds->ds_object = dsobj; 376 ds->ds_phys = dbuf->db_data; 377 378 mutex_init(&ds->ds_lock, NULL, MUTEX_DEFAULT, NULL); 379 mutex_init(&ds->ds_opening_lock, NULL, MUTEX_DEFAULT, NULL); 380 mutex_init(&ds->ds_sendstream_lock, NULL, MUTEX_DEFAULT, NULL); 381 refcount_create(&ds->ds_longholds); 382 383 bplist_create(&ds->ds_pending_deadlist); 384 dsl_deadlist_open(&ds->ds_deadlist, 385 mos, ds->ds_phys->ds_deadlist_obj); 386 387 list_create(&ds->ds_sendstreams, sizeof (dmu_sendarg_t), 388 offsetof(dmu_sendarg_t, dsa_link)); 389 390 if (err == 0) { 391 err = dsl_dir_hold_obj(dp, 392 ds->ds_phys->ds_dir_obj, NULL, ds, &ds->ds_dir); 393 } 394 if (err != 0) { 395 mutex_destroy(&ds->ds_lock); 396 mutex_destroy(&ds->ds_opening_lock); 397 refcount_destroy(&ds->ds_longholds); 398 bplist_destroy(&ds->ds_pending_deadlist); 399 dsl_deadlist_close(&ds->ds_deadlist); 400 kmem_free(ds, sizeof (dsl_dataset_t)); 401 dmu_buf_rele(dbuf, tag); 402 return (err); 403 } 404 405 if (!dsl_dataset_is_snapshot(ds)) { 406 ds->ds_snapname[0] = '\0'; 407 if (ds->ds_phys->ds_prev_snap_obj != 0) { 408 err = dsl_dataset_hold_obj(dp, 409 ds->ds_phys->ds_prev_snap_obj, 410 ds, &ds->ds_prev); 411 } 412 } else { 413 if (zfs_flags & ZFS_DEBUG_SNAPNAMES) 414 err = dsl_dataset_get_snapname(ds); 415 if (err == 0 && ds->ds_phys->ds_userrefs_obj != 0) { 416 err = zap_count( 417 ds->ds_dir->dd_pool->dp_meta_objset, 418 ds->ds_phys->ds_userrefs_obj, 419 &ds->ds_userrefs); 420 } 421 } 422 423 if (err == 0 && !dsl_dataset_is_snapshot(ds)) { 424 err = dsl_prop_get_int_ds(ds, 425 zfs_prop_to_name(ZFS_PROP_REFRESERVATION), 426 &ds->ds_reserved); 427 if (err == 0) { 428 err = dsl_prop_get_int_ds(ds, 429 zfs_prop_to_name(ZFS_PROP_REFQUOTA), 430 &ds->ds_quota); 431 } 432 } else { 433 ds->ds_reserved = ds->ds_quota = 0; 434 } 435 436 if (err != 0 || (winner = dmu_buf_set_user_ie(dbuf, ds, 437 &ds->ds_phys, dsl_dataset_evict)) != NULL) { 438 bplist_destroy(&ds->ds_pending_deadlist); 439 dsl_deadlist_close(&ds->ds_deadlist); 440 if (ds->ds_prev) 441 dsl_dataset_rele(ds->ds_prev, ds); 442 dsl_dir_rele(ds->ds_dir, ds); 443 mutex_destroy(&ds->ds_lock); 444 mutex_destroy(&ds->ds_opening_lock); 445 refcount_destroy(&ds->ds_longholds); 446 kmem_free(ds, sizeof (dsl_dataset_t)); 447 if (err != 0) { 448 dmu_buf_rele(dbuf, tag); 449 return (err); 450 } 451 ds = winner; 452 } else { 453 ds->ds_fsid_guid = 454 unique_insert(ds->ds_phys->ds_fsid_guid); 455 } 456 } 457 ASSERT3P(ds->ds_dbuf, ==, dbuf); 458 ASSERT3P(ds->ds_phys, ==, dbuf->db_data); 459 ASSERT(ds->ds_phys->ds_prev_snap_obj != 0 || 460 spa_version(dp->dp_spa) < SPA_VERSION_ORIGIN || 461 dp->dp_origin_snap == NULL || ds == dp->dp_origin_snap); 462 *dsp = ds; 463 return (0); 464} 465 466int 467dsl_dataset_hold(dsl_pool_t *dp, const char *name, 468 void *tag, dsl_dataset_t **dsp) 469{ 470 dsl_dir_t *dd; 471 const char *snapname; 472 uint64_t obj; 473 int err = 0; 474 475 err = dsl_dir_hold(dp, name, FTAG, &dd, &snapname); 476 if (err != 0) 477 return (err); 478 479 ASSERT(dsl_pool_config_held(dp)); 480 obj = dd->dd_phys->dd_head_dataset_obj; 481 if (obj != 0) 482 err = dsl_dataset_hold_obj(dp, obj, tag, dsp); 483 else 484 err = SET_ERROR(ENOENT); 485 486 /* we may be looking for a snapshot */ 487 if (err == 0 && snapname != NULL) { 488 dsl_dataset_t *ds; 489 490 if (*snapname++ != '@') { 491 dsl_dataset_rele(*dsp, tag); 492 dsl_dir_rele(dd, FTAG); 493 return (SET_ERROR(ENOENT)); 494 } 495 496 dprintf("looking for snapshot '%s'\n", snapname); 497 err = dsl_dataset_snap_lookup(*dsp, snapname, &obj); 498 if (err == 0) 499 err = dsl_dataset_hold_obj(dp, obj, tag, &ds); 500 dsl_dataset_rele(*dsp, tag); 501 502 if (err == 0) { 503 mutex_enter(&ds->ds_lock); 504 if (ds->ds_snapname[0] == 0) 505 (void) strlcpy(ds->ds_snapname, snapname, 506 sizeof (ds->ds_snapname)); 507 mutex_exit(&ds->ds_lock); 508 *dsp = ds; 509 } 510 } 511 512 dsl_dir_rele(dd, FTAG); 513 return (err); 514} 515 516int 517dsl_dataset_own_obj(dsl_pool_t *dp, uint64_t dsobj, 518 void *tag, dsl_dataset_t **dsp) 519{ 520 int err = dsl_dataset_hold_obj(dp, dsobj, tag, dsp); 521 if (err != 0) 522 return (err); 523 if (!dsl_dataset_tryown(*dsp, tag)) { 524 dsl_dataset_rele(*dsp, tag); 525 *dsp = NULL; 526 return (SET_ERROR(EBUSY)); 527 } 528 return (0); 529} 530 531int 532dsl_dataset_own(dsl_pool_t *dp, const char *name, 533 void *tag, dsl_dataset_t **dsp) 534{ 535 int err = dsl_dataset_hold(dp, name, tag, dsp); 536 if (err != 0) 537 return (err); 538 if (!dsl_dataset_tryown(*dsp, tag)) { 539 dsl_dataset_rele(*dsp, tag); 540 return (SET_ERROR(EBUSY)); 541 } 542 return (0); 543} 544 545/* 546 * See the comment above dsl_pool_hold() for details. In summary, a long 547 * hold is used to prevent destruction of a dataset while the pool hold 548 * is dropped, allowing other concurrent operations (e.g. spa_sync()). 549 * 550 * The dataset and pool must be held when this function is called. After it 551 * is called, the pool hold may be released while the dataset is still held 552 * and accessed. 553 */ 554void 555dsl_dataset_long_hold(dsl_dataset_t *ds, void *tag) 556{ 557 ASSERT(dsl_pool_config_held(ds->ds_dir->dd_pool)); 558 (void) refcount_add(&ds->ds_longholds, tag); 559} 560 561void 562dsl_dataset_long_rele(dsl_dataset_t *ds, void *tag) 563{ 564 (void) refcount_remove(&ds->ds_longholds, tag); 565} 566 567/* Return B_TRUE if there are any long holds on this dataset. */ 568boolean_t 569dsl_dataset_long_held(dsl_dataset_t *ds) 570{ 571 return (!refcount_is_zero(&ds->ds_longholds)); 572} 573 574void 575dsl_dataset_name(dsl_dataset_t *ds, char *name) 576{ 577 if (ds == NULL) { 578 (void) strcpy(name, "mos"); 579 } else { 580 dsl_dir_name(ds->ds_dir, name); 581 VERIFY0(dsl_dataset_get_snapname(ds)); 582 if (ds->ds_snapname[0]) { 583 (void) strcat(name, "@"); 584 /* 585 * We use a "recursive" mutex so that we 586 * can call dprintf_ds() with ds_lock held. 587 */ 588 if (!MUTEX_HELD(&ds->ds_lock)) { 589 mutex_enter(&ds->ds_lock); 590 (void) strcat(name, ds->ds_snapname); 591 mutex_exit(&ds->ds_lock); 592 } else { 593 (void) strcat(name, ds->ds_snapname); 594 } 595 } 596 } 597} 598 599static int 600dsl_dataset_namelen(dsl_dataset_t *ds) 601{ 602 int result; 603 604 if (ds == NULL) { 605 result = 3; /* "mos" */ 606 } else { 607 result = dsl_dir_namelen(ds->ds_dir); 608 VERIFY0(dsl_dataset_get_snapname(ds)); 609 if (ds->ds_snapname[0]) { 610 ++result; /* adding one for the @-sign */ 611 if (!MUTEX_HELD(&ds->ds_lock)) { 612 mutex_enter(&ds->ds_lock); 613 result += strlen(ds->ds_snapname); 614 mutex_exit(&ds->ds_lock); 615 } else { 616 result += strlen(ds->ds_snapname); 617 } 618 } 619 } 620 621 return (result); 622} 623 624void 625dsl_dataset_rele(dsl_dataset_t *ds, void *tag) 626{ 627 dmu_buf_rele(ds->ds_dbuf, tag); 628} 629 630void 631dsl_dataset_disown(dsl_dataset_t *ds, void *tag) 632{ 633 ASSERT(ds->ds_owner == tag && ds->ds_dbuf != NULL); 634 635 mutex_enter(&ds->ds_lock); 636 ds->ds_owner = NULL; 637 mutex_exit(&ds->ds_lock); 638 dsl_dataset_long_rele(ds, tag); 639 if (ds->ds_dbuf != NULL) 640 dsl_dataset_rele(ds, tag); 641 else 642 dsl_dataset_evict(NULL, ds); 643} 644 645boolean_t 646dsl_dataset_tryown(dsl_dataset_t *ds, void *tag) 647{ 648 boolean_t gotit = FALSE; 649 650 mutex_enter(&ds->ds_lock); 651 if (ds->ds_owner == NULL && !DS_IS_INCONSISTENT(ds)) { 652 ds->ds_owner = tag; 653 dsl_dataset_long_hold(ds, tag); 654 gotit = TRUE; 655 } 656 mutex_exit(&ds->ds_lock); 657 return (gotit); 658} 659 660uint64_t 661dsl_dataset_create_sync_dd(dsl_dir_t *dd, dsl_dataset_t *origin, 662 uint64_t flags, dmu_tx_t *tx) 663{ 664 dsl_pool_t *dp = dd->dd_pool; 665 dmu_buf_t *dbuf; 666 dsl_dataset_phys_t *dsphys; 667 uint64_t dsobj; 668 objset_t *mos = dp->dp_meta_objset; 669 670 if (origin == NULL) 671 origin = dp->dp_origin_snap; 672 673 ASSERT(origin == NULL || origin->ds_dir->dd_pool == dp); 674 ASSERT(origin == NULL || origin->ds_phys->ds_num_children > 0); 675 ASSERT(dmu_tx_is_syncing(tx)); 676 ASSERT(dd->dd_phys->dd_head_dataset_obj == 0); 677 678 dsobj = dmu_object_alloc(mos, DMU_OT_DSL_DATASET, 0, 679 DMU_OT_DSL_DATASET, sizeof (dsl_dataset_phys_t), tx); 680 VERIFY0(dmu_bonus_hold(mos, dsobj, FTAG, &dbuf)); 681 dmu_buf_will_dirty(dbuf, tx); 682 dsphys = dbuf->db_data; 683 bzero(dsphys, sizeof (dsl_dataset_phys_t)); 684 dsphys->ds_dir_obj = dd->dd_object; 685 dsphys->ds_flags = flags; 686 dsphys->ds_fsid_guid = unique_create(); 687 do { 688 (void) random_get_pseudo_bytes((void*)&dsphys->ds_guid, 689 sizeof (dsphys->ds_guid)); 690 } while (dsphys->ds_guid == 0); 691 dsphys->ds_snapnames_zapobj = 692 zap_create_norm(mos, U8_TEXTPREP_TOUPPER, DMU_OT_DSL_DS_SNAP_MAP, 693 DMU_OT_NONE, 0, tx); 694 dsphys->ds_creation_time = gethrestime_sec(); 695 dsphys->ds_creation_txg = tx->tx_txg == TXG_INITIAL ? 1 : tx->tx_txg; 696 697 if (origin == NULL) { 698 dsphys->ds_deadlist_obj = dsl_deadlist_alloc(mos, tx); 699 } else { 700 dsl_dataset_t *ohds; /* head of the origin snapshot */ 701 702 dsphys->ds_prev_snap_obj = origin->ds_object; 703 dsphys->ds_prev_snap_txg = 704 origin->ds_phys->ds_creation_txg; 705 dsphys->ds_referenced_bytes = 706 origin->ds_phys->ds_referenced_bytes; 707 dsphys->ds_compressed_bytes = 708 origin->ds_phys->ds_compressed_bytes; 709 dsphys->ds_uncompressed_bytes = 710 origin->ds_phys->ds_uncompressed_bytes; 711 dsphys->ds_bp = origin->ds_phys->ds_bp; 712 dsphys->ds_flags |= origin->ds_phys->ds_flags; 713 714 dmu_buf_will_dirty(origin->ds_dbuf, tx); 715 origin->ds_phys->ds_num_children++; 716 717 VERIFY0(dsl_dataset_hold_obj(dp, 718 origin->ds_dir->dd_phys->dd_head_dataset_obj, FTAG, &ohds)); 719 dsphys->ds_deadlist_obj = dsl_deadlist_clone(&ohds->ds_deadlist, 720 dsphys->ds_prev_snap_txg, dsphys->ds_prev_snap_obj, tx); 721 dsl_dataset_rele(ohds, FTAG); 722 723 if (spa_version(dp->dp_spa) >= SPA_VERSION_NEXT_CLONES) { 724 if (origin->ds_phys->ds_next_clones_obj == 0) { 725 origin->ds_phys->ds_next_clones_obj = 726 zap_create(mos, 727 DMU_OT_NEXT_CLONES, DMU_OT_NONE, 0, tx); 728 } 729 VERIFY0(zap_add_int(mos, 730 origin->ds_phys->ds_next_clones_obj, dsobj, tx)); 731 } 732 733 dmu_buf_will_dirty(dd->dd_dbuf, tx); 734 dd->dd_phys->dd_origin_obj = origin->ds_object; 735 if (spa_version(dp->dp_spa) >= SPA_VERSION_DIR_CLONES) { 736 if (origin->ds_dir->dd_phys->dd_clones == 0) { 737 dmu_buf_will_dirty(origin->ds_dir->dd_dbuf, tx); 738 origin->ds_dir->dd_phys->dd_clones = 739 zap_create(mos, 740 DMU_OT_DSL_CLONES, DMU_OT_NONE, 0, tx); 741 } 742 VERIFY0(zap_add_int(mos, 743 origin->ds_dir->dd_phys->dd_clones, dsobj, tx)); 744 } 745 } 746 747 if (spa_version(dp->dp_spa) >= SPA_VERSION_UNIQUE_ACCURATE) 748 dsphys->ds_flags |= DS_FLAG_UNIQUE_ACCURATE; 749 750 dmu_buf_rele(dbuf, FTAG); 751 752 dmu_buf_will_dirty(dd->dd_dbuf, tx); 753 dd->dd_phys->dd_head_dataset_obj = dsobj; 754 755 return (dsobj); 756} 757 758static void 759dsl_dataset_zero_zil(dsl_dataset_t *ds, dmu_tx_t *tx) 760{ 761 objset_t *os; 762 763 VERIFY0(dmu_objset_from_ds(ds, &os)); 764 bzero(&os->os_zil_header, sizeof (os->os_zil_header)); 765 dsl_dataset_dirty(ds, tx); 766} 767 768uint64_t 769dsl_dataset_create_sync(dsl_dir_t *pdd, const char *lastname, 770 dsl_dataset_t *origin, uint64_t flags, cred_t *cr, dmu_tx_t *tx) 771{ 772 dsl_pool_t *dp = pdd->dd_pool; 773 uint64_t dsobj, ddobj; 774 dsl_dir_t *dd; 775 776 ASSERT(dmu_tx_is_syncing(tx)); 777 ASSERT(lastname[0] != '@'); 778 779 ddobj = dsl_dir_create_sync(dp, pdd, lastname, tx); 780 VERIFY0(dsl_dir_hold_obj(dp, ddobj, lastname, FTAG, &dd)); 781 782 dsobj = dsl_dataset_create_sync_dd(dd, origin, 783 flags & ~DS_CREATE_FLAG_NODIRTY, tx); 784 785 dsl_deleg_set_create_perms(dd, tx, cr); 786 787 dsl_dir_rele(dd, FTAG); 788 789 /* 790 * If we are creating a clone, make sure we zero out any stale 791 * data from the origin snapshots zil header. 792 */ 793 if (origin != NULL && !(flags & DS_CREATE_FLAG_NODIRTY)) { 794 dsl_dataset_t *ds; 795 796 VERIFY0(dsl_dataset_hold_obj(dp, dsobj, FTAG, &ds)); 797 dsl_dataset_zero_zil(ds, tx); 798 dsl_dataset_rele(ds, FTAG); 799 } 800 801 return (dsobj); 802} 803 804#ifdef __FreeBSD__ 805/* FreeBSD ioctl compat begin */ 806struct destroyarg { 807 nvlist_t *nvl; 808 const char *snapname; 809}; 810 811static int 812dsl_check_snap_cb(const char *name, void *arg) 813{ 814 struct destroyarg *da = arg; 815 dsl_dataset_t *ds; 816 char *dsname; 817 818 dsname = kmem_asprintf("%s@%s", name, da->snapname); 819 fnvlist_add_boolean(da->nvl, dsname); 820 kmem_free(dsname, strlen(dsname) + 1); 821 822 return (0); 823} 824 825int 826dmu_get_recursive_snaps_nvl(char *fsname, const char *snapname, 827 nvlist_t *snaps) 828{ 829 struct destroyarg *da; 830 int err; 831 832 da = kmem_zalloc(sizeof (struct destroyarg), KM_SLEEP); 833 da->nvl = snaps; 834 da->snapname = snapname; 835 err = dmu_objset_find(fsname, dsl_check_snap_cb, da, 836 DS_FIND_CHILDREN); 837 kmem_free(da, sizeof (struct destroyarg)); 838 839 return (err); 840} 841/* FreeBSD ioctl compat end */ 842#endif /* __FreeBSD__ */ 843 844/* 845 * The unique space in the head dataset can be calculated by subtracting 846 * the space used in the most recent snapshot, that is still being used 847 * in this file system, from the space currently in use. To figure out 848 * the space in the most recent snapshot still in use, we need to take 849 * the total space used in the snapshot and subtract out the space that 850 * has been freed up since the snapshot was taken. 851 */ 852void 853dsl_dataset_recalc_head_uniq(dsl_dataset_t *ds) 854{ 855 uint64_t mrs_used; 856 uint64_t dlused, dlcomp, dluncomp; 857 858 ASSERT(!dsl_dataset_is_snapshot(ds)); 859 860 if (ds->ds_phys->ds_prev_snap_obj != 0) 861 mrs_used = ds->ds_prev->ds_phys->ds_referenced_bytes; 862 else 863 mrs_used = 0; 864 865 dsl_deadlist_space(&ds->ds_deadlist, &dlused, &dlcomp, &dluncomp); 866 867 ASSERT3U(dlused, <=, mrs_used); 868 ds->ds_phys->ds_unique_bytes = 869 ds->ds_phys->ds_referenced_bytes - (mrs_used - dlused); 870 871 if (spa_version(ds->ds_dir->dd_pool->dp_spa) >= 872 SPA_VERSION_UNIQUE_ACCURATE) 873 ds->ds_phys->ds_flags |= DS_FLAG_UNIQUE_ACCURATE; 874} 875 876void 877dsl_dataset_remove_from_next_clones(dsl_dataset_t *ds, uint64_t obj, 878 dmu_tx_t *tx) 879{ 880 objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset; 881 uint64_t count; 882 int err; 883 884 ASSERT(ds->ds_phys->ds_num_children >= 2); 885 err = zap_remove_int(mos, ds->ds_phys->ds_next_clones_obj, obj, tx); 886 /* 887 * The err should not be ENOENT, but a bug in a previous version 888 * of the code could cause upgrade_clones_cb() to not set 889 * ds_next_snap_obj when it should, leading to a missing entry. 890 * If we knew that the pool was created after 891 * SPA_VERSION_NEXT_CLONES, we could assert that it isn't 892 * ENOENT. However, at least we can check that we don't have 893 * too many entries in the next_clones_obj even after failing to 894 * remove this one. 895 */ 896 if (err != ENOENT) 897 VERIFY0(err); 898 ASSERT0(zap_count(mos, ds->ds_phys->ds_next_clones_obj, 899 &count)); 900 ASSERT3U(count, <=, ds->ds_phys->ds_num_children - 2); 901} 902 903 904blkptr_t * 905dsl_dataset_get_blkptr(dsl_dataset_t *ds) 906{ 907 return (&ds->ds_phys->ds_bp); 908} 909 910void 911dsl_dataset_set_blkptr(dsl_dataset_t *ds, blkptr_t *bp, dmu_tx_t *tx) 912{ 913 ASSERT(dmu_tx_is_syncing(tx)); 914 /* If it's the meta-objset, set dp_meta_rootbp */ 915 if (ds == NULL) { 916 tx->tx_pool->dp_meta_rootbp = *bp; 917 } else { 918 dmu_buf_will_dirty(ds->ds_dbuf, tx); 919 ds->ds_phys->ds_bp = *bp; 920 } 921} 922 923spa_t * 924dsl_dataset_get_spa(dsl_dataset_t *ds) 925{ 926 return (ds->ds_dir->dd_pool->dp_spa); 927} 928 929void 930dsl_dataset_dirty(dsl_dataset_t *ds, dmu_tx_t *tx) 931{ 932 dsl_pool_t *dp; 933 934 if (ds == NULL) /* this is the meta-objset */ 935 return; 936 937 ASSERT(ds->ds_objset != NULL); 938 939 if (ds->ds_phys->ds_next_snap_obj != 0) 940 panic("dirtying snapshot!"); 941 942 dp = ds->ds_dir->dd_pool; 943 944 if (txg_list_add(&dp->dp_dirty_datasets, ds, tx->tx_txg)) { 945 /* up the hold count until we can be written out */ 946 dmu_buf_add_ref(ds->ds_dbuf, ds); 947 } 948} 949 950boolean_t 951dsl_dataset_is_dirty(dsl_dataset_t *ds) 952{ 953 for (int t = 0; t < TXG_SIZE; t++) { 954 if (txg_list_member(&ds->ds_dir->dd_pool->dp_dirty_datasets, 955 ds, t)) 956 return (B_TRUE); 957 } 958 return (B_FALSE); 959} 960 961static int 962dsl_dataset_snapshot_reserve_space(dsl_dataset_t *ds, dmu_tx_t *tx) 963{ 964 uint64_t asize; 965 966 if (!dmu_tx_is_syncing(tx)) 967 return (0); 968 969 /* 970 * If there's an fs-only reservation, any blocks that might become 971 * owned by the snapshot dataset must be accommodated by space 972 * outside of the reservation. 973 */ 974 ASSERT(ds->ds_reserved == 0 || DS_UNIQUE_IS_ACCURATE(ds)); 975 asize = MIN(ds->ds_phys->ds_unique_bytes, ds->ds_reserved); 976 if (asize > dsl_dir_space_available(ds->ds_dir, NULL, 0, TRUE)) 977 return (SET_ERROR(ENOSPC)); 978 979 /* 980 * Propagate any reserved space for this snapshot to other 981 * snapshot checks in this sync group. 982 */ 983 if (asize > 0) 984 dsl_dir_willuse_space(ds->ds_dir, asize, tx); 985 986 return (0); 987} 988 989typedef struct dsl_dataset_snapshot_arg { 990 nvlist_t *ddsa_snaps; 991 nvlist_t *ddsa_props; 992 nvlist_t *ddsa_errors; 993} dsl_dataset_snapshot_arg_t; 994 995int 996dsl_dataset_snapshot_check_impl(dsl_dataset_t *ds, const char *snapname,
| 1/* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21/* 22 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. 23 * Copyright (c) 2013 by Delphix. All rights reserved. 24 * Copyright (c) 2012, Joyent, Inc. All rights reserved. 25 * Portions Copyright (c) 2011 Martin Matuska <mm@FreeBSD.org> 26 */ 27 28#include <sys/dmu_objset.h> 29#include <sys/dsl_dataset.h> 30#include <sys/dsl_dir.h> 31#include <sys/dsl_prop.h> 32#include <sys/dsl_synctask.h> 33#include <sys/dmu_traverse.h> 34#include <sys/dmu_impl.h> 35#include <sys/dmu_tx.h> 36#include <sys/arc.h> 37#include <sys/zio.h> 38#include <sys/zap.h> 39#include <sys/zfeature.h> 40#include <sys/unique.h> 41#include <sys/zfs_context.h> 42#include <sys/zfs_ioctl.h> 43#include <sys/spa.h> 44#include <sys/zfs_znode.h> 45#include <sys/zfs_onexit.h> 46#include <sys/zvol.h> 47#include <sys/dsl_scan.h> 48#include <sys/dsl_deadlist.h> 49#include <sys/dsl_destroy.h> 50#include <sys/dsl_userhold.h> 51 52#define SWITCH64(x, y) \ 53 { \ 54 uint64_t __tmp = (x); \ 55 (x) = (y); \ 56 (y) = __tmp; \ 57 } 58 59#define DS_REF_MAX (1ULL << 62) 60 61#define DSL_DEADLIST_BLOCKSIZE SPA_MAXBLOCKSIZE 62 63/* 64 * Figure out how much of this delta should be propogated to the dsl_dir 65 * layer. If there's a refreservation, that space has already been 66 * partially accounted for in our ancestors. 67 */ 68static int64_t 69parent_delta(dsl_dataset_t *ds, int64_t delta) 70{ 71 uint64_t old_bytes, new_bytes; 72 73 if (ds->ds_reserved == 0) 74 return (delta); 75 76 old_bytes = MAX(ds->ds_phys->ds_unique_bytes, ds->ds_reserved); 77 new_bytes = MAX(ds->ds_phys->ds_unique_bytes + delta, ds->ds_reserved); 78 79 ASSERT3U(ABS((int64_t)(new_bytes - old_bytes)), <=, ABS(delta)); 80 return (new_bytes - old_bytes); 81} 82 83void 84dsl_dataset_block_born(dsl_dataset_t *ds, const blkptr_t *bp, dmu_tx_t *tx) 85{ 86 int used = bp_get_dsize_sync(tx->tx_pool->dp_spa, bp); 87 int compressed = BP_GET_PSIZE(bp); 88 int uncompressed = BP_GET_UCSIZE(bp); 89 int64_t delta; 90 91 dprintf_bp(bp, "ds=%p", ds); 92 93 ASSERT(dmu_tx_is_syncing(tx)); 94 /* It could have been compressed away to nothing */ 95 if (BP_IS_HOLE(bp)) 96 return; 97 ASSERT(BP_GET_TYPE(bp) != DMU_OT_NONE); 98 ASSERT(DMU_OT_IS_VALID(BP_GET_TYPE(bp))); 99 if (ds == NULL) { 100 dsl_pool_mos_diduse_space(tx->tx_pool, 101 used, compressed, uncompressed); 102 return; 103 } 104 dmu_buf_will_dirty(ds->ds_dbuf, tx); 105 106 mutex_enter(&ds->ds_dir->dd_lock); 107 mutex_enter(&ds->ds_lock); 108 delta = parent_delta(ds, used); 109 ds->ds_phys->ds_referenced_bytes += used; 110 ds->ds_phys->ds_compressed_bytes += compressed; 111 ds->ds_phys->ds_uncompressed_bytes += uncompressed; 112 ds->ds_phys->ds_unique_bytes += used; 113 mutex_exit(&ds->ds_lock); 114 dsl_dir_diduse_space(ds->ds_dir, DD_USED_HEAD, delta, 115 compressed, uncompressed, tx); 116 dsl_dir_transfer_space(ds->ds_dir, used - delta, 117 DD_USED_REFRSRV, DD_USED_HEAD, tx); 118 mutex_exit(&ds->ds_dir->dd_lock); 119} 120 121int 122dsl_dataset_block_kill(dsl_dataset_t *ds, const blkptr_t *bp, dmu_tx_t *tx, 123 boolean_t async) 124{ 125 if (BP_IS_HOLE(bp)) 126 return (0); 127 128 ASSERT(dmu_tx_is_syncing(tx)); 129 ASSERT(bp->blk_birth <= tx->tx_txg); 130 131 int used = bp_get_dsize_sync(tx->tx_pool->dp_spa, bp); 132 int compressed = BP_GET_PSIZE(bp); 133 int uncompressed = BP_GET_UCSIZE(bp); 134 135 ASSERT(used > 0); 136 if (ds == NULL) { 137 dsl_free(tx->tx_pool, tx->tx_txg, bp); 138 dsl_pool_mos_diduse_space(tx->tx_pool, 139 -used, -compressed, -uncompressed); 140 return (used); 141 } 142 ASSERT3P(tx->tx_pool, ==, ds->ds_dir->dd_pool); 143 144 ASSERT(!dsl_dataset_is_snapshot(ds)); 145 dmu_buf_will_dirty(ds->ds_dbuf, tx); 146 147 if (bp->blk_birth > ds->ds_phys->ds_prev_snap_txg) { 148 int64_t delta; 149 150 dprintf_bp(bp, "freeing ds=%llu", ds->ds_object); 151 dsl_free(tx->tx_pool, tx->tx_txg, bp); 152 153 mutex_enter(&ds->ds_dir->dd_lock); 154 mutex_enter(&ds->ds_lock); 155 ASSERT(ds->ds_phys->ds_unique_bytes >= used || 156 !DS_UNIQUE_IS_ACCURATE(ds)); 157 delta = parent_delta(ds, -used); 158 ds->ds_phys->ds_unique_bytes -= used; 159 mutex_exit(&ds->ds_lock); 160 dsl_dir_diduse_space(ds->ds_dir, DD_USED_HEAD, 161 delta, -compressed, -uncompressed, tx); 162 dsl_dir_transfer_space(ds->ds_dir, -used - delta, 163 DD_USED_REFRSRV, DD_USED_HEAD, tx); 164 mutex_exit(&ds->ds_dir->dd_lock); 165 } else { 166 dprintf_bp(bp, "putting on dead list: %s", ""); 167 if (async) { 168 /* 169 * We are here as part of zio's write done callback, 170 * which means we're a zio interrupt thread. We can't 171 * call dsl_deadlist_insert() now because it may block 172 * waiting for I/O. Instead, put bp on the deferred 173 * queue and let dsl_pool_sync() finish the job. 174 */ 175 bplist_append(&ds->ds_pending_deadlist, bp); 176 } else { 177 dsl_deadlist_insert(&ds->ds_deadlist, bp, tx); 178 } 179 ASSERT3U(ds->ds_prev->ds_object, ==, 180 ds->ds_phys->ds_prev_snap_obj); 181 ASSERT(ds->ds_prev->ds_phys->ds_num_children > 0); 182 /* if (bp->blk_birth > prev prev snap txg) prev unique += bs */ 183 if (ds->ds_prev->ds_phys->ds_next_snap_obj == 184 ds->ds_object && bp->blk_birth > 185 ds->ds_prev->ds_phys->ds_prev_snap_txg) { 186 dmu_buf_will_dirty(ds->ds_prev->ds_dbuf, tx); 187 mutex_enter(&ds->ds_prev->ds_lock); 188 ds->ds_prev->ds_phys->ds_unique_bytes += used; 189 mutex_exit(&ds->ds_prev->ds_lock); 190 } 191 if (bp->blk_birth > ds->ds_dir->dd_origin_txg) { 192 dsl_dir_transfer_space(ds->ds_dir, used, 193 DD_USED_HEAD, DD_USED_SNAP, tx); 194 } 195 } 196 mutex_enter(&ds->ds_lock); 197 ASSERT3U(ds->ds_phys->ds_referenced_bytes, >=, used); 198 ds->ds_phys->ds_referenced_bytes -= used; 199 ASSERT3U(ds->ds_phys->ds_compressed_bytes, >=, compressed); 200 ds->ds_phys->ds_compressed_bytes -= compressed; 201 ASSERT3U(ds->ds_phys->ds_uncompressed_bytes, >=, uncompressed); 202 ds->ds_phys->ds_uncompressed_bytes -= uncompressed; 203 mutex_exit(&ds->ds_lock); 204 205 return (used); 206} 207 208uint64_t 209dsl_dataset_prev_snap_txg(dsl_dataset_t *ds) 210{ 211 uint64_t trysnap = 0; 212 213 if (ds == NULL) 214 return (0); 215 /* 216 * The snapshot creation could fail, but that would cause an 217 * incorrect FALSE return, which would only result in an 218 * overestimation of the amount of space that an operation would 219 * consume, which is OK. 220 * 221 * There's also a small window where we could miss a pending 222 * snapshot, because we could set the sync task in the quiescing 223 * phase. So this should only be used as a guess. 224 */ 225 if (ds->ds_trysnap_txg > 226 spa_last_synced_txg(ds->ds_dir->dd_pool->dp_spa)) 227 trysnap = ds->ds_trysnap_txg; 228 return (MAX(ds->ds_phys->ds_prev_snap_txg, trysnap)); 229} 230 231boolean_t 232dsl_dataset_block_freeable(dsl_dataset_t *ds, const blkptr_t *bp, 233 uint64_t blk_birth) 234{ 235 if (blk_birth <= dsl_dataset_prev_snap_txg(ds)) 236 return (B_FALSE); 237 238 ddt_prefetch(dsl_dataset_get_spa(ds), bp); 239 240 return (B_TRUE); 241} 242 243/* ARGSUSED */ 244static void 245dsl_dataset_evict(dmu_buf_t *db, void *dsv) 246{ 247 dsl_dataset_t *ds = dsv; 248 249 ASSERT(ds->ds_owner == NULL); 250 251 unique_remove(ds->ds_fsid_guid); 252 253 if (ds->ds_objset != NULL) 254 dmu_objset_evict(ds->ds_objset); 255 256 if (ds->ds_prev) { 257 dsl_dataset_rele(ds->ds_prev, ds); 258 ds->ds_prev = NULL; 259 } 260 261 bplist_destroy(&ds->ds_pending_deadlist); 262 if (ds->ds_phys->ds_deadlist_obj != 0) 263 dsl_deadlist_close(&ds->ds_deadlist); 264 if (ds->ds_dir) 265 dsl_dir_rele(ds->ds_dir, ds); 266 267 ASSERT(!list_link_active(&ds->ds_synced_link)); 268 269 if (mutex_owned(&ds->ds_lock)) 270 mutex_exit(&ds->ds_lock); 271 mutex_destroy(&ds->ds_lock); 272 if (mutex_owned(&ds->ds_opening_lock)) 273 mutex_exit(&ds->ds_opening_lock); 274 mutex_destroy(&ds->ds_opening_lock); 275 refcount_destroy(&ds->ds_longholds); 276 277 kmem_free(ds, sizeof (dsl_dataset_t)); 278} 279 280int 281dsl_dataset_get_snapname(dsl_dataset_t *ds) 282{ 283 dsl_dataset_phys_t *headphys; 284 int err; 285 dmu_buf_t *headdbuf; 286 dsl_pool_t *dp = ds->ds_dir->dd_pool; 287 objset_t *mos = dp->dp_meta_objset; 288 289 if (ds->ds_snapname[0]) 290 return (0); 291 if (ds->ds_phys->ds_next_snap_obj == 0) 292 return (0); 293 294 err = dmu_bonus_hold(mos, ds->ds_dir->dd_phys->dd_head_dataset_obj, 295 FTAG, &headdbuf); 296 if (err != 0) 297 return (err); 298 headphys = headdbuf->db_data; 299 err = zap_value_search(dp->dp_meta_objset, 300 headphys->ds_snapnames_zapobj, ds->ds_object, 0, ds->ds_snapname); 301 dmu_buf_rele(headdbuf, FTAG); 302 return (err); 303} 304 305int 306dsl_dataset_snap_lookup(dsl_dataset_t *ds, const char *name, uint64_t *value) 307{ 308 objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset; 309 uint64_t snapobj = ds->ds_phys->ds_snapnames_zapobj; 310 matchtype_t mt; 311 int err; 312 313 if (ds->ds_phys->ds_flags & DS_FLAG_CI_DATASET) 314 mt = MT_FIRST; 315 else 316 mt = MT_EXACT; 317 318 err = zap_lookup_norm(mos, snapobj, name, 8, 1, 319 value, mt, NULL, 0, NULL); 320 if (err == ENOTSUP && mt == MT_FIRST) 321 err = zap_lookup(mos, snapobj, name, 8, 1, value); 322 return (err); 323} 324 325int 326dsl_dataset_snap_remove(dsl_dataset_t *ds, const char *name, dmu_tx_t *tx) 327{ 328 objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset; 329 uint64_t snapobj = ds->ds_phys->ds_snapnames_zapobj; 330 matchtype_t mt; 331 int err; 332 333 dsl_dir_snap_cmtime_update(ds->ds_dir); 334 335 if (ds->ds_phys->ds_flags & DS_FLAG_CI_DATASET) 336 mt = MT_FIRST; 337 else 338 mt = MT_EXACT; 339 340 err = zap_remove_norm(mos, snapobj, name, mt, tx); 341 if (err == ENOTSUP && mt == MT_FIRST) 342 err = zap_remove(mos, snapobj, name, tx); 343 return (err); 344} 345 346int 347dsl_dataset_hold_obj(dsl_pool_t *dp, uint64_t dsobj, void *tag, 348 dsl_dataset_t **dsp) 349{ 350 objset_t *mos = dp->dp_meta_objset; 351 dmu_buf_t *dbuf; 352 dsl_dataset_t *ds; 353 int err; 354 dmu_object_info_t doi; 355 356 ASSERT(dsl_pool_config_held(dp)); 357 358 err = dmu_bonus_hold(mos, dsobj, tag, &dbuf); 359 if (err != 0) 360 return (err); 361 362 /* Make sure dsobj has the correct object type. */ 363 dmu_object_info_from_db(dbuf, &doi); 364 if (doi.doi_type != DMU_OT_DSL_DATASET) { 365 dmu_buf_rele(dbuf, tag); 366 return (SET_ERROR(EINVAL)); 367 } 368 369 ds = dmu_buf_get_user(dbuf); 370 if (ds == NULL) { 371 dsl_dataset_t *winner = NULL; 372 373 ds = kmem_zalloc(sizeof (dsl_dataset_t), KM_SLEEP); 374 ds->ds_dbuf = dbuf; 375 ds->ds_object = dsobj; 376 ds->ds_phys = dbuf->db_data; 377 378 mutex_init(&ds->ds_lock, NULL, MUTEX_DEFAULT, NULL); 379 mutex_init(&ds->ds_opening_lock, NULL, MUTEX_DEFAULT, NULL); 380 mutex_init(&ds->ds_sendstream_lock, NULL, MUTEX_DEFAULT, NULL); 381 refcount_create(&ds->ds_longholds); 382 383 bplist_create(&ds->ds_pending_deadlist); 384 dsl_deadlist_open(&ds->ds_deadlist, 385 mos, ds->ds_phys->ds_deadlist_obj); 386 387 list_create(&ds->ds_sendstreams, sizeof (dmu_sendarg_t), 388 offsetof(dmu_sendarg_t, dsa_link)); 389 390 if (err == 0) { 391 err = dsl_dir_hold_obj(dp, 392 ds->ds_phys->ds_dir_obj, NULL, ds, &ds->ds_dir); 393 } 394 if (err != 0) { 395 mutex_destroy(&ds->ds_lock); 396 mutex_destroy(&ds->ds_opening_lock); 397 refcount_destroy(&ds->ds_longholds); 398 bplist_destroy(&ds->ds_pending_deadlist); 399 dsl_deadlist_close(&ds->ds_deadlist); 400 kmem_free(ds, sizeof (dsl_dataset_t)); 401 dmu_buf_rele(dbuf, tag); 402 return (err); 403 } 404 405 if (!dsl_dataset_is_snapshot(ds)) { 406 ds->ds_snapname[0] = '\0'; 407 if (ds->ds_phys->ds_prev_snap_obj != 0) { 408 err = dsl_dataset_hold_obj(dp, 409 ds->ds_phys->ds_prev_snap_obj, 410 ds, &ds->ds_prev); 411 } 412 } else { 413 if (zfs_flags & ZFS_DEBUG_SNAPNAMES) 414 err = dsl_dataset_get_snapname(ds); 415 if (err == 0 && ds->ds_phys->ds_userrefs_obj != 0) { 416 err = zap_count( 417 ds->ds_dir->dd_pool->dp_meta_objset, 418 ds->ds_phys->ds_userrefs_obj, 419 &ds->ds_userrefs); 420 } 421 } 422 423 if (err == 0 && !dsl_dataset_is_snapshot(ds)) { 424 err = dsl_prop_get_int_ds(ds, 425 zfs_prop_to_name(ZFS_PROP_REFRESERVATION), 426 &ds->ds_reserved); 427 if (err == 0) { 428 err = dsl_prop_get_int_ds(ds, 429 zfs_prop_to_name(ZFS_PROP_REFQUOTA), 430 &ds->ds_quota); 431 } 432 } else { 433 ds->ds_reserved = ds->ds_quota = 0; 434 } 435 436 if (err != 0 || (winner = dmu_buf_set_user_ie(dbuf, ds, 437 &ds->ds_phys, dsl_dataset_evict)) != NULL) { 438 bplist_destroy(&ds->ds_pending_deadlist); 439 dsl_deadlist_close(&ds->ds_deadlist); 440 if (ds->ds_prev) 441 dsl_dataset_rele(ds->ds_prev, ds); 442 dsl_dir_rele(ds->ds_dir, ds); 443 mutex_destroy(&ds->ds_lock); 444 mutex_destroy(&ds->ds_opening_lock); 445 refcount_destroy(&ds->ds_longholds); 446 kmem_free(ds, sizeof (dsl_dataset_t)); 447 if (err != 0) { 448 dmu_buf_rele(dbuf, tag); 449 return (err); 450 } 451 ds = winner; 452 } else { 453 ds->ds_fsid_guid = 454 unique_insert(ds->ds_phys->ds_fsid_guid); 455 } 456 } 457 ASSERT3P(ds->ds_dbuf, ==, dbuf); 458 ASSERT3P(ds->ds_phys, ==, dbuf->db_data); 459 ASSERT(ds->ds_phys->ds_prev_snap_obj != 0 || 460 spa_version(dp->dp_spa) < SPA_VERSION_ORIGIN || 461 dp->dp_origin_snap == NULL || ds == dp->dp_origin_snap); 462 *dsp = ds; 463 return (0); 464} 465 466int 467dsl_dataset_hold(dsl_pool_t *dp, const char *name, 468 void *tag, dsl_dataset_t **dsp) 469{ 470 dsl_dir_t *dd; 471 const char *snapname; 472 uint64_t obj; 473 int err = 0; 474 475 err = dsl_dir_hold(dp, name, FTAG, &dd, &snapname); 476 if (err != 0) 477 return (err); 478 479 ASSERT(dsl_pool_config_held(dp)); 480 obj = dd->dd_phys->dd_head_dataset_obj; 481 if (obj != 0) 482 err = dsl_dataset_hold_obj(dp, obj, tag, dsp); 483 else 484 err = SET_ERROR(ENOENT); 485 486 /* we may be looking for a snapshot */ 487 if (err == 0 && snapname != NULL) { 488 dsl_dataset_t *ds; 489 490 if (*snapname++ != '@') { 491 dsl_dataset_rele(*dsp, tag); 492 dsl_dir_rele(dd, FTAG); 493 return (SET_ERROR(ENOENT)); 494 } 495 496 dprintf("looking for snapshot '%s'\n", snapname); 497 err = dsl_dataset_snap_lookup(*dsp, snapname, &obj); 498 if (err == 0) 499 err = dsl_dataset_hold_obj(dp, obj, tag, &ds); 500 dsl_dataset_rele(*dsp, tag); 501 502 if (err == 0) { 503 mutex_enter(&ds->ds_lock); 504 if (ds->ds_snapname[0] == 0) 505 (void) strlcpy(ds->ds_snapname, snapname, 506 sizeof (ds->ds_snapname)); 507 mutex_exit(&ds->ds_lock); 508 *dsp = ds; 509 } 510 } 511 512 dsl_dir_rele(dd, FTAG); 513 return (err); 514} 515 516int 517dsl_dataset_own_obj(dsl_pool_t *dp, uint64_t dsobj, 518 void *tag, dsl_dataset_t **dsp) 519{ 520 int err = dsl_dataset_hold_obj(dp, dsobj, tag, dsp); 521 if (err != 0) 522 return (err); 523 if (!dsl_dataset_tryown(*dsp, tag)) { 524 dsl_dataset_rele(*dsp, tag); 525 *dsp = NULL; 526 return (SET_ERROR(EBUSY)); 527 } 528 return (0); 529} 530 531int 532dsl_dataset_own(dsl_pool_t *dp, const char *name, 533 void *tag, dsl_dataset_t **dsp) 534{ 535 int err = dsl_dataset_hold(dp, name, tag, dsp); 536 if (err != 0) 537 return (err); 538 if (!dsl_dataset_tryown(*dsp, tag)) { 539 dsl_dataset_rele(*dsp, tag); 540 return (SET_ERROR(EBUSY)); 541 } 542 return (0); 543} 544 545/* 546 * See the comment above dsl_pool_hold() for details. In summary, a long 547 * hold is used to prevent destruction of a dataset while the pool hold 548 * is dropped, allowing other concurrent operations (e.g. spa_sync()). 549 * 550 * The dataset and pool must be held when this function is called. After it 551 * is called, the pool hold may be released while the dataset is still held 552 * and accessed. 553 */ 554void 555dsl_dataset_long_hold(dsl_dataset_t *ds, void *tag) 556{ 557 ASSERT(dsl_pool_config_held(ds->ds_dir->dd_pool)); 558 (void) refcount_add(&ds->ds_longholds, tag); 559} 560 561void 562dsl_dataset_long_rele(dsl_dataset_t *ds, void *tag) 563{ 564 (void) refcount_remove(&ds->ds_longholds, tag); 565} 566 567/* Return B_TRUE if there are any long holds on this dataset. */ 568boolean_t 569dsl_dataset_long_held(dsl_dataset_t *ds) 570{ 571 return (!refcount_is_zero(&ds->ds_longholds)); 572} 573 574void 575dsl_dataset_name(dsl_dataset_t *ds, char *name) 576{ 577 if (ds == NULL) { 578 (void) strcpy(name, "mos"); 579 } else { 580 dsl_dir_name(ds->ds_dir, name); 581 VERIFY0(dsl_dataset_get_snapname(ds)); 582 if (ds->ds_snapname[0]) { 583 (void) strcat(name, "@"); 584 /* 585 * We use a "recursive" mutex so that we 586 * can call dprintf_ds() with ds_lock held. 587 */ 588 if (!MUTEX_HELD(&ds->ds_lock)) { 589 mutex_enter(&ds->ds_lock); 590 (void) strcat(name, ds->ds_snapname); 591 mutex_exit(&ds->ds_lock); 592 } else { 593 (void) strcat(name, ds->ds_snapname); 594 } 595 } 596 } 597} 598 599static int 600dsl_dataset_namelen(dsl_dataset_t *ds) 601{ 602 int result; 603 604 if (ds == NULL) { 605 result = 3; /* "mos" */ 606 } else { 607 result = dsl_dir_namelen(ds->ds_dir); 608 VERIFY0(dsl_dataset_get_snapname(ds)); 609 if (ds->ds_snapname[0]) { 610 ++result; /* adding one for the @-sign */ 611 if (!MUTEX_HELD(&ds->ds_lock)) { 612 mutex_enter(&ds->ds_lock); 613 result += strlen(ds->ds_snapname); 614 mutex_exit(&ds->ds_lock); 615 } else { 616 result += strlen(ds->ds_snapname); 617 } 618 } 619 } 620 621 return (result); 622} 623 624void 625dsl_dataset_rele(dsl_dataset_t *ds, void *tag) 626{ 627 dmu_buf_rele(ds->ds_dbuf, tag); 628} 629 630void 631dsl_dataset_disown(dsl_dataset_t *ds, void *tag) 632{ 633 ASSERT(ds->ds_owner == tag && ds->ds_dbuf != NULL); 634 635 mutex_enter(&ds->ds_lock); 636 ds->ds_owner = NULL; 637 mutex_exit(&ds->ds_lock); 638 dsl_dataset_long_rele(ds, tag); 639 if (ds->ds_dbuf != NULL) 640 dsl_dataset_rele(ds, tag); 641 else 642 dsl_dataset_evict(NULL, ds); 643} 644 645boolean_t 646dsl_dataset_tryown(dsl_dataset_t *ds, void *tag) 647{ 648 boolean_t gotit = FALSE; 649 650 mutex_enter(&ds->ds_lock); 651 if (ds->ds_owner == NULL && !DS_IS_INCONSISTENT(ds)) { 652 ds->ds_owner = tag; 653 dsl_dataset_long_hold(ds, tag); 654 gotit = TRUE; 655 } 656 mutex_exit(&ds->ds_lock); 657 return (gotit); 658} 659 660uint64_t 661dsl_dataset_create_sync_dd(dsl_dir_t *dd, dsl_dataset_t *origin, 662 uint64_t flags, dmu_tx_t *tx) 663{ 664 dsl_pool_t *dp = dd->dd_pool; 665 dmu_buf_t *dbuf; 666 dsl_dataset_phys_t *dsphys; 667 uint64_t dsobj; 668 objset_t *mos = dp->dp_meta_objset; 669 670 if (origin == NULL) 671 origin = dp->dp_origin_snap; 672 673 ASSERT(origin == NULL || origin->ds_dir->dd_pool == dp); 674 ASSERT(origin == NULL || origin->ds_phys->ds_num_children > 0); 675 ASSERT(dmu_tx_is_syncing(tx)); 676 ASSERT(dd->dd_phys->dd_head_dataset_obj == 0); 677 678 dsobj = dmu_object_alloc(mos, DMU_OT_DSL_DATASET, 0, 679 DMU_OT_DSL_DATASET, sizeof (dsl_dataset_phys_t), tx); 680 VERIFY0(dmu_bonus_hold(mos, dsobj, FTAG, &dbuf)); 681 dmu_buf_will_dirty(dbuf, tx); 682 dsphys = dbuf->db_data; 683 bzero(dsphys, sizeof (dsl_dataset_phys_t)); 684 dsphys->ds_dir_obj = dd->dd_object; 685 dsphys->ds_flags = flags; 686 dsphys->ds_fsid_guid = unique_create(); 687 do { 688 (void) random_get_pseudo_bytes((void*)&dsphys->ds_guid, 689 sizeof (dsphys->ds_guid)); 690 } while (dsphys->ds_guid == 0); 691 dsphys->ds_snapnames_zapobj = 692 zap_create_norm(mos, U8_TEXTPREP_TOUPPER, DMU_OT_DSL_DS_SNAP_MAP, 693 DMU_OT_NONE, 0, tx); 694 dsphys->ds_creation_time = gethrestime_sec(); 695 dsphys->ds_creation_txg = tx->tx_txg == TXG_INITIAL ? 1 : tx->tx_txg; 696 697 if (origin == NULL) { 698 dsphys->ds_deadlist_obj = dsl_deadlist_alloc(mos, tx); 699 } else { 700 dsl_dataset_t *ohds; /* head of the origin snapshot */ 701 702 dsphys->ds_prev_snap_obj = origin->ds_object; 703 dsphys->ds_prev_snap_txg = 704 origin->ds_phys->ds_creation_txg; 705 dsphys->ds_referenced_bytes = 706 origin->ds_phys->ds_referenced_bytes; 707 dsphys->ds_compressed_bytes = 708 origin->ds_phys->ds_compressed_bytes; 709 dsphys->ds_uncompressed_bytes = 710 origin->ds_phys->ds_uncompressed_bytes; 711 dsphys->ds_bp = origin->ds_phys->ds_bp; 712 dsphys->ds_flags |= origin->ds_phys->ds_flags; 713 714 dmu_buf_will_dirty(origin->ds_dbuf, tx); 715 origin->ds_phys->ds_num_children++; 716 717 VERIFY0(dsl_dataset_hold_obj(dp, 718 origin->ds_dir->dd_phys->dd_head_dataset_obj, FTAG, &ohds)); 719 dsphys->ds_deadlist_obj = dsl_deadlist_clone(&ohds->ds_deadlist, 720 dsphys->ds_prev_snap_txg, dsphys->ds_prev_snap_obj, tx); 721 dsl_dataset_rele(ohds, FTAG); 722 723 if (spa_version(dp->dp_spa) >= SPA_VERSION_NEXT_CLONES) { 724 if (origin->ds_phys->ds_next_clones_obj == 0) { 725 origin->ds_phys->ds_next_clones_obj = 726 zap_create(mos, 727 DMU_OT_NEXT_CLONES, DMU_OT_NONE, 0, tx); 728 } 729 VERIFY0(zap_add_int(mos, 730 origin->ds_phys->ds_next_clones_obj, dsobj, tx)); 731 } 732 733 dmu_buf_will_dirty(dd->dd_dbuf, tx); 734 dd->dd_phys->dd_origin_obj = origin->ds_object; 735 if (spa_version(dp->dp_spa) >= SPA_VERSION_DIR_CLONES) { 736 if (origin->ds_dir->dd_phys->dd_clones == 0) { 737 dmu_buf_will_dirty(origin->ds_dir->dd_dbuf, tx); 738 origin->ds_dir->dd_phys->dd_clones = 739 zap_create(mos, 740 DMU_OT_DSL_CLONES, DMU_OT_NONE, 0, tx); 741 } 742 VERIFY0(zap_add_int(mos, 743 origin->ds_dir->dd_phys->dd_clones, dsobj, tx)); 744 } 745 } 746 747 if (spa_version(dp->dp_spa) >= SPA_VERSION_UNIQUE_ACCURATE) 748 dsphys->ds_flags |= DS_FLAG_UNIQUE_ACCURATE; 749 750 dmu_buf_rele(dbuf, FTAG); 751 752 dmu_buf_will_dirty(dd->dd_dbuf, tx); 753 dd->dd_phys->dd_head_dataset_obj = dsobj; 754 755 return (dsobj); 756} 757 758static void 759dsl_dataset_zero_zil(dsl_dataset_t *ds, dmu_tx_t *tx) 760{ 761 objset_t *os; 762 763 VERIFY0(dmu_objset_from_ds(ds, &os)); 764 bzero(&os->os_zil_header, sizeof (os->os_zil_header)); 765 dsl_dataset_dirty(ds, tx); 766} 767 768uint64_t 769dsl_dataset_create_sync(dsl_dir_t *pdd, const char *lastname, 770 dsl_dataset_t *origin, uint64_t flags, cred_t *cr, dmu_tx_t *tx) 771{ 772 dsl_pool_t *dp = pdd->dd_pool; 773 uint64_t dsobj, ddobj; 774 dsl_dir_t *dd; 775 776 ASSERT(dmu_tx_is_syncing(tx)); 777 ASSERT(lastname[0] != '@'); 778 779 ddobj = dsl_dir_create_sync(dp, pdd, lastname, tx); 780 VERIFY0(dsl_dir_hold_obj(dp, ddobj, lastname, FTAG, &dd)); 781 782 dsobj = dsl_dataset_create_sync_dd(dd, origin, 783 flags & ~DS_CREATE_FLAG_NODIRTY, tx); 784 785 dsl_deleg_set_create_perms(dd, tx, cr); 786 787 dsl_dir_rele(dd, FTAG); 788 789 /* 790 * If we are creating a clone, make sure we zero out any stale 791 * data from the origin snapshots zil header. 792 */ 793 if (origin != NULL && !(flags & DS_CREATE_FLAG_NODIRTY)) { 794 dsl_dataset_t *ds; 795 796 VERIFY0(dsl_dataset_hold_obj(dp, dsobj, FTAG, &ds)); 797 dsl_dataset_zero_zil(ds, tx); 798 dsl_dataset_rele(ds, FTAG); 799 } 800 801 return (dsobj); 802} 803 804#ifdef __FreeBSD__ 805/* FreeBSD ioctl compat begin */ 806struct destroyarg { 807 nvlist_t *nvl; 808 const char *snapname; 809}; 810 811static int 812dsl_check_snap_cb(const char *name, void *arg) 813{ 814 struct destroyarg *da = arg; 815 dsl_dataset_t *ds; 816 char *dsname; 817 818 dsname = kmem_asprintf("%s@%s", name, da->snapname); 819 fnvlist_add_boolean(da->nvl, dsname); 820 kmem_free(dsname, strlen(dsname) + 1); 821 822 return (0); 823} 824 825int 826dmu_get_recursive_snaps_nvl(char *fsname, const char *snapname, 827 nvlist_t *snaps) 828{ 829 struct destroyarg *da; 830 int err; 831 832 da = kmem_zalloc(sizeof (struct destroyarg), KM_SLEEP); 833 da->nvl = snaps; 834 da->snapname = snapname; 835 err = dmu_objset_find(fsname, dsl_check_snap_cb, da, 836 DS_FIND_CHILDREN); 837 kmem_free(da, sizeof (struct destroyarg)); 838 839 return (err); 840} 841/* FreeBSD ioctl compat end */ 842#endif /* __FreeBSD__ */ 843 844/* 845 * The unique space in the head dataset can be calculated by subtracting 846 * the space used in the most recent snapshot, that is still being used 847 * in this file system, from the space currently in use. To figure out 848 * the space in the most recent snapshot still in use, we need to take 849 * the total space used in the snapshot and subtract out the space that 850 * has been freed up since the snapshot was taken. 851 */ 852void 853dsl_dataset_recalc_head_uniq(dsl_dataset_t *ds) 854{ 855 uint64_t mrs_used; 856 uint64_t dlused, dlcomp, dluncomp; 857 858 ASSERT(!dsl_dataset_is_snapshot(ds)); 859 860 if (ds->ds_phys->ds_prev_snap_obj != 0) 861 mrs_used = ds->ds_prev->ds_phys->ds_referenced_bytes; 862 else 863 mrs_used = 0; 864 865 dsl_deadlist_space(&ds->ds_deadlist, &dlused, &dlcomp, &dluncomp); 866 867 ASSERT3U(dlused, <=, mrs_used); 868 ds->ds_phys->ds_unique_bytes = 869 ds->ds_phys->ds_referenced_bytes - (mrs_used - dlused); 870 871 if (spa_version(ds->ds_dir->dd_pool->dp_spa) >= 872 SPA_VERSION_UNIQUE_ACCURATE) 873 ds->ds_phys->ds_flags |= DS_FLAG_UNIQUE_ACCURATE; 874} 875 876void 877dsl_dataset_remove_from_next_clones(dsl_dataset_t *ds, uint64_t obj, 878 dmu_tx_t *tx) 879{ 880 objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset; 881 uint64_t count; 882 int err; 883 884 ASSERT(ds->ds_phys->ds_num_children >= 2); 885 err = zap_remove_int(mos, ds->ds_phys->ds_next_clones_obj, obj, tx); 886 /* 887 * The err should not be ENOENT, but a bug in a previous version 888 * of the code could cause upgrade_clones_cb() to not set 889 * ds_next_snap_obj when it should, leading to a missing entry. 890 * If we knew that the pool was created after 891 * SPA_VERSION_NEXT_CLONES, we could assert that it isn't 892 * ENOENT. However, at least we can check that we don't have 893 * too many entries in the next_clones_obj even after failing to 894 * remove this one. 895 */ 896 if (err != ENOENT) 897 VERIFY0(err); 898 ASSERT0(zap_count(mos, ds->ds_phys->ds_next_clones_obj, 899 &count)); 900 ASSERT3U(count, <=, ds->ds_phys->ds_num_children - 2); 901} 902 903 904blkptr_t * 905dsl_dataset_get_blkptr(dsl_dataset_t *ds) 906{ 907 return (&ds->ds_phys->ds_bp); 908} 909 910void 911dsl_dataset_set_blkptr(dsl_dataset_t *ds, blkptr_t *bp, dmu_tx_t *tx) 912{ 913 ASSERT(dmu_tx_is_syncing(tx)); 914 /* If it's the meta-objset, set dp_meta_rootbp */ 915 if (ds == NULL) { 916 tx->tx_pool->dp_meta_rootbp = *bp; 917 } else { 918 dmu_buf_will_dirty(ds->ds_dbuf, tx); 919 ds->ds_phys->ds_bp = *bp; 920 } 921} 922 923spa_t * 924dsl_dataset_get_spa(dsl_dataset_t *ds) 925{ 926 return (ds->ds_dir->dd_pool->dp_spa); 927} 928 929void 930dsl_dataset_dirty(dsl_dataset_t *ds, dmu_tx_t *tx) 931{ 932 dsl_pool_t *dp; 933 934 if (ds == NULL) /* this is the meta-objset */ 935 return; 936 937 ASSERT(ds->ds_objset != NULL); 938 939 if (ds->ds_phys->ds_next_snap_obj != 0) 940 panic("dirtying snapshot!"); 941 942 dp = ds->ds_dir->dd_pool; 943 944 if (txg_list_add(&dp->dp_dirty_datasets, ds, tx->tx_txg)) { 945 /* up the hold count until we can be written out */ 946 dmu_buf_add_ref(ds->ds_dbuf, ds); 947 } 948} 949 950boolean_t 951dsl_dataset_is_dirty(dsl_dataset_t *ds) 952{ 953 for (int t = 0; t < TXG_SIZE; t++) { 954 if (txg_list_member(&ds->ds_dir->dd_pool->dp_dirty_datasets, 955 ds, t)) 956 return (B_TRUE); 957 } 958 return (B_FALSE); 959} 960 961static int 962dsl_dataset_snapshot_reserve_space(dsl_dataset_t *ds, dmu_tx_t *tx) 963{ 964 uint64_t asize; 965 966 if (!dmu_tx_is_syncing(tx)) 967 return (0); 968 969 /* 970 * If there's an fs-only reservation, any blocks that might become 971 * owned by the snapshot dataset must be accommodated by space 972 * outside of the reservation. 973 */ 974 ASSERT(ds->ds_reserved == 0 || DS_UNIQUE_IS_ACCURATE(ds)); 975 asize = MIN(ds->ds_phys->ds_unique_bytes, ds->ds_reserved); 976 if (asize > dsl_dir_space_available(ds->ds_dir, NULL, 0, TRUE)) 977 return (SET_ERROR(ENOSPC)); 978 979 /* 980 * Propagate any reserved space for this snapshot to other 981 * snapshot checks in this sync group. 982 */ 983 if (asize > 0) 984 dsl_dir_willuse_space(ds->ds_dir, asize, tx); 985 986 return (0); 987} 988 989typedef struct dsl_dataset_snapshot_arg { 990 nvlist_t *ddsa_snaps; 991 nvlist_t *ddsa_props; 992 nvlist_t *ddsa_errors; 993} dsl_dataset_snapshot_arg_t; 994 995int 996dsl_dataset_snapshot_check_impl(dsl_dataset_t *ds, const char *snapname,
|
1326 if (error != 0) { 1327 dsl_dataset_rele(ds, FTAG); 1328 return (error); 1329 } 1330 1331 if (spa_version(dp->dp_spa) < SPA_VERSION_USERREFS) { 1332 dsl_dataset_rele(ds, FTAG); 1333 return (SET_ERROR(ENOTSUP)); 1334 } 1335 error = dsl_dataset_user_hold_check_one(NULL, ddsta->ddsta_htag, 1336 B_TRUE, tx); 1337 if (error != 0) { 1338 dsl_dataset_rele(ds, FTAG); 1339 return (error); 1340 } 1341 1342 dsl_dataset_rele(ds, FTAG); 1343 return (0); 1344} 1345 1346static void 1347dsl_dataset_snapshot_tmp_sync(void *arg, dmu_tx_t *tx) 1348{ 1349 dsl_dataset_snapshot_tmp_arg_t *ddsta = arg; 1350 dsl_pool_t *dp = dmu_tx_pool(tx); 1351 dsl_dataset_t *ds; 1352 1353 VERIFY0(dsl_dataset_hold(dp, ddsta->ddsta_fsname, FTAG, &ds)); 1354 1355 dsl_dataset_snapshot_sync_impl(ds, ddsta->ddsta_snapname, tx); 1356 dsl_dataset_user_hold_sync_one(ds->ds_prev, ddsta->ddsta_htag, 1357 ddsta->ddsta_cleanup_minor, gethrestime_sec(), tx); 1358 dsl_destroy_snapshot_sync_impl(ds->ds_prev, B_TRUE, tx); 1359 1360 dsl_dataset_rele(ds, FTAG); 1361} 1362 1363int 1364dsl_dataset_snapshot_tmp(const char *fsname, const char *snapname, 1365 minor_t cleanup_minor, const char *htag) 1366{ 1367 dsl_dataset_snapshot_tmp_arg_t ddsta; 1368 int error; 1369 spa_t *spa; 1370 boolean_t needsuspend; 1371 void *cookie; 1372 1373 ddsta.ddsta_fsname = fsname; 1374 ddsta.ddsta_snapname = snapname; 1375 ddsta.ddsta_cleanup_minor = cleanup_minor; 1376 ddsta.ddsta_htag = htag; 1377 1378 error = spa_open(fsname, &spa, FTAG); 1379 if (error != 0) 1380 return (error); 1381 needsuspend = (spa_version(spa) < SPA_VERSION_FAST_SNAP); 1382 spa_close(spa, FTAG); 1383 1384 if (needsuspend) { 1385 error = zil_suspend(fsname, &cookie); 1386 if (error != 0) 1387 return (error); 1388 } 1389 1390 error = dsl_sync_task(fsname, dsl_dataset_snapshot_tmp_check, 1391 dsl_dataset_snapshot_tmp_sync, &ddsta, 3); 1392 1393 if (needsuspend) 1394 zil_resume(cookie); 1395 return (error); 1396} 1397 1398 1399void 1400dsl_dataset_sync(dsl_dataset_t *ds, zio_t *zio, dmu_tx_t *tx) 1401{ 1402 ASSERT(dmu_tx_is_syncing(tx)); 1403 ASSERT(ds->ds_objset != NULL); 1404 ASSERT(ds->ds_phys->ds_next_snap_obj == 0); 1405 1406 /* 1407 * in case we had to change ds_fsid_guid when we opened it, 1408 * sync it out now. 1409 */ 1410 dmu_buf_will_dirty(ds->ds_dbuf, tx); 1411 ds->ds_phys->ds_fsid_guid = ds->ds_fsid_guid; 1412 1413 dmu_objset_sync(ds->ds_objset, zio, tx); 1414} 1415 1416static void 1417get_clones_stat(dsl_dataset_t *ds, nvlist_t *nv) 1418{ 1419 uint64_t count = 0; 1420 objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset; 1421 zap_cursor_t zc; 1422 zap_attribute_t za; 1423 nvlist_t *propval = fnvlist_alloc(); 1424 nvlist_t *val = fnvlist_alloc(); 1425 1426 ASSERT(dsl_pool_config_held(ds->ds_dir->dd_pool)); 1427 1428 /* 1429 * There may be missing entries in ds_next_clones_obj 1430 * due to a bug in a previous version of the code. 1431 * Only trust it if it has the right number of entries. 1432 */ 1433 if (ds->ds_phys->ds_next_clones_obj != 0) { 1434 ASSERT0(zap_count(mos, ds->ds_phys->ds_next_clones_obj, 1435 &count)); 1436 } 1437 if (count != ds->ds_phys->ds_num_children - 1) 1438 goto fail; 1439 for (zap_cursor_init(&zc, mos, ds->ds_phys->ds_next_clones_obj); 1440 zap_cursor_retrieve(&zc, &za) == 0; 1441 zap_cursor_advance(&zc)) { 1442 dsl_dataset_t *clone; 1443 char buf[ZFS_MAXNAMELEN]; 1444 VERIFY0(dsl_dataset_hold_obj(ds->ds_dir->dd_pool, 1445 za.za_first_integer, FTAG, &clone)); 1446 dsl_dir_name(clone->ds_dir, buf); 1447 fnvlist_add_boolean(val, buf); 1448 dsl_dataset_rele(clone, FTAG); 1449 } 1450 zap_cursor_fini(&zc); 1451 fnvlist_add_nvlist(propval, ZPROP_VALUE, val); 1452 fnvlist_add_nvlist(nv, zfs_prop_to_name(ZFS_PROP_CLONES), propval); 1453fail: 1454 nvlist_free(val); 1455 nvlist_free(propval); 1456} 1457 1458void 1459dsl_dataset_stats(dsl_dataset_t *ds, nvlist_t *nv) 1460{ 1461 dsl_pool_t *dp = ds->ds_dir->dd_pool; 1462 uint64_t refd, avail, uobjs, aobjs, ratio; 1463 1464 ASSERT(dsl_pool_config_held(dp)); 1465 1466 ratio = ds->ds_phys->ds_compressed_bytes == 0 ? 100 : 1467 (ds->ds_phys->ds_uncompressed_bytes * 100 / 1468 ds->ds_phys->ds_compressed_bytes); 1469 1470 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_REFRATIO, ratio); 1471 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_LOGICALREFERENCED, 1472 ds->ds_phys->ds_uncompressed_bytes); 1473 1474 if (dsl_dataset_is_snapshot(ds)) { 1475 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_COMPRESSRATIO, ratio); 1476 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_USED, 1477 ds->ds_phys->ds_unique_bytes); 1478 get_clones_stat(ds, nv); 1479 } else { 1480 dsl_dir_stats(ds->ds_dir, nv); 1481 } 1482 1483 dsl_dataset_space(ds, &refd, &avail, &uobjs, &aobjs); 1484 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_AVAILABLE, avail); 1485 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_REFERENCED, refd); 1486 1487 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_CREATION, 1488 ds->ds_phys->ds_creation_time); 1489 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_CREATETXG, 1490 ds->ds_phys->ds_creation_txg); 1491 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_REFQUOTA, 1492 ds->ds_quota); 1493 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_REFRESERVATION, 1494 ds->ds_reserved); 1495 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_GUID, 1496 ds->ds_phys->ds_guid); 1497 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_UNIQUE, 1498 ds->ds_phys->ds_unique_bytes); 1499 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_OBJSETID, 1500 ds->ds_object); 1501 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_USERREFS, 1502 ds->ds_userrefs); 1503 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_DEFER_DESTROY, 1504 DS_IS_DEFER_DESTROY(ds) ? 1 : 0); 1505 1506 if (ds->ds_phys->ds_prev_snap_obj != 0) { 1507 uint64_t written, comp, uncomp; 1508 dsl_pool_t *dp = ds->ds_dir->dd_pool; 1509 dsl_dataset_t *prev; 1510 1511 int err = dsl_dataset_hold_obj(dp, 1512 ds->ds_phys->ds_prev_snap_obj, FTAG, &prev); 1513 if (err == 0) { 1514 err = dsl_dataset_space_written(prev, ds, &written, 1515 &comp, &uncomp); 1516 dsl_dataset_rele(prev, FTAG); 1517 if (err == 0) { 1518 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_WRITTEN, 1519 written); 1520 } 1521 } 1522 } 1523} 1524 1525void 1526dsl_dataset_fast_stat(dsl_dataset_t *ds, dmu_objset_stats_t *stat) 1527{ 1528 dsl_pool_t *dp = ds->ds_dir->dd_pool; 1529 ASSERT(dsl_pool_config_held(dp)); 1530 1531 stat->dds_creation_txg = ds->ds_phys->ds_creation_txg; 1532 stat->dds_inconsistent = ds->ds_phys->ds_flags & DS_FLAG_INCONSISTENT; 1533 stat->dds_guid = ds->ds_phys->ds_guid; 1534 stat->dds_origin[0] = '\0'; 1535 if (dsl_dataset_is_snapshot(ds)) { 1536 stat->dds_is_snapshot = B_TRUE; 1537 stat->dds_num_clones = ds->ds_phys->ds_num_children - 1; 1538 } else { 1539 stat->dds_is_snapshot = B_FALSE; 1540 stat->dds_num_clones = 0; 1541 1542 if (dsl_dir_is_clone(ds->ds_dir)) { 1543 dsl_dataset_t *ods; 1544 1545 VERIFY0(dsl_dataset_hold_obj(dp, 1546 ds->ds_dir->dd_phys->dd_origin_obj, FTAG, &ods)); 1547 dsl_dataset_name(ods, stat->dds_origin); 1548 dsl_dataset_rele(ods, FTAG); 1549 } 1550 } 1551} 1552 1553uint64_t 1554dsl_dataset_fsid_guid(dsl_dataset_t *ds) 1555{ 1556 return (ds->ds_fsid_guid); 1557} 1558 1559void 1560dsl_dataset_space(dsl_dataset_t *ds, 1561 uint64_t *refdbytesp, uint64_t *availbytesp, 1562 uint64_t *usedobjsp, uint64_t *availobjsp) 1563{ 1564 *refdbytesp = ds->ds_phys->ds_referenced_bytes; 1565 *availbytesp = dsl_dir_space_available(ds->ds_dir, NULL, 0, TRUE); 1566 if (ds->ds_reserved > ds->ds_phys->ds_unique_bytes) 1567 *availbytesp += ds->ds_reserved - ds->ds_phys->ds_unique_bytes; 1568 if (ds->ds_quota != 0) { 1569 /* 1570 * Adjust available bytes according to refquota 1571 */ 1572 if (*refdbytesp < ds->ds_quota) 1573 *availbytesp = MIN(*availbytesp, 1574 ds->ds_quota - *refdbytesp); 1575 else 1576 *availbytesp = 0; 1577 } 1578 *usedobjsp = ds->ds_phys->ds_bp.blk_fill; 1579 *availobjsp = DN_MAX_OBJECT - *usedobjsp; 1580} 1581 1582boolean_t 1583dsl_dataset_modified_since_lastsnap(dsl_dataset_t *ds) 1584{ 1585 dsl_pool_t *dp = ds->ds_dir->dd_pool; 1586 1587 ASSERT(dsl_pool_config_held(dp)); 1588 if (ds->ds_prev == NULL) 1589 return (B_FALSE); 1590 if (ds->ds_phys->ds_bp.blk_birth > 1591 ds->ds_prev->ds_phys->ds_creation_txg) { 1592 objset_t *os, *os_prev; 1593 /* 1594 * It may be that only the ZIL differs, because it was 1595 * reset in the head. Don't count that as being 1596 * modified. 1597 */ 1598 if (dmu_objset_from_ds(ds, &os) != 0) 1599 return (B_TRUE); 1600 if (dmu_objset_from_ds(ds->ds_prev, &os_prev) != 0) 1601 return (B_TRUE); 1602 return (bcmp(&os->os_phys->os_meta_dnode, 1603 &os_prev->os_phys->os_meta_dnode, 1604 sizeof (os->os_phys->os_meta_dnode)) != 0); 1605 } 1606 return (B_FALSE); 1607} 1608 1609typedef struct dsl_dataset_rename_snapshot_arg { 1610 const char *ddrsa_fsname; 1611 const char *ddrsa_oldsnapname; 1612 const char *ddrsa_newsnapname; 1613 boolean_t ddrsa_recursive; 1614 dmu_tx_t *ddrsa_tx; 1615} dsl_dataset_rename_snapshot_arg_t; 1616 1617/* ARGSUSED */ 1618static int 1619dsl_dataset_rename_snapshot_check_impl(dsl_pool_t *dp, 1620 dsl_dataset_t *hds, void *arg) 1621{ 1622 dsl_dataset_rename_snapshot_arg_t *ddrsa = arg; 1623 int error; 1624 uint64_t val; 1625 1626 error = dsl_dataset_snap_lookup(hds, ddrsa->ddrsa_oldsnapname, &val); 1627 if (error != 0) { 1628 /* ignore nonexistent snapshots */ 1629 return (error == ENOENT ? 0 : error); 1630 } 1631 1632 /* new name should not exist */ 1633 error = dsl_dataset_snap_lookup(hds, ddrsa->ddrsa_newsnapname, &val); 1634 if (error == 0) 1635 error = SET_ERROR(EEXIST); 1636 else if (error == ENOENT) 1637 error = 0; 1638 1639 /* dataset name + 1 for the "@" + the new snapshot name must fit */ 1640 if (dsl_dir_namelen(hds->ds_dir) + 1 + 1641 strlen(ddrsa->ddrsa_newsnapname) >= MAXNAMELEN) 1642 error = SET_ERROR(ENAMETOOLONG); 1643 1644 return (error); 1645} 1646 1647static int 1648dsl_dataset_rename_snapshot_check(void *arg, dmu_tx_t *tx) 1649{ 1650 dsl_dataset_rename_snapshot_arg_t *ddrsa = arg; 1651 dsl_pool_t *dp = dmu_tx_pool(tx); 1652 dsl_dataset_t *hds; 1653 int error; 1654 1655 error = dsl_dataset_hold(dp, ddrsa->ddrsa_fsname, FTAG, &hds); 1656 if (error != 0) 1657 return (error); 1658 1659 if (ddrsa->ddrsa_recursive) { 1660 error = dmu_objset_find_dp(dp, hds->ds_dir->dd_object, 1661 dsl_dataset_rename_snapshot_check_impl, ddrsa, 1662 DS_FIND_CHILDREN); 1663 } else { 1664 error = dsl_dataset_rename_snapshot_check_impl(dp, hds, ddrsa); 1665 } 1666 dsl_dataset_rele(hds, FTAG); 1667 return (error); 1668} 1669 1670static int 1671dsl_dataset_rename_snapshot_sync_impl(dsl_pool_t *dp, 1672 dsl_dataset_t *hds, void *arg) 1673{ 1674#ifdef __FreeBSD__ 1675#ifdef _KERNEL 1676 char *oldname, *newname; 1677#endif 1678#endif 1679 dsl_dataset_rename_snapshot_arg_t *ddrsa = arg; 1680 dsl_dataset_t *ds; 1681 uint64_t val; 1682 dmu_tx_t *tx = ddrsa->ddrsa_tx; 1683 int error; 1684 1685 error = dsl_dataset_snap_lookup(hds, ddrsa->ddrsa_oldsnapname, &val); 1686 ASSERT(error == 0 || error == ENOENT); 1687 if (error == ENOENT) { 1688 /* ignore nonexistent snapshots */ 1689 return (0); 1690 } 1691 1692 VERIFY0(dsl_dataset_hold_obj(dp, val, FTAG, &ds)); 1693 1694 /* log before we change the name */ 1695 spa_history_log_internal_ds(ds, "rename", tx, 1696 "-> @%s", ddrsa->ddrsa_newsnapname); 1697 1698 VERIFY0(dsl_dataset_snap_remove(hds, ddrsa->ddrsa_oldsnapname, tx)); 1699 mutex_enter(&ds->ds_lock); 1700 (void) strcpy(ds->ds_snapname, ddrsa->ddrsa_newsnapname); 1701 mutex_exit(&ds->ds_lock); 1702 VERIFY0(zap_add(dp->dp_meta_objset, hds->ds_phys->ds_snapnames_zapobj, 1703 ds->ds_snapname, 8, 1, &ds->ds_object, tx)); 1704 1705#ifdef __FreeBSD__ 1706#ifdef _KERNEL 1707 oldname = kmem_alloc(MAXPATHLEN, KM_SLEEP); 1708 newname = kmem_alloc(MAXPATHLEN, KM_SLEEP); 1709 snprintf(oldname, MAXPATHLEN, "%s@%s", ddrsa->ddrsa_fsname, 1710 ddrsa->ddrsa_oldsnapname); 1711 snprintf(newname, MAXPATHLEN, "%s@%s", ddrsa->ddrsa_fsname, 1712 ddrsa->ddrsa_newsnapname); 1713 zfsvfs_update_fromname(oldname, newname); 1714 zvol_rename_minors(oldname, newname); 1715 kmem_free(newname, MAXPATHLEN); 1716 kmem_free(oldname, MAXPATHLEN); 1717#endif 1718#endif 1719 dsl_dataset_rele(ds, FTAG); 1720 1721 return (0); 1722} 1723 1724static void 1725dsl_dataset_rename_snapshot_sync(void *arg, dmu_tx_t *tx) 1726{ 1727 dsl_dataset_rename_snapshot_arg_t *ddrsa = arg; 1728 dsl_pool_t *dp = dmu_tx_pool(tx); 1729 dsl_dataset_t *hds; 1730 1731 VERIFY0(dsl_dataset_hold(dp, ddrsa->ddrsa_fsname, FTAG, &hds)); 1732 ddrsa->ddrsa_tx = tx; 1733 if (ddrsa->ddrsa_recursive) { 1734 VERIFY0(dmu_objset_find_dp(dp, hds->ds_dir->dd_object, 1735 dsl_dataset_rename_snapshot_sync_impl, ddrsa, 1736 DS_FIND_CHILDREN)); 1737 } else { 1738 VERIFY0(dsl_dataset_rename_snapshot_sync_impl(dp, hds, ddrsa)); 1739 } 1740 dsl_dataset_rele(hds, FTAG); 1741} 1742 1743int 1744dsl_dataset_rename_snapshot(const char *fsname, 1745 const char *oldsnapname, const char *newsnapname, boolean_t recursive) 1746{ 1747 dsl_dataset_rename_snapshot_arg_t ddrsa; 1748 1749 ddrsa.ddrsa_fsname = fsname; 1750 ddrsa.ddrsa_oldsnapname = oldsnapname; 1751 ddrsa.ddrsa_newsnapname = newsnapname; 1752 ddrsa.ddrsa_recursive = recursive; 1753 1754 return (dsl_sync_task(fsname, dsl_dataset_rename_snapshot_check, 1755 dsl_dataset_rename_snapshot_sync, &ddrsa, 1)); 1756} 1757 1758/* 1759 * If we're doing an ownership handoff, we need to make sure that there is 1760 * only one long hold on the dataset. We're not allowed to change anything here 1761 * so we don't permanently release the long hold or regular hold here. We want 1762 * to do this only when syncing to avoid the dataset unexpectedly going away 1763 * when we release the long hold. 1764 */ 1765static int 1766dsl_dataset_handoff_check(dsl_dataset_t *ds, void *owner, dmu_tx_t *tx) 1767{ 1768 boolean_t held; 1769 1770 if (!dmu_tx_is_syncing(tx)) 1771 return (0); 1772 1773 if (owner != NULL) { 1774 VERIFY3P(ds->ds_owner, ==, owner); 1775 dsl_dataset_long_rele(ds, owner); 1776 } 1777 1778 held = dsl_dataset_long_held(ds); 1779 1780 if (owner != NULL) 1781 dsl_dataset_long_hold(ds, owner); 1782 1783 if (held) 1784 return (SET_ERROR(EBUSY)); 1785 1786 return (0); 1787} 1788 1789typedef struct dsl_dataset_rollback_arg { 1790 const char *ddra_fsname; 1791 void *ddra_owner; 1792} dsl_dataset_rollback_arg_t; 1793 1794static int 1795dsl_dataset_rollback_check(void *arg, dmu_tx_t *tx) 1796{ 1797 dsl_dataset_rollback_arg_t *ddra = arg; 1798 dsl_pool_t *dp = dmu_tx_pool(tx); 1799 dsl_dataset_t *ds; 1800 int64_t unused_refres_delta; 1801 int error; 1802 1803 error = dsl_dataset_hold(dp, ddra->ddra_fsname, FTAG, &ds); 1804 if (error != 0) 1805 return (error); 1806 1807 /* must not be a snapshot */ 1808 if (dsl_dataset_is_snapshot(ds)) { 1809 dsl_dataset_rele(ds, FTAG); 1810 return (SET_ERROR(EINVAL)); 1811 } 1812 1813 /* must have a most recent snapshot */ 1814 if (ds->ds_phys->ds_prev_snap_txg < TXG_INITIAL) { 1815 dsl_dataset_rele(ds, FTAG); 1816 return (SET_ERROR(EINVAL)); 1817 } 1818 1819 error = dsl_dataset_handoff_check(ds, ddra->ddra_owner, tx); 1820 if (error != 0) { 1821 dsl_dataset_rele(ds, FTAG); 1822 return (error); 1823 } 1824 1825 /* 1826 * Check if the snap we are rolling back to uses more than 1827 * the refquota. 1828 */ 1829 if (ds->ds_quota != 0 && 1830 ds->ds_prev->ds_phys->ds_referenced_bytes > ds->ds_quota) { 1831 dsl_dataset_rele(ds, FTAG); 1832 return (SET_ERROR(EDQUOT)); 1833 } 1834 1835 /* 1836 * When we do the clone swap, we will temporarily use more space 1837 * due to the refreservation (the head will no longer have any 1838 * unique space, so the entire amount of the refreservation will need 1839 * to be free). We will immediately destroy the clone, freeing 1840 * this space, but the freeing happens over many txg's. 1841 */ 1842 unused_refres_delta = (int64_t)MIN(ds->ds_reserved, 1843 ds->ds_phys->ds_unique_bytes); 1844 1845 if (unused_refres_delta > 0 && 1846 unused_refres_delta > 1847 dsl_dir_space_available(ds->ds_dir, NULL, 0, TRUE)) { 1848 dsl_dataset_rele(ds, FTAG); 1849 return (SET_ERROR(ENOSPC)); 1850 } 1851 1852 dsl_dataset_rele(ds, FTAG); 1853 return (0); 1854} 1855 1856static void 1857dsl_dataset_rollback_sync(void *arg, dmu_tx_t *tx) 1858{ 1859 dsl_dataset_rollback_arg_t *ddra = arg; 1860 dsl_pool_t *dp = dmu_tx_pool(tx); 1861 dsl_dataset_t *ds, *clone; 1862 uint64_t cloneobj; 1863 1864 VERIFY0(dsl_dataset_hold(dp, ddra->ddra_fsname, FTAG, &ds)); 1865 1866 cloneobj = dsl_dataset_create_sync(ds->ds_dir, "%rollback", 1867 ds->ds_prev, DS_CREATE_FLAG_NODIRTY, kcred, tx); 1868 1869 VERIFY0(dsl_dataset_hold_obj(dp, cloneobj, FTAG, &clone)); 1870 1871 dsl_dataset_clone_swap_sync_impl(clone, ds, tx); 1872 dsl_dataset_zero_zil(ds, tx); 1873 1874 dsl_destroy_head_sync_impl(clone, tx); 1875 1876 dsl_dataset_rele(clone, FTAG); 1877 dsl_dataset_rele(ds, FTAG); 1878} 1879 1880/* 1881 * If owner != NULL: 1882 * 1883 * - The existing dataset MUST be owned by the specified owner at entry 1884 * - Upon return, dataset will still be held by the same owner, whether we 1885 * succeed or not. 1886 * 1887 * This mode is required any time the existing filesystem is mounted. See 1888 * notes above zfs_suspend_fs() for further details. 1889 */ 1890int 1891dsl_dataset_rollback(const char *fsname, void *owner) 1892{ 1893 dsl_dataset_rollback_arg_t ddra; 1894 1895 ddra.ddra_fsname = fsname; 1896 ddra.ddra_owner = owner; 1897 1898 return (dsl_sync_task(fsname, dsl_dataset_rollback_check, 1899 dsl_dataset_rollback_sync, (void *)&ddra, 1)); 1900} 1901 1902struct promotenode { 1903 list_node_t link; 1904 dsl_dataset_t *ds; 1905}; 1906 1907typedef struct dsl_dataset_promote_arg { 1908 const char *ddpa_clonename; 1909 dsl_dataset_t *ddpa_clone; 1910 list_t shared_snaps, origin_snaps, clone_snaps; 1911 dsl_dataset_t *origin_origin; /* origin of the origin */ 1912 uint64_t used, comp, uncomp, unique, cloneusedsnap, originusedsnap; 1913 char *err_ds; 1914} dsl_dataset_promote_arg_t; 1915 1916static int snaplist_space(list_t *l, uint64_t mintxg, uint64_t *spacep); 1917static int promote_hold(dsl_dataset_promote_arg_t *ddpa, dsl_pool_t *dp, 1918 void *tag); 1919static void promote_rele(dsl_dataset_promote_arg_t *ddpa, void *tag); 1920 1921static int 1922dsl_dataset_promote_check(void *arg, dmu_tx_t *tx) 1923{ 1924 dsl_dataset_promote_arg_t *ddpa = arg; 1925 dsl_pool_t *dp = dmu_tx_pool(tx); 1926 dsl_dataset_t *hds; 1927 struct promotenode *snap; 1928 dsl_dataset_t *origin_ds; 1929 int err; 1930 uint64_t unused; 1931 1932 err = promote_hold(ddpa, dp, FTAG); 1933 if (err != 0) 1934 return (err); 1935 1936 hds = ddpa->ddpa_clone; 1937 1938 if (hds->ds_phys->ds_flags & DS_FLAG_NOPROMOTE) { 1939 promote_rele(ddpa, FTAG); 1940 return (SET_ERROR(EXDEV)); 1941 } 1942 1943 /* 1944 * Compute and check the amount of space to transfer. Since this is 1945 * so expensive, don't do the preliminary check. 1946 */ 1947 if (!dmu_tx_is_syncing(tx)) { 1948 promote_rele(ddpa, FTAG); 1949 return (0); 1950 } 1951 1952 snap = list_head(&ddpa->shared_snaps); 1953 origin_ds = snap->ds; 1954 1955 /* compute origin's new unique space */ 1956 snap = list_tail(&ddpa->clone_snaps); 1957 ASSERT3U(snap->ds->ds_phys->ds_prev_snap_obj, ==, origin_ds->ds_object); 1958 dsl_deadlist_space_range(&snap->ds->ds_deadlist, 1959 origin_ds->ds_phys->ds_prev_snap_txg, UINT64_MAX, 1960 &ddpa->unique, &unused, &unused); 1961 1962 /* 1963 * Walk the snapshots that we are moving 1964 * 1965 * Compute space to transfer. Consider the incremental changes 1966 * to used by each snapshot: 1967 * (my used) = (prev's used) + (blocks born) - (blocks killed) 1968 * So each snapshot gave birth to: 1969 * (blocks born) = (my used) - (prev's used) + (blocks killed) 1970 * So a sequence would look like: 1971 * (uN - u(N-1) + kN) + ... + (u1 - u0 + k1) + (u0 - 0 + k0) 1972 * Which simplifies to: 1973 * uN + kN + kN-1 + ... + k1 + k0 1974 * Note however, if we stop before we reach the ORIGIN we get: 1975 * uN + kN + kN-1 + ... + kM - uM-1 1976 */ 1977 ddpa->used = origin_ds->ds_phys->ds_referenced_bytes; 1978 ddpa->comp = origin_ds->ds_phys->ds_compressed_bytes; 1979 ddpa->uncomp = origin_ds->ds_phys->ds_uncompressed_bytes; 1980 for (snap = list_head(&ddpa->shared_snaps); snap; 1981 snap = list_next(&ddpa->shared_snaps, snap)) { 1982 uint64_t val, dlused, dlcomp, dluncomp; 1983 dsl_dataset_t *ds = snap->ds; 1984 1985 /* 1986 * If there are long holds, we won't be able to evict 1987 * the objset. 1988 */ 1989 if (dsl_dataset_long_held(ds)) { 1990 err = SET_ERROR(EBUSY); 1991 goto out; 1992 } 1993 1994 /* Check that the snapshot name does not conflict */ 1995 VERIFY0(dsl_dataset_get_snapname(ds)); 1996 err = dsl_dataset_snap_lookup(hds, ds->ds_snapname, &val); 1997 if (err == 0) { 1998 (void) strcpy(ddpa->err_ds, snap->ds->ds_snapname); 1999 err = SET_ERROR(EEXIST); 2000 goto out; 2001 } 2002 if (err != ENOENT) 2003 goto out; 2004 2005 /* The very first snapshot does not have a deadlist */ 2006 if (ds->ds_phys->ds_prev_snap_obj == 0) 2007 continue; 2008 2009 dsl_deadlist_space(&ds->ds_deadlist, 2010 &dlused, &dlcomp, &dluncomp); 2011 ddpa->used += dlused; 2012 ddpa->comp += dlcomp; 2013 ddpa->uncomp += dluncomp; 2014 } 2015 2016 /* 2017 * If we are a clone of a clone then we never reached ORIGIN, 2018 * so we need to subtract out the clone origin's used space. 2019 */ 2020 if (ddpa->origin_origin) { 2021 ddpa->used -= ddpa->origin_origin->ds_phys->ds_referenced_bytes; 2022 ddpa->comp -= ddpa->origin_origin->ds_phys->ds_compressed_bytes; 2023 ddpa->uncomp -= 2024 ddpa->origin_origin->ds_phys->ds_uncompressed_bytes; 2025 } 2026 2027 /* Check that there is enough space here */ 2028 err = dsl_dir_transfer_possible(origin_ds->ds_dir, hds->ds_dir, 2029 ddpa->used); 2030 if (err != 0) 2031 goto out; 2032 2033 /* 2034 * Compute the amounts of space that will be used by snapshots 2035 * after the promotion (for both origin and clone). For each, 2036 * it is the amount of space that will be on all of their 2037 * deadlists (that was not born before their new origin). 2038 */ 2039 if (hds->ds_dir->dd_phys->dd_flags & DD_FLAG_USED_BREAKDOWN) { 2040 uint64_t space; 2041 2042 /* 2043 * Note, typically this will not be a clone of a clone, 2044 * so dd_origin_txg will be < TXG_INITIAL, so 2045 * these snaplist_space() -> dsl_deadlist_space_range() 2046 * calls will be fast because they do not have to 2047 * iterate over all bps. 2048 */ 2049 snap = list_head(&ddpa->origin_snaps); 2050 err = snaplist_space(&ddpa->shared_snaps, 2051 snap->ds->ds_dir->dd_origin_txg, &ddpa->cloneusedsnap); 2052 if (err != 0) 2053 goto out; 2054 2055 err = snaplist_space(&ddpa->clone_snaps, 2056 snap->ds->ds_dir->dd_origin_txg, &space); 2057 if (err != 0) 2058 goto out; 2059 ddpa->cloneusedsnap += space; 2060 } 2061 if (origin_ds->ds_dir->dd_phys->dd_flags & DD_FLAG_USED_BREAKDOWN) { 2062 err = snaplist_space(&ddpa->origin_snaps, 2063 origin_ds->ds_phys->ds_creation_txg, &ddpa->originusedsnap); 2064 if (err != 0) 2065 goto out; 2066 } 2067 2068out: 2069 promote_rele(ddpa, FTAG); 2070 return (err); 2071} 2072 2073static void 2074dsl_dataset_promote_sync(void *arg, dmu_tx_t *tx) 2075{ 2076 dsl_dataset_promote_arg_t *ddpa = arg; 2077 dsl_pool_t *dp = dmu_tx_pool(tx); 2078 dsl_dataset_t *hds; 2079 struct promotenode *snap; 2080 dsl_dataset_t *origin_ds; 2081 dsl_dataset_t *origin_head; 2082 dsl_dir_t *dd; 2083 dsl_dir_t *odd = NULL; 2084 uint64_t oldnext_obj; 2085 int64_t delta; 2086 2087 VERIFY0(promote_hold(ddpa, dp, FTAG)); 2088 hds = ddpa->ddpa_clone; 2089 2090 ASSERT0(hds->ds_phys->ds_flags & DS_FLAG_NOPROMOTE); 2091 2092 snap = list_head(&ddpa->shared_snaps); 2093 origin_ds = snap->ds; 2094 dd = hds->ds_dir; 2095 2096 snap = list_head(&ddpa->origin_snaps); 2097 origin_head = snap->ds; 2098 2099 /* 2100 * We need to explicitly open odd, since origin_ds's dd will be 2101 * changing. 2102 */ 2103 VERIFY0(dsl_dir_hold_obj(dp, origin_ds->ds_dir->dd_object, 2104 NULL, FTAG, &odd)); 2105 2106 /* change origin's next snap */ 2107 dmu_buf_will_dirty(origin_ds->ds_dbuf, tx); 2108 oldnext_obj = origin_ds->ds_phys->ds_next_snap_obj; 2109 snap = list_tail(&ddpa->clone_snaps); 2110 ASSERT3U(snap->ds->ds_phys->ds_prev_snap_obj, ==, origin_ds->ds_object); 2111 origin_ds->ds_phys->ds_next_snap_obj = snap->ds->ds_object; 2112 2113 /* change the origin's next clone */ 2114 if (origin_ds->ds_phys->ds_next_clones_obj) { 2115 dsl_dataset_remove_from_next_clones(origin_ds, 2116 snap->ds->ds_object, tx); 2117 VERIFY0(zap_add_int(dp->dp_meta_objset, 2118 origin_ds->ds_phys->ds_next_clones_obj, 2119 oldnext_obj, tx)); 2120 } 2121 2122 /* change origin */ 2123 dmu_buf_will_dirty(dd->dd_dbuf, tx); 2124 ASSERT3U(dd->dd_phys->dd_origin_obj, ==, origin_ds->ds_object); 2125 dd->dd_phys->dd_origin_obj = odd->dd_phys->dd_origin_obj; 2126 dd->dd_origin_txg = origin_head->ds_dir->dd_origin_txg; 2127 dmu_buf_will_dirty(odd->dd_dbuf, tx); 2128 odd->dd_phys->dd_origin_obj = origin_ds->ds_object; 2129 origin_head->ds_dir->dd_origin_txg = 2130 origin_ds->ds_phys->ds_creation_txg; 2131 2132 /* change dd_clone entries */ 2133 if (spa_version(dp->dp_spa) >= SPA_VERSION_DIR_CLONES) { 2134 VERIFY0(zap_remove_int(dp->dp_meta_objset, 2135 odd->dd_phys->dd_clones, hds->ds_object, tx)); 2136 VERIFY0(zap_add_int(dp->dp_meta_objset, 2137 ddpa->origin_origin->ds_dir->dd_phys->dd_clones, 2138 hds->ds_object, tx)); 2139 2140 VERIFY0(zap_remove_int(dp->dp_meta_objset, 2141 ddpa->origin_origin->ds_dir->dd_phys->dd_clones, 2142 origin_head->ds_object, tx)); 2143 if (dd->dd_phys->dd_clones == 0) { 2144 dd->dd_phys->dd_clones = zap_create(dp->dp_meta_objset, 2145 DMU_OT_DSL_CLONES, DMU_OT_NONE, 0, tx); 2146 } 2147 VERIFY0(zap_add_int(dp->dp_meta_objset, 2148 dd->dd_phys->dd_clones, origin_head->ds_object, tx)); 2149 } 2150 2151 /* move snapshots to this dir */ 2152 for (snap = list_head(&ddpa->shared_snaps); snap; 2153 snap = list_next(&ddpa->shared_snaps, snap)) { 2154 dsl_dataset_t *ds = snap->ds; 2155 2156 /* 2157 * Property callbacks are registered to a particular 2158 * dsl_dir. Since ours is changing, evict the objset 2159 * so that they will be unregistered from the old dsl_dir. 2160 */ 2161 if (ds->ds_objset) { 2162 dmu_objset_evict(ds->ds_objset); 2163 ds->ds_objset = NULL; 2164 } 2165 2166 /* move snap name entry */ 2167 VERIFY0(dsl_dataset_get_snapname(ds)); 2168 VERIFY0(dsl_dataset_snap_remove(origin_head, 2169 ds->ds_snapname, tx)); 2170 VERIFY0(zap_add(dp->dp_meta_objset, 2171 hds->ds_phys->ds_snapnames_zapobj, ds->ds_snapname, 2172 8, 1, &ds->ds_object, tx)); 2173 2174 /* change containing dsl_dir */ 2175 dmu_buf_will_dirty(ds->ds_dbuf, tx); 2176 ASSERT3U(ds->ds_phys->ds_dir_obj, ==, odd->dd_object); 2177 ds->ds_phys->ds_dir_obj = dd->dd_object; 2178 ASSERT3P(ds->ds_dir, ==, odd); 2179 dsl_dir_rele(ds->ds_dir, ds); 2180 VERIFY0(dsl_dir_hold_obj(dp, dd->dd_object, 2181 NULL, ds, &ds->ds_dir)); 2182 2183 /* move any clone references */ 2184 if (ds->ds_phys->ds_next_clones_obj && 2185 spa_version(dp->dp_spa) >= SPA_VERSION_DIR_CLONES) { 2186 zap_cursor_t zc; 2187 zap_attribute_t za; 2188 2189 for (zap_cursor_init(&zc, dp->dp_meta_objset, 2190 ds->ds_phys->ds_next_clones_obj); 2191 zap_cursor_retrieve(&zc, &za) == 0; 2192 zap_cursor_advance(&zc)) { 2193 dsl_dataset_t *cnds; 2194 uint64_t o; 2195 2196 if (za.za_first_integer == oldnext_obj) { 2197 /* 2198 * We've already moved the 2199 * origin's reference. 2200 */ 2201 continue; 2202 } 2203 2204 VERIFY0(dsl_dataset_hold_obj(dp, 2205 za.za_first_integer, FTAG, &cnds)); 2206 o = cnds->ds_dir->dd_phys->dd_head_dataset_obj; 2207 2208 VERIFY0(zap_remove_int(dp->dp_meta_objset, 2209 odd->dd_phys->dd_clones, o, tx)); 2210 VERIFY0(zap_add_int(dp->dp_meta_objset, 2211 dd->dd_phys->dd_clones, o, tx)); 2212 dsl_dataset_rele(cnds, FTAG); 2213 } 2214 zap_cursor_fini(&zc); 2215 } 2216 2217 ASSERT(!dsl_prop_hascb(ds)); 2218 } 2219 2220 /* 2221 * Change space accounting. 2222 * Note, pa->*usedsnap and dd_used_breakdown[SNAP] will either 2223 * both be valid, or both be 0 (resulting in delta == 0). This 2224 * is true for each of {clone,origin} independently. 2225 */ 2226 2227 delta = ddpa->cloneusedsnap - 2228 dd->dd_phys->dd_used_breakdown[DD_USED_SNAP]; 2229 ASSERT3S(delta, >=, 0); 2230 ASSERT3U(ddpa->used, >=, delta); 2231 dsl_dir_diduse_space(dd, DD_USED_SNAP, delta, 0, 0, tx); 2232 dsl_dir_diduse_space(dd, DD_USED_HEAD, 2233 ddpa->used - delta, ddpa->comp, ddpa->uncomp, tx); 2234 2235 delta = ddpa->originusedsnap - 2236 odd->dd_phys->dd_used_breakdown[DD_USED_SNAP]; 2237 ASSERT3S(delta, <=, 0); 2238 ASSERT3U(ddpa->used, >=, -delta); 2239 dsl_dir_diduse_space(odd, DD_USED_SNAP, delta, 0, 0, tx); 2240 dsl_dir_diduse_space(odd, DD_USED_HEAD, 2241 -ddpa->used - delta, -ddpa->comp, -ddpa->uncomp, tx); 2242 2243 origin_ds->ds_phys->ds_unique_bytes = ddpa->unique; 2244 2245 /* log history record */ 2246 spa_history_log_internal_ds(hds, "promote", tx, ""); 2247 2248 dsl_dir_rele(odd, FTAG); 2249 promote_rele(ddpa, FTAG); 2250} 2251 2252/* 2253 * Make a list of dsl_dataset_t's for the snapshots between first_obj 2254 * (exclusive) and last_obj (inclusive). The list will be in reverse 2255 * order (last_obj will be the list_head()). If first_obj == 0, do all 2256 * snapshots back to this dataset's origin. 2257 */ 2258static int 2259snaplist_make(dsl_pool_t *dp, 2260 uint64_t first_obj, uint64_t last_obj, list_t *l, void *tag) 2261{ 2262 uint64_t obj = last_obj; 2263 2264 list_create(l, sizeof (struct promotenode), 2265 offsetof(struct promotenode, link)); 2266 2267 while (obj != first_obj) { 2268 dsl_dataset_t *ds; 2269 struct promotenode *snap; 2270 int err; 2271 2272 err = dsl_dataset_hold_obj(dp, obj, tag, &ds); 2273 ASSERT(err != ENOENT); 2274 if (err != 0) 2275 return (err); 2276 2277 if (first_obj == 0) 2278 first_obj = ds->ds_dir->dd_phys->dd_origin_obj; 2279 2280 snap = kmem_alloc(sizeof (*snap), KM_SLEEP); 2281 snap->ds = ds; 2282 list_insert_tail(l, snap); 2283 obj = ds->ds_phys->ds_prev_snap_obj; 2284 } 2285 2286 return (0); 2287} 2288 2289static int 2290snaplist_space(list_t *l, uint64_t mintxg, uint64_t *spacep) 2291{ 2292 struct promotenode *snap; 2293 2294 *spacep = 0; 2295 for (snap = list_head(l); snap; snap = list_next(l, snap)) { 2296 uint64_t used, comp, uncomp; 2297 dsl_deadlist_space_range(&snap->ds->ds_deadlist, 2298 mintxg, UINT64_MAX, &used, &comp, &uncomp); 2299 *spacep += used; 2300 } 2301 return (0); 2302} 2303 2304static void 2305snaplist_destroy(list_t *l, void *tag) 2306{ 2307 struct promotenode *snap; 2308 2309 if (l == NULL || !list_link_active(&l->list_head)) 2310 return; 2311 2312 while ((snap = list_tail(l)) != NULL) { 2313 list_remove(l, snap); 2314 dsl_dataset_rele(snap->ds, tag); 2315 kmem_free(snap, sizeof (*snap)); 2316 } 2317 list_destroy(l); 2318} 2319 2320static int 2321promote_hold(dsl_dataset_promote_arg_t *ddpa, dsl_pool_t *dp, void *tag) 2322{ 2323 int error; 2324 dsl_dir_t *dd; 2325 struct promotenode *snap; 2326 2327 error = dsl_dataset_hold(dp, ddpa->ddpa_clonename, tag, 2328 &ddpa->ddpa_clone); 2329 if (error != 0) 2330 return (error); 2331 dd = ddpa->ddpa_clone->ds_dir; 2332 2333 if (dsl_dataset_is_snapshot(ddpa->ddpa_clone) || 2334 !dsl_dir_is_clone(dd)) { 2335 dsl_dataset_rele(ddpa->ddpa_clone, tag); 2336 return (SET_ERROR(EINVAL)); 2337 } 2338 2339 error = snaplist_make(dp, 0, dd->dd_phys->dd_origin_obj, 2340 &ddpa->shared_snaps, tag); 2341 if (error != 0) 2342 goto out; 2343 2344 error = snaplist_make(dp, 0, ddpa->ddpa_clone->ds_object, 2345 &ddpa->clone_snaps, tag); 2346 if (error != 0) 2347 goto out; 2348 2349 snap = list_head(&ddpa->shared_snaps); 2350 ASSERT3U(snap->ds->ds_object, ==, dd->dd_phys->dd_origin_obj); 2351 error = snaplist_make(dp, dd->dd_phys->dd_origin_obj, 2352 snap->ds->ds_dir->dd_phys->dd_head_dataset_obj, 2353 &ddpa->origin_snaps, tag); 2354 if (error != 0) 2355 goto out; 2356 2357 if (snap->ds->ds_dir->dd_phys->dd_origin_obj != 0) { 2358 error = dsl_dataset_hold_obj(dp, 2359 snap->ds->ds_dir->dd_phys->dd_origin_obj, 2360 tag, &ddpa->origin_origin); 2361 if (error != 0) 2362 goto out; 2363 } 2364out: 2365 if (error != 0) 2366 promote_rele(ddpa, tag); 2367 return (error); 2368} 2369 2370static void 2371promote_rele(dsl_dataset_promote_arg_t *ddpa, void *tag) 2372{ 2373 snaplist_destroy(&ddpa->shared_snaps, tag); 2374 snaplist_destroy(&ddpa->clone_snaps, tag); 2375 snaplist_destroy(&ddpa->origin_snaps, tag); 2376 if (ddpa->origin_origin != NULL) 2377 dsl_dataset_rele(ddpa->origin_origin, tag); 2378 dsl_dataset_rele(ddpa->ddpa_clone, tag); 2379} 2380 2381/* 2382 * Promote a clone. 2383 * 2384 * If it fails due to a conflicting snapshot name, "conflsnap" will be filled 2385 * in with the name. (It must be at least MAXNAMELEN bytes long.) 2386 */ 2387int 2388dsl_dataset_promote(const char *name, char *conflsnap) 2389{ 2390 dsl_dataset_promote_arg_t ddpa = { 0 }; 2391 uint64_t numsnaps; 2392 int error; 2393 objset_t *os; 2394 2395 /* 2396 * We will modify space proportional to the number of 2397 * snapshots. Compute numsnaps. 2398 */ 2399 error = dmu_objset_hold(name, FTAG, &os); 2400 if (error != 0) 2401 return (error); 2402 error = zap_count(dmu_objset_pool(os)->dp_meta_objset, 2403 dmu_objset_ds(os)->ds_phys->ds_snapnames_zapobj, &numsnaps); 2404 dmu_objset_rele(os, FTAG); 2405 if (error != 0) 2406 return (error); 2407 2408 ddpa.ddpa_clonename = name; 2409 ddpa.err_ds = conflsnap; 2410 2411 return (dsl_sync_task(name, dsl_dataset_promote_check, 2412 dsl_dataset_promote_sync, &ddpa, 2 + numsnaps)); 2413} 2414 2415int 2416dsl_dataset_clone_swap_check_impl(dsl_dataset_t *clone, 2417 dsl_dataset_t *origin_head, boolean_t force, void *owner, dmu_tx_t *tx) 2418{ 2419 int64_t unused_refres_delta; 2420 2421 /* they should both be heads */ 2422 if (dsl_dataset_is_snapshot(clone) || 2423 dsl_dataset_is_snapshot(origin_head)) 2424 return (SET_ERROR(EINVAL)); 2425 2426 /* the branch point should be just before them */ 2427 if (clone->ds_prev != origin_head->ds_prev) 2428 return (SET_ERROR(EINVAL)); 2429 2430 /* clone should be the clone (unless they are unrelated) */ 2431 if (clone->ds_prev != NULL && 2432 clone->ds_prev != clone->ds_dir->dd_pool->dp_origin_snap && 2433 origin_head->ds_object != 2434 clone->ds_prev->ds_phys->ds_next_snap_obj) 2435 return (SET_ERROR(EINVAL)); 2436 2437 /* the clone should be a child of the origin */ 2438 if (clone->ds_dir->dd_parent != origin_head->ds_dir) 2439 return (SET_ERROR(EINVAL)); 2440 2441 /* origin_head shouldn't be modified unless 'force' */ 2442 if (!force && dsl_dataset_modified_since_lastsnap(origin_head)) 2443 return (SET_ERROR(ETXTBSY)); 2444 2445 /* origin_head should have no long holds (e.g. is not mounted) */ 2446 if (dsl_dataset_handoff_check(origin_head, owner, tx)) 2447 return (SET_ERROR(EBUSY)); 2448 2449 /* check amount of any unconsumed refreservation */ 2450 unused_refres_delta = 2451 (int64_t)MIN(origin_head->ds_reserved, 2452 origin_head->ds_phys->ds_unique_bytes) - 2453 (int64_t)MIN(origin_head->ds_reserved, 2454 clone->ds_phys->ds_unique_bytes); 2455 2456 if (unused_refres_delta > 0 && 2457 unused_refres_delta > 2458 dsl_dir_space_available(origin_head->ds_dir, NULL, 0, TRUE)) 2459 return (SET_ERROR(ENOSPC)); 2460 2461 /* clone can't be over the head's refquota */ 2462 if (origin_head->ds_quota != 0 && 2463 clone->ds_phys->ds_referenced_bytes > origin_head->ds_quota) 2464 return (SET_ERROR(EDQUOT)); 2465 2466 return (0); 2467} 2468 2469void 2470dsl_dataset_clone_swap_sync_impl(dsl_dataset_t *clone, 2471 dsl_dataset_t *origin_head, dmu_tx_t *tx) 2472{ 2473 dsl_pool_t *dp = dmu_tx_pool(tx); 2474 int64_t unused_refres_delta; 2475 2476 ASSERT(clone->ds_reserved == 0); 2477 ASSERT(origin_head->ds_quota == 0 || 2478 clone->ds_phys->ds_unique_bytes <= origin_head->ds_quota); 2479 2480 dmu_buf_will_dirty(clone->ds_dbuf, tx); 2481 dmu_buf_will_dirty(origin_head->ds_dbuf, tx); 2482 2483 if (clone->ds_objset != NULL) { 2484 dmu_objset_evict(clone->ds_objset); 2485 clone->ds_objset = NULL; 2486 } 2487 2488 if (origin_head->ds_objset != NULL) { 2489 dmu_objset_evict(origin_head->ds_objset); 2490 origin_head->ds_objset = NULL; 2491 } 2492 2493 unused_refres_delta = 2494 (int64_t)MIN(origin_head->ds_reserved, 2495 origin_head->ds_phys->ds_unique_bytes) - 2496 (int64_t)MIN(origin_head->ds_reserved, 2497 clone->ds_phys->ds_unique_bytes); 2498 2499 /* 2500 * Reset origin's unique bytes, if it exists. 2501 */ 2502 if (clone->ds_prev) { 2503 dsl_dataset_t *origin = clone->ds_prev; 2504 uint64_t comp, uncomp; 2505 2506 dmu_buf_will_dirty(origin->ds_dbuf, tx); 2507 dsl_deadlist_space_range(&clone->ds_deadlist, 2508 origin->ds_phys->ds_prev_snap_txg, UINT64_MAX, 2509 &origin->ds_phys->ds_unique_bytes, &comp, &uncomp); 2510 } 2511 2512 /* swap blkptrs */ 2513 { 2514 blkptr_t tmp; 2515 tmp = origin_head->ds_phys->ds_bp; 2516 origin_head->ds_phys->ds_bp = clone->ds_phys->ds_bp; 2517 clone->ds_phys->ds_bp = tmp; 2518 } 2519 2520 /* set dd_*_bytes */ 2521 { 2522 int64_t dused, dcomp, duncomp; 2523 uint64_t cdl_used, cdl_comp, cdl_uncomp; 2524 uint64_t odl_used, odl_comp, odl_uncomp; 2525 2526 ASSERT3U(clone->ds_dir->dd_phys-> 2527 dd_used_breakdown[DD_USED_SNAP], ==, 0); 2528 2529 dsl_deadlist_space(&clone->ds_deadlist, 2530 &cdl_used, &cdl_comp, &cdl_uncomp); 2531 dsl_deadlist_space(&origin_head->ds_deadlist, 2532 &odl_used, &odl_comp, &odl_uncomp); 2533 2534 dused = clone->ds_phys->ds_referenced_bytes + cdl_used - 2535 (origin_head->ds_phys->ds_referenced_bytes + odl_used); 2536 dcomp = clone->ds_phys->ds_compressed_bytes + cdl_comp - 2537 (origin_head->ds_phys->ds_compressed_bytes + odl_comp); 2538 duncomp = clone->ds_phys->ds_uncompressed_bytes + 2539 cdl_uncomp - 2540 (origin_head->ds_phys->ds_uncompressed_bytes + odl_uncomp); 2541 2542 dsl_dir_diduse_space(origin_head->ds_dir, DD_USED_HEAD, 2543 dused, dcomp, duncomp, tx); 2544 dsl_dir_diduse_space(clone->ds_dir, DD_USED_HEAD, 2545 -dused, -dcomp, -duncomp, tx); 2546 2547 /* 2548 * The difference in the space used by snapshots is the 2549 * difference in snapshot space due to the head's 2550 * deadlist (since that's the only thing that's 2551 * changing that affects the snapused). 2552 */ 2553 dsl_deadlist_space_range(&clone->ds_deadlist, 2554 origin_head->ds_dir->dd_origin_txg, UINT64_MAX, 2555 &cdl_used, &cdl_comp, &cdl_uncomp); 2556 dsl_deadlist_space_range(&origin_head->ds_deadlist, 2557 origin_head->ds_dir->dd_origin_txg, UINT64_MAX, 2558 &odl_used, &odl_comp, &odl_uncomp); 2559 dsl_dir_transfer_space(origin_head->ds_dir, cdl_used - odl_used, 2560 DD_USED_HEAD, DD_USED_SNAP, tx); 2561 } 2562 2563 /* swap ds_*_bytes */ 2564 SWITCH64(origin_head->ds_phys->ds_referenced_bytes, 2565 clone->ds_phys->ds_referenced_bytes); 2566 SWITCH64(origin_head->ds_phys->ds_compressed_bytes, 2567 clone->ds_phys->ds_compressed_bytes); 2568 SWITCH64(origin_head->ds_phys->ds_uncompressed_bytes, 2569 clone->ds_phys->ds_uncompressed_bytes); 2570 SWITCH64(origin_head->ds_phys->ds_unique_bytes, 2571 clone->ds_phys->ds_unique_bytes); 2572 2573 /* apply any parent delta for change in unconsumed refreservation */ 2574 dsl_dir_diduse_space(origin_head->ds_dir, DD_USED_REFRSRV, 2575 unused_refres_delta, 0, 0, tx); 2576 2577 /* 2578 * Swap deadlists. 2579 */ 2580 dsl_deadlist_close(&clone->ds_deadlist); 2581 dsl_deadlist_close(&origin_head->ds_deadlist); 2582 SWITCH64(origin_head->ds_phys->ds_deadlist_obj, 2583 clone->ds_phys->ds_deadlist_obj); 2584 dsl_deadlist_open(&clone->ds_deadlist, dp->dp_meta_objset, 2585 clone->ds_phys->ds_deadlist_obj); 2586 dsl_deadlist_open(&origin_head->ds_deadlist, dp->dp_meta_objset, 2587 origin_head->ds_phys->ds_deadlist_obj); 2588 2589 dsl_scan_ds_clone_swapped(origin_head, clone, tx); 2590 2591 spa_history_log_internal_ds(clone, "clone swap", tx, 2592 "parent=%s", origin_head->ds_dir->dd_myname); 2593} 2594 2595/* 2596 * Given a pool name and a dataset object number in that pool, 2597 * return the name of that dataset. 2598 */ 2599int 2600dsl_dsobj_to_dsname(char *pname, uint64_t obj, char *buf) 2601{ 2602 dsl_pool_t *dp; 2603 dsl_dataset_t *ds; 2604 int error; 2605 2606 error = dsl_pool_hold(pname, FTAG, &dp); 2607 if (error != 0) 2608 return (error); 2609 2610 error = dsl_dataset_hold_obj(dp, obj, FTAG, &ds); 2611 if (error == 0) { 2612 dsl_dataset_name(ds, buf); 2613 dsl_dataset_rele(ds, FTAG); 2614 } 2615 dsl_pool_rele(dp, FTAG); 2616 2617 return (error); 2618} 2619 2620int 2621dsl_dataset_check_quota(dsl_dataset_t *ds, boolean_t check_quota, 2622 uint64_t asize, uint64_t inflight, uint64_t *used, uint64_t *ref_rsrv) 2623{ 2624 int error = 0; 2625 2626 ASSERT3S(asize, >, 0); 2627 2628 /* 2629 * *ref_rsrv is the portion of asize that will come from any 2630 * unconsumed refreservation space. 2631 */ 2632 *ref_rsrv = 0; 2633 2634 mutex_enter(&ds->ds_lock); 2635 /* 2636 * Make a space adjustment for reserved bytes. 2637 */ 2638 if (ds->ds_reserved > ds->ds_phys->ds_unique_bytes) { 2639 ASSERT3U(*used, >=, 2640 ds->ds_reserved - ds->ds_phys->ds_unique_bytes); 2641 *used -= (ds->ds_reserved - ds->ds_phys->ds_unique_bytes); 2642 *ref_rsrv = 2643 asize - MIN(asize, parent_delta(ds, asize + inflight)); 2644 } 2645 2646 if (!check_quota || ds->ds_quota == 0) { 2647 mutex_exit(&ds->ds_lock); 2648 return (0); 2649 } 2650 /* 2651 * If they are requesting more space, and our current estimate 2652 * is over quota, they get to try again unless the actual 2653 * on-disk is over quota and there are no pending changes (which 2654 * may free up space for us). 2655 */ 2656 if (ds->ds_phys->ds_referenced_bytes + inflight >= ds->ds_quota) { 2657 if (inflight > 0 || 2658 ds->ds_phys->ds_referenced_bytes < ds->ds_quota) 2659 error = SET_ERROR(ERESTART); 2660 else 2661 error = SET_ERROR(EDQUOT); 2662 } 2663 mutex_exit(&ds->ds_lock); 2664 2665 return (error); 2666} 2667 2668typedef struct dsl_dataset_set_qr_arg { 2669 const char *ddsqra_name; 2670 zprop_source_t ddsqra_source; 2671 uint64_t ddsqra_value; 2672} dsl_dataset_set_qr_arg_t; 2673 2674 2675/* ARGSUSED */ 2676static int 2677dsl_dataset_set_refquota_check(void *arg, dmu_tx_t *tx) 2678{ 2679 dsl_dataset_set_qr_arg_t *ddsqra = arg; 2680 dsl_pool_t *dp = dmu_tx_pool(tx); 2681 dsl_dataset_t *ds; 2682 int error; 2683 uint64_t newval; 2684 2685 if (spa_version(dp->dp_spa) < SPA_VERSION_REFQUOTA) 2686 return (SET_ERROR(ENOTSUP)); 2687 2688 error = dsl_dataset_hold(dp, ddsqra->ddsqra_name, FTAG, &ds); 2689 if (error != 0) 2690 return (error); 2691 2692 if (dsl_dataset_is_snapshot(ds)) { 2693 dsl_dataset_rele(ds, FTAG); 2694 return (SET_ERROR(EINVAL)); 2695 } 2696 2697 error = dsl_prop_predict(ds->ds_dir, 2698 zfs_prop_to_name(ZFS_PROP_REFQUOTA), 2699 ddsqra->ddsqra_source, ddsqra->ddsqra_value, &newval); 2700 if (error != 0) { 2701 dsl_dataset_rele(ds, FTAG); 2702 return (error); 2703 } 2704 2705 if (newval == 0) { 2706 dsl_dataset_rele(ds, FTAG); 2707 return (0); 2708 } 2709 2710 if (newval < ds->ds_phys->ds_referenced_bytes || 2711 newval < ds->ds_reserved) { 2712 dsl_dataset_rele(ds, FTAG); 2713 return (SET_ERROR(ENOSPC)); 2714 } 2715 2716 dsl_dataset_rele(ds, FTAG); 2717 return (0); 2718} 2719 2720static void 2721dsl_dataset_set_refquota_sync(void *arg, dmu_tx_t *tx) 2722{ 2723 dsl_dataset_set_qr_arg_t *ddsqra = arg; 2724 dsl_pool_t *dp = dmu_tx_pool(tx); 2725 dsl_dataset_t *ds; 2726 uint64_t newval; 2727 2728 VERIFY0(dsl_dataset_hold(dp, ddsqra->ddsqra_name, FTAG, &ds)); 2729 2730 dsl_prop_set_sync_impl(ds, 2731 zfs_prop_to_name(ZFS_PROP_REFQUOTA), 2732 ddsqra->ddsqra_source, sizeof (ddsqra->ddsqra_value), 1, 2733 &ddsqra->ddsqra_value, tx); 2734 2735 VERIFY0(dsl_prop_get_int_ds(ds, 2736 zfs_prop_to_name(ZFS_PROP_REFQUOTA), &newval)); 2737 2738 if (ds->ds_quota != newval) { 2739 dmu_buf_will_dirty(ds->ds_dbuf, tx); 2740 ds->ds_quota = newval; 2741 } 2742 dsl_dataset_rele(ds, FTAG); 2743} 2744 2745int 2746dsl_dataset_set_refquota(const char *dsname, zprop_source_t source, 2747 uint64_t refquota) 2748{ 2749 dsl_dataset_set_qr_arg_t ddsqra; 2750 2751 ddsqra.ddsqra_name = dsname; 2752 ddsqra.ddsqra_source = source; 2753 ddsqra.ddsqra_value = refquota; 2754 2755 return (dsl_sync_task(dsname, dsl_dataset_set_refquota_check, 2756 dsl_dataset_set_refquota_sync, &ddsqra, 0)); 2757} 2758 2759static int 2760dsl_dataset_set_refreservation_check(void *arg, dmu_tx_t *tx) 2761{ 2762 dsl_dataset_set_qr_arg_t *ddsqra = arg; 2763 dsl_pool_t *dp = dmu_tx_pool(tx); 2764 dsl_dataset_t *ds; 2765 int error; 2766 uint64_t newval, unique; 2767 2768 if (spa_version(dp->dp_spa) < SPA_VERSION_REFRESERVATION) 2769 return (SET_ERROR(ENOTSUP)); 2770 2771 error = dsl_dataset_hold(dp, ddsqra->ddsqra_name, FTAG, &ds); 2772 if (error != 0) 2773 return (error); 2774 2775 if (dsl_dataset_is_snapshot(ds)) { 2776 dsl_dataset_rele(ds, FTAG); 2777 return (SET_ERROR(EINVAL)); 2778 } 2779 2780 error = dsl_prop_predict(ds->ds_dir, 2781 zfs_prop_to_name(ZFS_PROP_REFRESERVATION), 2782 ddsqra->ddsqra_source, ddsqra->ddsqra_value, &newval); 2783 if (error != 0) { 2784 dsl_dataset_rele(ds, FTAG); 2785 return (error); 2786 } 2787 2788 /* 2789 * If we are doing the preliminary check in open context, the 2790 * space estimates may be inaccurate. 2791 */ 2792 if (!dmu_tx_is_syncing(tx)) { 2793 dsl_dataset_rele(ds, FTAG); 2794 return (0); 2795 } 2796 2797 mutex_enter(&ds->ds_lock); 2798 if (!DS_UNIQUE_IS_ACCURATE(ds)) 2799 dsl_dataset_recalc_head_uniq(ds); 2800 unique = ds->ds_phys->ds_unique_bytes; 2801 mutex_exit(&ds->ds_lock); 2802 2803 if (MAX(unique, newval) > MAX(unique, ds->ds_reserved)) { 2804 uint64_t delta = MAX(unique, newval) - 2805 MAX(unique, ds->ds_reserved); 2806 2807 if (delta > 2808 dsl_dir_space_available(ds->ds_dir, NULL, 0, B_TRUE) || 2809 (ds->ds_quota > 0 && newval > ds->ds_quota)) { 2810 dsl_dataset_rele(ds, FTAG); 2811 return (SET_ERROR(ENOSPC)); 2812 } 2813 } 2814 2815 dsl_dataset_rele(ds, FTAG); 2816 return (0); 2817} 2818 2819void 2820dsl_dataset_set_refreservation_sync_impl(dsl_dataset_t *ds, 2821 zprop_source_t source, uint64_t value, dmu_tx_t *tx) 2822{ 2823 uint64_t newval; 2824 uint64_t unique; 2825 int64_t delta; 2826 2827 dsl_prop_set_sync_impl(ds, zfs_prop_to_name(ZFS_PROP_REFRESERVATION), 2828 source, sizeof (value), 1, &value, tx); 2829 2830 VERIFY0(dsl_prop_get_int_ds(ds, 2831 zfs_prop_to_name(ZFS_PROP_REFRESERVATION), &newval)); 2832 2833 dmu_buf_will_dirty(ds->ds_dbuf, tx); 2834 mutex_enter(&ds->ds_dir->dd_lock); 2835 mutex_enter(&ds->ds_lock); 2836 ASSERT(DS_UNIQUE_IS_ACCURATE(ds)); 2837 unique = ds->ds_phys->ds_unique_bytes; 2838 delta = MAX(0, (int64_t)(newval - unique)) - 2839 MAX(0, (int64_t)(ds->ds_reserved - unique)); 2840 ds->ds_reserved = newval; 2841 mutex_exit(&ds->ds_lock); 2842 2843 dsl_dir_diduse_space(ds->ds_dir, DD_USED_REFRSRV, delta, 0, 0, tx); 2844 mutex_exit(&ds->ds_dir->dd_lock); 2845} 2846 2847static void 2848dsl_dataset_set_refreservation_sync(void *arg, dmu_tx_t *tx) 2849{ 2850 dsl_dataset_set_qr_arg_t *ddsqra = arg; 2851 dsl_pool_t *dp = dmu_tx_pool(tx); 2852 dsl_dataset_t *ds; 2853 2854 VERIFY0(dsl_dataset_hold(dp, ddsqra->ddsqra_name, FTAG, &ds)); 2855 dsl_dataset_set_refreservation_sync_impl(ds, 2856 ddsqra->ddsqra_source, ddsqra->ddsqra_value, tx); 2857 dsl_dataset_rele(ds, FTAG); 2858} 2859 2860int 2861dsl_dataset_set_refreservation(const char *dsname, zprop_source_t source, 2862 uint64_t refreservation) 2863{ 2864 dsl_dataset_set_qr_arg_t ddsqra; 2865 2866 ddsqra.ddsqra_name = dsname; 2867 ddsqra.ddsqra_source = source; 2868 ddsqra.ddsqra_value = refreservation; 2869 2870 return (dsl_sync_task(dsname, dsl_dataset_set_refreservation_check, 2871 dsl_dataset_set_refreservation_sync, &ddsqra, 0)); 2872} 2873 2874/* 2875 * Return (in *usedp) the amount of space written in new that is not 2876 * present in oldsnap. New may be a snapshot or the head. Old must be 2877 * a snapshot before new, in new's filesystem (or its origin). If not then 2878 * fail and return EINVAL. 2879 * 2880 * The written space is calculated by considering two components: First, we 2881 * ignore any freed space, and calculate the written as new's used space 2882 * minus old's used space. Next, we add in the amount of space that was freed 2883 * between the two snapshots, thus reducing new's used space relative to old's. 2884 * Specifically, this is the space that was born before old->ds_creation_txg, 2885 * and freed before new (ie. on new's deadlist or a previous deadlist). 2886 * 2887 * space freed [---------------------] 2888 * snapshots ---O-------O--------O-------O------ 2889 * oldsnap new 2890 */ 2891int 2892dsl_dataset_space_written(dsl_dataset_t *oldsnap, dsl_dataset_t *new, 2893 uint64_t *usedp, uint64_t *compp, uint64_t *uncompp) 2894{ 2895 int err = 0; 2896 uint64_t snapobj; 2897 dsl_pool_t *dp = new->ds_dir->dd_pool; 2898 2899 ASSERT(dsl_pool_config_held(dp)); 2900 2901 *usedp = 0; 2902 *usedp += new->ds_phys->ds_referenced_bytes; 2903 *usedp -= oldsnap->ds_phys->ds_referenced_bytes; 2904 2905 *compp = 0; 2906 *compp += new->ds_phys->ds_compressed_bytes; 2907 *compp -= oldsnap->ds_phys->ds_compressed_bytes; 2908 2909 *uncompp = 0; 2910 *uncompp += new->ds_phys->ds_uncompressed_bytes; 2911 *uncompp -= oldsnap->ds_phys->ds_uncompressed_bytes; 2912 2913 snapobj = new->ds_object; 2914 while (snapobj != oldsnap->ds_object) { 2915 dsl_dataset_t *snap; 2916 uint64_t used, comp, uncomp; 2917 2918 if (snapobj == new->ds_object) { 2919 snap = new; 2920 } else { 2921 err = dsl_dataset_hold_obj(dp, snapobj, FTAG, &snap); 2922 if (err != 0) 2923 break; 2924 } 2925 2926 if (snap->ds_phys->ds_prev_snap_txg == 2927 oldsnap->ds_phys->ds_creation_txg) { 2928 /* 2929 * The blocks in the deadlist can not be born after 2930 * ds_prev_snap_txg, so get the whole deadlist space, 2931 * which is more efficient (especially for old-format 2932 * deadlists). Unfortunately the deadlist code 2933 * doesn't have enough information to make this 2934 * optimization itself. 2935 */ 2936 dsl_deadlist_space(&snap->ds_deadlist, 2937 &used, &comp, &uncomp); 2938 } else { 2939 dsl_deadlist_space_range(&snap->ds_deadlist, 2940 0, oldsnap->ds_phys->ds_creation_txg, 2941 &used, &comp, &uncomp); 2942 } 2943 *usedp += used; 2944 *compp += comp; 2945 *uncompp += uncomp; 2946 2947 /* 2948 * If we get to the beginning of the chain of snapshots 2949 * (ds_prev_snap_obj == 0) before oldsnap, then oldsnap 2950 * was not a snapshot of/before new. 2951 */ 2952 snapobj = snap->ds_phys->ds_prev_snap_obj; 2953 if (snap != new) 2954 dsl_dataset_rele(snap, FTAG); 2955 if (snapobj == 0) { 2956 err = SET_ERROR(EINVAL); 2957 break; 2958 } 2959 2960 } 2961 return (err); 2962} 2963 2964/* 2965 * Return (in *usedp) the amount of space that will be reclaimed if firstsnap, 2966 * lastsnap, and all snapshots in between are deleted. 2967 * 2968 * blocks that would be freed [---------------------------] 2969 * snapshots ---O-------O--------O-------O--------O 2970 * firstsnap lastsnap 2971 * 2972 * This is the set of blocks that were born after the snap before firstsnap, 2973 * (birth > firstsnap->prev_snap_txg) and died before the snap after the 2974 * last snap (ie, is on lastsnap->ds_next->ds_deadlist or an earlier deadlist). 2975 * We calculate this by iterating over the relevant deadlists (from the snap 2976 * after lastsnap, backward to the snap after firstsnap), summing up the 2977 * space on the deadlist that was born after the snap before firstsnap. 2978 */ 2979int 2980dsl_dataset_space_wouldfree(dsl_dataset_t *firstsnap, 2981 dsl_dataset_t *lastsnap, 2982 uint64_t *usedp, uint64_t *compp, uint64_t *uncompp) 2983{ 2984 int err = 0; 2985 uint64_t snapobj; 2986 dsl_pool_t *dp = firstsnap->ds_dir->dd_pool; 2987 2988 ASSERT(dsl_dataset_is_snapshot(firstsnap)); 2989 ASSERT(dsl_dataset_is_snapshot(lastsnap)); 2990 2991 /* 2992 * Check that the snapshots are in the same dsl_dir, and firstsnap 2993 * is before lastsnap. 2994 */ 2995 if (firstsnap->ds_dir != lastsnap->ds_dir || 2996 firstsnap->ds_phys->ds_creation_txg > 2997 lastsnap->ds_phys->ds_creation_txg) 2998 return (SET_ERROR(EINVAL)); 2999 3000 *usedp = *compp = *uncompp = 0; 3001 3002 snapobj = lastsnap->ds_phys->ds_next_snap_obj; 3003 while (snapobj != firstsnap->ds_object) { 3004 dsl_dataset_t *ds; 3005 uint64_t used, comp, uncomp; 3006 3007 err = dsl_dataset_hold_obj(dp, snapobj, FTAG, &ds); 3008 if (err != 0) 3009 break; 3010 3011 dsl_deadlist_space_range(&ds->ds_deadlist, 3012 firstsnap->ds_phys->ds_prev_snap_txg, UINT64_MAX, 3013 &used, &comp, &uncomp); 3014 *usedp += used; 3015 *compp += comp; 3016 *uncompp += uncomp; 3017 3018 snapobj = ds->ds_phys->ds_prev_snap_obj; 3019 ASSERT3U(snapobj, !=, 0); 3020 dsl_dataset_rele(ds, FTAG); 3021 } 3022 return (err); 3023} 3024 3025/* 3026 * Return TRUE if 'earlier' is an earlier snapshot in 'later's timeline. 3027 * For example, they could both be snapshots of the same filesystem, and 3028 * 'earlier' is before 'later'. Or 'earlier' could be the origin of 3029 * 'later's filesystem. Or 'earlier' could be an older snapshot in the origin's 3030 * filesystem. Or 'earlier' could be the origin's origin. 3031 */ 3032boolean_t 3033dsl_dataset_is_before(dsl_dataset_t *later, dsl_dataset_t *earlier) 3034{ 3035 dsl_pool_t *dp = later->ds_dir->dd_pool; 3036 int error; 3037 boolean_t ret; 3038 3039 ASSERT(dsl_pool_config_held(dp)); 3040 3041 if (earlier->ds_phys->ds_creation_txg >= 3042 later->ds_phys->ds_creation_txg) 3043 return (B_FALSE); 3044 3045 if (later->ds_dir == earlier->ds_dir) 3046 return (B_TRUE); 3047 if (!dsl_dir_is_clone(later->ds_dir)) 3048 return (B_FALSE); 3049 3050 if (later->ds_dir->dd_phys->dd_origin_obj == earlier->ds_object) 3051 return (B_TRUE); 3052 dsl_dataset_t *origin; 3053 error = dsl_dataset_hold_obj(dp, 3054 later->ds_dir->dd_phys->dd_origin_obj, FTAG, &origin); 3055 if (error != 0) 3056 return (B_FALSE); 3057 ret = dsl_dataset_is_before(origin, earlier); 3058 dsl_dataset_rele(origin, FTAG); 3059 return (ret); 3060}
| 1339 if (error != 0) { 1340 dsl_dataset_rele(ds, FTAG); 1341 return (error); 1342 } 1343 1344 if (spa_version(dp->dp_spa) < SPA_VERSION_USERREFS) { 1345 dsl_dataset_rele(ds, FTAG); 1346 return (SET_ERROR(ENOTSUP)); 1347 } 1348 error = dsl_dataset_user_hold_check_one(NULL, ddsta->ddsta_htag, 1349 B_TRUE, tx); 1350 if (error != 0) { 1351 dsl_dataset_rele(ds, FTAG); 1352 return (error); 1353 } 1354 1355 dsl_dataset_rele(ds, FTAG); 1356 return (0); 1357} 1358 1359static void 1360dsl_dataset_snapshot_tmp_sync(void *arg, dmu_tx_t *tx) 1361{ 1362 dsl_dataset_snapshot_tmp_arg_t *ddsta = arg; 1363 dsl_pool_t *dp = dmu_tx_pool(tx); 1364 dsl_dataset_t *ds; 1365 1366 VERIFY0(dsl_dataset_hold(dp, ddsta->ddsta_fsname, FTAG, &ds)); 1367 1368 dsl_dataset_snapshot_sync_impl(ds, ddsta->ddsta_snapname, tx); 1369 dsl_dataset_user_hold_sync_one(ds->ds_prev, ddsta->ddsta_htag, 1370 ddsta->ddsta_cleanup_minor, gethrestime_sec(), tx); 1371 dsl_destroy_snapshot_sync_impl(ds->ds_prev, B_TRUE, tx); 1372 1373 dsl_dataset_rele(ds, FTAG); 1374} 1375 1376int 1377dsl_dataset_snapshot_tmp(const char *fsname, const char *snapname, 1378 minor_t cleanup_minor, const char *htag) 1379{ 1380 dsl_dataset_snapshot_tmp_arg_t ddsta; 1381 int error; 1382 spa_t *spa; 1383 boolean_t needsuspend; 1384 void *cookie; 1385 1386 ddsta.ddsta_fsname = fsname; 1387 ddsta.ddsta_snapname = snapname; 1388 ddsta.ddsta_cleanup_minor = cleanup_minor; 1389 ddsta.ddsta_htag = htag; 1390 1391 error = spa_open(fsname, &spa, FTAG); 1392 if (error != 0) 1393 return (error); 1394 needsuspend = (spa_version(spa) < SPA_VERSION_FAST_SNAP); 1395 spa_close(spa, FTAG); 1396 1397 if (needsuspend) { 1398 error = zil_suspend(fsname, &cookie); 1399 if (error != 0) 1400 return (error); 1401 } 1402 1403 error = dsl_sync_task(fsname, dsl_dataset_snapshot_tmp_check, 1404 dsl_dataset_snapshot_tmp_sync, &ddsta, 3); 1405 1406 if (needsuspend) 1407 zil_resume(cookie); 1408 return (error); 1409} 1410 1411 1412void 1413dsl_dataset_sync(dsl_dataset_t *ds, zio_t *zio, dmu_tx_t *tx) 1414{ 1415 ASSERT(dmu_tx_is_syncing(tx)); 1416 ASSERT(ds->ds_objset != NULL); 1417 ASSERT(ds->ds_phys->ds_next_snap_obj == 0); 1418 1419 /* 1420 * in case we had to change ds_fsid_guid when we opened it, 1421 * sync it out now. 1422 */ 1423 dmu_buf_will_dirty(ds->ds_dbuf, tx); 1424 ds->ds_phys->ds_fsid_guid = ds->ds_fsid_guid; 1425 1426 dmu_objset_sync(ds->ds_objset, zio, tx); 1427} 1428 1429static void 1430get_clones_stat(dsl_dataset_t *ds, nvlist_t *nv) 1431{ 1432 uint64_t count = 0; 1433 objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset; 1434 zap_cursor_t zc; 1435 zap_attribute_t za; 1436 nvlist_t *propval = fnvlist_alloc(); 1437 nvlist_t *val = fnvlist_alloc(); 1438 1439 ASSERT(dsl_pool_config_held(ds->ds_dir->dd_pool)); 1440 1441 /* 1442 * There may be missing entries in ds_next_clones_obj 1443 * due to a bug in a previous version of the code. 1444 * Only trust it if it has the right number of entries. 1445 */ 1446 if (ds->ds_phys->ds_next_clones_obj != 0) { 1447 ASSERT0(zap_count(mos, ds->ds_phys->ds_next_clones_obj, 1448 &count)); 1449 } 1450 if (count != ds->ds_phys->ds_num_children - 1) 1451 goto fail; 1452 for (zap_cursor_init(&zc, mos, ds->ds_phys->ds_next_clones_obj); 1453 zap_cursor_retrieve(&zc, &za) == 0; 1454 zap_cursor_advance(&zc)) { 1455 dsl_dataset_t *clone; 1456 char buf[ZFS_MAXNAMELEN]; 1457 VERIFY0(dsl_dataset_hold_obj(ds->ds_dir->dd_pool, 1458 za.za_first_integer, FTAG, &clone)); 1459 dsl_dir_name(clone->ds_dir, buf); 1460 fnvlist_add_boolean(val, buf); 1461 dsl_dataset_rele(clone, FTAG); 1462 } 1463 zap_cursor_fini(&zc); 1464 fnvlist_add_nvlist(propval, ZPROP_VALUE, val); 1465 fnvlist_add_nvlist(nv, zfs_prop_to_name(ZFS_PROP_CLONES), propval); 1466fail: 1467 nvlist_free(val); 1468 nvlist_free(propval); 1469} 1470 1471void 1472dsl_dataset_stats(dsl_dataset_t *ds, nvlist_t *nv) 1473{ 1474 dsl_pool_t *dp = ds->ds_dir->dd_pool; 1475 uint64_t refd, avail, uobjs, aobjs, ratio; 1476 1477 ASSERT(dsl_pool_config_held(dp)); 1478 1479 ratio = ds->ds_phys->ds_compressed_bytes == 0 ? 100 : 1480 (ds->ds_phys->ds_uncompressed_bytes * 100 / 1481 ds->ds_phys->ds_compressed_bytes); 1482 1483 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_REFRATIO, ratio); 1484 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_LOGICALREFERENCED, 1485 ds->ds_phys->ds_uncompressed_bytes); 1486 1487 if (dsl_dataset_is_snapshot(ds)) { 1488 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_COMPRESSRATIO, ratio); 1489 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_USED, 1490 ds->ds_phys->ds_unique_bytes); 1491 get_clones_stat(ds, nv); 1492 } else { 1493 dsl_dir_stats(ds->ds_dir, nv); 1494 } 1495 1496 dsl_dataset_space(ds, &refd, &avail, &uobjs, &aobjs); 1497 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_AVAILABLE, avail); 1498 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_REFERENCED, refd); 1499 1500 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_CREATION, 1501 ds->ds_phys->ds_creation_time); 1502 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_CREATETXG, 1503 ds->ds_phys->ds_creation_txg); 1504 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_REFQUOTA, 1505 ds->ds_quota); 1506 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_REFRESERVATION, 1507 ds->ds_reserved); 1508 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_GUID, 1509 ds->ds_phys->ds_guid); 1510 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_UNIQUE, 1511 ds->ds_phys->ds_unique_bytes); 1512 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_OBJSETID, 1513 ds->ds_object); 1514 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_USERREFS, 1515 ds->ds_userrefs); 1516 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_DEFER_DESTROY, 1517 DS_IS_DEFER_DESTROY(ds) ? 1 : 0); 1518 1519 if (ds->ds_phys->ds_prev_snap_obj != 0) { 1520 uint64_t written, comp, uncomp; 1521 dsl_pool_t *dp = ds->ds_dir->dd_pool; 1522 dsl_dataset_t *prev; 1523 1524 int err = dsl_dataset_hold_obj(dp, 1525 ds->ds_phys->ds_prev_snap_obj, FTAG, &prev); 1526 if (err == 0) { 1527 err = dsl_dataset_space_written(prev, ds, &written, 1528 &comp, &uncomp); 1529 dsl_dataset_rele(prev, FTAG); 1530 if (err == 0) { 1531 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_WRITTEN, 1532 written); 1533 } 1534 } 1535 } 1536} 1537 1538void 1539dsl_dataset_fast_stat(dsl_dataset_t *ds, dmu_objset_stats_t *stat) 1540{ 1541 dsl_pool_t *dp = ds->ds_dir->dd_pool; 1542 ASSERT(dsl_pool_config_held(dp)); 1543 1544 stat->dds_creation_txg = ds->ds_phys->ds_creation_txg; 1545 stat->dds_inconsistent = ds->ds_phys->ds_flags & DS_FLAG_INCONSISTENT; 1546 stat->dds_guid = ds->ds_phys->ds_guid; 1547 stat->dds_origin[0] = '\0'; 1548 if (dsl_dataset_is_snapshot(ds)) { 1549 stat->dds_is_snapshot = B_TRUE; 1550 stat->dds_num_clones = ds->ds_phys->ds_num_children - 1; 1551 } else { 1552 stat->dds_is_snapshot = B_FALSE; 1553 stat->dds_num_clones = 0; 1554 1555 if (dsl_dir_is_clone(ds->ds_dir)) { 1556 dsl_dataset_t *ods; 1557 1558 VERIFY0(dsl_dataset_hold_obj(dp, 1559 ds->ds_dir->dd_phys->dd_origin_obj, FTAG, &ods)); 1560 dsl_dataset_name(ods, stat->dds_origin); 1561 dsl_dataset_rele(ods, FTAG); 1562 } 1563 } 1564} 1565 1566uint64_t 1567dsl_dataset_fsid_guid(dsl_dataset_t *ds) 1568{ 1569 return (ds->ds_fsid_guid); 1570} 1571 1572void 1573dsl_dataset_space(dsl_dataset_t *ds, 1574 uint64_t *refdbytesp, uint64_t *availbytesp, 1575 uint64_t *usedobjsp, uint64_t *availobjsp) 1576{ 1577 *refdbytesp = ds->ds_phys->ds_referenced_bytes; 1578 *availbytesp = dsl_dir_space_available(ds->ds_dir, NULL, 0, TRUE); 1579 if (ds->ds_reserved > ds->ds_phys->ds_unique_bytes) 1580 *availbytesp += ds->ds_reserved - ds->ds_phys->ds_unique_bytes; 1581 if (ds->ds_quota != 0) { 1582 /* 1583 * Adjust available bytes according to refquota 1584 */ 1585 if (*refdbytesp < ds->ds_quota) 1586 *availbytesp = MIN(*availbytesp, 1587 ds->ds_quota - *refdbytesp); 1588 else 1589 *availbytesp = 0; 1590 } 1591 *usedobjsp = ds->ds_phys->ds_bp.blk_fill; 1592 *availobjsp = DN_MAX_OBJECT - *usedobjsp; 1593} 1594 1595boolean_t 1596dsl_dataset_modified_since_lastsnap(dsl_dataset_t *ds) 1597{ 1598 dsl_pool_t *dp = ds->ds_dir->dd_pool; 1599 1600 ASSERT(dsl_pool_config_held(dp)); 1601 if (ds->ds_prev == NULL) 1602 return (B_FALSE); 1603 if (ds->ds_phys->ds_bp.blk_birth > 1604 ds->ds_prev->ds_phys->ds_creation_txg) { 1605 objset_t *os, *os_prev; 1606 /* 1607 * It may be that only the ZIL differs, because it was 1608 * reset in the head. Don't count that as being 1609 * modified. 1610 */ 1611 if (dmu_objset_from_ds(ds, &os) != 0) 1612 return (B_TRUE); 1613 if (dmu_objset_from_ds(ds->ds_prev, &os_prev) != 0) 1614 return (B_TRUE); 1615 return (bcmp(&os->os_phys->os_meta_dnode, 1616 &os_prev->os_phys->os_meta_dnode, 1617 sizeof (os->os_phys->os_meta_dnode)) != 0); 1618 } 1619 return (B_FALSE); 1620} 1621 1622typedef struct dsl_dataset_rename_snapshot_arg { 1623 const char *ddrsa_fsname; 1624 const char *ddrsa_oldsnapname; 1625 const char *ddrsa_newsnapname; 1626 boolean_t ddrsa_recursive; 1627 dmu_tx_t *ddrsa_tx; 1628} dsl_dataset_rename_snapshot_arg_t; 1629 1630/* ARGSUSED */ 1631static int 1632dsl_dataset_rename_snapshot_check_impl(dsl_pool_t *dp, 1633 dsl_dataset_t *hds, void *arg) 1634{ 1635 dsl_dataset_rename_snapshot_arg_t *ddrsa = arg; 1636 int error; 1637 uint64_t val; 1638 1639 error = dsl_dataset_snap_lookup(hds, ddrsa->ddrsa_oldsnapname, &val); 1640 if (error != 0) { 1641 /* ignore nonexistent snapshots */ 1642 return (error == ENOENT ? 0 : error); 1643 } 1644 1645 /* new name should not exist */ 1646 error = dsl_dataset_snap_lookup(hds, ddrsa->ddrsa_newsnapname, &val); 1647 if (error == 0) 1648 error = SET_ERROR(EEXIST); 1649 else if (error == ENOENT) 1650 error = 0; 1651 1652 /* dataset name + 1 for the "@" + the new snapshot name must fit */ 1653 if (dsl_dir_namelen(hds->ds_dir) + 1 + 1654 strlen(ddrsa->ddrsa_newsnapname) >= MAXNAMELEN) 1655 error = SET_ERROR(ENAMETOOLONG); 1656 1657 return (error); 1658} 1659 1660static int 1661dsl_dataset_rename_snapshot_check(void *arg, dmu_tx_t *tx) 1662{ 1663 dsl_dataset_rename_snapshot_arg_t *ddrsa = arg; 1664 dsl_pool_t *dp = dmu_tx_pool(tx); 1665 dsl_dataset_t *hds; 1666 int error; 1667 1668 error = dsl_dataset_hold(dp, ddrsa->ddrsa_fsname, FTAG, &hds); 1669 if (error != 0) 1670 return (error); 1671 1672 if (ddrsa->ddrsa_recursive) { 1673 error = dmu_objset_find_dp(dp, hds->ds_dir->dd_object, 1674 dsl_dataset_rename_snapshot_check_impl, ddrsa, 1675 DS_FIND_CHILDREN); 1676 } else { 1677 error = dsl_dataset_rename_snapshot_check_impl(dp, hds, ddrsa); 1678 } 1679 dsl_dataset_rele(hds, FTAG); 1680 return (error); 1681} 1682 1683static int 1684dsl_dataset_rename_snapshot_sync_impl(dsl_pool_t *dp, 1685 dsl_dataset_t *hds, void *arg) 1686{ 1687#ifdef __FreeBSD__ 1688#ifdef _KERNEL 1689 char *oldname, *newname; 1690#endif 1691#endif 1692 dsl_dataset_rename_snapshot_arg_t *ddrsa = arg; 1693 dsl_dataset_t *ds; 1694 uint64_t val; 1695 dmu_tx_t *tx = ddrsa->ddrsa_tx; 1696 int error; 1697 1698 error = dsl_dataset_snap_lookup(hds, ddrsa->ddrsa_oldsnapname, &val); 1699 ASSERT(error == 0 || error == ENOENT); 1700 if (error == ENOENT) { 1701 /* ignore nonexistent snapshots */ 1702 return (0); 1703 } 1704 1705 VERIFY0(dsl_dataset_hold_obj(dp, val, FTAG, &ds)); 1706 1707 /* log before we change the name */ 1708 spa_history_log_internal_ds(ds, "rename", tx, 1709 "-> @%s", ddrsa->ddrsa_newsnapname); 1710 1711 VERIFY0(dsl_dataset_snap_remove(hds, ddrsa->ddrsa_oldsnapname, tx)); 1712 mutex_enter(&ds->ds_lock); 1713 (void) strcpy(ds->ds_snapname, ddrsa->ddrsa_newsnapname); 1714 mutex_exit(&ds->ds_lock); 1715 VERIFY0(zap_add(dp->dp_meta_objset, hds->ds_phys->ds_snapnames_zapobj, 1716 ds->ds_snapname, 8, 1, &ds->ds_object, tx)); 1717 1718#ifdef __FreeBSD__ 1719#ifdef _KERNEL 1720 oldname = kmem_alloc(MAXPATHLEN, KM_SLEEP); 1721 newname = kmem_alloc(MAXPATHLEN, KM_SLEEP); 1722 snprintf(oldname, MAXPATHLEN, "%s@%s", ddrsa->ddrsa_fsname, 1723 ddrsa->ddrsa_oldsnapname); 1724 snprintf(newname, MAXPATHLEN, "%s@%s", ddrsa->ddrsa_fsname, 1725 ddrsa->ddrsa_newsnapname); 1726 zfsvfs_update_fromname(oldname, newname); 1727 zvol_rename_minors(oldname, newname); 1728 kmem_free(newname, MAXPATHLEN); 1729 kmem_free(oldname, MAXPATHLEN); 1730#endif 1731#endif 1732 dsl_dataset_rele(ds, FTAG); 1733 1734 return (0); 1735} 1736 1737static void 1738dsl_dataset_rename_snapshot_sync(void *arg, dmu_tx_t *tx) 1739{ 1740 dsl_dataset_rename_snapshot_arg_t *ddrsa = arg; 1741 dsl_pool_t *dp = dmu_tx_pool(tx); 1742 dsl_dataset_t *hds; 1743 1744 VERIFY0(dsl_dataset_hold(dp, ddrsa->ddrsa_fsname, FTAG, &hds)); 1745 ddrsa->ddrsa_tx = tx; 1746 if (ddrsa->ddrsa_recursive) { 1747 VERIFY0(dmu_objset_find_dp(dp, hds->ds_dir->dd_object, 1748 dsl_dataset_rename_snapshot_sync_impl, ddrsa, 1749 DS_FIND_CHILDREN)); 1750 } else { 1751 VERIFY0(dsl_dataset_rename_snapshot_sync_impl(dp, hds, ddrsa)); 1752 } 1753 dsl_dataset_rele(hds, FTAG); 1754} 1755 1756int 1757dsl_dataset_rename_snapshot(const char *fsname, 1758 const char *oldsnapname, const char *newsnapname, boolean_t recursive) 1759{ 1760 dsl_dataset_rename_snapshot_arg_t ddrsa; 1761 1762 ddrsa.ddrsa_fsname = fsname; 1763 ddrsa.ddrsa_oldsnapname = oldsnapname; 1764 ddrsa.ddrsa_newsnapname = newsnapname; 1765 ddrsa.ddrsa_recursive = recursive; 1766 1767 return (dsl_sync_task(fsname, dsl_dataset_rename_snapshot_check, 1768 dsl_dataset_rename_snapshot_sync, &ddrsa, 1)); 1769} 1770 1771/* 1772 * If we're doing an ownership handoff, we need to make sure that there is 1773 * only one long hold on the dataset. We're not allowed to change anything here 1774 * so we don't permanently release the long hold or regular hold here. We want 1775 * to do this only when syncing to avoid the dataset unexpectedly going away 1776 * when we release the long hold. 1777 */ 1778static int 1779dsl_dataset_handoff_check(dsl_dataset_t *ds, void *owner, dmu_tx_t *tx) 1780{ 1781 boolean_t held; 1782 1783 if (!dmu_tx_is_syncing(tx)) 1784 return (0); 1785 1786 if (owner != NULL) { 1787 VERIFY3P(ds->ds_owner, ==, owner); 1788 dsl_dataset_long_rele(ds, owner); 1789 } 1790 1791 held = dsl_dataset_long_held(ds); 1792 1793 if (owner != NULL) 1794 dsl_dataset_long_hold(ds, owner); 1795 1796 if (held) 1797 return (SET_ERROR(EBUSY)); 1798 1799 return (0); 1800} 1801 1802typedef struct dsl_dataset_rollback_arg { 1803 const char *ddra_fsname; 1804 void *ddra_owner; 1805} dsl_dataset_rollback_arg_t; 1806 1807static int 1808dsl_dataset_rollback_check(void *arg, dmu_tx_t *tx) 1809{ 1810 dsl_dataset_rollback_arg_t *ddra = arg; 1811 dsl_pool_t *dp = dmu_tx_pool(tx); 1812 dsl_dataset_t *ds; 1813 int64_t unused_refres_delta; 1814 int error; 1815 1816 error = dsl_dataset_hold(dp, ddra->ddra_fsname, FTAG, &ds); 1817 if (error != 0) 1818 return (error); 1819 1820 /* must not be a snapshot */ 1821 if (dsl_dataset_is_snapshot(ds)) { 1822 dsl_dataset_rele(ds, FTAG); 1823 return (SET_ERROR(EINVAL)); 1824 } 1825 1826 /* must have a most recent snapshot */ 1827 if (ds->ds_phys->ds_prev_snap_txg < TXG_INITIAL) { 1828 dsl_dataset_rele(ds, FTAG); 1829 return (SET_ERROR(EINVAL)); 1830 } 1831 1832 error = dsl_dataset_handoff_check(ds, ddra->ddra_owner, tx); 1833 if (error != 0) { 1834 dsl_dataset_rele(ds, FTAG); 1835 return (error); 1836 } 1837 1838 /* 1839 * Check if the snap we are rolling back to uses more than 1840 * the refquota. 1841 */ 1842 if (ds->ds_quota != 0 && 1843 ds->ds_prev->ds_phys->ds_referenced_bytes > ds->ds_quota) { 1844 dsl_dataset_rele(ds, FTAG); 1845 return (SET_ERROR(EDQUOT)); 1846 } 1847 1848 /* 1849 * When we do the clone swap, we will temporarily use more space 1850 * due to the refreservation (the head will no longer have any 1851 * unique space, so the entire amount of the refreservation will need 1852 * to be free). We will immediately destroy the clone, freeing 1853 * this space, but the freeing happens over many txg's. 1854 */ 1855 unused_refres_delta = (int64_t)MIN(ds->ds_reserved, 1856 ds->ds_phys->ds_unique_bytes); 1857 1858 if (unused_refres_delta > 0 && 1859 unused_refres_delta > 1860 dsl_dir_space_available(ds->ds_dir, NULL, 0, TRUE)) { 1861 dsl_dataset_rele(ds, FTAG); 1862 return (SET_ERROR(ENOSPC)); 1863 } 1864 1865 dsl_dataset_rele(ds, FTAG); 1866 return (0); 1867} 1868 1869static void 1870dsl_dataset_rollback_sync(void *arg, dmu_tx_t *tx) 1871{ 1872 dsl_dataset_rollback_arg_t *ddra = arg; 1873 dsl_pool_t *dp = dmu_tx_pool(tx); 1874 dsl_dataset_t *ds, *clone; 1875 uint64_t cloneobj; 1876 1877 VERIFY0(dsl_dataset_hold(dp, ddra->ddra_fsname, FTAG, &ds)); 1878 1879 cloneobj = dsl_dataset_create_sync(ds->ds_dir, "%rollback", 1880 ds->ds_prev, DS_CREATE_FLAG_NODIRTY, kcred, tx); 1881 1882 VERIFY0(dsl_dataset_hold_obj(dp, cloneobj, FTAG, &clone)); 1883 1884 dsl_dataset_clone_swap_sync_impl(clone, ds, tx); 1885 dsl_dataset_zero_zil(ds, tx); 1886 1887 dsl_destroy_head_sync_impl(clone, tx); 1888 1889 dsl_dataset_rele(clone, FTAG); 1890 dsl_dataset_rele(ds, FTAG); 1891} 1892 1893/* 1894 * If owner != NULL: 1895 * 1896 * - The existing dataset MUST be owned by the specified owner at entry 1897 * - Upon return, dataset will still be held by the same owner, whether we 1898 * succeed or not. 1899 * 1900 * This mode is required any time the existing filesystem is mounted. See 1901 * notes above zfs_suspend_fs() for further details. 1902 */ 1903int 1904dsl_dataset_rollback(const char *fsname, void *owner) 1905{ 1906 dsl_dataset_rollback_arg_t ddra; 1907 1908 ddra.ddra_fsname = fsname; 1909 ddra.ddra_owner = owner; 1910 1911 return (dsl_sync_task(fsname, dsl_dataset_rollback_check, 1912 dsl_dataset_rollback_sync, (void *)&ddra, 1)); 1913} 1914 1915struct promotenode { 1916 list_node_t link; 1917 dsl_dataset_t *ds; 1918}; 1919 1920typedef struct dsl_dataset_promote_arg { 1921 const char *ddpa_clonename; 1922 dsl_dataset_t *ddpa_clone; 1923 list_t shared_snaps, origin_snaps, clone_snaps; 1924 dsl_dataset_t *origin_origin; /* origin of the origin */ 1925 uint64_t used, comp, uncomp, unique, cloneusedsnap, originusedsnap; 1926 char *err_ds; 1927} dsl_dataset_promote_arg_t; 1928 1929static int snaplist_space(list_t *l, uint64_t mintxg, uint64_t *spacep); 1930static int promote_hold(dsl_dataset_promote_arg_t *ddpa, dsl_pool_t *dp, 1931 void *tag); 1932static void promote_rele(dsl_dataset_promote_arg_t *ddpa, void *tag); 1933 1934static int 1935dsl_dataset_promote_check(void *arg, dmu_tx_t *tx) 1936{ 1937 dsl_dataset_promote_arg_t *ddpa = arg; 1938 dsl_pool_t *dp = dmu_tx_pool(tx); 1939 dsl_dataset_t *hds; 1940 struct promotenode *snap; 1941 dsl_dataset_t *origin_ds; 1942 int err; 1943 uint64_t unused; 1944 1945 err = promote_hold(ddpa, dp, FTAG); 1946 if (err != 0) 1947 return (err); 1948 1949 hds = ddpa->ddpa_clone; 1950 1951 if (hds->ds_phys->ds_flags & DS_FLAG_NOPROMOTE) { 1952 promote_rele(ddpa, FTAG); 1953 return (SET_ERROR(EXDEV)); 1954 } 1955 1956 /* 1957 * Compute and check the amount of space to transfer. Since this is 1958 * so expensive, don't do the preliminary check. 1959 */ 1960 if (!dmu_tx_is_syncing(tx)) { 1961 promote_rele(ddpa, FTAG); 1962 return (0); 1963 } 1964 1965 snap = list_head(&ddpa->shared_snaps); 1966 origin_ds = snap->ds; 1967 1968 /* compute origin's new unique space */ 1969 snap = list_tail(&ddpa->clone_snaps); 1970 ASSERT3U(snap->ds->ds_phys->ds_prev_snap_obj, ==, origin_ds->ds_object); 1971 dsl_deadlist_space_range(&snap->ds->ds_deadlist, 1972 origin_ds->ds_phys->ds_prev_snap_txg, UINT64_MAX, 1973 &ddpa->unique, &unused, &unused); 1974 1975 /* 1976 * Walk the snapshots that we are moving 1977 * 1978 * Compute space to transfer. Consider the incremental changes 1979 * to used by each snapshot: 1980 * (my used) = (prev's used) + (blocks born) - (blocks killed) 1981 * So each snapshot gave birth to: 1982 * (blocks born) = (my used) - (prev's used) + (blocks killed) 1983 * So a sequence would look like: 1984 * (uN - u(N-1) + kN) + ... + (u1 - u0 + k1) + (u0 - 0 + k0) 1985 * Which simplifies to: 1986 * uN + kN + kN-1 + ... + k1 + k0 1987 * Note however, if we stop before we reach the ORIGIN we get: 1988 * uN + kN + kN-1 + ... + kM - uM-1 1989 */ 1990 ddpa->used = origin_ds->ds_phys->ds_referenced_bytes; 1991 ddpa->comp = origin_ds->ds_phys->ds_compressed_bytes; 1992 ddpa->uncomp = origin_ds->ds_phys->ds_uncompressed_bytes; 1993 for (snap = list_head(&ddpa->shared_snaps); snap; 1994 snap = list_next(&ddpa->shared_snaps, snap)) { 1995 uint64_t val, dlused, dlcomp, dluncomp; 1996 dsl_dataset_t *ds = snap->ds; 1997 1998 /* 1999 * If there are long holds, we won't be able to evict 2000 * the objset. 2001 */ 2002 if (dsl_dataset_long_held(ds)) { 2003 err = SET_ERROR(EBUSY); 2004 goto out; 2005 } 2006 2007 /* Check that the snapshot name does not conflict */ 2008 VERIFY0(dsl_dataset_get_snapname(ds)); 2009 err = dsl_dataset_snap_lookup(hds, ds->ds_snapname, &val); 2010 if (err == 0) { 2011 (void) strcpy(ddpa->err_ds, snap->ds->ds_snapname); 2012 err = SET_ERROR(EEXIST); 2013 goto out; 2014 } 2015 if (err != ENOENT) 2016 goto out; 2017 2018 /* The very first snapshot does not have a deadlist */ 2019 if (ds->ds_phys->ds_prev_snap_obj == 0) 2020 continue; 2021 2022 dsl_deadlist_space(&ds->ds_deadlist, 2023 &dlused, &dlcomp, &dluncomp); 2024 ddpa->used += dlused; 2025 ddpa->comp += dlcomp; 2026 ddpa->uncomp += dluncomp; 2027 } 2028 2029 /* 2030 * If we are a clone of a clone then we never reached ORIGIN, 2031 * so we need to subtract out the clone origin's used space. 2032 */ 2033 if (ddpa->origin_origin) { 2034 ddpa->used -= ddpa->origin_origin->ds_phys->ds_referenced_bytes; 2035 ddpa->comp -= ddpa->origin_origin->ds_phys->ds_compressed_bytes; 2036 ddpa->uncomp -= 2037 ddpa->origin_origin->ds_phys->ds_uncompressed_bytes; 2038 } 2039 2040 /* Check that there is enough space here */ 2041 err = dsl_dir_transfer_possible(origin_ds->ds_dir, hds->ds_dir, 2042 ddpa->used); 2043 if (err != 0) 2044 goto out; 2045 2046 /* 2047 * Compute the amounts of space that will be used by snapshots 2048 * after the promotion (for both origin and clone). For each, 2049 * it is the amount of space that will be on all of their 2050 * deadlists (that was not born before their new origin). 2051 */ 2052 if (hds->ds_dir->dd_phys->dd_flags & DD_FLAG_USED_BREAKDOWN) { 2053 uint64_t space; 2054 2055 /* 2056 * Note, typically this will not be a clone of a clone, 2057 * so dd_origin_txg will be < TXG_INITIAL, so 2058 * these snaplist_space() -> dsl_deadlist_space_range() 2059 * calls will be fast because they do not have to 2060 * iterate over all bps. 2061 */ 2062 snap = list_head(&ddpa->origin_snaps); 2063 err = snaplist_space(&ddpa->shared_snaps, 2064 snap->ds->ds_dir->dd_origin_txg, &ddpa->cloneusedsnap); 2065 if (err != 0) 2066 goto out; 2067 2068 err = snaplist_space(&ddpa->clone_snaps, 2069 snap->ds->ds_dir->dd_origin_txg, &space); 2070 if (err != 0) 2071 goto out; 2072 ddpa->cloneusedsnap += space; 2073 } 2074 if (origin_ds->ds_dir->dd_phys->dd_flags & DD_FLAG_USED_BREAKDOWN) { 2075 err = snaplist_space(&ddpa->origin_snaps, 2076 origin_ds->ds_phys->ds_creation_txg, &ddpa->originusedsnap); 2077 if (err != 0) 2078 goto out; 2079 } 2080 2081out: 2082 promote_rele(ddpa, FTAG); 2083 return (err); 2084} 2085 2086static void 2087dsl_dataset_promote_sync(void *arg, dmu_tx_t *tx) 2088{ 2089 dsl_dataset_promote_arg_t *ddpa = arg; 2090 dsl_pool_t *dp = dmu_tx_pool(tx); 2091 dsl_dataset_t *hds; 2092 struct promotenode *snap; 2093 dsl_dataset_t *origin_ds; 2094 dsl_dataset_t *origin_head; 2095 dsl_dir_t *dd; 2096 dsl_dir_t *odd = NULL; 2097 uint64_t oldnext_obj; 2098 int64_t delta; 2099 2100 VERIFY0(promote_hold(ddpa, dp, FTAG)); 2101 hds = ddpa->ddpa_clone; 2102 2103 ASSERT0(hds->ds_phys->ds_flags & DS_FLAG_NOPROMOTE); 2104 2105 snap = list_head(&ddpa->shared_snaps); 2106 origin_ds = snap->ds; 2107 dd = hds->ds_dir; 2108 2109 snap = list_head(&ddpa->origin_snaps); 2110 origin_head = snap->ds; 2111 2112 /* 2113 * We need to explicitly open odd, since origin_ds's dd will be 2114 * changing. 2115 */ 2116 VERIFY0(dsl_dir_hold_obj(dp, origin_ds->ds_dir->dd_object, 2117 NULL, FTAG, &odd)); 2118 2119 /* change origin's next snap */ 2120 dmu_buf_will_dirty(origin_ds->ds_dbuf, tx); 2121 oldnext_obj = origin_ds->ds_phys->ds_next_snap_obj; 2122 snap = list_tail(&ddpa->clone_snaps); 2123 ASSERT3U(snap->ds->ds_phys->ds_prev_snap_obj, ==, origin_ds->ds_object); 2124 origin_ds->ds_phys->ds_next_snap_obj = snap->ds->ds_object; 2125 2126 /* change the origin's next clone */ 2127 if (origin_ds->ds_phys->ds_next_clones_obj) { 2128 dsl_dataset_remove_from_next_clones(origin_ds, 2129 snap->ds->ds_object, tx); 2130 VERIFY0(zap_add_int(dp->dp_meta_objset, 2131 origin_ds->ds_phys->ds_next_clones_obj, 2132 oldnext_obj, tx)); 2133 } 2134 2135 /* change origin */ 2136 dmu_buf_will_dirty(dd->dd_dbuf, tx); 2137 ASSERT3U(dd->dd_phys->dd_origin_obj, ==, origin_ds->ds_object); 2138 dd->dd_phys->dd_origin_obj = odd->dd_phys->dd_origin_obj; 2139 dd->dd_origin_txg = origin_head->ds_dir->dd_origin_txg; 2140 dmu_buf_will_dirty(odd->dd_dbuf, tx); 2141 odd->dd_phys->dd_origin_obj = origin_ds->ds_object; 2142 origin_head->ds_dir->dd_origin_txg = 2143 origin_ds->ds_phys->ds_creation_txg; 2144 2145 /* change dd_clone entries */ 2146 if (spa_version(dp->dp_spa) >= SPA_VERSION_DIR_CLONES) { 2147 VERIFY0(zap_remove_int(dp->dp_meta_objset, 2148 odd->dd_phys->dd_clones, hds->ds_object, tx)); 2149 VERIFY0(zap_add_int(dp->dp_meta_objset, 2150 ddpa->origin_origin->ds_dir->dd_phys->dd_clones, 2151 hds->ds_object, tx)); 2152 2153 VERIFY0(zap_remove_int(dp->dp_meta_objset, 2154 ddpa->origin_origin->ds_dir->dd_phys->dd_clones, 2155 origin_head->ds_object, tx)); 2156 if (dd->dd_phys->dd_clones == 0) { 2157 dd->dd_phys->dd_clones = zap_create(dp->dp_meta_objset, 2158 DMU_OT_DSL_CLONES, DMU_OT_NONE, 0, tx); 2159 } 2160 VERIFY0(zap_add_int(dp->dp_meta_objset, 2161 dd->dd_phys->dd_clones, origin_head->ds_object, tx)); 2162 } 2163 2164 /* move snapshots to this dir */ 2165 for (snap = list_head(&ddpa->shared_snaps); snap; 2166 snap = list_next(&ddpa->shared_snaps, snap)) { 2167 dsl_dataset_t *ds = snap->ds; 2168 2169 /* 2170 * Property callbacks are registered to a particular 2171 * dsl_dir. Since ours is changing, evict the objset 2172 * so that they will be unregistered from the old dsl_dir. 2173 */ 2174 if (ds->ds_objset) { 2175 dmu_objset_evict(ds->ds_objset); 2176 ds->ds_objset = NULL; 2177 } 2178 2179 /* move snap name entry */ 2180 VERIFY0(dsl_dataset_get_snapname(ds)); 2181 VERIFY0(dsl_dataset_snap_remove(origin_head, 2182 ds->ds_snapname, tx)); 2183 VERIFY0(zap_add(dp->dp_meta_objset, 2184 hds->ds_phys->ds_snapnames_zapobj, ds->ds_snapname, 2185 8, 1, &ds->ds_object, tx)); 2186 2187 /* change containing dsl_dir */ 2188 dmu_buf_will_dirty(ds->ds_dbuf, tx); 2189 ASSERT3U(ds->ds_phys->ds_dir_obj, ==, odd->dd_object); 2190 ds->ds_phys->ds_dir_obj = dd->dd_object; 2191 ASSERT3P(ds->ds_dir, ==, odd); 2192 dsl_dir_rele(ds->ds_dir, ds); 2193 VERIFY0(dsl_dir_hold_obj(dp, dd->dd_object, 2194 NULL, ds, &ds->ds_dir)); 2195 2196 /* move any clone references */ 2197 if (ds->ds_phys->ds_next_clones_obj && 2198 spa_version(dp->dp_spa) >= SPA_VERSION_DIR_CLONES) { 2199 zap_cursor_t zc; 2200 zap_attribute_t za; 2201 2202 for (zap_cursor_init(&zc, dp->dp_meta_objset, 2203 ds->ds_phys->ds_next_clones_obj); 2204 zap_cursor_retrieve(&zc, &za) == 0; 2205 zap_cursor_advance(&zc)) { 2206 dsl_dataset_t *cnds; 2207 uint64_t o; 2208 2209 if (za.za_first_integer == oldnext_obj) { 2210 /* 2211 * We've already moved the 2212 * origin's reference. 2213 */ 2214 continue; 2215 } 2216 2217 VERIFY0(dsl_dataset_hold_obj(dp, 2218 za.za_first_integer, FTAG, &cnds)); 2219 o = cnds->ds_dir->dd_phys->dd_head_dataset_obj; 2220 2221 VERIFY0(zap_remove_int(dp->dp_meta_objset, 2222 odd->dd_phys->dd_clones, o, tx)); 2223 VERIFY0(zap_add_int(dp->dp_meta_objset, 2224 dd->dd_phys->dd_clones, o, tx)); 2225 dsl_dataset_rele(cnds, FTAG); 2226 } 2227 zap_cursor_fini(&zc); 2228 } 2229 2230 ASSERT(!dsl_prop_hascb(ds)); 2231 } 2232 2233 /* 2234 * Change space accounting. 2235 * Note, pa->*usedsnap and dd_used_breakdown[SNAP] will either 2236 * both be valid, or both be 0 (resulting in delta == 0). This 2237 * is true for each of {clone,origin} independently. 2238 */ 2239 2240 delta = ddpa->cloneusedsnap - 2241 dd->dd_phys->dd_used_breakdown[DD_USED_SNAP]; 2242 ASSERT3S(delta, >=, 0); 2243 ASSERT3U(ddpa->used, >=, delta); 2244 dsl_dir_diduse_space(dd, DD_USED_SNAP, delta, 0, 0, tx); 2245 dsl_dir_diduse_space(dd, DD_USED_HEAD, 2246 ddpa->used - delta, ddpa->comp, ddpa->uncomp, tx); 2247 2248 delta = ddpa->originusedsnap - 2249 odd->dd_phys->dd_used_breakdown[DD_USED_SNAP]; 2250 ASSERT3S(delta, <=, 0); 2251 ASSERT3U(ddpa->used, >=, -delta); 2252 dsl_dir_diduse_space(odd, DD_USED_SNAP, delta, 0, 0, tx); 2253 dsl_dir_diduse_space(odd, DD_USED_HEAD, 2254 -ddpa->used - delta, -ddpa->comp, -ddpa->uncomp, tx); 2255 2256 origin_ds->ds_phys->ds_unique_bytes = ddpa->unique; 2257 2258 /* log history record */ 2259 spa_history_log_internal_ds(hds, "promote", tx, ""); 2260 2261 dsl_dir_rele(odd, FTAG); 2262 promote_rele(ddpa, FTAG); 2263} 2264 2265/* 2266 * Make a list of dsl_dataset_t's for the snapshots between first_obj 2267 * (exclusive) and last_obj (inclusive). The list will be in reverse 2268 * order (last_obj will be the list_head()). If first_obj == 0, do all 2269 * snapshots back to this dataset's origin. 2270 */ 2271static int 2272snaplist_make(dsl_pool_t *dp, 2273 uint64_t first_obj, uint64_t last_obj, list_t *l, void *tag) 2274{ 2275 uint64_t obj = last_obj; 2276 2277 list_create(l, sizeof (struct promotenode), 2278 offsetof(struct promotenode, link)); 2279 2280 while (obj != first_obj) { 2281 dsl_dataset_t *ds; 2282 struct promotenode *snap; 2283 int err; 2284 2285 err = dsl_dataset_hold_obj(dp, obj, tag, &ds); 2286 ASSERT(err != ENOENT); 2287 if (err != 0) 2288 return (err); 2289 2290 if (first_obj == 0) 2291 first_obj = ds->ds_dir->dd_phys->dd_origin_obj; 2292 2293 snap = kmem_alloc(sizeof (*snap), KM_SLEEP); 2294 snap->ds = ds; 2295 list_insert_tail(l, snap); 2296 obj = ds->ds_phys->ds_prev_snap_obj; 2297 } 2298 2299 return (0); 2300} 2301 2302static int 2303snaplist_space(list_t *l, uint64_t mintxg, uint64_t *spacep) 2304{ 2305 struct promotenode *snap; 2306 2307 *spacep = 0; 2308 for (snap = list_head(l); snap; snap = list_next(l, snap)) { 2309 uint64_t used, comp, uncomp; 2310 dsl_deadlist_space_range(&snap->ds->ds_deadlist, 2311 mintxg, UINT64_MAX, &used, &comp, &uncomp); 2312 *spacep += used; 2313 } 2314 return (0); 2315} 2316 2317static void 2318snaplist_destroy(list_t *l, void *tag) 2319{ 2320 struct promotenode *snap; 2321 2322 if (l == NULL || !list_link_active(&l->list_head)) 2323 return; 2324 2325 while ((snap = list_tail(l)) != NULL) { 2326 list_remove(l, snap); 2327 dsl_dataset_rele(snap->ds, tag); 2328 kmem_free(snap, sizeof (*snap)); 2329 } 2330 list_destroy(l); 2331} 2332 2333static int 2334promote_hold(dsl_dataset_promote_arg_t *ddpa, dsl_pool_t *dp, void *tag) 2335{ 2336 int error; 2337 dsl_dir_t *dd; 2338 struct promotenode *snap; 2339 2340 error = dsl_dataset_hold(dp, ddpa->ddpa_clonename, tag, 2341 &ddpa->ddpa_clone); 2342 if (error != 0) 2343 return (error); 2344 dd = ddpa->ddpa_clone->ds_dir; 2345 2346 if (dsl_dataset_is_snapshot(ddpa->ddpa_clone) || 2347 !dsl_dir_is_clone(dd)) { 2348 dsl_dataset_rele(ddpa->ddpa_clone, tag); 2349 return (SET_ERROR(EINVAL)); 2350 } 2351 2352 error = snaplist_make(dp, 0, dd->dd_phys->dd_origin_obj, 2353 &ddpa->shared_snaps, tag); 2354 if (error != 0) 2355 goto out; 2356 2357 error = snaplist_make(dp, 0, ddpa->ddpa_clone->ds_object, 2358 &ddpa->clone_snaps, tag); 2359 if (error != 0) 2360 goto out; 2361 2362 snap = list_head(&ddpa->shared_snaps); 2363 ASSERT3U(snap->ds->ds_object, ==, dd->dd_phys->dd_origin_obj); 2364 error = snaplist_make(dp, dd->dd_phys->dd_origin_obj, 2365 snap->ds->ds_dir->dd_phys->dd_head_dataset_obj, 2366 &ddpa->origin_snaps, tag); 2367 if (error != 0) 2368 goto out; 2369 2370 if (snap->ds->ds_dir->dd_phys->dd_origin_obj != 0) { 2371 error = dsl_dataset_hold_obj(dp, 2372 snap->ds->ds_dir->dd_phys->dd_origin_obj, 2373 tag, &ddpa->origin_origin); 2374 if (error != 0) 2375 goto out; 2376 } 2377out: 2378 if (error != 0) 2379 promote_rele(ddpa, tag); 2380 return (error); 2381} 2382 2383static void 2384promote_rele(dsl_dataset_promote_arg_t *ddpa, void *tag) 2385{ 2386 snaplist_destroy(&ddpa->shared_snaps, tag); 2387 snaplist_destroy(&ddpa->clone_snaps, tag); 2388 snaplist_destroy(&ddpa->origin_snaps, tag); 2389 if (ddpa->origin_origin != NULL) 2390 dsl_dataset_rele(ddpa->origin_origin, tag); 2391 dsl_dataset_rele(ddpa->ddpa_clone, tag); 2392} 2393 2394/* 2395 * Promote a clone. 2396 * 2397 * If it fails due to a conflicting snapshot name, "conflsnap" will be filled 2398 * in with the name. (It must be at least MAXNAMELEN bytes long.) 2399 */ 2400int 2401dsl_dataset_promote(const char *name, char *conflsnap) 2402{ 2403 dsl_dataset_promote_arg_t ddpa = { 0 }; 2404 uint64_t numsnaps; 2405 int error; 2406 objset_t *os; 2407 2408 /* 2409 * We will modify space proportional to the number of 2410 * snapshots. Compute numsnaps. 2411 */ 2412 error = dmu_objset_hold(name, FTAG, &os); 2413 if (error != 0) 2414 return (error); 2415 error = zap_count(dmu_objset_pool(os)->dp_meta_objset, 2416 dmu_objset_ds(os)->ds_phys->ds_snapnames_zapobj, &numsnaps); 2417 dmu_objset_rele(os, FTAG); 2418 if (error != 0) 2419 return (error); 2420 2421 ddpa.ddpa_clonename = name; 2422 ddpa.err_ds = conflsnap; 2423 2424 return (dsl_sync_task(name, dsl_dataset_promote_check, 2425 dsl_dataset_promote_sync, &ddpa, 2 + numsnaps)); 2426} 2427 2428int 2429dsl_dataset_clone_swap_check_impl(dsl_dataset_t *clone, 2430 dsl_dataset_t *origin_head, boolean_t force, void *owner, dmu_tx_t *tx) 2431{ 2432 int64_t unused_refres_delta; 2433 2434 /* they should both be heads */ 2435 if (dsl_dataset_is_snapshot(clone) || 2436 dsl_dataset_is_snapshot(origin_head)) 2437 return (SET_ERROR(EINVAL)); 2438 2439 /* the branch point should be just before them */ 2440 if (clone->ds_prev != origin_head->ds_prev) 2441 return (SET_ERROR(EINVAL)); 2442 2443 /* clone should be the clone (unless they are unrelated) */ 2444 if (clone->ds_prev != NULL && 2445 clone->ds_prev != clone->ds_dir->dd_pool->dp_origin_snap && 2446 origin_head->ds_object != 2447 clone->ds_prev->ds_phys->ds_next_snap_obj) 2448 return (SET_ERROR(EINVAL)); 2449 2450 /* the clone should be a child of the origin */ 2451 if (clone->ds_dir->dd_parent != origin_head->ds_dir) 2452 return (SET_ERROR(EINVAL)); 2453 2454 /* origin_head shouldn't be modified unless 'force' */ 2455 if (!force && dsl_dataset_modified_since_lastsnap(origin_head)) 2456 return (SET_ERROR(ETXTBSY)); 2457 2458 /* origin_head should have no long holds (e.g. is not mounted) */ 2459 if (dsl_dataset_handoff_check(origin_head, owner, tx)) 2460 return (SET_ERROR(EBUSY)); 2461 2462 /* check amount of any unconsumed refreservation */ 2463 unused_refres_delta = 2464 (int64_t)MIN(origin_head->ds_reserved, 2465 origin_head->ds_phys->ds_unique_bytes) - 2466 (int64_t)MIN(origin_head->ds_reserved, 2467 clone->ds_phys->ds_unique_bytes); 2468 2469 if (unused_refres_delta > 0 && 2470 unused_refres_delta > 2471 dsl_dir_space_available(origin_head->ds_dir, NULL, 0, TRUE)) 2472 return (SET_ERROR(ENOSPC)); 2473 2474 /* clone can't be over the head's refquota */ 2475 if (origin_head->ds_quota != 0 && 2476 clone->ds_phys->ds_referenced_bytes > origin_head->ds_quota) 2477 return (SET_ERROR(EDQUOT)); 2478 2479 return (0); 2480} 2481 2482void 2483dsl_dataset_clone_swap_sync_impl(dsl_dataset_t *clone, 2484 dsl_dataset_t *origin_head, dmu_tx_t *tx) 2485{ 2486 dsl_pool_t *dp = dmu_tx_pool(tx); 2487 int64_t unused_refres_delta; 2488 2489 ASSERT(clone->ds_reserved == 0); 2490 ASSERT(origin_head->ds_quota == 0 || 2491 clone->ds_phys->ds_unique_bytes <= origin_head->ds_quota); 2492 2493 dmu_buf_will_dirty(clone->ds_dbuf, tx); 2494 dmu_buf_will_dirty(origin_head->ds_dbuf, tx); 2495 2496 if (clone->ds_objset != NULL) { 2497 dmu_objset_evict(clone->ds_objset); 2498 clone->ds_objset = NULL; 2499 } 2500 2501 if (origin_head->ds_objset != NULL) { 2502 dmu_objset_evict(origin_head->ds_objset); 2503 origin_head->ds_objset = NULL; 2504 } 2505 2506 unused_refres_delta = 2507 (int64_t)MIN(origin_head->ds_reserved, 2508 origin_head->ds_phys->ds_unique_bytes) - 2509 (int64_t)MIN(origin_head->ds_reserved, 2510 clone->ds_phys->ds_unique_bytes); 2511 2512 /* 2513 * Reset origin's unique bytes, if it exists. 2514 */ 2515 if (clone->ds_prev) { 2516 dsl_dataset_t *origin = clone->ds_prev; 2517 uint64_t comp, uncomp; 2518 2519 dmu_buf_will_dirty(origin->ds_dbuf, tx); 2520 dsl_deadlist_space_range(&clone->ds_deadlist, 2521 origin->ds_phys->ds_prev_snap_txg, UINT64_MAX, 2522 &origin->ds_phys->ds_unique_bytes, &comp, &uncomp); 2523 } 2524 2525 /* swap blkptrs */ 2526 { 2527 blkptr_t tmp; 2528 tmp = origin_head->ds_phys->ds_bp; 2529 origin_head->ds_phys->ds_bp = clone->ds_phys->ds_bp; 2530 clone->ds_phys->ds_bp = tmp; 2531 } 2532 2533 /* set dd_*_bytes */ 2534 { 2535 int64_t dused, dcomp, duncomp; 2536 uint64_t cdl_used, cdl_comp, cdl_uncomp; 2537 uint64_t odl_used, odl_comp, odl_uncomp; 2538 2539 ASSERT3U(clone->ds_dir->dd_phys-> 2540 dd_used_breakdown[DD_USED_SNAP], ==, 0); 2541 2542 dsl_deadlist_space(&clone->ds_deadlist, 2543 &cdl_used, &cdl_comp, &cdl_uncomp); 2544 dsl_deadlist_space(&origin_head->ds_deadlist, 2545 &odl_used, &odl_comp, &odl_uncomp); 2546 2547 dused = clone->ds_phys->ds_referenced_bytes + cdl_used - 2548 (origin_head->ds_phys->ds_referenced_bytes + odl_used); 2549 dcomp = clone->ds_phys->ds_compressed_bytes + cdl_comp - 2550 (origin_head->ds_phys->ds_compressed_bytes + odl_comp); 2551 duncomp = clone->ds_phys->ds_uncompressed_bytes + 2552 cdl_uncomp - 2553 (origin_head->ds_phys->ds_uncompressed_bytes + odl_uncomp); 2554 2555 dsl_dir_diduse_space(origin_head->ds_dir, DD_USED_HEAD, 2556 dused, dcomp, duncomp, tx); 2557 dsl_dir_diduse_space(clone->ds_dir, DD_USED_HEAD, 2558 -dused, -dcomp, -duncomp, tx); 2559 2560 /* 2561 * The difference in the space used by snapshots is the 2562 * difference in snapshot space due to the head's 2563 * deadlist (since that's the only thing that's 2564 * changing that affects the snapused). 2565 */ 2566 dsl_deadlist_space_range(&clone->ds_deadlist, 2567 origin_head->ds_dir->dd_origin_txg, UINT64_MAX, 2568 &cdl_used, &cdl_comp, &cdl_uncomp); 2569 dsl_deadlist_space_range(&origin_head->ds_deadlist, 2570 origin_head->ds_dir->dd_origin_txg, UINT64_MAX, 2571 &odl_used, &odl_comp, &odl_uncomp); 2572 dsl_dir_transfer_space(origin_head->ds_dir, cdl_used - odl_used, 2573 DD_USED_HEAD, DD_USED_SNAP, tx); 2574 } 2575 2576 /* swap ds_*_bytes */ 2577 SWITCH64(origin_head->ds_phys->ds_referenced_bytes, 2578 clone->ds_phys->ds_referenced_bytes); 2579 SWITCH64(origin_head->ds_phys->ds_compressed_bytes, 2580 clone->ds_phys->ds_compressed_bytes); 2581 SWITCH64(origin_head->ds_phys->ds_uncompressed_bytes, 2582 clone->ds_phys->ds_uncompressed_bytes); 2583 SWITCH64(origin_head->ds_phys->ds_unique_bytes, 2584 clone->ds_phys->ds_unique_bytes); 2585 2586 /* apply any parent delta for change in unconsumed refreservation */ 2587 dsl_dir_diduse_space(origin_head->ds_dir, DD_USED_REFRSRV, 2588 unused_refres_delta, 0, 0, tx); 2589 2590 /* 2591 * Swap deadlists. 2592 */ 2593 dsl_deadlist_close(&clone->ds_deadlist); 2594 dsl_deadlist_close(&origin_head->ds_deadlist); 2595 SWITCH64(origin_head->ds_phys->ds_deadlist_obj, 2596 clone->ds_phys->ds_deadlist_obj); 2597 dsl_deadlist_open(&clone->ds_deadlist, dp->dp_meta_objset, 2598 clone->ds_phys->ds_deadlist_obj); 2599 dsl_deadlist_open(&origin_head->ds_deadlist, dp->dp_meta_objset, 2600 origin_head->ds_phys->ds_deadlist_obj); 2601 2602 dsl_scan_ds_clone_swapped(origin_head, clone, tx); 2603 2604 spa_history_log_internal_ds(clone, "clone swap", tx, 2605 "parent=%s", origin_head->ds_dir->dd_myname); 2606} 2607 2608/* 2609 * Given a pool name and a dataset object number in that pool, 2610 * return the name of that dataset. 2611 */ 2612int 2613dsl_dsobj_to_dsname(char *pname, uint64_t obj, char *buf) 2614{ 2615 dsl_pool_t *dp; 2616 dsl_dataset_t *ds; 2617 int error; 2618 2619 error = dsl_pool_hold(pname, FTAG, &dp); 2620 if (error != 0) 2621 return (error); 2622 2623 error = dsl_dataset_hold_obj(dp, obj, FTAG, &ds); 2624 if (error == 0) { 2625 dsl_dataset_name(ds, buf); 2626 dsl_dataset_rele(ds, FTAG); 2627 } 2628 dsl_pool_rele(dp, FTAG); 2629 2630 return (error); 2631} 2632 2633int 2634dsl_dataset_check_quota(dsl_dataset_t *ds, boolean_t check_quota, 2635 uint64_t asize, uint64_t inflight, uint64_t *used, uint64_t *ref_rsrv) 2636{ 2637 int error = 0; 2638 2639 ASSERT3S(asize, >, 0); 2640 2641 /* 2642 * *ref_rsrv is the portion of asize that will come from any 2643 * unconsumed refreservation space. 2644 */ 2645 *ref_rsrv = 0; 2646 2647 mutex_enter(&ds->ds_lock); 2648 /* 2649 * Make a space adjustment for reserved bytes. 2650 */ 2651 if (ds->ds_reserved > ds->ds_phys->ds_unique_bytes) { 2652 ASSERT3U(*used, >=, 2653 ds->ds_reserved - ds->ds_phys->ds_unique_bytes); 2654 *used -= (ds->ds_reserved - ds->ds_phys->ds_unique_bytes); 2655 *ref_rsrv = 2656 asize - MIN(asize, parent_delta(ds, asize + inflight)); 2657 } 2658 2659 if (!check_quota || ds->ds_quota == 0) { 2660 mutex_exit(&ds->ds_lock); 2661 return (0); 2662 } 2663 /* 2664 * If they are requesting more space, and our current estimate 2665 * is over quota, they get to try again unless the actual 2666 * on-disk is over quota and there are no pending changes (which 2667 * may free up space for us). 2668 */ 2669 if (ds->ds_phys->ds_referenced_bytes + inflight >= ds->ds_quota) { 2670 if (inflight > 0 || 2671 ds->ds_phys->ds_referenced_bytes < ds->ds_quota) 2672 error = SET_ERROR(ERESTART); 2673 else 2674 error = SET_ERROR(EDQUOT); 2675 } 2676 mutex_exit(&ds->ds_lock); 2677 2678 return (error); 2679} 2680 2681typedef struct dsl_dataset_set_qr_arg { 2682 const char *ddsqra_name; 2683 zprop_source_t ddsqra_source; 2684 uint64_t ddsqra_value; 2685} dsl_dataset_set_qr_arg_t; 2686 2687 2688/* ARGSUSED */ 2689static int 2690dsl_dataset_set_refquota_check(void *arg, dmu_tx_t *tx) 2691{ 2692 dsl_dataset_set_qr_arg_t *ddsqra = arg; 2693 dsl_pool_t *dp = dmu_tx_pool(tx); 2694 dsl_dataset_t *ds; 2695 int error; 2696 uint64_t newval; 2697 2698 if (spa_version(dp->dp_spa) < SPA_VERSION_REFQUOTA) 2699 return (SET_ERROR(ENOTSUP)); 2700 2701 error = dsl_dataset_hold(dp, ddsqra->ddsqra_name, FTAG, &ds); 2702 if (error != 0) 2703 return (error); 2704 2705 if (dsl_dataset_is_snapshot(ds)) { 2706 dsl_dataset_rele(ds, FTAG); 2707 return (SET_ERROR(EINVAL)); 2708 } 2709 2710 error = dsl_prop_predict(ds->ds_dir, 2711 zfs_prop_to_name(ZFS_PROP_REFQUOTA), 2712 ddsqra->ddsqra_source, ddsqra->ddsqra_value, &newval); 2713 if (error != 0) { 2714 dsl_dataset_rele(ds, FTAG); 2715 return (error); 2716 } 2717 2718 if (newval == 0) { 2719 dsl_dataset_rele(ds, FTAG); 2720 return (0); 2721 } 2722 2723 if (newval < ds->ds_phys->ds_referenced_bytes || 2724 newval < ds->ds_reserved) { 2725 dsl_dataset_rele(ds, FTAG); 2726 return (SET_ERROR(ENOSPC)); 2727 } 2728 2729 dsl_dataset_rele(ds, FTAG); 2730 return (0); 2731} 2732 2733static void 2734dsl_dataset_set_refquota_sync(void *arg, dmu_tx_t *tx) 2735{ 2736 dsl_dataset_set_qr_arg_t *ddsqra = arg; 2737 dsl_pool_t *dp = dmu_tx_pool(tx); 2738 dsl_dataset_t *ds; 2739 uint64_t newval; 2740 2741 VERIFY0(dsl_dataset_hold(dp, ddsqra->ddsqra_name, FTAG, &ds)); 2742 2743 dsl_prop_set_sync_impl(ds, 2744 zfs_prop_to_name(ZFS_PROP_REFQUOTA), 2745 ddsqra->ddsqra_source, sizeof (ddsqra->ddsqra_value), 1, 2746 &ddsqra->ddsqra_value, tx); 2747 2748 VERIFY0(dsl_prop_get_int_ds(ds, 2749 zfs_prop_to_name(ZFS_PROP_REFQUOTA), &newval)); 2750 2751 if (ds->ds_quota != newval) { 2752 dmu_buf_will_dirty(ds->ds_dbuf, tx); 2753 ds->ds_quota = newval; 2754 } 2755 dsl_dataset_rele(ds, FTAG); 2756} 2757 2758int 2759dsl_dataset_set_refquota(const char *dsname, zprop_source_t source, 2760 uint64_t refquota) 2761{ 2762 dsl_dataset_set_qr_arg_t ddsqra; 2763 2764 ddsqra.ddsqra_name = dsname; 2765 ddsqra.ddsqra_source = source; 2766 ddsqra.ddsqra_value = refquota; 2767 2768 return (dsl_sync_task(dsname, dsl_dataset_set_refquota_check, 2769 dsl_dataset_set_refquota_sync, &ddsqra, 0)); 2770} 2771 2772static int 2773dsl_dataset_set_refreservation_check(void *arg, dmu_tx_t *tx) 2774{ 2775 dsl_dataset_set_qr_arg_t *ddsqra = arg; 2776 dsl_pool_t *dp = dmu_tx_pool(tx); 2777 dsl_dataset_t *ds; 2778 int error; 2779 uint64_t newval, unique; 2780 2781 if (spa_version(dp->dp_spa) < SPA_VERSION_REFRESERVATION) 2782 return (SET_ERROR(ENOTSUP)); 2783 2784 error = dsl_dataset_hold(dp, ddsqra->ddsqra_name, FTAG, &ds); 2785 if (error != 0) 2786 return (error); 2787 2788 if (dsl_dataset_is_snapshot(ds)) { 2789 dsl_dataset_rele(ds, FTAG); 2790 return (SET_ERROR(EINVAL)); 2791 } 2792 2793 error = dsl_prop_predict(ds->ds_dir, 2794 zfs_prop_to_name(ZFS_PROP_REFRESERVATION), 2795 ddsqra->ddsqra_source, ddsqra->ddsqra_value, &newval); 2796 if (error != 0) { 2797 dsl_dataset_rele(ds, FTAG); 2798 return (error); 2799 } 2800 2801 /* 2802 * If we are doing the preliminary check in open context, the 2803 * space estimates may be inaccurate. 2804 */ 2805 if (!dmu_tx_is_syncing(tx)) { 2806 dsl_dataset_rele(ds, FTAG); 2807 return (0); 2808 } 2809 2810 mutex_enter(&ds->ds_lock); 2811 if (!DS_UNIQUE_IS_ACCURATE(ds)) 2812 dsl_dataset_recalc_head_uniq(ds); 2813 unique = ds->ds_phys->ds_unique_bytes; 2814 mutex_exit(&ds->ds_lock); 2815 2816 if (MAX(unique, newval) > MAX(unique, ds->ds_reserved)) { 2817 uint64_t delta = MAX(unique, newval) - 2818 MAX(unique, ds->ds_reserved); 2819 2820 if (delta > 2821 dsl_dir_space_available(ds->ds_dir, NULL, 0, B_TRUE) || 2822 (ds->ds_quota > 0 && newval > ds->ds_quota)) { 2823 dsl_dataset_rele(ds, FTAG); 2824 return (SET_ERROR(ENOSPC)); 2825 } 2826 } 2827 2828 dsl_dataset_rele(ds, FTAG); 2829 return (0); 2830} 2831 2832void 2833dsl_dataset_set_refreservation_sync_impl(dsl_dataset_t *ds, 2834 zprop_source_t source, uint64_t value, dmu_tx_t *tx) 2835{ 2836 uint64_t newval; 2837 uint64_t unique; 2838 int64_t delta; 2839 2840 dsl_prop_set_sync_impl(ds, zfs_prop_to_name(ZFS_PROP_REFRESERVATION), 2841 source, sizeof (value), 1, &value, tx); 2842 2843 VERIFY0(dsl_prop_get_int_ds(ds, 2844 zfs_prop_to_name(ZFS_PROP_REFRESERVATION), &newval)); 2845 2846 dmu_buf_will_dirty(ds->ds_dbuf, tx); 2847 mutex_enter(&ds->ds_dir->dd_lock); 2848 mutex_enter(&ds->ds_lock); 2849 ASSERT(DS_UNIQUE_IS_ACCURATE(ds)); 2850 unique = ds->ds_phys->ds_unique_bytes; 2851 delta = MAX(0, (int64_t)(newval - unique)) - 2852 MAX(0, (int64_t)(ds->ds_reserved - unique)); 2853 ds->ds_reserved = newval; 2854 mutex_exit(&ds->ds_lock); 2855 2856 dsl_dir_diduse_space(ds->ds_dir, DD_USED_REFRSRV, delta, 0, 0, tx); 2857 mutex_exit(&ds->ds_dir->dd_lock); 2858} 2859 2860static void 2861dsl_dataset_set_refreservation_sync(void *arg, dmu_tx_t *tx) 2862{ 2863 dsl_dataset_set_qr_arg_t *ddsqra = arg; 2864 dsl_pool_t *dp = dmu_tx_pool(tx); 2865 dsl_dataset_t *ds; 2866 2867 VERIFY0(dsl_dataset_hold(dp, ddsqra->ddsqra_name, FTAG, &ds)); 2868 dsl_dataset_set_refreservation_sync_impl(ds, 2869 ddsqra->ddsqra_source, ddsqra->ddsqra_value, tx); 2870 dsl_dataset_rele(ds, FTAG); 2871} 2872 2873int 2874dsl_dataset_set_refreservation(const char *dsname, zprop_source_t source, 2875 uint64_t refreservation) 2876{ 2877 dsl_dataset_set_qr_arg_t ddsqra; 2878 2879 ddsqra.ddsqra_name = dsname; 2880 ddsqra.ddsqra_source = source; 2881 ddsqra.ddsqra_value = refreservation; 2882 2883 return (dsl_sync_task(dsname, dsl_dataset_set_refreservation_check, 2884 dsl_dataset_set_refreservation_sync, &ddsqra, 0)); 2885} 2886 2887/* 2888 * Return (in *usedp) the amount of space written in new that is not 2889 * present in oldsnap. New may be a snapshot or the head. Old must be 2890 * a snapshot before new, in new's filesystem (or its origin). If not then 2891 * fail and return EINVAL. 2892 * 2893 * The written space is calculated by considering two components: First, we 2894 * ignore any freed space, and calculate the written as new's used space 2895 * minus old's used space. Next, we add in the amount of space that was freed 2896 * between the two snapshots, thus reducing new's used space relative to old's. 2897 * Specifically, this is the space that was born before old->ds_creation_txg, 2898 * and freed before new (ie. on new's deadlist or a previous deadlist). 2899 * 2900 * space freed [---------------------] 2901 * snapshots ---O-------O--------O-------O------ 2902 * oldsnap new 2903 */ 2904int 2905dsl_dataset_space_written(dsl_dataset_t *oldsnap, dsl_dataset_t *new, 2906 uint64_t *usedp, uint64_t *compp, uint64_t *uncompp) 2907{ 2908 int err = 0; 2909 uint64_t snapobj; 2910 dsl_pool_t *dp = new->ds_dir->dd_pool; 2911 2912 ASSERT(dsl_pool_config_held(dp)); 2913 2914 *usedp = 0; 2915 *usedp += new->ds_phys->ds_referenced_bytes; 2916 *usedp -= oldsnap->ds_phys->ds_referenced_bytes; 2917 2918 *compp = 0; 2919 *compp += new->ds_phys->ds_compressed_bytes; 2920 *compp -= oldsnap->ds_phys->ds_compressed_bytes; 2921 2922 *uncompp = 0; 2923 *uncompp += new->ds_phys->ds_uncompressed_bytes; 2924 *uncompp -= oldsnap->ds_phys->ds_uncompressed_bytes; 2925 2926 snapobj = new->ds_object; 2927 while (snapobj != oldsnap->ds_object) { 2928 dsl_dataset_t *snap; 2929 uint64_t used, comp, uncomp; 2930 2931 if (snapobj == new->ds_object) { 2932 snap = new; 2933 } else { 2934 err = dsl_dataset_hold_obj(dp, snapobj, FTAG, &snap); 2935 if (err != 0) 2936 break; 2937 } 2938 2939 if (snap->ds_phys->ds_prev_snap_txg == 2940 oldsnap->ds_phys->ds_creation_txg) { 2941 /* 2942 * The blocks in the deadlist can not be born after 2943 * ds_prev_snap_txg, so get the whole deadlist space, 2944 * which is more efficient (especially for old-format 2945 * deadlists). Unfortunately the deadlist code 2946 * doesn't have enough information to make this 2947 * optimization itself. 2948 */ 2949 dsl_deadlist_space(&snap->ds_deadlist, 2950 &used, &comp, &uncomp); 2951 } else { 2952 dsl_deadlist_space_range(&snap->ds_deadlist, 2953 0, oldsnap->ds_phys->ds_creation_txg, 2954 &used, &comp, &uncomp); 2955 } 2956 *usedp += used; 2957 *compp += comp; 2958 *uncompp += uncomp; 2959 2960 /* 2961 * If we get to the beginning of the chain of snapshots 2962 * (ds_prev_snap_obj == 0) before oldsnap, then oldsnap 2963 * was not a snapshot of/before new. 2964 */ 2965 snapobj = snap->ds_phys->ds_prev_snap_obj; 2966 if (snap != new) 2967 dsl_dataset_rele(snap, FTAG); 2968 if (snapobj == 0) { 2969 err = SET_ERROR(EINVAL); 2970 break; 2971 } 2972 2973 } 2974 return (err); 2975} 2976 2977/* 2978 * Return (in *usedp) the amount of space that will be reclaimed if firstsnap, 2979 * lastsnap, and all snapshots in between are deleted. 2980 * 2981 * blocks that would be freed [---------------------------] 2982 * snapshots ---O-------O--------O-------O--------O 2983 * firstsnap lastsnap 2984 * 2985 * This is the set of blocks that were born after the snap before firstsnap, 2986 * (birth > firstsnap->prev_snap_txg) and died before the snap after the 2987 * last snap (ie, is on lastsnap->ds_next->ds_deadlist or an earlier deadlist). 2988 * We calculate this by iterating over the relevant deadlists (from the snap 2989 * after lastsnap, backward to the snap after firstsnap), summing up the 2990 * space on the deadlist that was born after the snap before firstsnap. 2991 */ 2992int 2993dsl_dataset_space_wouldfree(dsl_dataset_t *firstsnap, 2994 dsl_dataset_t *lastsnap, 2995 uint64_t *usedp, uint64_t *compp, uint64_t *uncompp) 2996{ 2997 int err = 0; 2998 uint64_t snapobj; 2999 dsl_pool_t *dp = firstsnap->ds_dir->dd_pool; 3000 3001 ASSERT(dsl_dataset_is_snapshot(firstsnap)); 3002 ASSERT(dsl_dataset_is_snapshot(lastsnap)); 3003 3004 /* 3005 * Check that the snapshots are in the same dsl_dir, and firstsnap 3006 * is before lastsnap. 3007 */ 3008 if (firstsnap->ds_dir != lastsnap->ds_dir || 3009 firstsnap->ds_phys->ds_creation_txg > 3010 lastsnap->ds_phys->ds_creation_txg) 3011 return (SET_ERROR(EINVAL)); 3012 3013 *usedp = *compp = *uncompp = 0; 3014 3015 snapobj = lastsnap->ds_phys->ds_next_snap_obj; 3016 while (snapobj != firstsnap->ds_object) { 3017 dsl_dataset_t *ds; 3018 uint64_t used, comp, uncomp; 3019 3020 err = dsl_dataset_hold_obj(dp, snapobj, FTAG, &ds); 3021 if (err != 0) 3022 break; 3023 3024 dsl_deadlist_space_range(&ds->ds_deadlist, 3025 firstsnap->ds_phys->ds_prev_snap_txg, UINT64_MAX, 3026 &used, &comp, &uncomp); 3027 *usedp += used; 3028 *compp += comp; 3029 *uncompp += uncomp; 3030 3031 snapobj = ds->ds_phys->ds_prev_snap_obj; 3032 ASSERT3U(snapobj, !=, 0); 3033 dsl_dataset_rele(ds, FTAG); 3034 } 3035 return (err); 3036} 3037 3038/* 3039 * Return TRUE if 'earlier' is an earlier snapshot in 'later's timeline. 3040 * For example, they could both be snapshots of the same filesystem, and 3041 * 'earlier' is before 'later'. Or 'earlier' could be the origin of 3042 * 'later's filesystem. Or 'earlier' could be an older snapshot in the origin's 3043 * filesystem. Or 'earlier' could be the origin's origin. 3044 */ 3045boolean_t 3046dsl_dataset_is_before(dsl_dataset_t *later, dsl_dataset_t *earlier) 3047{ 3048 dsl_pool_t *dp = later->ds_dir->dd_pool; 3049 int error; 3050 boolean_t ret; 3051 3052 ASSERT(dsl_pool_config_held(dp)); 3053 3054 if (earlier->ds_phys->ds_creation_txg >= 3055 later->ds_phys->ds_creation_txg) 3056 return (B_FALSE); 3057 3058 if (later->ds_dir == earlier->ds_dir) 3059 return (B_TRUE); 3060 if (!dsl_dir_is_clone(later->ds_dir)) 3061 return (B_FALSE); 3062 3063 if (later->ds_dir->dd_phys->dd_origin_obj == earlier->ds_object) 3064 return (B_TRUE); 3065 dsl_dataset_t *origin; 3066 error = dsl_dataset_hold_obj(dp, 3067 later->ds_dir->dd_phys->dd_origin_obj, FTAG, &origin); 3068 if (error != 0) 3069 return (B_FALSE); 3070 ret = dsl_dataset_is_before(origin, earlier); 3071 dsl_dataset_rele(origin, FTAG); 3072 return (ret); 3073}
|