1/* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21/* 22 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. 23 * Copyright (c) 2011 by Delphix. All rights reserved. 24 * Copyright (c) 2011 Pawel Jakub Dawidek <pawel@dawidek.net>. 25 * All rights reserved.
| 1/* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21/* 22 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. 23 * Copyright (c) 2011 by Delphix. All rights reserved. 24 * Copyright (c) 2011 Pawel Jakub Dawidek <pawel@dawidek.net>. 25 * All rights reserved.
|
| 26 * Portions Copyright 2011 Martin Matuska <mm@FreeBSd.org>
|
26 */ 27 28#include <sys/dmu_objset.h> 29#include <sys/dsl_dataset.h> 30#include <sys/dsl_dir.h> 31#include <sys/dsl_prop.h> 32#include <sys/dsl_synctask.h> 33#include <sys/dmu_traverse.h> 34#include <sys/dmu_tx.h> 35#include <sys/arc.h> 36#include <sys/zio.h> 37#include <sys/zap.h> 38#include <sys/unique.h> 39#include <sys/zfs_context.h> 40#include <sys/zfs_ioctl.h> 41#include <sys/spa.h> 42#include <sys/zfs_znode.h> 43#include <sys/zfs_onexit.h> 44#include <sys/zvol.h> 45#include <sys/dsl_scan.h> 46#include <sys/dsl_deadlist.h> 47 48static char *dsl_reaper = "the grim reaper"; 49 50static dsl_checkfunc_t dsl_dataset_destroy_begin_check; 51static dsl_syncfunc_t dsl_dataset_destroy_begin_sync; 52static dsl_syncfunc_t dsl_dataset_set_reservation_sync; 53 54#define SWITCH64(x, y) \ 55 { \ 56 uint64_t __tmp = (x); \ 57 (x) = (y); \ 58 (y) = __tmp; \ 59 } 60 61#define DS_REF_MAX (1ULL << 62) 62 63#define DSL_DEADLIST_BLOCKSIZE SPA_MAXBLOCKSIZE 64 65#define DSL_DATASET_IS_DESTROYED(ds) ((ds)->ds_owner == dsl_reaper) 66 67 68/* 69 * Figure out how much of this delta should be propogated to the dsl_dir 70 * layer. If there's a refreservation, that space has already been 71 * partially accounted for in our ancestors. 72 */ 73static int64_t 74parent_delta(dsl_dataset_t *ds, int64_t delta) 75{ 76 uint64_t old_bytes, new_bytes; 77 78 if (ds->ds_reserved == 0) 79 return (delta); 80 81 old_bytes = MAX(ds->ds_phys->ds_unique_bytes, ds->ds_reserved); 82 new_bytes = MAX(ds->ds_phys->ds_unique_bytes + delta, ds->ds_reserved); 83 84 ASSERT3U(ABS((int64_t)(new_bytes - old_bytes)), <=, ABS(delta)); 85 return (new_bytes - old_bytes); 86} 87 88void 89dsl_dataset_block_born(dsl_dataset_t *ds, const blkptr_t *bp, dmu_tx_t *tx) 90{ 91 int used = bp_get_dsize_sync(tx->tx_pool->dp_spa, bp); 92 int compressed = BP_GET_PSIZE(bp); 93 int uncompressed = BP_GET_UCSIZE(bp); 94 int64_t delta; 95 96 dprintf_bp(bp, "ds=%p", ds); 97 98 ASSERT(dmu_tx_is_syncing(tx)); 99 /* It could have been compressed away to nothing */ 100 if (BP_IS_HOLE(bp)) 101 return; 102 ASSERT(BP_GET_TYPE(bp) != DMU_OT_NONE); 103 ASSERT3U(BP_GET_TYPE(bp), <, DMU_OT_NUMTYPES); 104 if (ds == NULL) { 105 /* 106 * Account for the meta-objset space in its placeholder 107 * dsl_dir. 108 */ 109 ASSERT3U(compressed, ==, uncompressed); /* it's all metadata */ 110 dsl_dir_diduse_space(tx->tx_pool->dp_mos_dir, DD_USED_HEAD, 111 used, compressed, uncompressed, tx); 112 dsl_dir_dirty(tx->tx_pool->dp_mos_dir, tx); 113 return; 114 } 115 dmu_buf_will_dirty(ds->ds_dbuf, tx); 116 117 mutex_enter(&ds->ds_dir->dd_lock); 118 mutex_enter(&ds->ds_lock); 119 delta = parent_delta(ds, used); 120 ds->ds_phys->ds_used_bytes += used; 121 ds->ds_phys->ds_compressed_bytes += compressed; 122 ds->ds_phys->ds_uncompressed_bytes += uncompressed; 123 ds->ds_phys->ds_unique_bytes += used; 124 mutex_exit(&ds->ds_lock); 125 dsl_dir_diduse_space(ds->ds_dir, DD_USED_HEAD, delta, 126 compressed, uncompressed, tx); 127 dsl_dir_transfer_space(ds->ds_dir, used - delta, 128 DD_USED_REFRSRV, DD_USED_HEAD, tx); 129 mutex_exit(&ds->ds_dir->dd_lock); 130} 131 132int 133dsl_dataset_block_kill(dsl_dataset_t *ds, const blkptr_t *bp, dmu_tx_t *tx, 134 boolean_t async) 135{ 136 if (BP_IS_HOLE(bp)) 137 return (0); 138 139 ASSERT(dmu_tx_is_syncing(tx)); 140 ASSERT(bp->blk_birth <= tx->tx_txg); 141 142 int used = bp_get_dsize_sync(tx->tx_pool->dp_spa, bp); 143 int compressed = BP_GET_PSIZE(bp); 144 int uncompressed = BP_GET_UCSIZE(bp); 145 146 ASSERT(used > 0); 147 if (ds == NULL) { 148 /* 149 * Account for the meta-objset space in its placeholder 150 * dataset. 151 */ 152 dsl_free(tx->tx_pool, tx->tx_txg, bp); 153 154 dsl_dir_diduse_space(tx->tx_pool->dp_mos_dir, DD_USED_HEAD, 155 -used, -compressed, -uncompressed, tx); 156 dsl_dir_dirty(tx->tx_pool->dp_mos_dir, tx); 157 return (used); 158 } 159 ASSERT3P(tx->tx_pool, ==, ds->ds_dir->dd_pool); 160 161 ASSERT(!dsl_dataset_is_snapshot(ds)); 162 dmu_buf_will_dirty(ds->ds_dbuf, tx); 163 164 if (bp->blk_birth > ds->ds_phys->ds_prev_snap_txg) { 165 int64_t delta; 166 167 dprintf_bp(bp, "freeing ds=%llu", ds->ds_object); 168 dsl_free(tx->tx_pool, tx->tx_txg, bp); 169 170 mutex_enter(&ds->ds_dir->dd_lock); 171 mutex_enter(&ds->ds_lock); 172 ASSERT(ds->ds_phys->ds_unique_bytes >= used || 173 !DS_UNIQUE_IS_ACCURATE(ds)); 174 delta = parent_delta(ds, -used); 175 ds->ds_phys->ds_unique_bytes -= used; 176 mutex_exit(&ds->ds_lock); 177 dsl_dir_diduse_space(ds->ds_dir, DD_USED_HEAD, 178 delta, -compressed, -uncompressed, tx); 179 dsl_dir_transfer_space(ds->ds_dir, -used - delta, 180 DD_USED_REFRSRV, DD_USED_HEAD, tx); 181 mutex_exit(&ds->ds_dir->dd_lock); 182 } else { 183 dprintf_bp(bp, "putting on dead list: %s", ""); 184 if (async) { 185 /* 186 * We are here as part of zio's write done callback, 187 * which means we're a zio interrupt thread. We can't 188 * call dsl_deadlist_insert() now because it may block 189 * waiting for I/O. Instead, put bp on the deferred 190 * queue and let dsl_pool_sync() finish the job. 191 */ 192 bplist_append(&ds->ds_pending_deadlist, bp); 193 } else { 194 dsl_deadlist_insert(&ds->ds_deadlist, bp, tx); 195 } 196 ASSERT3U(ds->ds_prev->ds_object, ==, 197 ds->ds_phys->ds_prev_snap_obj); 198 ASSERT(ds->ds_prev->ds_phys->ds_num_children > 0); 199 /* if (bp->blk_birth > prev prev snap txg) prev unique += bs */ 200 if (ds->ds_prev->ds_phys->ds_next_snap_obj == 201 ds->ds_object && bp->blk_birth > 202 ds->ds_prev->ds_phys->ds_prev_snap_txg) { 203 dmu_buf_will_dirty(ds->ds_prev->ds_dbuf, tx); 204 mutex_enter(&ds->ds_prev->ds_lock); 205 ds->ds_prev->ds_phys->ds_unique_bytes += used; 206 mutex_exit(&ds->ds_prev->ds_lock); 207 } 208 if (bp->blk_birth > ds->ds_dir->dd_origin_txg) { 209 dsl_dir_transfer_space(ds->ds_dir, used, 210 DD_USED_HEAD, DD_USED_SNAP, tx); 211 } 212 } 213 mutex_enter(&ds->ds_lock); 214 ASSERT3U(ds->ds_phys->ds_used_bytes, >=, used); 215 ds->ds_phys->ds_used_bytes -= used; 216 ASSERT3U(ds->ds_phys->ds_compressed_bytes, >=, compressed); 217 ds->ds_phys->ds_compressed_bytes -= compressed; 218 ASSERT3U(ds->ds_phys->ds_uncompressed_bytes, >=, uncompressed); 219 ds->ds_phys->ds_uncompressed_bytes -= uncompressed; 220 mutex_exit(&ds->ds_lock); 221 222 return (used); 223} 224 225uint64_t 226dsl_dataset_prev_snap_txg(dsl_dataset_t *ds) 227{ 228 uint64_t trysnap = 0; 229 230 if (ds == NULL) 231 return (0); 232 /* 233 * The snapshot creation could fail, but that would cause an 234 * incorrect FALSE return, which would only result in an 235 * overestimation of the amount of space that an operation would 236 * consume, which is OK. 237 * 238 * There's also a small window where we could miss a pending 239 * snapshot, because we could set the sync task in the quiescing 240 * phase. So this should only be used as a guess. 241 */ 242 if (ds->ds_trysnap_txg > 243 spa_last_synced_txg(ds->ds_dir->dd_pool->dp_spa)) 244 trysnap = ds->ds_trysnap_txg; 245 return (MAX(ds->ds_phys->ds_prev_snap_txg, trysnap)); 246} 247 248boolean_t 249dsl_dataset_block_freeable(dsl_dataset_t *ds, const blkptr_t *bp, 250 uint64_t blk_birth) 251{ 252 if (blk_birth <= dsl_dataset_prev_snap_txg(ds)) 253 return (B_FALSE); 254 255 ddt_prefetch(dsl_dataset_get_spa(ds), bp); 256 257 return (B_TRUE); 258} 259 260/* ARGSUSED */ 261static void 262dsl_dataset_evict(dmu_buf_t *db, void *dsv) 263{ 264 dsl_dataset_t *ds = dsv; 265 266 ASSERT(ds->ds_owner == NULL || DSL_DATASET_IS_DESTROYED(ds)); 267 268 unique_remove(ds->ds_fsid_guid); 269 270 if (ds->ds_objset != NULL) 271 dmu_objset_evict(ds->ds_objset); 272 273 if (ds->ds_prev) { 274 dsl_dataset_drop_ref(ds->ds_prev, ds); 275 ds->ds_prev = NULL; 276 } 277 278 bplist_destroy(&ds->ds_pending_deadlist); 279 if (db != NULL) { 280 dsl_deadlist_close(&ds->ds_deadlist); 281 } else { 282 ASSERT(ds->ds_deadlist.dl_dbuf == NULL); 283 ASSERT(!ds->ds_deadlist.dl_oldfmt); 284 } 285 if (ds->ds_dir) 286 dsl_dir_close(ds->ds_dir, ds); 287 288 ASSERT(!list_link_active(&ds->ds_synced_link)); 289 290 if (mutex_owned(&ds->ds_lock)) 291 mutex_exit(&ds->ds_lock); 292 mutex_destroy(&ds->ds_lock); 293 mutex_destroy(&ds->ds_recvlock); 294 if (mutex_owned(&ds->ds_opening_lock)) 295 mutex_exit(&ds->ds_opening_lock); 296 mutex_destroy(&ds->ds_opening_lock); 297 rw_destroy(&ds->ds_rwlock); 298 cv_destroy(&ds->ds_exclusive_cv); 299 300 kmem_free(ds, sizeof (dsl_dataset_t)); 301} 302 303static int 304dsl_dataset_get_snapname(dsl_dataset_t *ds) 305{ 306 dsl_dataset_phys_t *headphys; 307 int err; 308 dmu_buf_t *headdbuf; 309 dsl_pool_t *dp = ds->ds_dir->dd_pool; 310 objset_t *mos = dp->dp_meta_objset; 311 312 if (ds->ds_snapname[0]) 313 return (0); 314 if (ds->ds_phys->ds_next_snap_obj == 0) 315 return (0); 316 317 err = dmu_bonus_hold(mos, ds->ds_dir->dd_phys->dd_head_dataset_obj, 318 FTAG, &headdbuf); 319 if (err) 320 return (err); 321 headphys = headdbuf->db_data; 322 err = zap_value_search(dp->dp_meta_objset, 323 headphys->ds_snapnames_zapobj, ds->ds_object, 0, ds->ds_snapname); 324 dmu_buf_rele(headdbuf, FTAG); 325 return (err); 326} 327 328static int 329dsl_dataset_snap_lookup(dsl_dataset_t *ds, const char *name, uint64_t *value) 330{ 331 objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset; 332 uint64_t snapobj = ds->ds_phys->ds_snapnames_zapobj; 333 matchtype_t mt; 334 int err; 335 336 if (ds->ds_phys->ds_flags & DS_FLAG_CI_DATASET) 337 mt = MT_FIRST; 338 else 339 mt = MT_EXACT; 340 341 err = zap_lookup_norm(mos, snapobj, name, 8, 1, 342 value, mt, NULL, 0, NULL); 343 if (err == ENOTSUP && mt == MT_FIRST) 344 err = zap_lookup(mos, snapobj, name, 8, 1, value); 345 return (err); 346} 347 348static int 349dsl_dataset_snap_remove(dsl_dataset_t *ds, char *name, dmu_tx_t *tx) 350{ 351 objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset; 352 uint64_t snapobj = ds->ds_phys->ds_snapnames_zapobj; 353 matchtype_t mt; 354 int err; 355 356 dsl_dir_snap_cmtime_update(ds->ds_dir); 357 358 if (ds->ds_phys->ds_flags & DS_FLAG_CI_DATASET) 359 mt = MT_FIRST; 360 else 361 mt = MT_EXACT; 362 363 err = zap_remove_norm(mos, snapobj, name, mt, tx); 364 if (err == ENOTSUP && mt == MT_FIRST) 365 err = zap_remove(mos, snapobj, name, tx); 366 return (err); 367} 368 369static int 370dsl_dataset_get_ref(dsl_pool_t *dp, uint64_t dsobj, void *tag, 371 dsl_dataset_t **dsp) 372{ 373 objset_t *mos = dp->dp_meta_objset; 374 dmu_buf_t *dbuf; 375 dsl_dataset_t *ds; 376 int err; 377 dmu_object_info_t doi; 378 379 ASSERT(RW_LOCK_HELD(&dp->dp_config_rwlock) || 380 dsl_pool_sync_context(dp)); 381 382 err = dmu_bonus_hold(mos, dsobj, tag, &dbuf); 383 if (err) 384 return (err); 385 386 /* Make sure dsobj has the correct object type. */ 387 dmu_object_info_from_db(dbuf, &doi); 388 if (doi.doi_type != DMU_OT_DSL_DATASET) 389 return (EINVAL); 390 391 ds = dmu_buf_get_user(dbuf); 392 if (ds == NULL) { 393 dsl_dataset_t *winner; 394 395 ds = kmem_zalloc(sizeof (dsl_dataset_t), KM_SLEEP); 396 ds->ds_dbuf = dbuf; 397 ds->ds_object = dsobj; 398 ds->ds_phys = dbuf->db_data; 399 400 mutex_init(&ds->ds_lock, NULL, MUTEX_DEFAULT, NULL); 401 mutex_init(&ds->ds_recvlock, NULL, MUTEX_DEFAULT, NULL); 402 mutex_init(&ds->ds_opening_lock, NULL, MUTEX_DEFAULT, NULL); 403 rw_init(&ds->ds_rwlock, 0, 0, 0); 404 cv_init(&ds->ds_exclusive_cv, NULL, CV_DEFAULT, NULL); 405 406 bplist_create(&ds->ds_pending_deadlist); 407 dsl_deadlist_open(&ds->ds_deadlist, 408 mos, ds->ds_phys->ds_deadlist_obj); 409 410 if (err == 0) { 411 err = dsl_dir_open_obj(dp, 412 ds->ds_phys->ds_dir_obj, NULL, ds, &ds->ds_dir); 413 } 414 if (err) { 415 mutex_destroy(&ds->ds_lock); 416 mutex_destroy(&ds->ds_recvlock); 417 mutex_destroy(&ds->ds_opening_lock); 418 rw_destroy(&ds->ds_rwlock); 419 cv_destroy(&ds->ds_exclusive_cv); 420 bplist_destroy(&ds->ds_pending_deadlist); 421 dsl_deadlist_close(&ds->ds_deadlist); 422 kmem_free(ds, sizeof (dsl_dataset_t)); 423 dmu_buf_rele(dbuf, tag); 424 return (err); 425 } 426 427 if (!dsl_dataset_is_snapshot(ds)) { 428 ds->ds_snapname[0] = '\0'; 429 if (ds->ds_phys->ds_prev_snap_obj) { 430 err = dsl_dataset_get_ref(dp, 431 ds->ds_phys->ds_prev_snap_obj, 432 ds, &ds->ds_prev); 433 } 434 } else { 435 if (zfs_flags & ZFS_DEBUG_SNAPNAMES) 436 err = dsl_dataset_get_snapname(ds); 437 if (err == 0 && ds->ds_phys->ds_userrefs_obj != 0) { 438 err = zap_count( 439 ds->ds_dir->dd_pool->dp_meta_objset, 440 ds->ds_phys->ds_userrefs_obj, 441 &ds->ds_userrefs); 442 } 443 } 444 445 if (err == 0 && !dsl_dataset_is_snapshot(ds)) { 446 /* 447 * In sync context, we're called with either no lock 448 * or with the write lock. If we're not syncing, 449 * we're always called with the read lock held. 450 */ 451 boolean_t need_lock = 452 !RW_WRITE_HELD(&dp->dp_config_rwlock) && 453 dsl_pool_sync_context(dp); 454 455 if (need_lock) 456 rw_enter(&dp->dp_config_rwlock, RW_READER); 457 458 err = dsl_prop_get_ds(ds, 459 "refreservation", sizeof (uint64_t), 1, 460 &ds->ds_reserved, NULL); 461 if (err == 0) { 462 err = dsl_prop_get_ds(ds, 463 "refquota", sizeof (uint64_t), 1, 464 &ds->ds_quota, NULL); 465 } 466 467 if (need_lock) 468 rw_exit(&dp->dp_config_rwlock); 469 } else { 470 ds->ds_reserved = ds->ds_quota = 0; 471 } 472 473 if (err == 0) { 474 winner = dmu_buf_set_user_ie(dbuf, ds, &ds->ds_phys, 475 dsl_dataset_evict); 476 } 477 if (err || winner) { 478 bplist_destroy(&ds->ds_pending_deadlist); 479 dsl_deadlist_close(&ds->ds_deadlist); 480 if (ds->ds_prev) 481 dsl_dataset_drop_ref(ds->ds_prev, ds); 482 dsl_dir_close(ds->ds_dir, ds); 483 mutex_destroy(&ds->ds_lock); 484 mutex_destroy(&ds->ds_recvlock); 485 mutex_destroy(&ds->ds_opening_lock); 486 rw_destroy(&ds->ds_rwlock); 487 cv_destroy(&ds->ds_exclusive_cv); 488 kmem_free(ds, sizeof (dsl_dataset_t)); 489 if (err) { 490 dmu_buf_rele(dbuf, tag); 491 return (err); 492 } 493 ds = winner; 494 } else { 495 ds->ds_fsid_guid = 496 unique_insert(ds->ds_phys->ds_fsid_guid); 497 } 498 } 499 ASSERT3P(ds->ds_dbuf, ==, dbuf); 500 ASSERT3P(ds->ds_phys, ==, dbuf->db_data); 501 ASSERT(ds->ds_phys->ds_prev_snap_obj != 0 || 502 spa_version(dp->dp_spa) < SPA_VERSION_ORIGIN || 503 dp->dp_origin_snap == NULL || ds == dp->dp_origin_snap); 504 mutex_enter(&ds->ds_lock); 505 if (!dsl_pool_sync_context(dp) && DSL_DATASET_IS_DESTROYED(ds)) { 506 mutex_exit(&ds->ds_lock); 507 dmu_buf_rele(ds->ds_dbuf, tag); 508 return (ENOENT); 509 } 510 mutex_exit(&ds->ds_lock); 511 *dsp = ds; 512 return (0); 513} 514 515static int 516dsl_dataset_hold_ref(dsl_dataset_t *ds, void *tag) 517{ 518 dsl_pool_t *dp = ds->ds_dir->dd_pool; 519 520 /* 521 * In syncing context we don't want the rwlock lock: there 522 * may be an existing writer waiting for sync phase to 523 * finish. We don't need to worry about such writers, since 524 * sync phase is single-threaded, so the writer can't be 525 * doing anything while we are active. 526 */ 527 if (dsl_pool_sync_context(dp)) { 528 ASSERT(!DSL_DATASET_IS_DESTROYED(ds)); 529 return (0); 530 } 531 532 /* 533 * Normal users will hold the ds_rwlock as a READER until they 534 * are finished (i.e., call dsl_dataset_rele()). "Owners" will 535 * drop their READER lock after they set the ds_owner field. 536 * 537 * If the dataset is being destroyed, the destroy thread will 538 * obtain a WRITER lock for exclusive access after it's done its 539 * open-context work and then change the ds_owner to 540 * dsl_reaper once destruction is assured. So threads 541 * may block here temporarily, until the "destructability" of 542 * the dataset is determined. 543 */ 544 ASSERT(!RW_WRITE_HELD(&dp->dp_config_rwlock)); 545 mutex_enter(&ds->ds_lock); 546 while (!rw_tryenter(&ds->ds_rwlock, RW_READER)) { 547 rw_exit(&dp->dp_config_rwlock); 548 cv_wait(&ds->ds_exclusive_cv, &ds->ds_lock); 549 if (DSL_DATASET_IS_DESTROYED(ds)) { 550 mutex_exit(&ds->ds_lock); 551 dsl_dataset_drop_ref(ds, tag); 552 rw_enter(&dp->dp_config_rwlock, RW_READER); 553 return (ENOENT); 554 } 555 /* 556 * The dp_config_rwlock lives above the ds_lock. And 557 * we need to check DSL_DATASET_IS_DESTROYED() while 558 * holding the ds_lock, so we have to drop and reacquire 559 * the ds_lock here. 560 */ 561 mutex_exit(&ds->ds_lock); 562 rw_enter(&dp->dp_config_rwlock, RW_READER); 563 mutex_enter(&ds->ds_lock); 564 } 565 mutex_exit(&ds->ds_lock); 566 return (0); 567} 568 569int 570dsl_dataset_hold_obj(dsl_pool_t *dp, uint64_t dsobj, void *tag, 571 dsl_dataset_t **dsp) 572{ 573 int err = dsl_dataset_get_ref(dp, dsobj, tag, dsp); 574 575 if (err) 576 return (err); 577 return (dsl_dataset_hold_ref(*dsp, tag)); 578} 579 580int 581dsl_dataset_own_obj(dsl_pool_t *dp, uint64_t dsobj, boolean_t inconsistentok, 582 void *tag, dsl_dataset_t **dsp) 583{ 584 int err = dsl_dataset_hold_obj(dp, dsobj, tag, dsp); 585 if (err) 586 return (err); 587 if (!dsl_dataset_tryown(*dsp, inconsistentok, tag)) { 588 dsl_dataset_rele(*dsp, tag); 589 *dsp = NULL; 590 return (EBUSY); 591 } 592 return (0); 593} 594 595int 596dsl_dataset_hold(const char *name, void *tag, dsl_dataset_t **dsp) 597{ 598 dsl_dir_t *dd; 599 dsl_pool_t *dp; 600 const char *snapname; 601 uint64_t obj; 602 int err = 0; 603 604 err = dsl_dir_open_spa(NULL, name, FTAG, &dd, &snapname); 605 if (err) 606 return (err); 607 608 dp = dd->dd_pool; 609 obj = dd->dd_phys->dd_head_dataset_obj; 610 rw_enter(&dp->dp_config_rwlock, RW_READER); 611 if (obj) 612 err = dsl_dataset_get_ref(dp, obj, tag, dsp); 613 else 614 err = ENOENT; 615 if (err) 616 goto out; 617 618 err = dsl_dataset_hold_ref(*dsp, tag); 619 620 /* we may be looking for a snapshot */ 621 if (err == 0 && snapname != NULL) { 622 dsl_dataset_t *ds = NULL; 623 624 if (*snapname++ != '@') { 625 dsl_dataset_rele(*dsp, tag); 626 err = ENOENT; 627 goto out; 628 } 629 630 dprintf("looking for snapshot '%s'\n", snapname); 631 err = dsl_dataset_snap_lookup(*dsp, snapname, &obj); 632 if (err == 0) 633 err = dsl_dataset_get_ref(dp, obj, tag, &ds); 634 dsl_dataset_rele(*dsp, tag); 635 636 ASSERT3U((err == 0), ==, (ds != NULL)); 637 638 if (ds) { 639 mutex_enter(&ds->ds_lock); 640 if (ds->ds_snapname[0] == 0) 641 (void) strlcpy(ds->ds_snapname, snapname, 642 sizeof (ds->ds_snapname)); 643 mutex_exit(&ds->ds_lock); 644 err = dsl_dataset_hold_ref(ds, tag); 645 *dsp = err ? NULL : ds; 646 } 647 } 648out: 649 rw_exit(&dp->dp_config_rwlock); 650 dsl_dir_close(dd, FTAG); 651 return (err); 652} 653 654int 655dsl_dataset_own(const char *name, boolean_t inconsistentok, 656 void *tag, dsl_dataset_t **dsp) 657{ 658 int err = dsl_dataset_hold(name, tag, dsp); 659 if (err) 660 return (err); 661 if (!dsl_dataset_tryown(*dsp, inconsistentok, tag)) { 662 dsl_dataset_rele(*dsp, tag); 663 return (EBUSY); 664 } 665 return (0); 666} 667 668void 669dsl_dataset_name(dsl_dataset_t *ds, char *name) 670{ 671 if (ds == NULL) { 672 (void) strcpy(name, "mos"); 673 } else { 674 dsl_dir_name(ds->ds_dir, name); 675 VERIFY(0 == dsl_dataset_get_snapname(ds)); 676 if (ds->ds_snapname[0]) { 677 (void) strcat(name, "@"); 678 /* 679 * We use a "recursive" mutex so that we 680 * can call dprintf_ds() with ds_lock held. 681 */ 682 if (!MUTEX_HELD(&ds->ds_lock)) { 683 mutex_enter(&ds->ds_lock); 684 (void) strcat(name, ds->ds_snapname); 685 mutex_exit(&ds->ds_lock); 686 } else { 687 (void) strcat(name, ds->ds_snapname); 688 } 689 } 690 } 691} 692 693static int 694dsl_dataset_namelen(dsl_dataset_t *ds) 695{ 696 int result; 697 698 if (ds == NULL) { 699 result = 3; /* "mos" */ 700 } else { 701 result = dsl_dir_namelen(ds->ds_dir); 702 VERIFY(0 == dsl_dataset_get_snapname(ds)); 703 if (ds->ds_snapname[0]) { 704 ++result; /* adding one for the @-sign */ 705 if (!MUTEX_HELD(&ds->ds_lock)) { 706 mutex_enter(&ds->ds_lock); 707 result += strlen(ds->ds_snapname); 708 mutex_exit(&ds->ds_lock); 709 } else { 710 result += strlen(ds->ds_snapname); 711 } 712 } 713 } 714 715 return (result); 716} 717 718void 719dsl_dataset_drop_ref(dsl_dataset_t *ds, void *tag) 720{ 721 dmu_buf_rele(ds->ds_dbuf, tag); 722} 723 724void 725dsl_dataset_rele(dsl_dataset_t *ds, void *tag) 726{ 727 if (!dsl_pool_sync_context(ds->ds_dir->dd_pool)) { 728 rw_exit(&ds->ds_rwlock); 729 } 730 dsl_dataset_drop_ref(ds, tag); 731} 732 733void 734dsl_dataset_disown(dsl_dataset_t *ds, void *tag) 735{ 736 ASSERT((ds->ds_owner == tag && ds->ds_dbuf) || 737 (DSL_DATASET_IS_DESTROYED(ds) && ds->ds_dbuf == NULL)); 738 739 mutex_enter(&ds->ds_lock); 740 ds->ds_owner = NULL; 741 if (RW_WRITE_HELD(&ds->ds_rwlock)) { 742 rw_exit(&ds->ds_rwlock); 743 cv_broadcast(&ds->ds_exclusive_cv); 744 } 745 mutex_exit(&ds->ds_lock); 746 if (ds->ds_dbuf) 747 dsl_dataset_drop_ref(ds, tag); 748 else 749 dsl_dataset_evict(NULL, ds); 750} 751 752boolean_t 753dsl_dataset_tryown(dsl_dataset_t *ds, boolean_t inconsistentok, void *tag) 754{ 755 boolean_t gotit = FALSE; 756 757 mutex_enter(&ds->ds_lock); 758 if (ds->ds_owner == NULL && 759 (!DS_IS_INCONSISTENT(ds) || inconsistentok)) { 760 ds->ds_owner = tag; 761 if (!dsl_pool_sync_context(ds->ds_dir->dd_pool)) 762 rw_exit(&ds->ds_rwlock); 763 gotit = TRUE; 764 } 765 mutex_exit(&ds->ds_lock); 766 return (gotit); 767} 768 769void 770dsl_dataset_make_exclusive(dsl_dataset_t *ds, void *owner) 771{ 772 ASSERT3P(owner, ==, ds->ds_owner); 773 if (!RW_WRITE_HELD(&ds->ds_rwlock)) 774 rw_enter(&ds->ds_rwlock, RW_WRITER); 775} 776 777uint64_t 778dsl_dataset_create_sync_dd(dsl_dir_t *dd, dsl_dataset_t *origin, 779 uint64_t flags, dmu_tx_t *tx) 780{ 781 dsl_pool_t *dp = dd->dd_pool; 782 dmu_buf_t *dbuf; 783 dsl_dataset_phys_t *dsphys; 784 uint64_t dsobj; 785 objset_t *mos = dp->dp_meta_objset; 786 787 if (origin == NULL) 788 origin = dp->dp_origin_snap; 789 790 ASSERT(origin == NULL || origin->ds_dir->dd_pool == dp); 791 ASSERT(origin == NULL || origin->ds_phys->ds_num_children > 0); 792 ASSERT(dmu_tx_is_syncing(tx)); 793 ASSERT(dd->dd_phys->dd_head_dataset_obj == 0); 794 795 dsobj = dmu_object_alloc(mos, DMU_OT_DSL_DATASET, 0, 796 DMU_OT_DSL_DATASET, sizeof (dsl_dataset_phys_t), tx); 797 VERIFY(0 == dmu_bonus_hold(mos, dsobj, FTAG, &dbuf)); 798 dmu_buf_will_dirty(dbuf, tx); 799 dsphys = dbuf->db_data; 800 bzero(dsphys, sizeof (dsl_dataset_phys_t)); 801 dsphys->ds_dir_obj = dd->dd_object; 802 dsphys->ds_flags = flags; 803 dsphys->ds_fsid_guid = unique_create(); 804 (void) random_get_pseudo_bytes((void*)&dsphys->ds_guid, 805 sizeof (dsphys->ds_guid)); 806 dsphys->ds_snapnames_zapobj = 807 zap_create_norm(mos, U8_TEXTPREP_TOUPPER, DMU_OT_DSL_DS_SNAP_MAP, 808 DMU_OT_NONE, 0, tx); 809 dsphys->ds_creation_time = gethrestime_sec(); 810 dsphys->ds_creation_txg = tx->tx_txg == TXG_INITIAL ? 1 : tx->tx_txg; 811 812 if (origin == NULL) { 813 dsphys->ds_deadlist_obj = dsl_deadlist_alloc(mos, tx); 814 } else { 815 dsl_dataset_t *ohds; 816 817 dsphys->ds_prev_snap_obj = origin->ds_object; 818 dsphys->ds_prev_snap_txg = 819 origin->ds_phys->ds_creation_txg; 820 dsphys->ds_used_bytes = 821 origin->ds_phys->ds_used_bytes; 822 dsphys->ds_compressed_bytes = 823 origin->ds_phys->ds_compressed_bytes; 824 dsphys->ds_uncompressed_bytes = 825 origin->ds_phys->ds_uncompressed_bytes; 826 dsphys->ds_bp = origin->ds_phys->ds_bp; 827 dsphys->ds_flags |= origin->ds_phys->ds_flags; 828 829 dmu_buf_will_dirty(origin->ds_dbuf, tx); 830 origin->ds_phys->ds_num_children++; 831 832 VERIFY3U(0, ==, dsl_dataset_hold_obj(dp, 833 origin->ds_dir->dd_phys->dd_head_dataset_obj, FTAG, &ohds)); 834 dsphys->ds_deadlist_obj = dsl_deadlist_clone(&ohds->ds_deadlist, 835 dsphys->ds_prev_snap_txg, dsphys->ds_prev_snap_obj, tx); 836 dsl_dataset_rele(ohds, FTAG); 837 838 if (spa_version(dp->dp_spa) >= SPA_VERSION_NEXT_CLONES) { 839 if (origin->ds_phys->ds_next_clones_obj == 0) { 840 origin->ds_phys->ds_next_clones_obj = 841 zap_create(mos, 842 DMU_OT_NEXT_CLONES, DMU_OT_NONE, 0, tx); 843 } 844 VERIFY(0 == zap_add_int(mos, 845 origin->ds_phys->ds_next_clones_obj, 846 dsobj, tx)); 847 } 848 849 dmu_buf_will_dirty(dd->dd_dbuf, tx); 850 dd->dd_phys->dd_origin_obj = origin->ds_object; 851 if (spa_version(dp->dp_spa) >= SPA_VERSION_DIR_CLONES) { 852 if (origin->ds_dir->dd_phys->dd_clones == 0) { 853 dmu_buf_will_dirty(origin->ds_dir->dd_dbuf, tx); 854 origin->ds_dir->dd_phys->dd_clones = 855 zap_create(mos, 856 DMU_OT_DSL_CLONES, DMU_OT_NONE, 0, tx); 857 } 858 VERIFY3U(0, ==, zap_add_int(mos, 859 origin->ds_dir->dd_phys->dd_clones, dsobj, tx)); 860 } 861 } 862 863 if (spa_version(dp->dp_spa) >= SPA_VERSION_UNIQUE_ACCURATE) 864 dsphys->ds_flags |= DS_FLAG_UNIQUE_ACCURATE; 865 866 dmu_buf_rele(dbuf, FTAG); 867 868 dmu_buf_will_dirty(dd->dd_dbuf, tx); 869 dd->dd_phys->dd_head_dataset_obj = dsobj; 870 871 return (dsobj); 872} 873 874uint64_t 875dsl_dataset_create_sync(dsl_dir_t *pdd, const char *lastname, 876 dsl_dataset_t *origin, uint64_t flags, cred_t *cr, dmu_tx_t *tx) 877{ 878 dsl_pool_t *dp = pdd->dd_pool; 879 uint64_t dsobj, ddobj; 880 dsl_dir_t *dd; 881 882 ASSERT(lastname[0] != '@'); 883 884 ddobj = dsl_dir_create_sync(dp, pdd, lastname, tx); 885 VERIFY(0 == dsl_dir_open_obj(dp, ddobj, lastname, FTAG, &dd)); 886 887 dsobj = dsl_dataset_create_sync_dd(dd, origin, flags, tx); 888 889 dsl_deleg_set_create_perms(dd, tx, cr); 890 891 dsl_dir_close(dd, FTAG); 892 893 /* 894 * If we are creating a clone, make sure we zero out any stale 895 * data from the origin snapshots zil header. 896 */ 897 if (origin != NULL) { 898 dsl_dataset_t *ds; 899 objset_t *os; 900 901 VERIFY3U(0, ==, dsl_dataset_hold_obj(dp, dsobj, FTAG, &ds)); 902 VERIFY3U(0, ==, dmu_objset_from_ds(ds, &os)); 903 bzero(&os->os_zil_header, sizeof (os->os_zil_header)); 904 dsl_dataset_dirty(ds, tx); 905 dsl_dataset_rele(ds, FTAG); 906 } 907 908 return (dsobj); 909} 910
| 27 */ 28 29#include <sys/dmu_objset.h> 30#include <sys/dsl_dataset.h> 31#include <sys/dsl_dir.h> 32#include <sys/dsl_prop.h> 33#include <sys/dsl_synctask.h> 34#include <sys/dmu_traverse.h> 35#include <sys/dmu_tx.h> 36#include <sys/arc.h> 37#include <sys/zio.h> 38#include <sys/zap.h> 39#include <sys/unique.h> 40#include <sys/zfs_context.h> 41#include <sys/zfs_ioctl.h> 42#include <sys/spa.h> 43#include <sys/zfs_znode.h> 44#include <sys/zfs_onexit.h> 45#include <sys/zvol.h> 46#include <sys/dsl_scan.h> 47#include <sys/dsl_deadlist.h> 48 49static char *dsl_reaper = "the grim reaper"; 50 51static dsl_checkfunc_t dsl_dataset_destroy_begin_check; 52static dsl_syncfunc_t dsl_dataset_destroy_begin_sync; 53static dsl_syncfunc_t dsl_dataset_set_reservation_sync; 54 55#define SWITCH64(x, y) \ 56 { \ 57 uint64_t __tmp = (x); \ 58 (x) = (y); \ 59 (y) = __tmp; \ 60 } 61 62#define DS_REF_MAX (1ULL << 62) 63 64#define DSL_DEADLIST_BLOCKSIZE SPA_MAXBLOCKSIZE 65 66#define DSL_DATASET_IS_DESTROYED(ds) ((ds)->ds_owner == dsl_reaper) 67 68 69/* 70 * Figure out how much of this delta should be propogated to the dsl_dir 71 * layer. If there's a refreservation, that space has already been 72 * partially accounted for in our ancestors. 73 */ 74static int64_t 75parent_delta(dsl_dataset_t *ds, int64_t delta) 76{ 77 uint64_t old_bytes, new_bytes; 78 79 if (ds->ds_reserved == 0) 80 return (delta); 81 82 old_bytes = MAX(ds->ds_phys->ds_unique_bytes, ds->ds_reserved); 83 new_bytes = MAX(ds->ds_phys->ds_unique_bytes + delta, ds->ds_reserved); 84 85 ASSERT3U(ABS((int64_t)(new_bytes - old_bytes)), <=, ABS(delta)); 86 return (new_bytes - old_bytes); 87} 88 89void 90dsl_dataset_block_born(dsl_dataset_t *ds, const blkptr_t *bp, dmu_tx_t *tx) 91{ 92 int used = bp_get_dsize_sync(tx->tx_pool->dp_spa, bp); 93 int compressed = BP_GET_PSIZE(bp); 94 int uncompressed = BP_GET_UCSIZE(bp); 95 int64_t delta; 96 97 dprintf_bp(bp, "ds=%p", ds); 98 99 ASSERT(dmu_tx_is_syncing(tx)); 100 /* It could have been compressed away to nothing */ 101 if (BP_IS_HOLE(bp)) 102 return; 103 ASSERT(BP_GET_TYPE(bp) != DMU_OT_NONE); 104 ASSERT3U(BP_GET_TYPE(bp), <, DMU_OT_NUMTYPES); 105 if (ds == NULL) { 106 /* 107 * Account for the meta-objset space in its placeholder 108 * dsl_dir. 109 */ 110 ASSERT3U(compressed, ==, uncompressed); /* it's all metadata */ 111 dsl_dir_diduse_space(tx->tx_pool->dp_mos_dir, DD_USED_HEAD, 112 used, compressed, uncompressed, tx); 113 dsl_dir_dirty(tx->tx_pool->dp_mos_dir, tx); 114 return; 115 } 116 dmu_buf_will_dirty(ds->ds_dbuf, tx); 117 118 mutex_enter(&ds->ds_dir->dd_lock); 119 mutex_enter(&ds->ds_lock); 120 delta = parent_delta(ds, used); 121 ds->ds_phys->ds_used_bytes += used; 122 ds->ds_phys->ds_compressed_bytes += compressed; 123 ds->ds_phys->ds_uncompressed_bytes += uncompressed; 124 ds->ds_phys->ds_unique_bytes += used; 125 mutex_exit(&ds->ds_lock); 126 dsl_dir_diduse_space(ds->ds_dir, DD_USED_HEAD, delta, 127 compressed, uncompressed, tx); 128 dsl_dir_transfer_space(ds->ds_dir, used - delta, 129 DD_USED_REFRSRV, DD_USED_HEAD, tx); 130 mutex_exit(&ds->ds_dir->dd_lock); 131} 132 133int 134dsl_dataset_block_kill(dsl_dataset_t *ds, const blkptr_t *bp, dmu_tx_t *tx, 135 boolean_t async) 136{ 137 if (BP_IS_HOLE(bp)) 138 return (0); 139 140 ASSERT(dmu_tx_is_syncing(tx)); 141 ASSERT(bp->blk_birth <= tx->tx_txg); 142 143 int used = bp_get_dsize_sync(tx->tx_pool->dp_spa, bp); 144 int compressed = BP_GET_PSIZE(bp); 145 int uncompressed = BP_GET_UCSIZE(bp); 146 147 ASSERT(used > 0); 148 if (ds == NULL) { 149 /* 150 * Account for the meta-objset space in its placeholder 151 * dataset. 152 */ 153 dsl_free(tx->tx_pool, tx->tx_txg, bp); 154 155 dsl_dir_diduse_space(tx->tx_pool->dp_mos_dir, DD_USED_HEAD, 156 -used, -compressed, -uncompressed, tx); 157 dsl_dir_dirty(tx->tx_pool->dp_mos_dir, tx); 158 return (used); 159 } 160 ASSERT3P(tx->tx_pool, ==, ds->ds_dir->dd_pool); 161 162 ASSERT(!dsl_dataset_is_snapshot(ds)); 163 dmu_buf_will_dirty(ds->ds_dbuf, tx); 164 165 if (bp->blk_birth > ds->ds_phys->ds_prev_snap_txg) { 166 int64_t delta; 167 168 dprintf_bp(bp, "freeing ds=%llu", ds->ds_object); 169 dsl_free(tx->tx_pool, tx->tx_txg, bp); 170 171 mutex_enter(&ds->ds_dir->dd_lock); 172 mutex_enter(&ds->ds_lock); 173 ASSERT(ds->ds_phys->ds_unique_bytes >= used || 174 !DS_UNIQUE_IS_ACCURATE(ds)); 175 delta = parent_delta(ds, -used); 176 ds->ds_phys->ds_unique_bytes -= used; 177 mutex_exit(&ds->ds_lock); 178 dsl_dir_diduse_space(ds->ds_dir, DD_USED_HEAD, 179 delta, -compressed, -uncompressed, tx); 180 dsl_dir_transfer_space(ds->ds_dir, -used - delta, 181 DD_USED_REFRSRV, DD_USED_HEAD, tx); 182 mutex_exit(&ds->ds_dir->dd_lock); 183 } else { 184 dprintf_bp(bp, "putting on dead list: %s", ""); 185 if (async) { 186 /* 187 * We are here as part of zio's write done callback, 188 * which means we're a zio interrupt thread. We can't 189 * call dsl_deadlist_insert() now because it may block 190 * waiting for I/O. Instead, put bp on the deferred 191 * queue and let dsl_pool_sync() finish the job. 192 */ 193 bplist_append(&ds->ds_pending_deadlist, bp); 194 } else { 195 dsl_deadlist_insert(&ds->ds_deadlist, bp, tx); 196 } 197 ASSERT3U(ds->ds_prev->ds_object, ==, 198 ds->ds_phys->ds_prev_snap_obj); 199 ASSERT(ds->ds_prev->ds_phys->ds_num_children > 0); 200 /* if (bp->blk_birth > prev prev snap txg) prev unique += bs */ 201 if (ds->ds_prev->ds_phys->ds_next_snap_obj == 202 ds->ds_object && bp->blk_birth > 203 ds->ds_prev->ds_phys->ds_prev_snap_txg) { 204 dmu_buf_will_dirty(ds->ds_prev->ds_dbuf, tx); 205 mutex_enter(&ds->ds_prev->ds_lock); 206 ds->ds_prev->ds_phys->ds_unique_bytes += used; 207 mutex_exit(&ds->ds_prev->ds_lock); 208 } 209 if (bp->blk_birth > ds->ds_dir->dd_origin_txg) { 210 dsl_dir_transfer_space(ds->ds_dir, used, 211 DD_USED_HEAD, DD_USED_SNAP, tx); 212 } 213 } 214 mutex_enter(&ds->ds_lock); 215 ASSERT3U(ds->ds_phys->ds_used_bytes, >=, used); 216 ds->ds_phys->ds_used_bytes -= used; 217 ASSERT3U(ds->ds_phys->ds_compressed_bytes, >=, compressed); 218 ds->ds_phys->ds_compressed_bytes -= compressed; 219 ASSERT3U(ds->ds_phys->ds_uncompressed_bytes, >=, uncompressed); 220 ds->ds_phys->ds_uncompressed_bytes -= uncompressed; 221 mutex_exit(&ds->ds_lock); 222 223 return (used); 224} 225 226uint64_t 227dsl_dataset_prev_snap_txg(dsl_dataset_t *ds) 228{ 229 uint64_t trysnap = 0; 230 231 if (ds == NULL) 232 return (0); 233 /* 234 * The snapshot creation could fail, but that would cause an 235 * incorrect FALSE return, which would only result in an 236 * overestimation of the amount of space that an operation would 237 * consume, which is OK. 238 * 239 * There's also a small window where we could miss a pending 240 * snapshot, because we could set the sync task in the quiescing 241 * phase. So this should only be used as a guess. 242 */ 243 if (ds->ds_trysnap_txg > 244 spa_last_synced_txg(ds->ds_dir->dd_pool->dp_spa)) 245 trysnap = ds->ds_trysnap_txg; 246 return (MAX(ds->ds_phys->ds_prev_snap_txg, trysnap)); 247} 248 249boolean_t 250dsl_dataset_block_freeable(dsl_dataset_t *ds, const blkptr_t *bp, 251 uint64_t blk_birth) 252{ 253 if (blk_birth <= dsl_dataset_prev_snap_txg(ds)) 254 return (B_FALSE); 255 256 ddt_prefetch(dsl_dataset_get_spa(ds), bp); 257 258 return (B_TRUE); 259} 260 261/* ARGSUSED */ 262static void 263dsl_dataset_evict(dmu_buf_t *db, void *dsv) 264{ 265 dsl_dataset_t *ds = dsv; 266 267 ASSERT(ds->ds_owner == NULL || DSL_DATASET_IS_DESTROYED(ds)); 268 269 unique_remove(ds->ds_fsid_guid); 270 271 if (ds->ds_objset != NULL) 272 dmu_objset_evict(ds->ds_objset); 273 274 if (ds->ds_prev) { 275 dsl_dataset_drop_ref(ds->ds_prev, ds); 276 ds->ds_prev = NULL; 277 } 278 279 bplist_destroy(&ds->ds_pending_deadlist); 280 if (db != NULL) { 281 dsl_deadlist_close(&ds->ds_deadlist); 282 } else { 283 ASSERT(ds->ds_deadlist.dl_dbuf == NULL); 284 ASSERT(!ds->ds_deadlist.dl_oldfmt); 285 } 286 if (ds->ds_dir) 287 dsl_dir_close(ds->ds_dir, ds); 288 289 ASSERT(!list_link_active(&ds->ds_synced_link)); 290 291 if (mutex_owned(&ds->ds_lock)) 292 mutex_exit(&ds->ds_lock); 293 mutex_destroy(&ds->ds_lock); 294 mutex_destroy(&ds->ds_recvlock); 295 if (mutex_owned(&ds->ds_opening_lock)) 296 mutex_exit(&ds->ds_opening_lock); 297 mutex_destroy(&ds->ds_opening_lock); 298 rw_destroy(&ds->ds_rwlock); 299 cv_destroy(&ds->ds_exclusive_cv); 300 301 kmem_free(ds, sizeof (dsl_dataset_t)); 302} 303 304static int 305dsl_dataset_get_snapname(dsl_dataset_t *ds) 306{ 307 dsl_dataset_phys_t *headphys; 308 int err; 309 dmu_buf_t *headdbuf; 310 dsl_pool_t *dp = ds->ds_dir->dd_pool; 311 objset_t *mos = dp->dp_meta_objset; 312 313 if (ds->ds_snapname[0]) 314 return (0); 315 if (ds->ds_phys->ds_next_snap_obj == 0) 316 return (0); 317 318 err = dmu_bonus_hold(mos, ds->ds_dir->dd_phys->dd_head_dataset_obj, 319 FTAG, &headdbuf); 320 if (err) 321 return (err); 322 headphys = headdbuf->db_data; 323 err = zap_value_search(dp->dp_meta_objset, 324 headphys->ds_snapnames_zapobj, ds->ds_object, 0, ds->ds_snapname); 325 dmu_buf_rele(headdbuf, FTAG); 326 return (err); 327} 328 329static int 330dsl_dataset_snap_lookup(dsl_dataset_t *ds, const char *name, uint64_t *value) 331{ 332 objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset; 333 uint64_t snapobj = ds->ds_phys->ds_snapnames_zapobj; 334 matchtype_t mt; 335 int err; 336 337 if (ds->ds_phys->ds_flags & DS_FLAG_CI_DATASET) 338 mt = MT_FIRST; 339 else 340 mt = MT_EXACT; 341 342 err = zap_lookup_norm(mos, snapobj, name, 8, 1, 343 value, mt, NULL, 0, NULL); 344 if (err == ENOTSUP && mt == MT_FIRST) 345 err = zap_lookup(mos, snapobj, name, 8, 1, value); 346 return (err); 347} 348 349static int 350dsl_dataset_snap_remove(dsl_dataset_t *ds, char *name, dmu_tx_t *tx) 351{ 352 objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset; 353 uint64_t snapobj = ds->ds_phys->ds_snapnames_zapobj; 354 matchtype_t mt; 355 int err; 356 357 dsl_dir_snap_cmtime_update(ds->ds_dir); 358 359 if (ds->ds_phys->ds_flags & DS_FLAG_CI_DATASET) 360 mt = MT_FIRST; 361 else 362 mt = MT_EXACT; 363 364 err = zap_remove_norm(mos, snapobj, name, mt, tx); 365 if (err == ENOTSUP && mt == MT_FIRST) 366 err = zap_remove(mos, snapobj, name, tx); 367 return (err); 368} 369 370static int 371dsl_dataset_get_ref(dsl_pool_t *dp, uint64_t dsobj, void *tag, 372 dsl_dataset_t **dsp) 373{ 374 objset_t *mos = dp->dp_meta_objset; 375 dmu_buf_t *dbuf; 376 dsl_dataset_t *ds; 377 int err; 378 dmu_object_info_t doi; 379 380 ASSERT(RW_LOCK_HELD(&dp->dp_config_rwlock) || 381 dsl_pool_sync_context(dp)); 382 383 err = dmu_bonus_hold(mos, dsobj, tag, &dbuf); 384 if (err) 385 return (err); 386 387 /* Make sure dsobj has the correct object type. */ 388 dmu_object_info_from_db(dbuf, &doi); 389 if (doi.doi_type != DMU_OT_DSL_DATASET) 390 return (EINVAL); 391 392 ds = dmu_buf_get_user(dbuf); 393 if (ds == NULL) { 394 dsl_dataset_t *winner; 395 396 ds = kmem_zalloc(sizeof (dsl_dataset_t), KM_SLEEP); 397 ds->ds_dbuf = dbuf; 398 ds->ds_object = dsobj; 399 ds->ds_phys = dbuf->db_data; 400 401 mutex_init(&ds->ds_lock, NULL, MUTEX_DEFAULT, NULL); 402 mutex_init(&ds->ds_recvlock, NULL, MUTEX_DEFAULT, NULL); 403 mutex_init(&ds->ds_opening_lock, NULL, MUTEX_DEFAULT, NULL); 404 rw_init(&ds->ds_rwlock, 0, 0, 0); 405 cv_init(&ds->ds_exclusive_cv, NULL, CV_DEFAULT, NULL); 406 407 bplist_create(&ds->ds_pending_deadlist); 408 dsl_deadlist_open(&ds->ds_deadlist, 409 mos, ds->ds_phys->ds_deadlist_obj); 410 411 if (err == 0) { 412 err = dsl_dir_open_obj(dp, 413 ds->ds_phys->ds_dir_obj, NULL, ds, &ds->ds_dir); 414 } 415 if (err) { 416 mutex_destroy(&ds->ds_lock); 417 mutex_destroy(&ds->ds_recvlock); 418 mutex_destroy(&ds->ds_opening_lock); 419 rw_destroy(&ds->ds_rwlock); 420 cv_destroy(&ds->ds_exclusive_cv); 421 bplist_destroy(&ds->ds_pending_deadlist); 422 dsl_deadlist_close(&ds->ds_deadlist); 423 kmem_free(ds, sizeof (dsl_dataset_t)); 424 dmu_buf_rele(dbuf, tag); 425 return (err); 426 } 427 428 if (!dsl_dataset_is_snapshot(ds)) { 429 ds->ds_snapname[0] = '\0'; 430 if (ds->ds_phys->ds_prev_snap_obj) { 431 err = dsl_dataset_get_ref(dp, 432 ds->ds_phys->ds_prev_snap_obj, 433 ds, &ds->ds_prev); 434 } 435 } else { 436 if (zfs_flags & ZFS_DEBUG_SNAPNAMES) 437 err = dsl_dataset_get_snapname(ds); 438 if (err == 0 && ds->ds_phys->ds_userrefs_obj != 0) { 439 err = zap_count( 440 ds->ds_dir->dd_pool->dp_meta_objset, 441 ds->ds_phys->ds_userrefs_obj, 442 &ds->ds_userrefs); 443 } 444 } 445 446 if (err == 0 && !dsl_dataset_is_snapshot(ds)) { 447 /* 448 * In sync context, we're called with either no lock 449 * or with the write lock. If we're not syncing, 450 * we're always called with the read lock held. 451 */ 452 boolean_t need_lock = 453 !RW_WRITE_HELD(&dp->dp_config_rwlock) && 454 dsl_pool_sync_context(dp); 455 456 if (need_lock) 457 rw_enter(&dp->dp_config_rwlock, RW_READER); 458 459 err = dsl_prop_get_ds(ds, 460 "refreservation", sizeof (uint64_t), 1, 461 &ds->ds_reserved, NULL); 462 if (err == 0) { 463 err = dsl_prop_get_ds(ds, 464 "refquota", sizeof (uint64_t), 1, 465 &ds->ds_quota, NULL); 466 } 467 468 if (need_lock) 469 rw_exit(&dp->dp_config_rwlock); 470 } else { 471 ds->ds_reserved = ds->ds_quota = 0; 472 } 473 474 if (err == 0) { 475 winner = dmu_buf_set_user_ie(dbuf, ds, &ds->ds_phys, 476 dsl_dataset_evict); 477 } 478 if (err || winner) { 479 bplist_destroy(&ds->ds_pending_deadlist); 480 dsl_deadlist_close(&ds->ds_deadlist); 481 if (ds->ds_prev) 482 dsl_dataset_drop_ref(ds->ds_prev, ds); 483 dsl_dir_close(ds->ds_dir, ds); 484 mutex_destroy(&ds->ds_lock); 485 mutex_destroy(&ds->ds_recvlock); 486 mutex_destroy(&ds->ds_opening_lock); 487 rw_destroy(&ds->ds_rwlock); 488 cv_destroy(&ds->ds_exclusive_cv); 489 kmem_free(ds, sizeof (dsl_dataset_t)); 490 if (err) { 491 dmu_buf_rele(dbuf, tag); 492 return (err); 493 } 494 ds = winner; 495 } else { 496 ds->ds_fsid_guid = 497 unique_insert(ds->ds_phys->ds_fsid_guid); 498 } 499 } 500 ASSERT3P(ds->ds_dbuf, ==, dbuf); 501 ASSERT3P(ds->ds_phys, ==, dbuf->db_data); 502 ASSERT(ds->ds_phys->ds_prev_snap_obj != 0 || 503 spa_version(dp->dp_spa) < SPA_VERSION_ORIGIN || 504 dp->dp_origin_snap == NULL || ds == dp->dp_origin_snap); 505 mutex_enter(&ds->ds_lock); 506 if (!dsl_pool_sync_context(dp) && DSL_DATASET_IS_DESTROYED(ds)) { 507 mutex_exit(&ds->ds_lock); 508 dmu_buf_rele(ds->ds_dbuf, tag); 509 return (ENOENT); 510 } 511 mutex_exit(&ds->ds_lock); 512 *dsp = ds; 513 return (0); 514} 515 516static int 517dsl_dataset_hold_ref(dsl_dataset_t *ds, void *tag) 518{ 519 dsl_pool_t *dp = ds->ds_dir->dd_pool; 520 521 /* 522 * In syncing context we don't want the rwlock lock: there 523 * may be an existing writer waiting for sync phase to 524 * finish. We don't need to worry about such writers, since 525 * sync phase is single-threaded, so the writer can't be 526 * doing anything while we are active. 527 */ 528 if (dsl_pool_sync_context(dp)) { 529 ASSERT(!DSL_DATASET_IS_DESTROYED(ds)); 530 return (0); 531 } 532 533 /* 534 * Normal users will hold the ds_rwlock as a READER until they 535 * are finished (i.e., call dsl_dataset_rele()). "Owners" will 536 * drop their READER lock after they set the ds_owner field. 537 * 538 * If the dataset is being destroyed, the destroy thread will 539 * obtain a WRITER lock for exclusive access after it's done its 540 * open-context work and then change the ds_owner to 541 * dsl_reaper once destruction is assured. So threads 542 * may block here temporarily, until the "destructability" of 543 * the dataset is determined. 544 */ 545 ASSERT(!RW_WRITE_HELD(&dp->dp_config_rwlock)); 546 mutex_enter(&ds->ds_lock); 547 while (!rw_tryenter(&ds->ds_rwlock, RW_READER)) { 548 rw_exit(&dp->dp_config_rwlock); 549 cv_wait(&ds->ds_exclusive_cv, &ds->ds_lock); 550 if (DSL_DATASET_IS_DESTROYED(ds)) { 551 mutex_exit(&ds->ds_lock); 552 dsl_dataset_drop_ref(ds, tag); 553 rw_enter(&dp->dp_config_rwlock, RW_READER); 554 return (ENOENT); 555 } 556 /* 557 * The dp_config_rwlock lives above the ds_lock. And 558 * we need to check DSL_DATASET_IS_DESTROYED() while 559 * holding the ds_lock, so we have to drop and reacquire 560 * the ds_lock here. 561 */ 562 mutex_exit(&ds->ds_lock); 563 rw_enter(&dp->dp_config_rwlock, RW_READER); 564 mutex_enter(&ds->ds_lock); 565 } 566 mutex_exit(&ds->ds_lock); 567 return (0); 568} 569 570int 571dsl_dataset_hold_obj(dsl_pool_t *dp, uint64_t dsobj, void *tag, 572 dsl_dataset_t **dsp) 573{ 574 int err = dsl_dataset_get_ref(dp, dsobj, tag, dsp); 575 576 if (err) 577 return (err); 578 return (dsl_dataset_hold_ref(*dsp, tag)); 579} 580 581int 582dsl_dataset_own_obj(dsl_pool_t *dp, uint64_t dsobj, boolean_t inconsistentok, 583 void *tag, dsl_dataset_t **dsp) 584{ 585 int err = dsl_dataset_hold_obj(dp, dsobj, tag, dsp); 586 if (err) 587 return (err); 588 if (!dsl_dataset_tryown(*dsp, inconsistentok, tag)) { 589 dsl_dataset_rele(*dsp, tag); 590 *dsp = NULL; 591 return (EBUSY); 592 } 593 return (0); 594} 595 596int 597dsl_dataset_hold(const char *name, void *tag, dsl_dataset_t **dsp) 598{ 599 dsl_dir_t *dd; 600 dsl_pool_t *dp; 601 const char *snapname; 602 uint64_t obj; 603 int err = 0; 604 605 err = dsl_dir_open_spa(NULL, name, FTAG, &dd, &snapname); 606 if (err) 607 return (err); 608 609 dp = dd->dd_pool; 610 obj = dd->dd_phys->dd_head_dataset_obj; 611 rw_enter(&dp->dp_config_rwlock, RW_READER); 612 if (obj) 613 err = dsl_dataset_get_ref(dp, obj, tag, dsp); 614 else 615 err = ENOENT; 616 if (err) 617 goto out; 618 619 err = dsl_dataset_hold_ref(*dsp, tag); 620 621 /* we may be looking for a snapshot */ 622 if (err == 0 && snapname != NULL) { 623 dsl_dataset_t *ds = NULL; 624 625 if (*snapname++ != '@') { 626 dsl_dataset_rele(*dsp, tag); 627 err = ENOENT; 628 goto out; 629 } 630 631 dprintf("looking for snapshot '%s'\n", snapname); 632 err = dsl_dataset_snap_lookup(*dsp, snapname, &obj); 633 if (err == 0) 634 err = dsl_dataset_get_ref(dp, obj, tag, &ds); 635 dsl_dataset_rele(*dsp, tag); 636 637 ASSERT3U((err == 0), ==, (ds != NULL)); 638 639 if (ds) { 640 mutex_enter(&ds->ds_lock); 641 if (ds->ds_snapname[0] == 0) 642 (void) strlcpy(ds->ds_snapname, snapname, 643 sizeof (ds->ds_snapname)); 644 mutex_exit(&ds->ds_lock); 645 err = dsl_dataset_hold_ref(ds, tag); 646 *dsp = err ? NULL : ds; 647 } 648 } 649out: 650 rw_exit(&dp->dp_config_rwlock); 651 dsl_dir_close(dd, FTAG); 652 return (err); 653} 654 655int 656dsl_dataset_own(const char *name, boolean_t inconsistentok, 657 void *tag, dsl_dataset_t **dsp) 658{ 659 int err = dsl_dataset_hold(name, tag, dsp); 660 if (err) 661 return (err); 662 if (!dsl_dataset_tryown(*dsp, inconsistentok, tag)) { 663 dsl_dataset_rele(*dsp, tag); 664 return (EBUSY); 665 } 666 return (0); 667} 668 669void 670dsl_dataset_name(dsl_dataset_t *ds, char *name) 671{ 672 if (ds == NULL) { 673 (void) strcpy(name, "mos"); 674 } else { 675 dsl_dir_name(ds->ds_dir, name); 676 VERIFY(0 == dsl_dataset_get_snapname(ds)); 677 if (ds->ds_snapname[0]) { 678 (void) strcat(name, "@"); 679 /* 680 * We use a "recursive" mutex so that we 681 * can call dprintf_ds() with ds_lock held. 682 */ 683 if (!MUTEX_HELD(&ds->ds_lock)) { 684 mutex_enter(&ds->ds_lock); 685 (void) strcat(name, ds->ds_snapname); 686 mutex_exit(&ds->ds_lock); 687 } else { 688 (void) strcat(name, ds->ds_snapname); 689 } 690 } 691 } 692} 693 694static int 695dsl_dataset_namelen(dsl_dataset_t *ds) 696{ 697 int result; 698 699 if (ds == NULL) { 700 result = 3; /* "mos" */ 701 } else { 702 result = dsl_dir_namelen(ds->ds_dir); 703 VERIFY(0 == dsl_dataset_get_snapname(ds)); 704 if (ds->ds_snapname[0]) { 705 ++result; /* adding one for the @-sign */ 706 if (!MUTEX_HELD(&ds->ds_lock)) { 707 mutex_enter(&ds->ds_lock); 708 result += strlen(ds->ds_snapname); 709 mutex_exit(&ds->ds_lock); 710 } else { 711 result += strlen(ds->ds_snapname); 712 } 713 } 714 } 715 716 return (result); 717} 718 719void 720dsl_dataset_drop_ref(dsl_dataset_t *ds, void *tag) 721{ 722 dmu_buf_rele(ds->ds_dbuf, tag); 723} 724 725void 726dsl_dataset_rele(dsl_dataset_t *ds, void *tag) 727{ 728 if (!dsl_pool_sync_context(ds->ds_dir->dd_pool)) { 729 rw_exit(&ds->ds_rwlock); 730 } 731 dsl_dataset_drop_ref(ds, tag); 732} 733 734void 735dsl_dataset_disown(dsl_dataset_t *ds, void *tag) 736{ 737 ASSERT((ds->ds_owner == tag && ds->ds_dbuf) || 738 (DSL_DATASET_IS_DESTROYED(ds) && ds->ds_dbuf == NULL)); 739 740 mutex_enter(&ds->ds_lock); 741 ds->ds_owner = NULL; 742 if (RW_WRITE_HELD(&ds->ds_rwlock)) { 743 rw_exit(&ds->ds_rwlock); 744 cv_broadcast(&ds->ds_exclusive_cv); 745 } 746 mutex_exit(&ds->ds_lock); 747 if (ds->ds_dbuf) 748 dsl_dataset_drop_ref(ds, tag); 749 else 750 dsl_dataset_evict(NULL, ds); 751} 752 753boolean_t 754dsl_dataset_tryown(dsl_dataset_t *ds, boolean_t inconsistentok, void *tag) 755{ 756 boolean_t gotit = FALSE; 757 758 mutex_enter(&ds->ds_lock); 759 if (ds->ds_owner == NULL && 760 (!DS_IS_INCONSISTENT(ds) || inconsistentok)) { 761 ds->ds_owner = tag; 762 if (!dsl_pool_sync_context(ds->ds_dir->dd_pool)) 763 rw_exit(&ds->ds_rwlock); 764 gotit = TRUE; 765 } 766 mutex_exit(&ds->ds_lock); 767 return (gotit); 768} 769 770void 771dsl_dataset_make_exclusive(dsl_dataset_t *ds, void *owner) 772{ 773 ASSERT3P(owner, ==, ds->ds_owner); 774 if (!RW_WRITE_HELD(&ds->ds_rwlock)) 775 rw_enter(&ds->ds_rwlock, RW_WRITER); 776} 777 778uint64_t 779dsl_dataset_create_sync_dd(dsl_dir_t *dd, dsl_dataset_t *origin, 780 uint64_t flags, dmu_tx_t *tx) 781{ 782 dsl_pool_t *dp = dd->dd_pool; 783 dmu_buf_t *dbuf; 784 dsl_dataset_phys_t *dsphys; 785 uint64_t dsobj; 786 objset_t *mos = dp->dp_meta_objset; 787 788 if (origin == NULL) 789 origin = dp->dp_origin_snap; 790 791 ASSERT(origin == NULL || origin->ds_dir->dd_pool == dp); 792 ASSERT(origin == NULL || origin->ds_phys->ds_num_children > 0); 793 ASSERT(dmu_tx_is_syncing(tx)); 794 ASSERT(dd->dd_phys->dd_head_dataset_obj == 0); 795 796 dsobj = dmu_object_alloc(mos, DMU_OT_DSL_DATASET, 0, 797 DMU_OT_DSL_DATASET, sizeof (dsl_dataset_phys_t), tx); 798 VERIFY(0 == dmu_bonus_hold(mos, dsobj, FTAG, &dbuf)); 799 dmu_buf_will_dirty(dbuf, tx); 800 dsphys = dbuf->db_data; 801 bzero(dsphys, sizeof (dsl_dataset_phys_t)); 802 dsphys->ds_dir_obj = dd->dd_object; 803 dsphys->ds_flags = flags; 804 dsphys->ds_fsid_guid = unique_create(); 805 (void) random_get_pseudo_bytes((void*)&dsphys->ds_guid, 806 sizeof (dsphys->ds_guid)); 807 dsphys->ds_snapnames_zapobj = 808 zap_create_norm(mos, U8_TEXTPREP_TOUPPER, DMU_OT_DSL_DS_SNAP_MAP, 809 DMU_OT_NONE, 0, tx); 810 dsphys->ds_creation_time = gethrestime_sec(); 811 dsphys->ds_creation_txg = tx->tx_txg == TXG_INITIAL ? 1 : tx->tx_txg; 812 813 if (origin == NULL) { 814 dsphys->ds_deadlist_obj = dsl_deadlist_alloc(mos, tx); 815 } else { 816 dsl_dataset_t *ohds; 817 818 dsphys->ds_prev_snap_obj = origin->ds_object; 819 dsphys->ds_prev_snap_txg = 820 origin->ds_phys->ds_creation_txg; 821 dsphys->ds_used_bytes = 822 origin->ds_phys->ds_used_bytes; 823 dsphys->ds_compressed_bytes = 824 origin->ds_phys->ds_compressed_bytes; 825 dsphys->ds_uncompressed_bytes = 826 origin->ds_phys->ds_uncompressed_bytes; 827 dsphys->ds_bp = origin->ds_phys->ds_bp; 828 dsphys->ds_flags |= origin->ds_phys->ds_flags; 829 830 dmu_buf_will_dirty(origin->ds_dbuf, tx); 831 origin->ds_phys->ds_num_children++; 832 833 VERIFY3U(0, ==, dsl_dataset_hold_obj(dp, 834 origin->ds_dir->dd_phys->dd_head_dataset_obj, FTAG, &ohds)); 835 dsphys->ds_deadlist_obj = dsl_deadlist_clone(&ohds->ds_deadlist, 836 dsphys->ds_prev_snap_txg, dsphys->ds_prev_snap_obj, tx); 837 dsl_dataset_rele(ohds, FTAG); 838 839 if (spa_version(dp->dp_spa) >= SPA_VERSION_NEXT_CLONES) { 840 if (origin->ds_phys->ds_next_clones_obj == 0) { 841 origin->ds_phys->ds_next_clones_obj = 842 zap_create(mos, 843 DMU_OT_NEXT_CLONES, DMU_OT_NONE, 0, tx); 844 } 845 VERIFY(0 == zap_add_int(mos, 846 origin->ds_phys->ds_next_clones_obj, 847 dsobj, tx)); 848 } 849 850 dmu_buf_will_dirty(dd->dd_dbuf, tx); 851 dd->dd_phys->dd_origin_obj = origin->ds_object; 852 if (spa_version(dp->dp_spa) >= SPA_VERSION_DIR_CLONES) { 853 if (origin->ds_dir->dd_phys->dd_clones == 0) { 854 dmu_buf_will_dirty(origin->ds_dir->dd_dbuf, tx); 855 origin->ds_dir->dd_phys->dd_clones = 856 zap_create(mos, 857 DMU_OT_DSL_CLONES, DMU_OT_NONE, 0, tx); 858 } 859 VERIFY3U(0, ==, zap_add_int(mos, 860 origin->ds_dir->dd_phys->dd_clones, dsobj, tx)); 861 } 862 } 863 864 if (spa_version(dp->dp_spa) >= SPA_VERSION_UNIQUE_ACCURATE) 865 dsphys->ds_flags |= DS_FLAG_UNIQUE_ACCURATE; 866 867 dmu_buf_rele(dbuf, FTAG); 868 869 dmu_buf_will_dirty(dd->dd_dbuf, tx); 870 dd->dd_phys->dd_head_dataset_obj = dsobj; 871 872 return (dsobj); 873} 874 875uint64_t 876dsl_dataset_create_sync(dsl_dir_t *pdd, const char *lastname, 877 dsl_dataset_t *origin, uint64_t flags, cred_t *cr, dmu_tx_t *tx) 878{ 879 dsl_pool_t *dp = pdd->dd_pool; 880 uint64_t dsobj, ddobj; 881 dsl_dir_t *dd; 882 883 ASSERT(lastname[0] != '@'); 884 885 ddobj = dsl_dir_create_sync(dp, pdd, lastname, tx); 886 VERIFY(0 == dsl_dir_open_obj(dp, ddobj, lastname, FTAG, &dd)); 887 888 dsobj = dsl_dataset_create_sync_dd(dd, origin, flags, tx); 889 890 dsl_deleg_set_create_perms(dd, tx, cr); 891 892 dsl_dir_close(dd, FTAG); 893 894 /* 895 * If we are creating a clone, make sure we zero out any stale 896 * data from the origin snapshots zil header. 897 */ 898 if (origin != NULL) { 899 dsl_dataset_t *ds; 900 objset_t *os; 901 902 VERIFY3U(0, ==, dsl_dataset_hold_obj(dp, dsobj, FTAG, &ds)); 903 VERIFY3U(0, ==, dmu_objset_from_ds(ds, &os)); 904 bzero(&os->os_zil_header, sizeof (os->os_zil_header)); 905 dsl_dataset_dirty(ds, tx); 906 dsl_dataset_rele(ds, FTAG); 907 } 908 909 return (dsobj); 910} 911
|
| 912#ifdef __FreeBSD__ 913/* FreeBSD ioctl compat begin */
|
911struct destroyarg {
| 914struct destroyarg {
|
912 dsl_sync_task_group_t *dstg; 913 char *snapname; 914 char *failed; 915 boolean_t defer;
| 915 nvlist_t *nvl; 916 const char *snapname;
|
916}; 917 918static int
| 917}; 918 919static int
|
919dsl_snapshot_destroy_one(const char *name, void *arg)
| 920dsl_check_snap_cb(const char *name, void *arg)
|
920{ 921 struct destroyarg *da = arg; 922 dsl_dataset_t *ds;
| 921{ 922 struct destroyarg *da = arg; 923 dsl_dataset_t *ds;
|
923 int err;
| |
924 char *dsname; 925 926 dsname = kmem_asprintf("%s@%s", name, da->snapname);
| 924 char *dsname; 925 926 dsname = kmem_asprintf("%s@%s", name, da->snapname);
|
927 err = dsl_dataset_own(dsname, B_TRUE, da->dstg, &ds); 928 strfree(dsname); 929 if (err == 0) { 930 struct dsl_ds_destroyarg *dsda;
| 927 VERIFY(nvlist_add_boolean(da->nvl, dsname) == 0);
|
931
| 928
|
932 dsl_dataset_make_exclusive(ds, da->dstg); 933 dsda = kmem_zalloc(sizeof (struct dsl_ds_destroyarg), KM_SLEEP); 934 dsda->ds = ds; 935 dsda->defer = da->defer; 936 dsl_sync_task_create(da->dstg, dsl_dataset_destroy_check, 937 dsl_dataset_destroy_sync, dsda, da->dstg, 0); 938 } else if (err == ENOENT) { 939 err = 0; 940 } else { 941 (void) strcpy(da->failed, name); 942 }
| 929 return (0); 930} 931 932int 933dmu_get_recursive_snaps_nvl(const char *fsname, const char *snapname, 934 nvlist_t *snaps) 935{ 936 struct destroyarg *da; 937 int err; 938 939 da = kmem_zalloc(sizeof (struct destroyarg), KM_SLEEP); 940 da->nvl = snaps; 941 da->snapname = snapname; 942 err = dmu_objset_find(fsname, dsl_check_snap_cb, da, 943 DS_FIND_CHILDREN); 944 kmem_free(da, sizeof (struct destroyarg)); 945
|
943 return (err); 944}
| 946 return (err); 947}
|
| 948/* FreeBSD ioctl compat end */ 949#endif /* __FreeBSD__ */
|
945 946/*
| 950 951/*
|
947 * Destroy 'snapname' in all descendants of 'fsname'.
| 952 * The snapshots must all be in the same pool.
|
948 */
| 953 */
|
949#pragma weak dmu_snapshots_destroy = dsl_snapshots_destroy
| |
950int
| 954int
|
951dsl_snapshots_destroy(char *fsname, char *snapname, boolean_t defer)
| 955dmu_snapshots_destroy_nvl(nvlist_t *snaps, boolean_t defer, char *failed)
|
952{ 953 int err;
| 956{ 957 int err;
|
954 struct destroyarg da;
| |
955 dsl_sync_task_t *dst; 956 spa_t *spa;
| 958 dsl_sync_task_t *dst; 959 spa_t *spa;
|
| 960 nvpair_t *pair; 961 dsl_sync_task_group_t *dstg;
|
957
| 962
|
958 err = spa_open(fsname, &spa, FTAG);
| 963 pair = nvlist_next_nvpair(snaps, NULL); 964 if (pair == NULL) 965 return (0); 966 967 err = spa_open(nvpair_name(pair), &spa, FTAG);
|
959 if (err) 960 return (err);
| 968 if (err) 969 return (err);
|
961 da.dstg = dsl_sync_task_group_create(spa_get_dsl(spa)); 962 da.snapname = snapname; 963 da.failed = fsname; 964 da.defer = defer;
| 970 dstg = dsl_sync_task_group_create(spa_get_dsl(spa));
|
965
| 971
|
966 err = dmu_objset_find(fsname, 967 dsl_snapshot_destroy_one, &da, DS_FIND_CHILDREN);
| 972 for (pair = nvlist_next_nvpair(snaps, NULL); pair != NULL; 973 pair = nvlist_next_nvpair(snaps, pair)) { 974 dsl_dataset_t *ds; 975 int err;
|
968
| 976
|
| 977 err = dsl_dataset_own(nvpair_name(pair), B_TRUE, dstg, &ds); 978 if (err == 0) { 979 struct dsl_ds_destroyarg *dsda; 980 981 dsl_dataset_make_exclusive(ds, dstg); 982 dsda = kmem_zalloc(sizeof (struct dsl_ds_destroyarg), 983 KM_SLEEP); 984 dsda->ds = ds; 985 dsda->defer = defer; 986 dsl_sync_task_create(dstg, dsl_dataset_destroy_check, 987 dsl_dataset_destroy_sync, dsda, dstg, 0); 988 } else if (err == ENOENT) { 989 err = 0; 990 } else { 991 (void) strcpy(failed, nvpair_name(pair)); 992 break; 993 } 994 } 995
|
969 if (err == 0)
| 996 if (err == 0)
|
970 err = dsl_sync_task_group_wait(da.dstg);
| 997 err = dsl_sync_task_group_wait(dstg);
|
971
| 998
|
972 for (dst = list_head(&da.dstg->dstg_tasks); dst; 973 dst = list_next(&da.dstg->dstg_tasks, dst)) {
| 999 for (dst = list_head(&dstg->dstg_tasks); dst; 1000 dst = list_next(&dstg->dstg_tasks, dst)) {
|
974 struct dsl_ds_destroyarg *dsda = dst->dst_arg1; 975 dsl_dataset_t *ds = dsda->ds; 976 977 /* 978 * Return the file system name that triggered the error 979 */ 980 if (dst->dst_err) {
| 1001 struct dsl_ds_destroyarg *dsda = dst->dst_arg1; 1002 dsl_dataset_t *ds = dsda->ds; 1003 1004 /* 1005 * Return the file system name that triggered the error 1006 */ 1007 if (dst->dst_err) {
|
981 dsl_dataset_name(ds, fsname); 982 *strchr(fsname, '@') = '\0';
| 1008 dsl_dataset_name(ds, failed);
|
983 } 984 ASSERT3P(dsda->rm_origin, ==, NULL);
| 1009 } 1010 ASSERT3P(dsda->rm_origin, ==, NULL);
|
985 dsl_dataset_disown(ds, da.dstg);
| 1011 dsl_dataset_disown(ds, dstg);
|
986 kmem_free(dsda, sizeof (struct dsl_ds_destroyarg)); 987 } 988
| 1012 kmem_free(dsda, sizeof (struct dsl_ds_destroyarg)); 1013 } 1014
|
989 dsl_sync_task_group_destroy(da.dstg);
| 1015 dsl_sync_task_group_destroy(dstg);
|
990 spa_close(spa, FTAG); 991 return (err);
| 1016 spa_close(spa, FTAG); 1017 return (err);
|
| 1018
|
992} 993 994static boolean_t 995dsl_dataset_might_destroy_origin(dsl_dataset_t *ds) 996{ 997 boolean_t might_destroy = B_FALSE; 998 999 mutex_enter(&ds->ds_lock); 1000 if (ds->ds_phys->ds_num_children == 2 && ds->ds_userrefs == 0 && 1001 DS_IS_DEFER_DESTROY(ds)) 1002 might_destroy = B_TRUE; 1003 mutex_exit(&ds->ds_lock); 1004 1005 return (might_destroy); 1006} 1007 1008/* 1009 * If we're removing a clone, and these three conditions are true: 1010 * 1) the clone's origin has no other children 1011 * 2) the clone's origin has no user references 1012 * 3) the clone's origin has been marked for deferred destruction 1013 * Then, prepare to remove the origin as part of this sync task group. 1014 */ 1015static int 1016dsl_dataset_origin_rm_prep(struct dsl_ds_destroyarg *dsda, void *tag) 1017{ 1018 dsl_dataset_t *ds = dsda->ds; 1019 dsl_dataset_t *origin = ds->ds_prev; 1020 1021 if (dsl_dataset_might_destroy_origin(origin)) { 1022 char *name; 1023 int namelen; 1024 int error; 1025 1026 namelen = dsl_dataset_namelen(origin) + 1; 1027 name = kmem_alloc(namelen, KM_SLEEP); 1028 dsl_dataset_name(origin, name); 1029#ifdef _KERNEL 1030 error = zfs_unmount_snap(name, NULL); 1031 if (error) { 1032 kmem_free(name, namelen); 1033 return (error); 1034 } 1035#endif 1036 error = dsl_dataset_own(name, B_TRUE, tag, &origin); 1037 kmem_free(name, namelen); 1038 if (error) 1039 return (error); 1040 dsda->rm_origin = origin; 1041 dsl_dataset_make_exclusive(origin, tag); 1042 } 1043 1044 return (0); 1045} 1046 1047/* 1048 * ds must be opened as OWNER. On return (whether successful or not), 1049 * ds will be closed and caller can no longer dereference it. 1050 */ 1051int 1052dsl_dataset_destroy(dsl_dataset_t *ds, void *tag, boolean_t defer) 1053{ 1054 int err; 1055 dsl_sync_task_group_t *dstg; 1056 objset_t *os; 1057 dsl_dir_t *dd; 1058 uint64_t obj; 1059 struct dsl_ds_destroyarg dsda = { 0 }; 1060 dsl_dataset_t dummy_ds = { 0 }; 1061 1062 dsda.ds = ds; 1063 1064 if (dsl_dataset_is_snapshot(ds)) { 1065 /* Destroying a snapshot is simpler */ 1066 dsl_dataset_make_exclusive(ds, tag); 1067 1068 dsda.defer = defer; 1069 err = dsl_sync_task_do(ds->ds_dir->dd_pool, 1070 dsl_dataset_destroy_check, dsl_dataset_destroy_sync, 1071 &dsda, tag, 0); 1072 ASSERT3P(dsda.rm_origin, ==, NULL); 1073 goto out; 1074 } else if (defer) { 1075 err = EINVAL; 1076 goto out; 1077 } 1078 1079 dd = ds->ds_dir; 1080 dummy_ds.ds_dir = dd; 1081 dummy_ds.ds_object = ds->ds_object; 1082 1083 /* 1084 * Check for errors and mark this ds as inconsistent, in 1085 * case we crash while freeing the objects. 1086 */ 1087 err = dsl_sync_task_do(dd->dd_pool, dsl_dataset_destroy_begin_check, 1088 dsl_dataset_destroy_begin_sync, ds, NULL, 0); 1089 if (err) 1090 goto out; 1091 1092 err = dmu_objset_from_ds(ds, &os); 1093 if (err) 1094 goto out; 1095 1096 /* 1097 * remove the objects in open context, so that we won't 1098 * have too much to do in syncing context. 1099 */ 1100 for (obj = 0; err == 0; err = dmu_object_next(os, &obj, FALSE, 1101 ds->ds_phys->ds_prev_snap_txg)) { 1102 /* 1103 * Ignore errors, if there is not enough disk space 1104 * we will deal with it in dsl_dataset_destroy_sync(). 1105 */ 1106 (void) dmu_free_object(os, obj); 1107 } 1108 if (err != ESRCH) 1109 goto out; 1110 1111 /* 1112 * Only the ZIL knows how to free log blocks. 1113 */ 1114 zil_destroy(dmu_objset_zil(os), B_FALSE); 1115 1116 /* 1117 * Sync out all in-flight IO. 1118 */ 1119 txg_wait_synced(dd->dd_pool, 0); 1120 1121 /* 1122 * If we managed to free all the objects in open 1123 * context, the user space accounting should be zero. 1124 */ 1125 if (ds->ds_phys->ds_bp.blk_fill == 0 && 1126 dmu_objset_userused_enabled(os)) { 1127 uint64_t count; 1128 1129 ASSERT(zap_count(os, DMU_USERUSED_OBJECT, &count) != 0 || 1130 count == 0); 1131 ASSERT(zap_count(os, DMU_GROUPUSED_OBJECT, &count) != 0 || 1132 count == 0); 1133 } 1134 1135 rw_enter(&dd->dd_pool->dp_config_rwlock, RW_READER); 1136 err = dsl_dir_open_obj(dd->dd_pool, dd->dd_object, NULL, FTAG, &dd); 1137 rw_exit(&dd->dd_pool->dp_config_rwlock); 1138 1139 if (err) 1140 goto out; 1141 1142 /* 1143 * Blow away the dsl_dir + head dataset. 1144 */ 1145 dsl_dataset_make_exclusive(ds, tag); 1146 /* 1147 * If we're removing a clone, we might also need to remove its 1148 * origin. 1149 */ 1150 do { 1151 dsda.need_prep = B_FALSE; 1152 if (dsl_dir_is_clone(dd)) { 1153 err = dsl_dataset_origin_rm_prep(&dsda, tag); 1154 if (err) { 1155 dsl_dir_close(dd, FTAG); 1156 goto out; 1157 } 1158 } 1159 1160 dstg = dsl_sync_task_group_create(ds->ds_dir->dd_pool); 1161 dsl_sync_task_create(dstg, dsl_dataset_destroy_check, 1162 dsl_dataset_destroy_sync, &dsda, tag, 0); 1163 dsl_sync_task_create(dstg, dsl_dir_destroy_check, 1164 dsl_dir_destroy_sync, &dummy_ds, FTAG, 0); 1165 err = dsl_sync_task_group_wait(dstg); 1166 dsl_sync_task_group_destroy(dstg); 1167 1168 /* 1169 * We could be racing against 'zfs release' or 'zfs destroy -d' 1170 * on the origin snap, in which case we can get EBUSY if we 1171 * needed to destroy the origin snap but were not ready to 1172 * do so. 1173 */ 1174 if (dsda.need_prep) { 1175 ASSERT(err == EBUSY); 1176 ASSERT(dsl_dir_is_clone(dd)); 1177 ASSERT(dsda.rm_origin == NULL); 1178 } 1179 } while (dsda.need_prep); 1180 1181 if (dsda.rm_origin != NULL) 1182 dsl_dataset_disown(dsda.rm_origin, tag); 1183 1184 /* if it is successful, dsl_dir_destroy_sync will close the dd */ 1185 if (err) 1186 dsl_dir_close(dd, FTAG); 1187out: 1188 dsl_dataset_disown(ds, tag); 1189 return (err); 1190} 1191 1192blkptr_t * 1193dsl_dataset_get_blkptr(dsl_dataset_t *ds) 1194{ 1195 return (&ds->ds_phys->ds_bp); 1196} 1197 1198void 1199dsl_dataset_set_blkptr(dsl_dataset_t *ds, blkptr_t *bp, dmu_tx_t *tx) 1200{ 1201 ASSERT(dmu_tx_is_syncing(tx)); 1202 /* If it's the meta-objset, set dp_meta_rootbp */ 1203 if (ds == NULL) { 1204 tx->tx_pool->dp_meta_rootbp = *bp; 1205 } else { 1206 dmu_buf_will_dirty(ds->ds_dbuf, tx); 1207 ds->ds_phys->ds_bp = *bp; 1208 } 1209} 1210 1211spa_t * 1212dsl_dataset_get_spa(dsl_dataset_t *ds) 1213{ 1214 return (ds->ds_dir->dd_pool->dp_spa); 1215} 1216 1217void 1218dsl_dataset_dirty(dsl_dataset_t *ds, dmu_tx_t *tx) 1219{ 1220 dsl_pool_t *dp; 1221 1222 if (ds == NULL) /* this is the meta-objset */ 1223 return; 1224 1225 ASSERT(ds->ds_objset != NULL); 1226 1227 if (ds->ds_phys->ds_next_snap_obj != 0) 1228 panic("dirtying snapshot!"); 1229 1230 dp = ds->ds_dir->dd_pool; 1231 1232 if (txg_list_add(&dp->dp_dirty_datasets, ds, tx->tx_txg) == 0) { 1233 /* up the hold count until we can be written out */ 1234 dmu_buf_add_ref(ds->ds_dbuf, ds); 1235 } 1236} 1237 1238/* 1239 * The unique space in the head dataset can be calculated by subtracting 1240 * the space used in the most recent snapshot, that is still being used 1241 * in this file system, from the space currently in use. To figure out 1242 * the space in the most recent snapshot still in use, we need to take 1243 * the total space used in the snapshot and subtract out the space that 1244 * has been freed up since the snapshot was taken. 1245 */ 1246static void 1247dsl_dataset_recalc_head_uniq(dsl_dataset_t *ds) 1248{ 1249 uint64_t mrs_used; 1250 uint64_t dlused, dlcomp, dluncomp; 1251 1252 ASSERT(!dsl_dataset_is_snapshot(ds)); 1253 1254 if (ds->ds_phys->ds_prev_snap_obj != 0) 1255 mrs_used = ds->ds_prev->ds_phys->ds_used_bytes; 1256 else 1257 mrs_used = 0; 1258 1259 dsl_deadlist_space(&ds->ds_deadlist, &dlused, &dlcomp, &dluncomp); 1260 1261 ASSERT3U(dlused, <=, mrs_used); 1262 ds->ds_phys->ds_unique_bytes = 1263 ds->ds_phys->ds_used_bytes - (mrs_used - dlused); 1264 1265 if (spa_version(ds->ds_dir->dd_pool->dp_spa) >= 1266 SPA_VERSION_UNIQUE_ACCURATE) 1267 ds->ds_phys->ds_flags |= DS_FLAG_UNIQUE_ACCURATE; 1268} 1269 1270struct killarg { 1271 dsl_dataset_t *ds; 1272 dmu_tx_t *tx; 1273}; 1274 1275/* ARGSUSED */ 1276static int 1277kill_blkptr(spa_t *spa, zilog_t *zilog, const blkptr_t *bp, arc_buf_t *pbuf, 1278 const zbookmark_t *zb, const dnode_phys_t *dnp, void *arg) 1279{ 1280 struct killarg *ka = arg; 1281 dmu_tx_t *tx = ka->tx; 1282 1283 if (bp == NULL) 1284 return (0); 1285 1286 if (zb->zb_level == ZB_ZIL_LEVEL) { 1287 ASSERT(zilog != NULL); 1288 /* 1289 * It's a block in the intent log. It has no 1290 * accounting, so just free it. 1291 */ 1292 dsl_free(ka->tx->tx_pool, ka->tx->tx_txg, bp); 1293 } else { 1294 ASSERT(zilog == NULL); 1295 ASSERT3U(bp->blk_birth, >, ka->ds->ds_phys->ds_prev_snap_txg); 1296 (void) dsl_dataset_block_kill(ka->ds, bp, tx, B_FALSE); 1297 } 1298 1299 return (0); 1300} 1301 1302/* ARGSUSED */ 1303static int 1304dsl_dataset_destroy_begin_check(void *arg1, void *arg2, dmu_tx_t *tx) 1305{ 1306 dsl_dataset_t *ds = arg1; 1307 objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset; 1308 uint64_t count; 1309 int err; 1310 1311 /* 1312 * Can't delete a head dataset if there are snapshots of it. 1313 * (Except if the only snapshots are from the branch we cloned 1314 * from.) 1315 */ 1316 if (ds->ds_prev != NULL && 1317 ds->ds_prev->ds_phys->ds_next_snap_obj == ds->ds_object) 1318 return (EBUSY); 1319 1320 /* 1321 * This is really a dsl_dir thing, but check it here so that 1322 * we'll be less likely to leave this dataset inconsistent & 1323 * nearly destroyed. 1324 */ 1325 err = zap_count(mos, ds->ds_dir->dd_phys->dd_child_dir_zapobj, &count); 1326 if (err) 1327 return (err); 1328 if (count != 0) 1329 return (EEXIST); 1330 1331 return (0); 1332} 1333 1334/* ARGSUSED */ 1335static void 1336dsl_dataset_destroy_begin_sync(void *arg1, void *arg2, dmu_tx_t *tx) 1337{ 1338 dsl_dataset_t *ds = arg1; 1339 dsl_pool_t *dp = ds->ds_dir->dd_pool; 1340 1341 /* Mark it as inconsistent on-disk, in case we crash */ 1342 dmu_buf_will_dirty(ds->ds_dbuf, tx); 1343 ds->ds_phys->ds_flags |= DS_FLAG_INCONSISTENT; 1344 1345 spa_history_log_internal(LOG_DS_DESTROY_BEGIN, dp->dp_spa, tx, 1346 "dataset = %llu", ds->ds_object); 1347} 1348 1349static int 1350dsl_dataset_origin_check(struct dsl_ds_destroyarg *dsda, void *tag, 1351 dmu_tx_t *tx) 1352{ 1353 dsl_dataset_t *ds = dsda->ds; 1354 dsl_dataset_t *ds_prev = ds->ds_prev; 1355 1356 if (dsl_dataset_might_destroy_origin(ds_prev)) { 1357 struct dsl_ds_destroyarg ndsda = {0}; 1358 1359 /* 1360 * If we're not prepared to remove the origin, don't remove 1361 * the clone either. 1362 */ 1363 if (dsda->rm_origin == NULL) { 1364 dsda->need_prep = B_TRUE; 1365 return (EBUSY); 1366 } 1367 1368 ndsda.ds = ds_prev; 1369 ndsda.is_origin_rm = B_TRUE; 1370 return (dsl_dataset_destroy_check(&ndsda, tag, tx)); 1371 } 1372 1373 /* 1374 * If we're not going to remove the origin after all, 1375 * undo the open context setup. 1376 */ 1377 if (dsda->rm_origin != NULL) { 1378 dsl_dataset_disown(dsda->rm_origin, tag); 1379 dsda->rm_origin = NULL; 1380 } 1381 1382 return (0); 1383} 1384 1385/* 1386 * If you add new checks here, you may need to add 1387 * additional checks to the "temporary" case in 1388 * snapshot_check() in dmu_objset.c. 1389 */ 1390/* ARGSUSED */ 1391int 1392dsl_dataset_destroy_check(void *arg1, void *arg2, dmu_tx_t *tx) 1393{ 1394 struct dsl_ds_destroyarg *dsda = arg1; 1395 dsl_dataset_t *ds = dsda->ds; 1396 1397 /* we have an owner hold, so noone else can destroy us */ 1398 ASSERT(!DSL_DATASET_IS_DESTROYED(ds)); 1399 1400 /* 1401 * Only allow deferred destroy on pools that support it. 1402 * NOTE: deferred destroy is only supported on snapshots. 1403 */ 1404 if (dsda->defer) { 1405 if (spa_version(ds->ds_dir->dd_pool->dp_spa) < 1406 SPA_VERSION_USERREFS) 1407 return (ENOTSUP); 1408 ASSERT(dsl_dataset_is_snapshot(ds)); 1409 return (0); 1410 } 1411 1412 /* 1413 * Can't delete a head dataset if there are snapshots of it. 1414 * (Except if the only snapshots are from the branch we cloned 1415 * from.) 1416 */ 1417 if (ds->ds_prev != NULL && 1418 ds->ds_prev->ds_phys->ds_next_snap_obj == ds->ds_object) 1419 return (EBUSY); 1420 1421 /* 1422 * If we made changes this txg, traverse_dsl_dataset won't find 1423 * them. Try again. 1424 */ 1425 if (ds->ds_phys->ds_bp.blk_birth >= tx->tx_txg) 1426 return (EAGAIN); 1427 1428 if (dsl_dataset_is_snapshot(ds)) { 1429 /* 1430 * If this snapshot has an elevated user reference count, 1431 * we can't destroy it yet. 1432 */ 1433 if (ds->ds_userrefs > 0 && !dsda->releasing) 1434 return (EBUSY); 1435 1436 mutex_enter(&ds->ds_lock); 1437 /* 1438 * Can't delete a branch point. However, if we're destroying 1439 * a clone and removing its origin due to it having a user 1440 * hold count of 0 and having been marked for deferred destroy, 1441 * it's OK for the origin to have a single clone. 1442 */ 1443 if (ds->ds_phys->ds_num_children > 1444 (dsda->is_origin_rm ? 2 : 1)) { 1445 mutex_exit(&ds->ds_lock); 1446 return (EEXIST); 1447 } 1448 mutex_exit(&ds->ds_lock); 1449 } else if (dsl_dir_is_clone(ds->ds_dir)) { 1450 return (dsl_dataset_origin_check(dsda, arg2, tx)); 1451 } 1452 1453 /* XXX we should do some i/o error checking... */ 1454 return (0); 1455} 1456 1457struct refsarg { 1458 kmutex_t lock; 1459 boolean_t gone; 1460 kcondvar_t cv; 1461}; 1462 1463/* ARGSUSED */ 1464static void 1465dsl_dataset_refs_gone(dmu_buf_t *db, void *argv) 1466{ 1467 struct refsarg *arg = argv; 1468 1469 mutex_enter(&arg->lock); 1470 arg->gone = TRUE; 1471 cv_signal(&arg->cv); 1472 mutex_exit(&arg->lock); 1473} 1474 1475static void 1476dsl_dataset_drain_refs(dsl_dataset_t *ds, void *tag) 1477{ 1478 struct refsarg arg; 1479 1480 bzero(&arg, sizeof(arg)); 1481 mutex_init(&arg.lock, NULL, MUTEX_DEFAULT, NULL); 1482 cv_init(&arg.cv, NULL, CV_DEFAULT, NULL); 1483 arg.gone = FALSE; 1484 (void) dmu_buf_update_user(ds->ds_dbuf, ds, &arg, &ds->ds_phys, 1485 dsl_dataset_refs_gone); 1486 dmu_buf_rele(ds->ds_dbuf, tag); 1487 mutex_enter(&arg.lock); 1488 while (!arg.gone) 1489 cv_wait(&arg.cv, &arg.lock); 1490 ASSERT(arg.gone); 1491 mutex_exit(&arg.lock); 1492 ds->ds_dbuf = NULL; 1493 ds->ds_phys = NULL; 1494 mutex_destroy(&arg.lock); 1495 cv_destroy(&arg.cv); 1496} 1497 1498static void 1499remove_from_next_clones(dsl_dataset_t *ds, uint64_t obj, dmu_tx_t *tx) 1500{ 1501 objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset; 1502 uint64_t count; 1503 int err; 1504 1505 ASSERT(ds->ds_phys->ds_num_children >= 2); 1506 err = zap_remove_int(mos, ds->ds_phys->ds_next_clones_obj, obj, tx); 1507 /* 1508 * The err should not be ENOENT, but a bug in a previous version 1509 * of the code could cause upgrade_clones_cb() to not set 1510 * ds_next_snap_obj when it should, leading to a missing entry. 1511 * If we knew that the pool was created after 1512 * SPA_VERSION_NEXT_CLONES, we could assert that it isn't 1513 * ENOENT. However, at least we can check that we don't have 1514 * too many entries in the next_clones_obj even after failing to 1515 * remove this one. 1516 */ 1517 if (err != ENOENT) { 1518 VERIFY3U(err, ==, 0); 1519 } 1520 ASSERT3U(0, ==, zap_count(mos, ds->ds_phys->ds_next_clones_obj, 1521 &count)); 1522 ASSERT3U(count, <=, ds->ds_phys->ds_num_children - 2); 1523} 1524 1525static void 1526dsl_dataset_remove_clones_key(dsl_dataset_t *ds, uint64_t mintxg, dmu_tx_t *tx) 1527{ 1528 objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset; 1529 zap_cursor_t zc; 1530 zap_attribute_t za; 1531 1532 /* 1533 * If it is the old version, dd_clones doesn't exist so we can't 1534 * find the clones, but deadlist_remove_key() is a no-op so it 1535 * doesn't matter. 1536 */ 1537 if (ds->ds_dir->dd_phys->dd_clones == 0) 1538 return; 1539 1540 for (zap_cursor_init(&zc, mos, ds->ds_dir->dd_phys->dd_clones); 1541 zap_cursor_retrieve(&zc, &za) == 0; 1542 zap_cursor_advance(&zc)) { 1543 dsl_dataset_t *clone; 1544 1545 VERIFY3U(0, ==, dsl_dataset_hold_obj(ds->ds_dir->dd_pool, 1546 za.za_first_integer, FTAG, &clone)); 1547 if (clone->ds_dir->dd_origin_txg > mintxg) { 1548 dsl_deadlist_remove_key(&clone->ds_deadlist, 1549 mintxg, tx); 1550 dsl_dataset_remove_clones_key(clone, mintxg, tx); 1551 } 1552 dsl_dataset_rele(clone, FTAG); 1553 } 1554 zap_cursor_fini(&zc); 1555} 1556 1557struct process_old_arg { 1558 dsl_dataset_t *ds; 1559 dsl_dataset_t *ds_prev; 1560 boolean_t after_branch_point; 1561 zio_t *pio; 1562 uint64_t used, comp, uncomp; 1563}; 1564 1565static int 1566process_old_cb(void *arg, const blkptr_t *bp, dmu_tx_t *tx) 1567{ 1568 struct process_old_arg *poa = arg; 1569 dsl_pool_t *dp = poa->ds->ds_dir->dd_pool; 1570 1571 if (bp->blk_birth <= poa->ds->ds_phys->ds_prev_snap_txg) { 1572 dsl_deadlist_insert(&poa->ds->ds_deadlist, bp, tx); 1573 if (poa->ds_prev && !poa->after_branch_point && 1574 bp->blk_birth > 1575 poa->ds_prev->ds_phys->ds_prev_snap_txg) { 1576 poa->ds_prev->ds_phys->ds_unique_bytes += 1577 bp_get_dsize_sync(dp->dp_spa, bp); 1578 } 1579 } else { 1580 poa->used += bp_get_dsize_sync(dp->dp_spa, bp); 1581 poa->comp += BP_GET_PSIZE(bp); 1582 poa->uncomp += BP_GET_UCSIZE(bp); 1583 dsl_free_sync(poa->pio, dp, tx->tx_txg, bp); 1584 } 1585 return (0); 1586} 1587 1588static void 1589process_old_deadlist(dsl_dataset_t *ds, dsl_dataset_t *ds_prev, 1590 dsl_dataset_t *ds_next, boolean_t after_branch_point, dmu_tx_t *tx) 1591{ 1592 struct process_old_arg poa = { 0 }; 1593 dsl_pool_t *dp = ds->ds_dir->dd_pool; 1594 objset_t *mos = dp->dp_meta_objset; 1595 1596 ASSERT(ds->ds_deadlist.dl_oldfmt); 1597 ASSERT(ds_next->ds_deadlist.dl_oldfmt); 1598 1599 poa.ds = ds; 1600 poa.ds_prev = ds_prev; 1601 poa.after_branch_point = after_branch_point; 1602 poa.pio = zio_root(dp->dp_spa, NULL, NULL, ZIO_FLAG_MUSTSUCCEED); 1603 VERIFY3U(0, ==, bpobj_iterate(&ds_next->ds_deadlist.dl_bpobj, 1604 process_old_cb, &poa, tx)); 1605 VERIFY3U(zio_wait(poa.pio), ==, 0); 1606 ASSERT3U(poa.used, ==, ds->ds_phys->ds_unique_bytes); 1607 1608 /* change snapused */ 1609 dsl_dir_diduse_space(ds->ds_dir, DD_USED_SNAP, 1610 -poa.used, -poa.comp, -poa.uncomp, tx); 1611 1612 /* swap next's deadlist to our deadlist */ 1613 dsl_deadlist_close(&ds->ds_deadlist); 1614 dsl_deadlist_close(&ds_next->ds_deadlist); 1615 SWITCH64(ds_next->ds_phys->ds_deadlist_obj, 1616 ds->ds_phys->ds_deadlist_obj); 1617 dsl_deadlist_open(&ds->ds_deadlist, mos, ds->ds_phys->ds_deadlist_obj); 1618 dsl_deadlist_open(&ds_next->ds_deadlist, mos, 1619 ds_next->ds_phys->ds_deadlist_obj); 1620} 1621 1622void 1623dsl_dataset_destroy_sync(void *arg1, void *tag, dmu_tx_t *tx) 1624{ 1625 struct dsl_ds_destroyarg *dsda = arg1; 1626 dsl_dataset_t *ds = dsda->ds; 1627 int err; 1628 int after_branch_point = FALSE; 1629 dsl_pool_t *dp = ds->ds_dir->dd_pool; 1630 objset_t *mos = dp->dp_meta_objset; 1631 dsl_dataset_t *ds_prev = NULL; 1632 boolean_t wont_destroy; 1633 uint64_t obj; 1634 1635 wont_destroy = (dsda->defer && 1636 (ds->ds_userrefs > 0 || ds->ds_phys->ds_num_children > 1)); 1637 1638 ASSERT(ds->ds_owner || wont_destroy); 1639 ASSERT(dsda->defer || ds->ds_phys->ds_num_children <= 1); 1640 ASSERT(ds->ds_prev == NULL || 1641 ds->ds_prev->ds_phys->ds_next_snap_obj != ds->ds_object); 1642 ASSERT3U(ds->ds_phys->ds_bp.blk_birth, <=, tx->tx_txg); 1643 1644 if (wont_destroy) { 1645 ASSERT(spa_version(dp->dp_spa) >= SPA_VERSION_USERREFS); 1646 dmu_buf_will_dirty(ds->ds_dbuf, tx); 1647 ds->ds_phys->ds_flags |= DS_FLAG_DEFER_DESTROY; 1648 return; 1649 } 1650 1651 /* signal any waiters that this dataset is going away */ 1652 mutex_enter(&ds->ds_lock); 1653 ds->ds_owner = dsl_reaper; 1654 cv_broadcast(&ds->ds_exclusive_cv); 1655 mutex_exit(&ds->ds_lock); 1656 1657 /* Remove our reservation */ 1658 if (ds->ds_reserved != 0) { 1659 dsl_prop_setarg_t psa; 1660 uint64_t value = 0; 1661 1662 dsl_prop_setarg_init_uint64(&psa, "refreservation", 1663 (ZPROP_SRC_NONE | ZPROP_SRC_LOCAL | ZPROP_SRC_RECEIVED), 1664 &value); 1665 psa.psa_effective_value = 0; /* predict default value */ 1666 1667 dsl_dataset_set_reservation_sync(ds, &psa, tx); 1668 ASSERT3U(ds->ds_reserved, ==, 0); 1669 } 1670 1671 ASSERT(RW_WRITE_HELD(&dp->dp_config_rwlock)); 1672 1673 dsl_scan_ds_destroyed(ds, tx); 1674 1675 obj = ds->ds_object; 1676 1677 if (ds->ds_phys->ds_prev_snap_obj != 0) { 1678 if (ds->ds_prev) { 1679 ds_prev = ds->ds_prev; 1680 } else { 1681 VERIFY(0 == dsl_dataset_hold_obj(dp, 1682 ds->ds_phys->ds_prev_snap_obj, FTAG, &ds_prev)); 1683 } 1684 after_branch_point = 1685 (ds_prev->ds_phys->ds_next_snap_obj != obj); 1686 1687 dmu_buf_will_dirty(ds_prev->ds_dbuf, tx); 1688 if (after_branch_point && 1689 ds_prev->ds_phys->ds_next_clones_obj != 0) { 1690 remove_from_next_clones(ds_prev, obj, tx); 1691 if (ds->ds_phys->ds_next_snap_obj != 0) { 1692 VERIFY(0 == zap_add_int(mos, 1693 ds_prev->ds_phys->ds_next_clones_obj, 1694 ds->ds_phys->ds_next_snap_obj, tx)); 1695 } 1696 } 1697 if (after_branch_point && 1698 ds->ds_phys->ds_next_snap_obj == 0) { 1699 /* This clone is toast. */ 1700 ASSERT(ds_prev->ds_phys->ds_num_children > 1); 1701 ds_prev->ds_phys->ds_num_children--; 1702 1703 /* 1704 * If the clone's origin has no other clones, no 1705 * user holds, and has been marked for deferred 1706 * deletion, then we should have done the necessary 1707 * destroy setup for it. 1708 */ 1709 if (ds_prev->ds_phys->ds_num_children == 1 && 1710 ds_prev->ds_userrefs == 0 && 1711 DS_IS_DEFER_DESTROY(ds_prev)) { 1712 ASSERT3P(dsda->rm_origin, !=, NULL); 1713 } else { 1714 ASSERT3P(dsda->rm_origin, ==, NULL); 1715 } 1716 } else if (!after_branch_point) { 1717 ds_prev->ds_phys->ds_next_snap_obj = 1718 ds->ds_phys->ds_next_snap_obj; 1719 } 1720 } 1721 1722 if (dsl_dataset_is_snapshot(ds)) { 1723 dsl_dataset_t *ds_next; 1724 uint64_t old_unique; 1725 uint64_t used = 0, comp = 0, uncomp = 0; 1726 1727 VERIFY(0 == dsl_dataset_hold_obj(dp, 1728 ds->ds_phys->ds_next_snap_obj, FTAG, &ds_next)); 1729 ASSERT3U(ds_next->ds_phys->ds_prev_snap_obj, ==, obj); 1730 1731 old_unique = ds_next->ds_phys->ds_unique_bytes; 1732 1733 dmu_buf_will_dirty(ds_next->ds_dbuf, tx); 1734 ds_next->ds_phys->ds_prev_snap_obj = 1735 ds->ds_phys->ds_prev_snap_obj; 1736 ds_next->ds_phys->ds_prev_snap_txg = 1737 ds->ds_phys->ds_prev_snap_txg; 1738 ASSERT3U(ds->ds_phys->ds_prev_snap_txg, ==, 1739 ds_prev ? ds_prev->ds_phys->ds_creation_txg : 0); 1740 1741 1742 if (ds_next->ds_deadlist.dl_oldfmt) { 1743 process_old_deadlist(ds, ds_prev, ds_next, 1744 after_branch_point, tx); 1745 } else { 1746 /* Adjust prev's unique space. */ 1747 if (ds_prev && !after_branch_point) { 1748 dsl_deadlist_space_range(&ds_next->ds_deadlist, 1749 ds_prev->ds_phys->ds_prev_snap_txg, 1750 ds->ds_phys->ds_prev_snap_txg, 1751 &used, &comp, &uncomp); 1752 ds_prev->ds_phys->ds_unique_bytes += used; 1753 } 1754 1755 /* Adjust snapused. */ 1756 dsl_deadlist_space_range(&ds_next->ds_deadlist, 1757 ds->ds_phys->ds_prev_snap_txg, UINT64_MAX, 1758 &used, &comp, &uncomp); 1759 dsl_dir_diduse_space(ds->ds_dir, DD_USED_SNAP, 1760 -used, -comp, -uncomp, tx); 1761 1762 /* Move blocks to be freed to pool's free list. */ 1763 dsl_deadlist_move_bpobj(&ds_next->ds_deadlist, 1764 &dp->dp_free_bpobj, ds->ds_phys->ds_prev_snap_txg, 1765 tx); 1766 dsl_dir_diduse_space(tx->tx_pool->dp_free_dir, 1767 DD_USED_HEAD, used, comp, uncomp, tx); 1768 dsl_dir_dirty(tx->tx_pool->dp_free_dir, tx); 1769 1770 /* Merge our deadlist into next's and free it. */ 1771 dsl_deadlist_merge(&ds_next->ds_deadlist, 1772 ds->ds_phys->ds_deadlist_obj, tx); 1773 } 1774 dsl_deadlist_close(&ds->ds_deadlist); 1775 dsl_deadlist_free(mos, ds->ds_phys->ds_deadlist_obj, tx); 1776 1777 /* Collapse range in clone heads */ 1778 dsl_dataset_remove_clones_key(ds, 1779 ds->ds_phys->ds_creation_txg, tx); 1780 1781 if (dsl_dataset_is_snapshot(ds_next)) { 1782 dsl_dataset_t *ds_nextnext; 1783 1784 /* 1785 * Update next's unique to include blocks which 1786 * were previously shared by only this snapshot 1787 * and it. Those blocks will be born after the 1788 * prev snap and before this snap, and will have 1789 * died after the next snap and before the one 1790 * after that (ie. be on the snap after next's 1791 * deadlist). 1792 */ 1793 VERIFY(0 == dsl_dataset_hold_obj(dp, 1794 ds_next->ds_phys->ds_next_snap_obj, 1795 FTAG, &ds_nextnext)); 1796 dsl_deadlist_space_range(&ds_nextnext->ds_deadlist, 1797 ds->ds_phys->ds_prev_snap_txg, 1798 ds->ds_phys->ds_creation_txg, 1799 &used, &comp, &uncomp); 1800 ds_next->ds_phys->ds_unique_bytes += used; 1801 dsl_dataset_rele(ds_nextnext, FTAG); 1802 ASSERT3P(ds_next->ds_prev, ==, NULL); 1803 1804 /* Collapse range in this head. */ 1805 dsl_dataset_t *hds; 1806 VERIFY3U(0, ==, dsl_dataset_hold_obj(dp, 1807 ds->ds_dir->dd_phys->dd_head_dataset_obj, 1808 FTAG, &hds)); 1809 dsl_deadlist_remove_key(&hds->ds_deadlist, 1810 ds->ds_phys->ds_creation_txg, tx); 1811 dsl_dataset_rele(hds, FTAG); 1812 1813 } else { 1814 ASSERT3P(ds_next->ds_prev, ==, ds); 1815 dsl_dataset_drop_ref(ds_next->ds_prev, ds_next); 1816 ds_next->ds_prev = NULL; 1817 if (ds_prev) { 1818 VERIFY(0 == dsl_dataset_get_ref(dp, 1819 ds->ds_phys->ds_prev_snap_obj, 1820 ds_next, &ds_next->ds_prev)); 1821 } 1822 1823 dsl_dataset_recalc_head_uniq(ds_next); 1824 1825 /* 1826 * Reduce the amount of our unconsmed refreservation 1827 * being charged to our parent by the amount of 1828 * new unique data we have gained. 1829 */ 1830 if (old_unique < ds_next->ds_reserved) { 1831 int64_t mrsdelta; 1832 uint64_t new_unique = 1833 ds_next->ds_phys->ds_unique_bytes; 1834 1835 ASSERT(old_unique <= new_unique); 1836 mrsdelta = MIN(new_unique - old_unique, 1837 ds_next->ds_reserved - old_unique); 1838 dsl_dir_diduse_space(ds->ds_dir, 1839 DD_USED_REFRSRV, -mrsdelta, 0, 0, tx); 1840 } 1841 } 1842 dsl_dataset_rele(ds_next, FTAG); 1843 } else { 1844 /* 1845 * There's no next snapshot, so this is a head dataset. 1846 * Destroy the deadlist. Unless it's a clone, the 1847 * deadlist should be empty. (If it's a clone, it's 1848 * safe to ignore the deadlist contents.) 1849 */ 1850 struct killarg ka; 1851 1852 dsl_deadlist_close(&ds->ds_deadlist); 1853 dsl_deadlist_free(mos, ds->ds_phys->ds_deadlist_obj, tx); 1854 ds->ds_phys->ds_deadlist_obj = 0; 1855 1856 /* 1857 * Free everything that we point to (that's born after 1858 * the previous snapshot, if we are a clone) 1859 * 1860 * NB: this should be very quick, because we already 1861 * freed all the objects in open context. 1862 */ 1863 ka.ds = ds; 1864 ka.tx = tx; 1865 err = traverse_dataset(ds, ds->ds_phys->ds_prev_snap_txg, 1866 TRAVERSE_POST, kill_blkptr, &ka); 1867 ASSERT3U(err, ==, 0); 1868 ASSERT(!DS_UNIQUE_IS_ACCURATE(ds) || 1869 ds->ds_phys->ds_unique_bytes == 0); 1870 1871 if (ds->ds_prev != NULL) { 1872 if (spa_version(dp->dp_spa) >= SPA_VERSION_DIR_CLONES) { 1873 VERIFY3U(0, ==, zap_remove_int(mos, 1874 ds->ds_prev->ds_dir->dd_phys->dd_clones, 1875 ds->ds_object, tx)); 1876 } 1877 dsl_dataset_rele(ds->ds_prev, ds); 1878 ds->ds_prev = ds_prev = NULL; 1879 } 1880 } 1881 1882 /* 1883 * This must be done after the dsl_traverse(), because it will 1884 * re-open the objset. 1885 */ 1886 if (ds->ds_objset) { 1887 dmu_objset_evict(ds->ds_objset); 1888 ds->ds_objset = NULL; 1889 } 1890 1891 if (ds->ds_dir->dd_phys->dd_head_dataset_obj == ds->ds_object) { 1892 /* Erase the link in the dir */ 1893 dmu_buf_will_dirty(ds->ds_dir->dd_dbuf, tx); 1894 ds->ds_dir->dd_phys->dd_head_dataset_obj = 0; 1895 ASSERT(ds->ds_phys->ds_snapnames_zapobj != 0); 1896 err = zap_destroy(mos, ds->ds_phys->ds_snapnames_zapobj, tx); 1897 ASSERT(err == 0); 1898 } else { 1899 /* remove from snapshot namespace */ 1900 dsl_dataset_t *ds_head; 1901 ASSERT(ds->ds_phys->ds_snapnames_zapobj == 0); 1902 VERIFY(0 == dsl_dataset_hold_obj(dp, 1903 ds->ds_dir->dd_phys->dd_head_dataset_obj, FTAG, &ds_head)); 1904 VERIFY(0 == dsl_dataset_get_snapname(ds)); 1905#ifdef ZFS_DEBUG 1906 { 1907 uint64_t val; 1908 1909 err = dsl_dataset_snap_lookup(ds_head, 1910 ds->ds_snapname, &val); 1911 ASSERT3U(err, ==, 0); 1912 ASSERT3U(val, ==, obj); 1913 } 1914#endif 1915 err = dsl_dataset_snap_remove(ds_head, ds->ds_snapname, tx); 1916 ASSERT(err == 0); 1917 dsl_dataset_rele(ds_head, FTAG); 1918 } 1919 1920 if (ds_prev && ds->ds_prev != ds_prev) 1921 dsl_dataset_rele(ds_prev, FTAG); 1922 1923 spa_prop_clear_bootfs(dp->dp_spa, ds->ds_object, tx); 1924 spa_history_log_internal(LOG_DS_DESTROY, dp->dp_spa, tx, 1925 "dataset = %llu", ds->ds_object); 1926 1927 if (ds->ds_phys->ds_next_clones_obj != 0) { 1928 uint64_t count; 1929 ASSERT(0 == zap_count(mos, 1930 ds->ds_phys->ds_next_clones_obj, &count) && count == 0); 1931 VERIFY(0 == dmu_object_free(mos, 1932 ds->ds_phys->ds_next_clones_obj, tx)); 1933 } 1934 if (ds->ds_phys->ds_props_obj != 0) 1935 VERIFY(0 == zap_destroy(mos, ds->ds_phys->ds_props_obj, tx)); 1936 if (ds->ds_phys->ds_userrefs_obj != 0) 1937 VERIFY(0 == zap_destroy(mos, ds->ds_phys->ds_userrefs_obj, tx)); 1938 dsl_dir_close(ds->ds_dir, ds); 1939 ds->ds_dir = NULL; 1940 dsl_dataset_drain_refs(ds, tag); 1941 VERIFY(0 == dmu_object_free(mos, obj, tx)); 1942 1943 if (dsda->rm_origin) { 1944 /* 1945 * Remove the origin of the clone we just destroyed. 1946 */ 1947 struct dsl_ds_destroyarg ndsda = {0}; 1948 1949 ndsda.ds = dsda->rm_origin; 1950 dsl_dataset_destroy_sync(&ndsda, tag, tx); 1951 } 1952} 1953 1954static int 1955dsl_dataset_snapshot_reserve_space(dsl_dataset_t *ds, dmu_tx_t *tx) 1956{ 1957 uint64_t asize; 1958 1959 if (!dmu_tx_is_syncing(tx)) 1960 return (0); 1961 1962 /* 1963 * If there's an fs-only reservation, any blocks that might become 1964 * owned by the snapshot dataset must be accommodated by space 1965 * outside of the reservation. 1966 */ 1967 ASSERT(ds->ds_reserved == 0 || DS_UNIQUE_IS_ACCURATE(ds)); 1968 asize = MIN(ds->ds_phys->ds_unique_bytes, ds->ds_reserved); 1969 if (asize > dsl_dir_space_available(ds->ds_dir, NULL, 0, TRUE)) 1970 return (ENOSPC); 1971 1972 /* 1973 * Propogate any reserved space for this snapshot to other 1974 * snapshot checks in this sync group. 1975 */ 1976 if (asize > 0) 1977 dsl_dir_willuse_space(ds->ds_dir, asize, tx); 1978 1979 return (0); 1980} 1981 1982int 1983dsl_dataset_snapshot_check(void *arg1, void *arg2, dmu_tx_t *tx) 1984{ 1985 dsl_dataset_t *ds = arg1; 1986 const char *snapname = arg2; 1987 int err; 1988 uint64_t value; 1989 1990 /* 1991 * We don't allow multiple snapshots of the same txg. If there 1992 * is already one, try again. 1993 */ 1994 if (ds->ds_phys->ds_prev_snap_txg >= tx->tx_txg) 1995 return (EAGAIN); 1996 1997 /* 1998 * Check for conflicting name snapshot name. 1999 */ 2000 err = dsl_dataset_snap_lookup(ds, snapname, &value); 2001 if (err == 0) 2002 return (EEXIST); 2003 if (err != ENOENT) 2004 return (err); 2005 2006 /* 2007 * Check that the dataset's name is not too long. Name consists 2008 * of the dataset's length + 1 for the @-sign + snapshot name's length 2009 */ 2010 if (dsl_dataset_namelen(ds) + 1 + strlen(snapname) >= MAXNAMELEN) 2011 return (ENAMETOOLONG); 2012 2013 err = dsl_dataset_snapshot_reserve_space(ds, tx); 2014 if (err) 2015 return (err); 2016 2017 ds->ds_trysnap_txg = tx->tx_txg; 2018 return (0); 2019} 2020 2021void 2022dsl_dataset_snapshot_sync(void *arg1, void *arg2, dmu_tx_t *tx) 2023{ 2024 dsl_dataset_t *ds = arg1; 2025 const char *snapname = arg2; 2026 dsl_pool_t *dp = ds->ds_dir->dd_pool; 2027 dmu_buf_t *dbuf; 2028 dsl_dataset_phys_t *dsphys; 2029 uint64_t dsobj, crtxg; 2030 objset_t *mos = dp->dp_meta_objset; 2031 int err; 2032 2033 ASSERT(RW_WRITE_HELD(&dp->dp_config_rwlock)); 2034 2035 /* 2036 * The origin's ds_creation_txg has to be < TXG_INITIAL 2037 */ 2038 if (strcmp(snapname, ORIGIN_DIR_NAME) == 0) 2039 crtxg = 1; 2040 else 2041 crtxg = tx->tx_txg; 2042 2043 dsobj = dmu_object_alloc(mos, DMU_OT_DSL_DATASET, 0, 2044 DMU_OT_DSL_DATASET, sizeof (dsl_dataset_phys_t), tx); 2045 VERIFY(0 == dmu_bonus_hold(mos, dsobj, FTAG, &dbuf)); 2046 dmu_buf_will_dirty(dbuf, tx); 2047 dsphys = dbuf->db_data; 2048 bzero(dsphys, sizeof (dsl_dataset_phys_t)); 2049 dsphys->ds_dir_obj = ds->ds_dir->dd_object; 2050 dsphys->ds_fsid_guid = unique_create(); 2051 (void) random_get_pseudo_bytes((void*)&dsphys->ds_guid, 2052 sizeof (dsphys->ds_guid)); 2053 dsphys->ds_prev_snap_obj = ds->ds_phys->ds_prev_snap_obj; 2054 dsphys->ds_prev_snap_txg = ds->ds_phys->ds_prev_snap_txg; 2055 dsphys->ds_next_snap_obj = ds->ds_object; 2056 dsphys->ds_num_children = 1; 2057 dsphys->ds_creation_time = gethrestime_sec(); 2058 dsphys->ds_creation_txg = crtxg; 2059 dsphys->ds_deadlist_obj = ds->ds_phys->ds_deadlist_obj; 2060 dsphys->ds_used_bytes = ds->ds_phys->ds_used_bytes; 2061 dsphys->ds_compressed_bytes = ds->ds_phys->ds_compressed_bytes; 2062 dsphys->ds_uncompressed_bytes = ds->ds_phys->ds_uncompressed_bytes; 2063 dsphys->ds_flags = ds->ds_phys->ds_flags; 2064 dsphys->ds_bp = ds->ds_phys->ds_bp; 2065 dmu_buf_rele(dbuf, FTAG); 2066 2067 ASSERT3U(ds->ds_prev != 0, ==, ds->ds_phys->ds_prev_snap_obj != 0); 2068 if (ds->ds_prev) { 2069 uint64_t next_clones_obj = 2070 ds->ds_prev->ds_phys->ds_next_clones_obj; 2071 ASSERT(ds->ds_prev->ds_phys->ds_next_snap_obj == 2072 ds->ds_object || 2073 ds->ds_prev->ds_phys->ds_num_children > 1); 2074 if (ds->ds_prev->ds_phys->ds_next_snap_obj == ds->ds_object) { 2075 dmu_buf_will_dirty(ds->ds_prev->ds_dbuf, tx); 2076 ASSERT3U(ds->ds_phys->ds_prev_snap_txg, ==, 2077 ds->ds_prev->ds_phys->ds_creation_txg); 2078 ds->ds_prev->ds_phys->ds_next_snap_obj = dsobj; 2079 } else if (next_clones_obj != 0) { 2080 remove_from_next_clones(ds->ds_prev, 2081 dsphys->ds_next_snap_obj, tx); 2082 VERIFY3U(0, ==, zap_add_int(mos, 2083 next_clones_obj, dsobj, tx)); 2084 } 2085 } 2086 2087 /* 2088 * If we have a reference-reservation on this dataset, we will 2089 * need to increase the amount of refreservation being charged 2090 * since our unique space is going to zero. 2091 */ 2092 if (ds->ds_reserved) { 2093 int64_t delta; 2094 ASSERT(DS_UNIQUE_IS_ACCURATE(ds)); 2095 delta = MIN(ds->ds_phys->ds_unique_bytes, ds->ds_reserved); 2096 dsl_dir_diduse_space(ds->ds_dir, DD_USED_REFRSRV, 2097 delta, 0, 0, tx); 2098 } 2099 2100 dmu_buf_will_dirty(ds->ds_dbuf, tx); 2101 zfs_dbgmsg("taking snapshot %s@%s/%llu; newkey=%llu", 2102 ds->ds_dir->dd_myname, snapname, dsobj, 2103 ds->ds_phys->ds_prev_snap_txg); 2104 ds->ds_phys->ds_deadlist_obj = dsl_deadlist_clone(&ds->ds_deadlist, 2105 UINT64_MAX, ds->ds_phys->ds_prev_snap_obj, tx); 2106 dsl_deadlist_close(&ds->ds_deadlist); 2107 dsl_deadlist_open(&ds->ds_deadlist, mos, ds->ds_phys->ds_deadlist_obj); 2108 dsl_deadlist_add_key(&ds->ds_deadlist, 2109 ds->ds_phys->ds_prev_snap_txg, tx); 2110 2111 ASSERT3U(ds->ds_phys->ds_prev_snap_txg, <, tx->tx_txg); 2112 ds->ds_phys->ds_prev_snap_obj = dsobj; 2113 ds->ds_phys->ds_prev_snap_txg = crtxg; 2114 ds->ds_phys->ds_unique_bytes = 0; 2115 if (spa_version(dp->dp_spa) >= SPA_VERSION_UNIQUE_ACCURATE) 2116 ds->ds_phys->ds_flags |= DS_FLAG_UNIQUE_ACCURATE; 2117 2118 err = zap_add(mos, ds->ds_phys->ds_snapnames_zapobj, 2119 snapname, 8, 1, &dsobj, tx); 2120 ASSERT(err == 0); 2121 2122 if (ds->ds_prev) 2123 dsl_dataset_drop_ref(ds->ds_prev, ds); 2124 VERIFY(0 == dsl_dataset_get_ref(dp, 2125 ds->ds_phys->ds_prev_snap_obj, ds, &ds->ds_prev)); 2126 2127 dsl_scan_ds_snapshotted(ds, tx); 2128 2129 dsl_dir_snap_cmtime_update(ds->ds_dir); 2130 2131 spa_history_log_internal(LOG_DS_SNAPSHOT, dp->dp_spa, tx, 2132 "dataset = %llu", dsobj); 2133} 2134 2135void 2136dsl_dataset_sync(dsl_dataset_t *ds, zio_t *zio, dmu_tx_t *tx) 2137{ 2138 ASSERT(dmu_tx_is_syncing(tx)); 2139 ASSERT(ds->ds_objset != NULL); 2140 ASSERT(ds->ds_phys->ds_next_snap_obj == 0); 2141 2142 /* 2143 * in case we had to change ds_fsid_guid when we opened it, 2144 * sync it out now. 2145 */ 2146 dmu_buf_will_dirty(ds->ds_dbuf, tx); 2147 ds->ds_phys->ds_fsid_guid = ds->ds_fsid_guid; 2148 2149 dsl_dir_dirty(ds->ds_dir, tx); 2150 dmu_objset_sync(ds->ds_objset, zio, tx); 2151} 2152
| 1019} 1020 1021static boolean_t 1022dsl_dataset_might_destroy_origin(dsl_dataset_t *ds) 1023{ 1024 boolean_t might_destroy = B_FALSE; 1025 1026 mutex_enter(&ds->ds_lock); 1027 if (ds->ds_phys->ds_num_children == 2 && ds->ds_userrefs == 0 && 1028 DS_IS_DEFER_DESTROY(ds)) 1029 might_destroy = B_TRUE; 1030 mutex_exit(&ds->ds_lock); 1031 1032 return (might_destroy); 1033} 1034 1035/* 1036 * If we're removing a clone, and these three conditions are true: 1037 * 1) the clone's origin has no other children 1038 * 2) the clone's origin has no user references 1039 * 3) the clone's origin has been marked for deferred destruction 1040 * Then, prepare to remove the origin as part of this sync task group. 1041 */ 1042static int 1043dsl_dataset_origin_rm_prep(struct dsl_ds_destroyarg *dsda, void *tag) 1044{ 1045 dsl_dataset_t *ds = dsda->ds; 1046 dsl_dataset_t *origin = ds->ds_prev; 1047 1048 if (dsl_dataset_might_destroy_origin(origin)) { 1049 char *name; 1050 int namelen; 1051 int error; 1052 1053 namelen = dsl_dataset_namelen(origin) + 1; 1054 name = kmem_alloc(namelen, KM_SLEEP); 1055 dsl_dataset_name(origin, name); 1056#ifdef _KERNEL 1057 error = zfs_unmount_snap(name, NULL); 1058 if (error) { 1059 kmem_free(name, namelen); 1060 return (error); 1061 } 1062#endif 1063 error = dsl_dataset_own(name, B_TRUE, tag, &origin); 1064 kmem_free(name, namelen); 1065 if (error) 1066 return (error); 1067 dsda->rm_origin = origin; 1068 dsl_dataset_make_exclusive(origin, tag); 1069 } 1070 1071 return (0); 1072} 1073 1074/* 1075 * ds must be opened as OWNER. On return (whether successful or not), 1076 * ds will be closed and caller can no longer dereference it. 1077 */ 1078int 1079dsl_dataset_destroy(dsl_dataset_t *ds, void *tag, boolean_t defer) 1080{ 1081 int err; 1082 dsl_sync_task_group_t *dstg; 1083 objset_t *os; 1084 dsl_dir_t *dd; 1085 uint64_t obj; 1086 struct dsl_ds_destroyarg dsda = { 0 }; 1087 dsl_dataset_t dummy_ds = { 0 }; 1088 1089 dsda.ds = ds; 1090 1091 if (dsl_dataset_is_snapshot(ds)) { 1092 /* Destroying a snapshot is simpler */ 1093 dsl_dataset_make_exclusive(ds, tag); 1094 1095 dsda.defer = defer; 1096 err = dsl_sync_task_do(ds->ds_dir->dd_pool, 1097 dsl_dataset_destroy_check, dsl_dataset_destroy_sync, 1098 &dsda, tag, 0); 1099 ASSERT3P(dsda.rm_origin, ==, NULL); 1100 goto out; 1101 } else if (defer) { 1102 err = EINVAL; 1103 goto out; 1104 } 1105 1106 dd = ds->ds_dir; 1107 dummy_ds.ds_dir = dd; 1108 dummy_ds.ds_object = ds->ds_object; 1109 1110 /* 1111 * Check for errors and mark this ds as inconsistent, in 1112 * case we crash while freeing the objects. 1113 */ 1114 err = dsl_sync_task_do(dd->dd_pool, dsl_dataset_destroy_begin_check, 1115 dsl_dataset_destroy_begin_sync, ds, NULL, 0); 1116 if (err) 1117 goto out; 1118 1119 err = dmu_objset_from_ds(ds, &os); 1120 if (err) 1121 goto out; 1122 1123 /* 1124 * remove the objects in open context, so that we won't 1125 * have too much to do in syncing context. 1126 */ 1127 for (obj = 0; err == 0; err = dmu_object_next(os, &obj, FALSE, 1128 ds->ds_phys->ds_prev_snap_txg)) { 1129 /* 1130 * Ignore errors, if there is not enough disk space 1131 * we will deal with it in dsl_dataset_destroy_sync(). 1132 */ 1133 (void) dmu_free_object(os, obj); 1134 } 1135 if (err != ESRCH) 1136 goto out; 1137 1138 /* 1139 * Only the ZIL knows how to free log blocks. 1140 */ 1141 zil_destroy(dmu_objset_zil(os), B_FALSE); 1142 1143 /* 1144 * Sync out all in-flight IO. 1145 */ 1146 txg_wait_synced(dd->dd_pool, 0); 1147 1148 /* 1149 * If we managed to free all the objects in open 1150 * context, the user space accounting should be zero. 1151 */ 1152 if (ds->ds_phys->ds_bp.blk_fill == 0 && 1153 dmu_objset_userused_enabled(os)) { 1154 uint64_t count; 1155 1156 ASSERT(zap_count(os, DMU_USERUSED_OBJECT, &count) != 0 || 1157 count == 0); 1158 ASSERT(zap_count(os, DMU_GROUPUSED_OBJECT, &count) != 0 || 1159 count == 0); 1160 } 1161 1162 rw_enter(&dd->dd_pool->dp_config_rwlock, RW_READER); 1163 err = dsl_dir_open_obj(dd->dd_pool, dd->dd_object, NULL, FTAG, &dd); 1164 rw_exit(&dd->dd_pool->dp_config_rwlock); 1165 1166 if (err) 1167 goto out; 1168 1169 /* 1170 * Blow away the dsl_dir + head dataset. 1171 */ 1172 dsl_dataset_make_exclusive(ds, tag); 1173 /* 1174 * If we're removing a clone, we might also need to remove its 1175 * origin. 1176 */ 1177 do { 1178 dsda.need_prep = B_FALSE; 1179 if (dsl_dir_is_clone(dd)) { 1180 err = dsl_dataset_origin_rm_prep(&dsda, tag); 1181 if (err) { 1182 dsl_dir_close(dd, FTAG); 1183 goto out; 1184 } 1185 } 1186 1187 dstg = dsl_sync_task_group_create(ds->ds_dir->dd_pool); 1188 dsl_sync_task_create(dstg, dsl_dataset_destroy_check, 1189 dsl_dataset_destroy_sync, &dsda, tag, 0); 1190 dsl_sync_task_create(dstg, dsl_dir_destroy_check, 1191 dsl_dir_destroy_sync, &dummy_ds, FTAG, 0); 1192 err = dsl_sync_task_group_wait(dstg); 1193 dsl_sync_task_group_destroy(dstg); 1194 1195 /* 1196 * We could be racing against 'zfs release' or 'zfs destroy -d' 1197 * on the origin snap, in which case we can get EBUSY if we 1198 * needed to destroy the origin snap but were not ready to 1199 * do so. 1200 */ 1201 if (dsda.need_prep) { 1202 ASSERT(err == EBUSY); 1203 ASSERT(dsl_dir_is_clone(dd)); 1204 ASSERT(dsda.rm_origin == NULL); 1205 } 1206 } while (dsda.need_prep); 1207 1208 if (dsda.rm_origin != NULL) 1209 dsl_dataset_disown(dsda.rm_origin, tag); 1210 1211 /* if it is successful, dsl_dir_destroy_sync will close the dd */ 1212 if (err) 1213 dsl_dir_close(dd, FTAG); 1214out: 1215 dsl_dataset_disown(ds, tag); 1216 return (err); 1217} 1218 1219blkptr_t * 1220dsl_dataset_get_blkptr(dsl_dataset_t *ds) 1221{ 1222 return (&ds->ds_phys->ds_bp); 1223} 1224 1225void 1226dsl_dataset_set_blkptr(dsl_dataset_t *ds, blkptr_t *bp, dmu_tx_t *tx) 1227{ 1228 ASSERT(dmu_tx_is_syncing(tx)); 1229 /* If it's the meta-objset, set dp_meta_rootbp */ 1230 if (ds == NULL) { 1231 tx->tx_pool->dp_meta_rootbp = *bp; 1232 } else { 1233 dmu_buf_will_dirty(ds->ds_dbuf, tx); 1234 ds->ds_phys->ds_bp = *bp; 1235 } 1236} 1237 1238spa_t * 1239dsl_dataset_get_spa(dsl_dataset_t *ds) 1240{ 1241 return (ds->ds_dir->dd_pool->dp_spa); 1242} 1243 1244void 1245dsl_dataset_dirty(dsl_dataset_t *ds, dmu_tx_t *tx) 1246{ 1247 dsl_pool_t *dp; 1248 1249 if (ds == NULL) /* this is the meta-objset */ 1250 return; 1251 1252 ASSERT(ds->ds_objset != NULL); 1253 1254 if (ds->ds_phys->ds_next_snap_obj != 0) 1255 panic("dirtying snapshot!"); 1256 1257 dp = ds->ds_dir->dd_pool; 1258 1259 if (txg_list_add(&dp->dp_dirty_datasets, ds, tx->tx_txg) == 0) { 1260 /* up the hold count until we can be written out */ 1261 dmu_buf_add_ref(ds->ds_dbuf, ds); 1262 } 1263} 1264 1265/* 1266 * The unique space in the head dataset can be calculated by subtracting 1267 * the space used in the most recent snapshot, that is still being used 1268 * in this file system, from the space currently in use. To figure out 1269 * the space in the most recent snapshot still in use, we need to take 1270 * the total space used in the snapshot and subtract out the space that 1271 * has been freed up since the snapshot was taken. 1272 */ 1273static void 1274dsl_dataset_recalc_head_uniq(dsl_dataset_t *ds) 1275{ 1276 uint64_t mrs_used; 1277 uint64_t dlused, dlcomp, dluncomp; 1278 1279 ASSERT(!dsl_dataset_is_snapshot(ds)); 1280 1281 if (ds->ds_phys->ds_prev_snap_obj != 0) 1282 mrs_used = ds->ds_prev->ds_phys->ds_used_bytes; 1283 else 1284 mrs_used = 0; 1285 1286 dsl_deadlist_space(&ds->ds_deadlist, &dlused, &dlcomp, &dluncomp); 1287 1288 ASSERT3U(dlused, <=, mrs_used); 1289 ds->ds_phys->ds_unique_bytes = 1290 ds->ds_phys->ds_used_bytes - (mrs_used - dlused); 1291 1292 if (spa_version(ds->ds_dir->dd_pool->dp_spa) >= 1293 SPA_VERSION_UNIQUE_ACCURATE) 1294 ds->ds_phys->ds_flags |= DS_FLAG_UNIQUE_ACCURATE; 1295} 1296 1297struct killarg { 1298 dsl_dataset_t *ds; 1299 dmu_tx_t *tx; 1300}; 1301 1302/* ARGSUSED */ 1303static int 1304kill_blkptr(spa_t *spa, zilog_t *zilog, const blkptr_t *bp, arc_buf_t *pbuf, 1305 const zbookmark_t *zb, const dnode_phys_t *dnp, void *arg) 1306{ 1307 struct killarg *ka = arg; 1308 dmu_tx_t *tx = ka->tx; 1309 1310 if (bp == NULL) 1311 return (0); 1312 1313 if (zb->zb_level == ZB_ZIL_LEVEL) { 1314 ASSERT(zilog != NULL); 1315 /* 1316 * It's a block in the intent log. It has no 1317 * accounting, so just free it. 1318 */ 1319 dsl_free(ka->tx->tx_pool, ka->tx->tx_txg, bp); 1320 } else { 1321 ASSERT(zilog == NULL); 1322 ASSERT3U(bp->blk_birth, >, ka->ds->ds_phys->ds_prev_snap_txg); 1323 (void) dsl_dataset_block_kill(ka->ds, bp, tx, B_FALSE); 1324 } 1325 1326 return (0); 1327} 1328 1329/* ARGSUSED */ 1330static int 1331dsl_dataset_destroy_begin_check(void *arg1, void *arg2, dmu_tx_t *tx) 1332{ 1333 dsl_dataset_t *ds = arg1; 1334 objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset; 1335 uint64_t count; 1336 int err; 1337 1338 /* 1339 * Can't delete a head dataset if there are snapshots of it. 1340 * (Except if the only snapshots are from the branch we cloned 1341 * from.) 1342 */ 1343 if (ds->ds_prev != NULL && 1344 ds->ds_prev->ds_phys->ds_next_snap_obj == ds->ds_object) 1345 return (EBUSY); 1346 1347 /* 1348 * This is really a dsl_dir thing, but check it here so that 1349 * we'll be less likely to leave this dataset inconsistent & 1350 * nearly destroyed. 1351 */ 1352 err = zap_count(mos, ds->ds_dir->dd_phys->dd_child_dir_zapobj, &count); 1353 if (err) 1354 return (err); 1355 if (count != 0) 1356 return (EEXIST); 1357 1358 return (0); 1359} 1360 1361/* ARGSUSED */ 1362static void 1363dsl_dataset_destroy_begin_sync(void *arg1, void *arg2, dmu_tx_t *tx) 1364{ 1365 dsl_dataset_t *ds = arg1; 1366 dsl_pool_t *dp = ds->ds_dir->dd_pool; 1367 1368 /* Mark it as inconsistent on-disk, in case we crash */ 1369 dmu_buf_will_dirty(ds->ds_dbuf, tx); 1370 ds->ds_phys->ds_flags |= DS_FLAG_INCONSISTENT; 1371 1372 spa_history_log_internal(LOG_DS_DESTROY_BEGIN, dp->dp_spa, tx, 1373 "dataset = %llu", ds->ds_object); 1374} 1375 1376static int 1377dsl_dataset_origin_check(struct dsl_ds_destroyarg *dsda, void *tag, 1378 dmu_tx_t *tx) 1379{ 1380 dsl_dataset_t *ds = dsda->ds; 1381 dsl_dataset_t *ds_prev = ds->ds_prev; 1382 1383 if (dsl_dataset_might_destroy_origin(ds_prev)) { 1384 struct dsl_ds_destroyarg ndsda = {0}; 1385 1386 /* 1387 * If we're not prepared to remove the origin, don't remove 1388 * the clone either. 1389 */ 1390 if (dsda->rm_origin == NULL) { 1391 dsda->need_prep = B_TRUE; 1392 return (EBUSY); 1393 } 1394 1395 ndsda.ds = ds_prev; 1396 ndsda.is_origin_rm = B_TRUE; 1397 return (dsl_dataset_destroy_check(&ndsda, tag, tx)); 1398 } 1399 1400 /* 1401 * If we're not going to remove the origin after all, 1402 * undo the open context setup. 1403 */ 1404 if (dsda->rm_origin != NULL) { 1405 dsl_dataset_disown(dsda->rm_origin, tag); 1406 dsda->rm_origin = NULL; 1407 } 1408 1409 return (0); 1410} 1411 1412/* 1413 * If you add new checks here, you may need to add 1414 * additional checks to the "temporary" case in 1415 * snapshot_check() in dmu_objset.c. 1416 */ 1417/* ARGSUSED */ 1418int 1419dsl_dataset_destroy_check(void *arg1, void *arg2, dmu_tx_t *tx) 1420{ 1421 struct dsl_ds_destroyarg *dsda = arg1; 1422 dsl_dataset_t *ds = dsda->ds; 1423 1424 /* we have an owner hold, so noone else can destroy us */ 1425 ASSERT(!DSL_DATASET_IS_DESTROYED(ds)); 1426 1427 /* 1428 * Only allow deferred destroy on pools that support it. 1429 * NOTE: deferred destroy is only supported on snapshots. 1430 */ 1431 if (dsda->defer) { 1432 if (spa_version(ds->ds_dir->dd_pool->dp_spa) < 1433 SPA_VERSION_USERREFS) 1434 return (ENOTSUP); 1435 ASSERT(dsl_dataset_is_snapshot(ds)); 1436 return (0); 1437 } 1438 1439 /* 1440 * Can't delete a head dataset if there are snapshots of it. 1441 * (Except if the only snapshots are from the branch we cloned 1442 * from.) 1443 */ 1444 if (ds->ds_prev != NULL && 1445 ds->ds_prev->ds_phys->ds_next_snap_obj == ds->ds_object) 1446 return (EBUSY); 1447 1448 /* 1449 * If we made changes this txg, traverse_dsl_dataset won't find 1450 * them. Try again. 1451 */ 1452 if (ds->ds_phys->ds_bp.blk_birth >= tx->tx_txg) 1453 return (EAGAIN); 1454 1455 if (dsl_dataset_is_snapshot(ds)) { 1456 /* 1457 * If this snapshot has an elevated user reference count, 1458 * we can't destroy it yet. 1459 */ 1460 if (ds->ds_userrefs > 0 && !dsda->releasing) 1461 return (EBUSY); 1462 1463 mutex_enter(&ds->ds_lock); 1464 /* 1465 * Can't delete a branch point. However, if we're destroying 1466 * a clone and removing its origin due to it having a user 1467 * hold count of 0 and having been marked for deferred destroy, 1468 * it's OK for the origin to have a single clone. 1469 */ 1470 if (ds->ds_phys->ds_num_children > 1471 (dsda->is_origin_rm ? 2 : 1)) { 1472 mutex_exit(&ds->ds_lock); 1473 return (EEXIST); 1474 } 1475 mutex_exit(&ds->ds_lock); 1476 } else if (dsl_dir_is_clone(ds->ds_dir)) { 1477 return (dsl_dataset_origin_check(dsda, arg2, tx)); 1478 } 1479 1480 /* XXX we should do some i/o error checking... */ 1481 return (0); 1482} 1483 1484struct refsarg { 1485 kmutex_t lock; 1486 boolean_t gone; 1487 kcondvar_t cv; 1488}; 1489 1490/* ARGSUSED */ 1491static void 1492dsl_dataset_refs_gone(dmu_buf_t *db, void *argv) 1493{ 1494 struct refsarg *arg = argv; 1495 1496 mutex_enter(&arg->lock); 1497 arg->gone = TRUE; 1498 cv_signal(&arg->cv); 1499 mutex_exit(&arg->lock); 1500} 1501 1502static void 1503dsl_dataset_drain_refs(dsl_dataset_t *ds, void *tag) 1504{ 1505 struct refsarg arg; 1506 1507 bzero(&arg, sizeof(arg)); 1508 mutex_init(&arg.lock, NULL, MUTEX_DEFAULT, NULL); 1509 cv_init(&arg.cv, NULL, CV_DEFAULT, NULL); 1510 arg.gone = FALSE; 1511 (void) dmu_buf_update_user(ds->ds_dbuf, ds, &arg, &ds->ds_phys, 1512 dsl_dataset_refs_gone); 1513 dmu_buf_rele(ds->ds_dbuf, tag); 1514 mutex_enter(&arg.lock); 1515 while (!arg.gone) 1516 cv_wait(&arg.cv, &arg.lock); 1517 ASSERT(arg.gone); 1518 mutex_exit(&arg.lock); 1519 ds->ds_dbuf = NULL; 1520 ds->ds_phys = NULL; 1521 mutex_destroy(&arg.lock); 1522 cv_destroy(&arg.cv); 1523} 1524 1525static void 1526remove_from_next_clones(dsl_dataset_t *ds, uint64_t obj, dmu_tx_t *tx) 1527{ 1528 objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset; 1529 uint64_t count; 1530 int err; 1531 1532 ASSERT(ds->ds_phys->ds_num_children >= 2); 1533 err = zap_remove_int(mos, ds->ds_phys->ds_next_clones_obj, obj, tx); 1534 /* 1535 * The err should not be ENOENT, but a bug in a previous version 1536 * of the code could cause upgrade_clones_cb() to not set 1537 * ds_next_snap_obj when it should, leading to a missing entry. 1538 * If we knew that the pool was created after 1539 * SPA_VERSION_NEXT_CLONES, we could assert that it isn't 1540 * ENOENT. However, at least we can check that we don't have 1541 * too many entries in the next_clones_obj even after failing to 1542 * remove this one. 1543 */ 1544 if (err != ENOENT) { 1545 VERIFY3U(err, ==, 0); 1546 } 1547 ASSERT3U(0, ==, zap_count(mos, ds->ds_phys->ds_next_clones_obj, 1548 &count)); 1549 ASSERT3U(count, <=, ds->ds_phys->ds_num_children - 2); 1550} 1551 1552static void 1553dsl_dataset_remove_clones_key(dsl_dataset_t *ds, uint64_t mintxg, dmu_tx_t *tx) 1554{ 1555 objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset; 1556 zap_cursor_t zc; 1557 zap_attribute_t za; 1558 1559 /* 1560 * If it is the old version, dd_clones doesn't exist so we can't 1561 * find the clones, but deadlist_remove_key() is a no-op so it 1562 * doesn't matter. 1563 */ 1564 if (ds->ds_dir->dd_phys->dd_clones == 0) 1565 return; 1566 1567 for (zap_cursor_init(&zc, mos, ds->ds_dir->dd_phys->dd_clones); 1568 zap_cursor_retrieve(&zc, &za) == 0; 1569 zap_cursor_advance(&zc)) { 1570 dsl_dataset_t *clone; 1571 1572 VERIFY3U(0, ==, dsl_dataset_hold_obj(ds->ds_dir->dd_pool, 1573 za.za_first_integer, FTAG, &clone)); 1574 if (clone->ds_dir->dd_origin_txg > mintxg) { 1575 dsl_deadlist_remove_key(&clone->ds_deadlist, 1576 mintxg, tx); 1577 dsl_dataset_remove_clones_key(clone, mintxg, tx); 1578 } 1579 dsl_dataset_rele(clone, FTAG); 1580 } 1581 zap_cursor_fini(&zc); 1582} 1583 1584struct process_old_arg { 1585 dsl_dataset_t *ds; 1586 dsl_dataset_t *ds_prev; 1587 boolean_t after_branch_point; 1588 zio_t *pio; 1589 uint64_t used, comp, uncomp; 1590}; 1591 1592static int 1593process_old_cb(void *arg, const blkptr_t *bp, dmu_tx_t *tx) 1594{ 1595 struct process_old_arg *poa = arg; 1596 dsl_pool_t *dp = poa->ds->ds_dir->dd_pool; 1597 1598 if (bp->blk_birth <= poa->ds->ds_phys->ds_prev_snap_txg) { 1599 dsl_deadlist_insert(&poa->ds->ds_deadlist, bp, tx); 1600 if (poa->ds_prev && !poa->after_branch_point && 1601 bp->blk_birth > 1602 poa->ds_prev->ds_phys->ds_prev_snap_txg) { 1603 poa->ds_prev->ds_phys->ds_unique_bytes += 1604 bp_get_dsize_sync(dp->dp_spa, bp); 1605 } 1606 } else { 1607 poa->used += bp_get_dsize_sync(dp->dp_spa, bp); 1608 poa->comp += BP_GET_PSIZE(bp); 1609 poa->uncomp += BP_GET_UCSIZE(bp); 1610 dsl_free_sync(poa->pio, dp, tx->tx_txg, bp); 1611 } 1612 return (0); 1613} 1614 1615static void 1616process_old_deadlist(dsl_dataset_t *ds, dsl_dataset_t *ds_prev, 1617 dsl_dataset_t *ds_next, boolean_t after_branch_point, dmu_tx_t *tx) 1618{ 1619 struct process_old_arg poa = { 0 }; 1620 dsl_pool_t *dp = ds->ds_dir->dd_pool; 1621 objset_t *mos = dp->dp_meta_objset; 1622 1623 ASSERT(ds->ds_deadlist.dl_oldfmt); 1624 ASSERT(ds_next->ds_deadlist.dl_oldfmt); 1625 1626 poa.ds = ds; 1627 poa.ds_prev = ds_prev; 1628 poa.after_branch_point = after_branch_point; 1629 poa.pio = zio_root(dp->dp_spa, NULL, NULL, ZIO_FLAG_MUSTSUCCEED); 1630 VERIFY3U(0, ==, bpobj_iterate(&ds_next->ds_deadlist.dl_bpobj, 1631 process_old_cb, &poa, tx)); 1632 VERIFY3U(zio_wait(poa.pio), ==, 0); 1633 ASSERT3U(poa.used, ==, ds->ds_phys->ds_unique_bytes); 1634 1635 /* change snapused */ 1636 dsl_dir_diduse_space(ds->ds_dir, DD_USED_SNAP, 1637 -poa.used, -poa.comp, -poa.uncomp, tx); 1638 1639 /* swap next's deadlist to our deadlist */ 1640 dsl_deadlist_close(&ds->ds_deadlist); 1641 dsl_deadlist_close(&ds_next->ds_deadlist); 1642 SWITCH64(ds_next->ds_phys->ds_deadlist_obj, 1643 ds->ds_phys->ds_deadlist_obj); 1644 dsl_deadlist_open(&ds->ds_deadlist, mos, ds->ds_phys->ds_deadlist_obj); 1645 dsl_deadlist_open(&ds_next->ds_deadlist, mos, 1646 ds_next->ds_phys->ds_deadlist_obj); 1647} 1648 1649void 1650dsl_dataset_destroy_sync(void *arg1, void *tag, dmu_tx_t *tx) 1651{ 1652 struct dsl_ds_destroyarg *dsda = arg1; 1653 dsl_dataset_t *ds = dsda->ds; 1654 int err; 1655 int after_branch_point = FALSE; 1656 dsl_pool_t *dp = ds->ds_dir->dd_pool; 1657 objset_t *mos = dp->dp_meta_objset; 1658 dsl_dataset_t *ds_prev = NULL; 1659 boolean_t wont_destroy; 1660 uint64_t obj; 1661 1662 wont_destroy = (dsda->defer && 1663 (ds->ds_userrefs > 0 || ds->ds_phys->ds_num_children > 1)); 1664 1665 ASSERT(ds->ds_owner || wont_destroy); 1666 ASSERT(dsda->defer || ds->ds_phys->ds_num_children <= 1); 1667 ASSERT(ds->ds_prev == NULL || 1668 ds->ds_prev->ds_phys->ds_next_snap_obj != ds->ds_object); 1669 ASSERT3U(ds->ds_phys->ds_bp.blk_birth, <=, tx->tx_txg); 1670 1671 if (wont_destroy) { 1672 ASSERT(spa_version(dp->dp_spa) >= SPA_VERSION_USERREFS); 1673 dmu_buf_will_dirty(ds->ds_dbuf, tx); 1674 ds->ds_phys->ds_flags |= DS_FLAG_DEFER_DESTROY; 1675 return; 1676 } 1677 1678 /* signal any waiters that this dataset is going away */ 1679 mutex_enter(&ds->ds_lock); 1680 ds->ds_owner = dsl_reaper; 1681 cv_broadcast(&ds->ds_exclusive_cv); 1682 mutex_exit(&ds->ds_lock); 1683 1684 /* Remove our reservation */ 1685 if (ds->ds_reserved != 0) { 1686 dsl_prop_setarg_t psa; 1687 uint64_t value = 0; 1688 1689 dsl_prop_setarg_init_uint64(&psa, "refreservation", 1690 (ZPROP_SRC_NONE | ZPROP_SRC_LOCAL | ZPROP_SRC_RECEIVED), 1691 &value); 1692 psa.psa_effective_value = 0; /* predict default value */ 1693 1694 dsl_dataset_set_reservation_sync(ds, &psa, tx); 1695 ASSERT3U(ds->ds_reserved, ==, 0); 1696 } 1697 1698 ASSERT(RW_WRITE_HELD(&dp->dp_config_rwlock)); 1699 1700 dsl_scan_ds_destroyed(ds, tx); 1701 1702 obj = ds->ds_object; 1703 1704 if (ds->ds_phys->ds_prev_snap_obj != 0) { 1705 if (ds->ds_prev) { 1706 ds_prev = ds->ds_prev; 1707 } else { 1708 VERIFY(0 == dsl_dataset_hold_obj(dp, 1709 ds->ds_phys->ds_prev_snap_obj, FTAG, &ds_prev)); 1710 } 1711 after_branch_point = 1712 (ds_prev->ds_phys->ds_next_snap_obj != obj); 1713 1714 dmu_buf_will_dirty(ds_prev->ds_dbuf, tx); 1715 if (after_branch_point && 1716 ds_prev->ds_phys->ds_next_clones_obj != 0) { 1717 remove_from_next_clones(ds_prev, obj, tx); 1718 if (ds->ds_phys->ds_next_snap_obj != 0) { 1719 VERIFY(0 == zap_add_int(mos, 1720 ds_prev->ds_phys->ds_next_clones_obj, 1721 ds->ds_phys->ds_next_snap_obj, tx)); 1722 } 1723 } 1724 if (after_branch_point && 1725 ds->ds_phys->ds_next_snap_obj == 0) { 1726 /* This clone is toast. */ 1727 ASSERT(ds_prev->ds_phys->ds_num_children > 1); 1728 ds_prev->ds_phys->ds_num_children--; 1729 1730 /* 1731 * If the clone's origin has no other clones, no 1732 * user holds, and has been marked for deferred 1733 * deletion, then we should have done the necessary 1734 * destroy setup for it. 1735 */ 1736 if (ds_prev->ds_phys->ds_num_children == 1 && 1737 ds_prev->ds_userrefs == 0 && 1738 DS_IS_DEFER_DESTROY(ds_prev)) { 1739 ASSERT3P(dsda->rm_origin, !=, NULL); 1740 } else { 1741 ASSERT3P(dsda->rm_origin, ==, NULL); 1742 } 1743 } else if (!after_branch_point) { 1744 ds_prev->ds_phys->ds_next_snap_obj = 1745 ds->ds_phys->ds_next_snap_obj; 1746 } 1747 } 1748 1749 if (dsl_dataset_is_snapshot(ds)) { 1750 dsl_dataset_t *ds_next; 1751 uint64_t old_unique; 1752 uint64_t used = 0, comp = 0, uncomp = 0; 1753 1754 VERIFY(0 == dsl_dataset_hold_obj(dp, 1755 ds->ds_phys->ds_next_snap_obj, FTAG, &ds_next)); 1756 ASSERT3U(ds_next->ds_phys->ds_prev_snap_obj, ==, obj); 1757 1758 old_unique = ds_next->ds_phys->ds_unique_bytes; 1759 1760 dmu_buf_will_dirty(ds_next->ds_dbuf, tx); 1761 ds_next->ds_phys->ds_prev_snap_obj = 1762 ds->ds_phys->ds_prev_snap_obj; 1763 ds_next->ds_phys->ds_prev_snap_txg = 1764 ds->ds_phys->ds_prev_snap_txg; 1765 ASSERT3U(ds->ds_phys->ds_prev_snap_txg, ==, 1766 ds_prev ? ds_prev->ds_phys->ds_creation_txg : 0); 1767 1768 1769 if (ds_next->ds_deadlist.dl_oldfmt) { 1770 process_old_deadlist(ds, ds_prev, ds_next, 1771 after_branch_point, tx); 1772 } else { 1773 /* Adjust prev's unique space. */ 1774 if (ds_prev && !after_branch_point) { 1775 dsl_deadlist_space_range(&ds_next->ds_deadlist, 1776 ds_prev->ds_phys->ds_prev_snap_txg, 1777 ds->ds_phys->ds_prev_snap_txg, 1778 &used, &comp, &uncomp); 1779 ds_prev->ds_phys->ds_unique_bytes += used; 1780 } 1781 1782 /* Adjust snapused. */ 1783 dsl_deadlist_space_range(&ds_next->ds_deadlist, 1784 ds->ds_phys->ds_prev_snap_txg, UINT64_MAX, 1785 &used, &comp, &uncomp); 1786 dsl_dir_diduse_space(ds->ds_dir, DD_USED_SNAP, 1787 -used, -comp, -uncomp, tx); 1788 1789 /* Move blocks to be freed to pool's free list. */ 1790 dsl_deadlist_move_bpobj(&ds_next->ds_deadlist, 1791 &dp->dp_free_bpobj, ds->ds_phys->ds_prev_snap_txg, 1792 tx); 1793 dsl_dir_diduse_space(tx->tx_pool->dp_free_dir, 1794 DD_USED_HEAD, used, comp, uncomp, tx); 1795 dsl_dir_dirty(tx->tx_pool->dp_free_dir, tx); 1796 1797 /* Merge our deadlist into next's and free it. */ 1798 dsl_deadlist_merge(&ds_next->ds_deadlist, 1799 ds->ds_phys->ds_deadlist_obj, tx); 1800 } 1801 dsl_deadlist_close(&ds->ds_deadlist); 1802 dsl_deadlist_free(mos, ds->ds_phys->ds_deadlist_obj, tx); 1803 1804 /* Collapse range in clone heads */ 1805 dsl_dataset_remove_clones_key(ds, 1806 ds->ds_phys->ds_creation_txg, tx); 1807 1808 if (dsl_dataset_is_snapshot(ds_next)) { 1809 dsl_dataset_t *ds_nextnext; 1810 1811 /* 1812 * Update next's unique to include blocks which 1813 * were previously shared by only this snapshot 1814 * and it. Those blocks will be born after the 1815 * prev snap and before this snap, and will have 1816 * died after the next snap and before the one 1817 * after that (ie. be on the snap after next's 1818 * deadlist). 1819 */ 1820 VERIFY(0 == dsl_dataset_hold_obj(dp, 1821 ds_next->ds_phys->ds_next_snap_obj, 1822 FTAG, &ds_nextnext)); 1823 dsl_deadlist_space_range(&ds_nextnext->ds_deadlist, 1824 ds->ds_phys->ds_prev_snap_txg, 1825 ds->ds_phys->ds_creation_txg, 1826 &used, &comp, &uncomp); 1827 ds_next->ds_phys->ds_unique_bytes += used; 1828 dsl_dataset_rele(ds_nextnext, FTAG); 1829 ASSERT3P(ds_next->ds_prev, ==, NULL); 1830 1831 /* Collapse range in this head. */ 1832 dsl_dataset_t *hds; 1833 VERIFY3U(0, ==, dsl_dataset_hold_obj(dp, 1834 ds->ds_dir->dd_phys->dd_head_dataset_obj, 1835 FTAG, &hds)); 1836 dsl_deadlist_remove_key(&hds->ds_deadlist, 1837 ds->ds_phys->ds_creation_txg, tx); 1838 dsl_dataset_rele(hds, FTAG); 1839 1840 } else { 1841 ASSERT3P(ds_next->ds_prev, ==, ds); 1842 dsl_dataset_drop_ref(ds_next->ds_prev, ds_next); 1843 ds_next->ds_prev = NULL; 1844 if (ds_prev) { 1845 VERIFY(0 == dsl_dataset_get_ref(dp, 1846 ds->ds_phys->ds_prev_snap_obj, 1847 ds_next, &ds_next->ds_prev)); 1848 } 1849 1850 dsl_dataset_recalc_head_uniq(ds_next); 1851 1852 /* 1853 * Reduce the amount of our unconsmed refreservation 1854 * being charged to our parent by the amount of 1855 * new unique data we have gained. 1856 */ 1857 if (old_unique < ds_next->ds_reserved) { 1858 int64_t mrsdelta; 1859 uint64_t new_unique = 1860 ds_next->ds_phys->ds_unique_bytes; 1861 1862 ASSERT(old_unique <= new_unique); 1863 mrsdelta = MIN(new_unique - old_unique, 1864 ds_next->ds_reserved - old_unique); 1865 dsl_dir_diduse_space(ds->ds_dir, 1866 DD_USED_REFRSRV, -mrsdelta, 0, 0, tx); 1867 } 1868 } 1869 dsl_dataset_rele(ds_next, FTAG); 1870 } else { 1871 /* 1872 * There's no next snapshot, so this is a head dataset. 1873 * Destroy the deadlist. Unless it's a clone, the 1874 * deadlist should be empty. (If it's a clone, it's 1875 * safe to ignore the deadlist contents.) 1876 */ 1877 struct killarg ka; 1878 1879 dsl_deadlist_close(&ds->ds_deadlist); 1880 dsl_deadlist_free(mos, ds->ds_phys->ds_deadlist_obj, tx); 1881 ds->ds_phys->ds_deadlist_obj = 0; 1882 1883 /* 1884 * Free everything that we point to (that's born after 1885 * the previous snapshot, if we are a clone) 1886 * 1887 * NB: this should be very quick, because we already 1888 * freed all the objects in open context. 1889 */ 1890 ka.ds = ds; 1891 ka.tx = tx; 1892 err = traverse_dataset(ds, ds->ds_phys->ds_prev_snap_txg, 1893 TRAVERSE_POST, kill_blkptr, &ka); 1894 ASSERT3U(err, ==, 0); 1895 ASSERT(!DS_UNIQUE_IS_ACCURATE(ds) || 1896 ds->ds_phys->ds_unique_bytes == 0); 1897 1898 if (ds->ds_prev != NULL) { 1899 if (spa_version(dp->dp_spa) >= SPA_VERSION_DIR_CLONES) { 1900 VERIFY3U(0, ==, zap_remove_int(mos, 1901 ds->ds_prev->ds_dir->dd_phys->dd_clones, 1902 ds->ds_object, tx)); 1903 } 1904 dsl_dataset_rele(ds->ds_prev, ds); 1905 ds->ds_prev = ds_prev = NULL; 1906 } 1907 } 1908 1909 /* 1910 * This must be done after the dsl_traverse(), because it will 1911 * re-open the objset. 1912 */ 1913 if (ds->ds_objset) { 1914 dmu_objset_evict(ds->ds_objset); 1915 ds->ds_objset = NULL; 1916 } 1917 1918 if (ds->ds_dir->dd_phys->dd_head_dataset_obj == ds->ds_object) { 1919 /* Erase the link in the dir */ 1920 dmu_buf_will_dirty(ds->ds_dir->dd_dbuf, tx); 1921 ds->ds_dir->dd_phys->dd_head_dataset_obj = 0; 1922 ASSERT(ds->ds_phys->ds_snapnames_zapobj != 0); 1923 err = zap_destroy(mos, ds->ds_phys->ds_snapnames_zapobj, tx); 1924 ASSERT(err == 0); 1925 } else { 1926 /* remove from snapshot namespace */ 1927 dsl_dataset_t *ds_head; 1928 ASSERT(ds->ds_phys->ds_snapnames_zapobj == 0); 1929 VERIFY(0 == dsl_dataset_hold_obj(dp, 1930 ds->ds_dir->dd_phys->dd_head_dataset_obj, FTAG, &ds_head)); 1931 VERIFY(0 == dsl_dataset_get_snapname(ds)); 1932#ifdef ZFS_DEBUG 1933 { 1934 uint64_t val; 1935 1936 err = dsl_dataset_snap_lookup(ds_head, 1937 ds->ds_snapname, &val); 1938 ASSERT3U(err, ==, 0); 1939 ASSERT3U(val, ==, obj); 1940 } 1941#endif 1942 err = dsl_dataset_snap_remove(ds_head, ds->ds_snapname, tx); 1943 ASSERT(err == 0); 1944 dsl_dataset_rele(ds_head, FTAG); 1945 } 1946 1947 if (ds_prev && ds->ds_prev != ds_prev) 1948 dsl_dataset_rele(ds_prev, FTAG); 1949 1950 spa_prop_clear_bootfs(dp->dp_spa, ds->ds_object, tx); 1951 spa_history_log_internal(LOG_DS_DESTROY, dp->dp_spa, tx, 1952 "dataset = %llu", ds->ds_object); 1953 1954 if (ds->ds_phys->ds_next_clones_obj != 0) { 1955 uint64_t count; 1956 ASSERT(0 == zap_count(mos, 1957 ds->ds_phys->ds_next_clones_obj, &count) && count == 0); 1958 VERIFY(0 == dmu_object_free(mos, 1959 ds->ds_phys->ds_next_clones_obj, tx)); 1960 } 1961 if (ds->ds_phys->ds_props_obj != 0) 1962 VERIFY(0 == zap_destroy(mos, ds->ds_phys->ds_props_obj, tx)); 1963 if (ds->ds_phys->ds_userrefs_obj != 0) 1964 VERIFY(0 == zap_destroy(mos, ds->ds_phys->ds_userrefs_obj, tx)); 1965 dsl_dir_close(ds->ds_dir, ds); 1966 ds->ds_dir = NULL; 1967 dsl_dataset_drain_refs(ds, tag); 1968 VERIFY(0 == dmu_object_free(mos, obj, tx)); 1969 1970 if (dsda->rm_origin) { 1971 /* 1972 * Remove the origin of the clone we just destroyed. 1973 */ 1974 struct dsl_ds_destroyarg ndsda = {0}; 1975 1976 ndsda.ds = dsda->rm_origin; 1977 dsl_dataset_destroy_sync(&ndsda, tag, tx); 1978 } 1979} 1980 1981static int 1982dsl_dataset_snapshot_reserve_space(dsl_dataset_t *ds, dmu_tx_t *tx) 1983{ 1984 uint64_t asize; 1985 1986 if (!dmu_tx_is_syncing(tx)) 1987 return (0); 1988 1989 /* 1990 * If there's an fs-only reservation, any blocks that might become 1991 * owned by the snapshot dataset must be accommodated by space 1992 * outside of the reservation. 1993 */ 1994 ASSERT(ds->ds_reserved == 0 || DS_UNIQUE_IS_ACCURATE(ds)); 1995 asize = MIN(ds->ds_phys->ds_unique_bytes, ds->ds_reserved); 1996 if (asize > dsl_dir_space_available(ds->ds_dir, NULL, 0, TRUE)) 1997 return (ENOSPC); 1998 1999 /* 2000 * Propogate any reserved space for this snapshot to other 2001 * snapshot checks in this sync group. 2002 */ 2003 if (asize > 0) 2004 dsl_dir_willuse_space(ds->ds_dir, asize, tx); 2005 2006 return (0); 2007} 2008 2009int 2010dsl_dataset_snapshot_check(void *arg1, void *arg2, dmu_tx_t *tx) 2011{ 2012 dsl_dataset_t *ds = arg1; 2013 const char *snapname = arg2; 2014 int err; 2015 uint64_t value; 2016 2017 /* 2018 * We don't allow multiple snapshots of the same txg. If there 2019 * is already one, try again. 2020 */ 2021 if (ds->ds_phys->ds_prev_snap_txg >= tx->tx_txg) 2022 return (EAGAIN); 2023 2024 /* 2025 * Check for conflicting name snapshot name. 2026 */ 2027 err = dsl_dataset_snap_lookup(ds, snapname, &value); 2028 if (err == 0) 2029 return (EEXIST); 2030 if (err != ENOENT) 2031 return (err); 2032 2033 /* 2034 * Check that the dataset's name is not too long. Name consists 2035 * of the dataset's length + 1 for the @-sign + snapshot name's length 2036 */ 2037 if (dsl_dataset_namelen(ds) + 1 + strlen(snapname) >= MAXNAMELEN) 2038 return (ENAMETOOLONG); 2039 2040 err = dsl_dataset_snapshot_reserve_space(ds, tx); 2041 if (err) 2042 return (err); 2043 2044 ds->ds_trysnap_txg = tx->tx_txg; 2045 return (0); 2046} 2047 2048void 2049dsl_dataset_snapshot_sync(void *arg1, void *arg2, dmu_tx_t *tx) 2050{ 2051 dsl_dataset_t *ds = arg1; 2052 const char *snapname = arg2; 2053 dsl_pool_t *dp = ds->ds_dir->dd_pool; 2054 dmu_buf_t *dbuf; 2055 dsl_dataset_phys_t *dsphys; 2056 uint64_t dsobj, crtxg; 2057 objset_t *mos = dp->dp_meta_objset; 2058 int err; 2059 2060 ASSERT(RW_WRITE_HELD(&dp->dp_config_rwlock)); 2061 2062 /* 2063 * The origin's ds_creation_txg has to be < TXG_INITIAL 2064 */ 2065 if (strcmp(snapname, ORIGIN_DIR_NAME) == 0) 2066 crtxg = 1; 2067 else 2068 crtxg = tx->tx_txg; 2069 2070 dsobj = dmu_object_alloc(mos, DMU_OT_DSL_DATASET, 0, 2071 DMU_OT_DSL_DATASET, sizeof (dsl_dataset_phys_t), tx); 2072 VERIFY(0 == dmu_bonus_hold(mos, dsobj, FTAG, &dbuf)); 2073 dmu_buf_will_dirty(dbuf, tx); 2074 dsphys = dbuf->db_data; 2075 bzero(dsphys, sizeof (dsl_dataset_phys_t)); 2076 dsphys->ds_dir_obj = ds->ds_dir->dd_object; 2077 dsphys->ds_fsid_guid = unique_create(); 2078 (void) random_get_pseudo_bytes((void*)&dsphys->ds_guid, 2079 sizeof (dsphys->ds_guid)); 2080 dsphys->ds_prev_snap_obj = ds->ds_phys->ds_prev_snap_obj; 2081 dsphys->ds_prev_snap_txg = ds->ds_phys->ds_prev_snap_txg; 2082 dsphys->ds_next_snap_obj = ds->ds_object; 2083 dsphys->ds_num_children = 1; 2084 dsphys->ds_creation_time = gethrestime_sec(); 2085 dsphys->ds_creation_txg = crtxg; 2086 dsphys->ds_deadlist_obj = ds->ds_phys->ds_deadlist_obj; 2087 dsphys->ds_used_bytes = ds->ds_phys->ds_used_bytes; 2088 dsphys->ds_compressed_bytes = ds->ds_phys->ds_compressed_bytes; 2089 dsphys->ds_uncompressed_bytes = ds->ds_phys->ds_uncompressed_bytes; 2090 dsphys->ds_flags = ds->ds_phys->ds_flags; 2091 dsphys->ds_bp = ds->ds_phys->ds_bp; 2092 dmu_buf_rele(dbuf, FTAG); 2093 2094 ASSERT3U(ds->ds_prev != 0, ==, ds->ds_phys->ds_prev_snap_obj != 0); 2095 if (ds->ds_prev) { 2096 uint64_t next_clones_obj = 2097 ds->ds_prev->ds_phys->ds_next_clones_obj; 2098 ASSERT(ds->ds_prev->ds_phys->ds_next_snap_obj == 2099 ds->ds_object || 2100 ds->ds_prev->ds_phys->ds_num_children > 1); 2101 if (ds->ds_prev->ds_phys->ds_next_snap_obj == ds->ds_object) { 2102 dmu_buf_will_dirty(ds->ds_prev->ds_dbuf, tx); 2103 ASSERT3U(ds->ds_phys->ds_prev_snap_txg, ==, 2104 ds->ds_prev->ds_phys->ds_creation_txg); 2105 ds->ds_prev->ds_phys->ds_next_snap_obj = dsobj; 2106 } else if (next_clones_obj != 0) { 2107 remove_from_next_clones(ds->ds_prev, 2108 dsphys->ds_next_snap_obj, tx); 2109 VERIFY3U(0, ==, zap_add_int(mos, 2110 next_clones_obj, dsobj, tx)); 2111 } 2112 } 2113 2114 /* 2115 * If we have a reference-reservation on this dataset, we will 2116 * need to increase the amount of refreservation being charged 2117 * since our unique space is going to zero. 2118 */ 2119 if (ds->ds_reserved) { 2120 int64_t delta; 2121 ASSERT(DS_UNIQUE_IS_ACCURATE(ds)); 2122 delta = MIN(ds->ds_phys->ds_unique_bytes, ds->ds_reserved); 2123 dsl_dir_diduse_space(ds->ds_dir, DD_USED_REFRSRV, 2124 delta, 0, 0, tx); 2125 } 2126 2127 dmu_buf_will_dirty(ds->ds_dbuf, tx); 2128 zfs_dbgmsg("taking snapshot %s@%s/%llu; newkey=%llu", 2129 ds->ds_dir->dd_myname, snapname, dsobj, 2130 ds->ds_phys->ds_prev_snap_txg); 2131 ds->ds_phys->ds_deadlist_obj = dsl_deadlist_clone(&ds->ds_deadlist, 2132 UINT64_MAX, ds->ds_phys->ds_prev_snap_obj, tx); 2133 dsl_deadlist_close(&ds->ds_deadlist); 2134 dsl_deadlist_open(&ds->ds_deadlist, mos, ds->ds_phys->ds_deadlist_obj); 2135 dsl_deadlist_add_key(&ds->ds_deadlist, 2136 ds->ds_phys->ds_prev_snap_txg, tx); 2137 2138 ASSERT3U(ds->ds_phys->ds_prev_snap_txg, <, tx->tx_txg); 2139 ds->ds_phys->ds_prev_snap_obj = dsobj; 2140 ds->ds_phys->ds_prev_snap_txg = crtxg; 2141 ds->ds_phys->ds_unique_bytes = 0; 2142 if (spa_version(dp->dp_spa) >= SPA_VERSION_UNIQUE_ACCURATE) 2143 ds->ds_phys->ds_flags |= DS_FLAG_UNIQUE_ACCURATE; 2144 2145 err = zap_add(mos, ds->ds_phys->ds_snapnames_zapobj, 2146 snapname, 8, 1, &dsobj, tx); 2147 ASSERT(err == 0); 2148 2149 if (ds->ds_prev) 2150 dsl_dataset_drop_ref(ds->ds_prev, ds); 2151 VERIFY(0 == dsl_dataset_get_ref(dp, 2152 ds->ds_phys->ds_prev_snap_obj, ds, &ds->ds_prev)); 2153 2154 dsl_scan_ds_snapshotted(ds, tx); 2155 2156 dsl_dir_snap_cmtime_update(ds->ds_dir); 2157 2158 spa_history_log_internal(LOG_DS_SNAPSHOT, dp->dp_spa, tx, 2159 "dataset = %llu", dsobj); 2160} 2161 2162void 2163dsl_dataset_sync(dsl_dataset_t *ds, zio_t *zio, dmu_tx_t *tx) 2164{ 2165 ASSERT(dmu_tx_is_syncing(tx)); 2166 ASSERT(ds->ds_objset != NULL); 2167 ASSERT(ds->ds_phys->ds_next_snap_obj == 0); 2168 2169 /* 2170 * in case we had to change ds_fsid_guid when we opened it, 2171 * sync it out now. 2172 */ 2173 dmu_buf_will_dirty(ds->ds_dbuf, tx); 2174 ds->ds_phys->ds_fsid_guid = ds->ds_fsid_guid; 2175 2176 dsl_dir_dirty(ds->ds_dir, tx); 2177 dmu_objset_sync(ds->ds_objset, zio, tx); 2178} 2179
|
| 2180static void 2181get_clones_stat(dsl_dataset_t *ds, nvlist_t *nv) 2182{ 2183 uint64_t count = 0; 2184 objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset; 2185 zap_cursor_t zc; 2186 zap_attribute_t za; 2187 nvlist_t *propval; 2188 nvlist_t *val; 2189 2190 rw_enter(&ds->ds_dir->dd_pool->dp_config_rwlock, RW_READER); 2191 VERIFY(nvlist_alloc(&propval, NV_UNIQUE_NAME, KM_SLEEP) == 0); 2192 VERIFY(nvlist_alloc(&val, NV_UNIQUE_NAME, KM_SLEEP) == 0); 2193 2194 /* 2195 * There may me missing entries in ds_next_clones_obj 2196 * due to a bug in a previous version of the code. 2197 * Only trust it if it has the right number of entries. 2198 */ 2199 if (ds->ds_phys->ds_next_clones_obj != 0) { 2200 ASSERT3U(0, ==, zap_count(mos, ds->ds_phys->ds_next_clones_obj, 2201 &count)); 2202 } 2203 if (count != ds->ds_phys->ds_num_children - 1) { 2204 goto fail; 2205 } 2206 for (zap_cursor_init(&zc, mos, ds->ds_phys->ds_next_clones_obj); 2207 zap_cursor_retrieve(&zc, &za) == 0; 2208 zap_cursor_advance(&zc)) { 2209 dsl_dataset_t *clone; 2210 char buf[ZFS_MAXNAMELEN]; 2211 if (dsl_dataset_hold_obj(ds->ds_dir->dd_pool, 2212 za.za_first_integer, FTAG, &clone) != 0) { 2213 goto fail; 2214 } 2215 dsl_dir_name(clone->ds_dir, buf); 2216 VERIFY(nvlist_add_boolean(val, buf) == 0); 2217 dsl_dataset_rele(clone, FTAG); 2218 } 2219 zap_cursor_fini(&zc); 2220 VERIFY(nvlist_add_nvlist(propval, ZPROP_VALUE, val) == 0); 2221 VERIFY(nvlist_add_nvlist(nv, zfs_prop_to_name(ZFS_PROP_CLONES), 2222 propval) == 0); 2223fail: 2224 nvlist_free(val); 2225 nvlist_free(propval); 2226 rw_exit(&ds->ds_dir->dd_pool->dp_config_rwlock); 2227} 2228
|
2153void 2154dsl_dataset_stats(dsl_dataset_t *ds, nvlist_t *nv) 2155{ 2156 uint64_t refd, avail, uobjs, aobjs, ratio; 2157 2158 dsl_dir_stats(ds->ds_dir, nv); 2159 2160 dsl_dataset_space(ds, &refd, &avail, &uobjs, &aobjs); 2161 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_AVAILABLE, avail); 2162 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_REFERENCED, refd); 2163 2164 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_CREATION, 2165 ds->ds_phys->ds_creation_time); 2166 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_CREATETXG, 2167 ds->ds_phys->ds_creation_txg); 2168 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_REFQUOTA, 2169 ds->ds_quota); 2170 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_REFRESERVATION, 2171 ds->ds_reserved); 2172 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_GUID, 2173 ds->ds_phys->ds_guid); 2174 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_UNIQUE, 2175 ds->ds_phys->ds_unique_bytes); 2176 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_OBJSETID, 2177 ds->ds_object); 2178 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_USERREFS, 2179 ds->ds_userrefs); 2180 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_DEFER_DESTROY, 2181 DS_IS_DEFER_DESTROY(ds) ? 1 : 0); 2182
| 2229void 2230dsl_dataset_stats(dsl_dataset_t *ds, nvlist_t *nv) 2231{ 2232 uint64_t refd, avail, uobjs, aobjs, ratio; 2233 2234 dsl_dir_stats(ds->ds_dir, nv); 2235 2236 dsl_dataset_space(ds, &refd, &avail, &uobjs, &aobjs); 2237 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_AVAILABLE, avail); 2238 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_REFERENCED, refd); 2239 2240 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_CREATION, 2241 ds->ds_phys->ds_creation_time); 2242 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_CREATETXG, 2243 ds->ds_phys->ds_creation_txg); 2244 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_REFQUOTA, 2245 ds->ds_quota); 2246 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_REFRESERVATION, 2247 ds->ds_reserved); 2248 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_GUID, 2249 ds->ds_phys->ds_guid); 2250 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_UNIQUE, 2251 ds->ds_phys->ds_unique_bytes); 2252 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_OBJSETID, 2253 ds->ds_object); 2254 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_USERREFS, 2255 ds->ds_userrefs); 2256 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_DEFER_DESTROY, 2257 DS_IS_DEFER_DESTROY(ds) ? 1 : 0); 2258
|
| 2259 if (ds->ds_phys->ds_prev_snap_obj != 0) { 2260 uint64_t written, comp, uncomp; 2261 dsl_pool_t *dp = ds->ds_dir->dd_pool; 2262 dsl_dataset_t *prev; 2263 2264 rw_enter(&dp->dp_config_rwlock, RW_READER); 2265 int err = dsl_dataset_hold_obj(dp, 2266 ds->ds_phys->ds_prev_snap_obj, FTAG, &prev); 2267 rw_exit(&dp->dp_config_rwlock); 2268 if (err == 0) { 2269 err = dsl_dataset_space_written(prev, ds, &written, 2270 &comp, &uncomp); 2271 dsl_dataset_rele(prev, FTAG); 2272 if (err == 0) { 2273 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_WRITTEN, 2274 written); 2275 } 2276 } 2277 } 2278
|
2183 ratio = ds->ds_phys->ds_compressed_bytes == 0 ? 100 : 2184 (ds->ds_phys->ds_uncompressed_bytes * 100 / 2185 ds->ds_phys->ds_compressed_bytes); 2186 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_REFRATIO, ratio); 2187 2188 if (ds->ds_phys->ds_next_snap_obj) { 2189 /* 2190 * This is a snapshot; override the dd's space used with 2191 * our unique space and compression ratio. 2192 */ 2193 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_USED, 2194 ds->ds_phys->ds_unique_bytes); 2195 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_COMPRESSRATIO, ratio);
| 2279 ratio = ds->ds_phys->ds_compressed_bytes == 0 ? 100 : 2280 (ds->ds_phys->ds_uncompressed_bytes * 100 / 2281 ds->ds_phys->ds_compressed_bytes); 2282 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_REFRATIO, ratio); 2283 2284 if (ds->ds_phys->ds_next_snap_obj) { 2285 /* 2286 * This is a snapshot; override the dd's space used with 2287 * our unique space and compression ratio. 2288 */ 2289 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_USED, 2290 ds->ds_phys->ds_unique_bytes); 2291 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_COMPRESSRATIO, ratio);
|
| 2292 2293 get_clones_stat(ds, nv);
|
2196 } 2197} 2198 2199void 2200dsl_dataset_fast_stat(dsl_dataset_t *ds, dmu_objset_stats_t *stat) 2201{ 2202 stat->dds_creation_txg = ds->ds_phys->ds_creation_txg; 2203 stat->dds_inconsistent = ds->ds_phys->ds_flags & DS_FLAG_INCONSISTENT; 2204 stat->dds_guid = ds->ds_phys->ds_guid; 2205 if (ds->ds_phys->ds_next_snap_obj) { 2206 stat->dds_is_snapshot = B_TRUE; 2207 stat->dds_num_clones = ds->ds_phys->ds_num_children - 1; 2208 } else { 2209 stat->dds_is_snapshot = B_FALSE; 2210 stat->dds_num_clones = 0; 2211 } 2212 2213 /* clone origin is really a dsl_dir thing... */ 2214 rw_enter(&ds->ds_dir->dd_pool->dp_config_rwlock, RW_READER); 2215 if (dsl_dir_is_clone(ds->ds_dir)) { 2216 dsl_dataset_t *ods; 2217 2218 VERIFY(0 == dsl_dataset_get_ref(ds->ds_dir->dd_pool, 2219 ds->ds_dir->dd_phys->dd_origin_obj, FTAG, &ods)); 2220 dsl_dataset_name(ods, stat->dds_origin); 2221 dsl_dataset_drop_ref(ods, FTAG); 2222 } else { 2223 stat->dds_origin[0] = '\0'; 2224 } 2225 rw_exit(&ds->ds_dir->dd_pool->dp_config_rwlock); 2226} 2227 2228uint64_t 2229dsl_dataset_fsid_guid(dsl_dataset_t *ds) 2230{ 2231 return (ds->ds_fsid_guid); 2232} 2233 2234void 2235dsl_dataset_space(dsl_dataset_t *ds, 2236 uint64_t *refdbytesp, uint64_t *availbytesp, 2237 uint64_t *usedobjsp, uint64_t *availobjsp) 2238{ 2239 *refdbytesp = ds->ds_phys->ds_used_bytes; 2240 *availbytesp = dsl_dir_space_available(ds->ds_dir, NULL, 0, TRUE); 2241 if (ds->ds_reserved > ds->ds_phys->ds_unique_bytes) 2242 *availbytesp += ds->ds_reserved - ds->ds_phys->ds_unique_bytes; 2243 if (ds->ds_quota != 0) { 2244 /* 2245 * Adjust available bytes according to refquota 2246 */ 2247 if (*refdbytesp < ds->ds_quota) 2248 *availbytesp = MIN(*availbytesp, 2249 ds->ds_quota - *refdbytesp); 2250 else 2251 *availbytesp = 0; 2252 } 2253 *usedobjsp = ds->ds_phys->ds_bp.blk_fill; 2254 *availobjsp = DN_MAX_OBJECT - *usedobjsp; 2255} 2256 2257boolean_t 2258dsl_dataset_modified_since_lastsnap(dsl_dataset_t *ds) 2259{ 2260 dsl_pool_t *dp = ds->ds_dir->dd_pool; 2261 2262 ASSERT(RW_LOCK_HELD(&dp->dp_config_rwlock) || 2263 dsl_pool_sync_context(dp)); 2264 if (ds->ds_prev == NULL) 2265 return (B_FALSE); 2266 if (ds->ds_phys->ds_bp.blk_birth > 2267 ds->ds_prev->ds_phys->ds_creation_txg) { 2268 objset_t *os, *os_prev; 2269 /* 2270 * It may be that only the ZIL differs, because it was 2271 * reset in the head. Don't count that as being 2272 * modified. 2273 */ 2274 if (dmu_objset_from_ds(ds, &os) != 0) 2275 return (B_TRUE); 2276 if (dmu_objset_from_ds(ds->ds_prev, &os_prev) != 0) 2277 return (B_TRUE); 2278 return (bcmp(&os->os_phys->os_meta_dnode, 2279 &os_prev->os_phys->os_meta_dnode, 2280 sizeof (os->os_phys->os_meta_dnode)) != 0); 2281 } 2282 return (B_FALSE); 2283} 2284 2285/* ARGSUSED */ 2286static int 2287dsl_dataset_snapshot_rename_check(void *arg1, void *arg2, dmu_tx_t *tx) 2288{ 2289 dsl_dataset_t *ds = arg1; 2290 char *newsnapname = arg2; 2291 dsl_dir_t *dd = ds->ds_dir; 2292 dsl_dataset_t *hds; 2293 uint64_t val; 2294 int err; 2295 2296 err = dsl_dataset_hold_obj(dd->dd_pool, 2297 dd->dd_phys->dd_head_dataset_obj, FTAG, &hds); 2298 if (err) 2299 return (err); 2300 2301 /* new name better not be in use */ 2302 err = dsl_dataset_snap_lookup(hds, newsnapname, &val); 2303 dsl_dataset_rele(hds, FTAG); 2304 2305 if (err == 0) 2306 err = EEXIST; 2307 else if (err == ENOENT) 2308 err = 0; 2309 2310 /* dataset name + 1 for the "@" + the new snapshot name must fit */ 2311 if (dsl_dir_namelen(ds->ds_dir) + 1 + strlen(newsnapname) >= MAXNAMELEN) 2312 err = ENAMETOOLONG; 2313 2314 return (err); 2315} 2316 2317static void 2318dsl_dataset_snapshot_rename_sync(void *arg1, void *arg2, dmu_tx_t *tx) 2319{ 2320 char oldname[MAXPATHLEN], newname[MAXPATHLEN]; 2321 dsl_dataset_t *ds = arg1; 2322 const char *newsnapname = arg2; 2323 dsl_dir_t *dd = ds->ds_dir; 2324 objset_t *mos = dd->dd_pool->dp_meta_objset; 2325 dsl_dataset_t *hds; 2326 int err; 2327 2328 ASSERT(ds->ds_phys->ds_next_snap_obj != 0); 2329 2330 VERIFY(0 == dsl_dataset_hold_obj(dd->dd_pool, 2331 dd->dd_phys->dd_head_dataset_obj, FTAG, &hds)); 2332 2333 VERIFY(0 == dsl_dataset_get_snapname(ds)); 2334 err = dsl_dataset_snap_remove(hds, ds->ds_snapname, tx); 2335 ASSERT3U(err, ==, 0); 2336 dsl_dataset_name(ds, oldname); 2337 mutex_enter(&ds->ds_lock); 2338 (void) strcpy(ds->ds_snapname, newsnapname); 2339 mutex_exit(&ds->ds_lock); 2340 err = zap_add(mos, hds->ds_phys->ds_snapnames_zapobj, 2341 ds->ds_snapname, 8, 1, &ds->ds_object, tx); 2342 ASSERT3U(err, ==, 0); 2343 dsl_dataset_name(ds, newname); 2344#ifdef _KERNEL 2345 zvol_rename_minors(oldname, newname); 2346#endif 2347 2348 spa_history_log_internal(LOG_DS_RENAME, dd->dd_pool->dp_spa, tx, 2349 "dataset = %llu", ds->ds_object); 2350 dsl_dataset_rele(hds, FTAG); 2351} 2352 2353struct renamesnaparg { 2354 dsl_sync_task_group_t *dstg; 2355 char failed[MAXPATHLEN]; 2356 char *oldsnap; 2357 char *newsnap; 2358}; 2359 2360static int 2361dsl_snapshot_rename_one(const char *name, void *arg) 2362{ 2363 struct renamesnaparg *ra = arg; 2364 dsl_dataset_t *ds = NULL; 2365 char *snapname; 2366 int err; 2367 2368 snapname = kmem_asprintf("%s@%s", name, ra->oldsnap); 2369 (void) strlcpy(ra->failed, snapname, sizeof (ra->failed)); 2370 2371 /* 2372 * For recursive snapshot renames the parent won't be changing 2373 * so we just pass name for both the to/from argument. 2374 */ 2375 err = zfs_secpolicy_rename_perms(snapname, snapname, CRED()); 2376 if (err != 0) { 2377 strfree(snapname); 2378 return (err == ENOENT ? 0 : err); 2379 } 2380 2381#ifdef _KERNEL 2382 /* 2383 * For all filesystems undergoing rename, we'll need to unmount it. 2384 */ 2385 (void) zfs_unmount_snap(snapname, NULL); 2386#endif 2387 err = dsl_dataset_hold(snapname, ra->dstg, &ds); 2388 strfree(snapname); 2389 if (err != 0) 2390 return (err == ENOENT ? 0 : err); 2391 2392 dsl_sync_task_create(ra->dstg, dsl_dataset_snapshot_rename_check, 2393 dsl_dataset_snapshot_rename_sync, ds, ra->newsnap, 0); 2394 2395 return (0); 2396} 2397 2398static int 2399dsl_recursive_rename(char *oldname, const char *newname) 2400{ 2401 int err; 2402 struct renamesnaparg *ra; 2403 dsl_sync_task_t *dst; 2404 spa_t *spa; 2405 char *cp, *fsname = spa_strdup(oldname); 2406 int len = strlen(oldname) + 1; 2407 2408 /* truncate the snapshot name to get the fsname */ 2409 cp = strchr(fsname, '@'); 2410 *cp = '\0'; 2411 2412 err = spa_open(fsname, &spa, FTAG); 2413 if (err) { 2414 kmem_free(fsname, len); 2415 return (err); 2416 } 2417 ra = kmem_alloc(sizeof (struct renamesnaparg), KM_SLEEP); 2418 ra->dstg = dsl_sync_task_group_create(spa_get_dsl(spa)); 2419 2420 ra->oldsnap = strchr(oldname, '@') + 1; 2421 ra->newsnap = strchr(newname, '@') + 1; 2422 *ra->failed = '\0'; 2423 2424 err = dmu_objset_find(fsname, dsl_snapshot_rename_one, ra, 2425 DS_FIND_CHILDREN); 2426 kmem_free(fsname, len); 2427 2428 if (err == 0) { 2429 err = dsl_sync_task_group_wait(ra->dstg); 2430 } 2431 2432 for (dst = list_head(&ra->dstg->dstg_tasks); dst; 2433 dst = list_next(&ra->dstg->dstg_tasks, dst)) { 2434 dsl_dataset_t *ds = dst->dst_arg1; 2435 if (dst->dst_err) { 2436 dsl_dir_name(ds->ds_dir, ra->failed); 2437 (void) strlcat(ra->failed, "@", sizeof (ra->failed)); 2438 (void) strlcat(ra->failed, ra->newsnap, 2439 sizeof (ra->failed)); 2440 } 2441 dsl_dataset_rele(ds, ra->dstg); 2442 } 2443 2444 if (err) 2445 (void) strlcpy(oldname, ra->failed, sizeof (ra->failed)); 2446 2447 dsl_sync_task_group_destroy(ra->dstg); 2448 kmem_free(ra, sizeof (struct renamesnaparg)); 2449 spa_close(spa, FTAG); 2450 return (err); 2451} 2452 2453static int 2454dsl_valid_rename(const char *oldname, void *arg) 2455{ 2456 int delta = *(int *)arg; 2457 2458 if (strlen(oldname) + delta >= MAXNAMELEN) 2459 return (ENAMETOOLONG); 2460 2461 return (0); 2462} 2463 2464#pragma weak dmu_objset_rename = dsl_dataset_rename 2465int 2466dsl_dataset_rename(char *oldname, const char *newname, int flags) 2467{ 2468 dsl_dir_t *dd; 2469 dsl_dataset_t *ds; 2470 const char *tail; 2471 int err; 2472 2473 err = dsl_dir_open(oldname, FTAG, &dd, &tail); 2474 if (err) 2475 return (err); 2476 2477 if (tail == NULL) { 2478 int delta = strlen(newname) - strlen(oldname); 2479 2480 /* if we're growing, validate child name lengths */ 2481 if (delta > 0) 2482 err = dmu_objset_find(oldname, dsl_valid_rename, 2483 &delta, DS_FIND_CHILDREN | DS_FIND_SNAPSHOTS); 2484 2485 if (err == 0) 2486 err = dsl_dir_rename(dd, newname, flags); 2487 dsl_dir_close(dd, FTAG); 2488 return (err); 2489 } 2490 2491 if (tail[0] != '@') { 2492 /* the name ended in a nonexistent component */ 2493 dsl_dir_close(dd, FTAG); 2494 return (ENOENT); 2495 } 2496 2497 dsl_dir_close(dd, FTAG); 2498 2499 /* new name must be snapshot in same filesystem */ 2500 tail = strchr(newname, '@'); 2501 if (tail == NULL) 2502 return (EINVAL); 2503 tail++; 2504 if (strncmp(oldname, newname, tail - newname) != 0) 2505 return (EXDEV); 2506 2507 if (flags & ZFS_RENAME_RECURSIVE) { 2508 err = dsl_recursive_rename(oldname, newname); 2509 } else { 2510 err = dsl_dataset_hold(oldname, FTAG, &ds); 2511 if (err) 2512 return (err); 2513 2514 err = dsl_sync_task_do(ds->ds_dir->dd_pool, 2515 dsl_dataset_snapshot_rename_check, 2516 dsl_dataset_snapshot_rename_sync, ds, (char *)tail, 1); 2517 2518 dsl_dataset_rele(ds, FTAG); 2519 } 2520 2521 return (err); 2522} 2523 2524struct promotenode { 2525 list_node_t link; 2526 dsl_dataset_t *ds; 2527}; 2528 2529struct promotearg { 2530 list_t shared_snaps, origin_snaps, clone_snaps; 2531 dsl_dataset_t *origin_origin; 2532 uint64_t used, comp, uncomp, unique, cloneusedsnap, originusedsnap; 2533 char *err_ds; 2534}; 2535 2536static int snaplist_space(list_t *l, uint64_t mintxg, uint64_t *spacep); 2537static boolean_t snaplist_unstable(list_t *l); 2538 2539static int 2540dsl_dataset_promote_check(void *arg1, void *arg2, dmu_tx_t *tx) 2541{ 2542 dsl_dataset_t *hds = arg1; 2543 struct promotearg *pa = arg2; 2544 struct promotenode *snap = list_head(&pa->shared_snaps); 2545 dsl_dataset_t *origin_ds = snap->ds; 2546 int err; 2547 uint64_t unused; 2548 2549 /* Check that it is a real clone */ 2550 if (!dsl_dir_is_clone(hds->ds_dir)) 2551 return (EINVAL); 2552 2553 /* Since this is so expensive, don't do the preliminary check */ 2554 if (!dmu_tx_is_syncing(tx)) 2555 return (0); 2556 2557 if (hds->ds_phys->ds_flags & DS_FLAG_NOPROMOTE) 2558 return (EXDEV); 2559 2560 /* compute origin's new unique space */ 2561 snap = list_tail(&pa->clone_snaps); 2562 ASSERT3U(snap->ds->ds_phys->ds_prev_snap_obj, ==, origin_ds->ds_object); 2563 dsl_deadlist_space_range(&snap->ds->ds_deadlist, 2564 origin_ds->ds_phys->ds_prev_snap_txg, UINT64_MAX, 2565 &pa->unique, &unused, &unused); 2566 2567 /* 2568 * Walk the snapshots that we are moving 2569 * 2570 * Compute space to transfer. Consider the incremental changes 2571 * to used for each snapshot: 2572 * (my used) = (prev's used) + (blocks born) - (blocks killed) 2573 * So each snapshot gave birth to: 2574 * (blocks born) = (my used) - (prev's used) + (blocks killed) 2575 * So a sequence would look like: 2576 * (uN - u(N-1) + kN) + ... + (u1 - u0 + k1) + (u0 - 0 + k0) 2577 * Which simplifies to: 2578 * uN + kN + kN-1 + ... + k1 + k0 2579 * Note however, if we stop before we reach the ORIGIN we get: 2580 * uN + kN + kN-1 + ... + kM - uM-1 2581 */ 2582 pa->used = origin_ds->ds_phys->ds_used_bytes; 2583 pa->comp = origin_ds->ds_phys->ds_compressed_bytes; 2584 pa->uncomp = origin_ds->ds_phys->ds_uncompressed_bytes; 2585 for (snap = list_head(&pa->shared_snaps); snap; 2586 snap = list_next(&pa->shared_snaps, snap)) { 2587 uint64_t val, dlused, dlcomp, dluncomp; 2588 dsl_dataset_t *ds = snap->ds; 2589 2590 /* Check that the snapshot name does not conflict */ 2591 VERIFY(0 == dsl_dataset_get_snapname(ds)); 2592 err = dsl_dataset_snap_lookup(hds, ds->ds_snapname, &val); 2593 if (err == 0) { 2594 err = EEXIST; 2595 goto out; 2596 } 2597 if (err != ENOENT) 2598 goto out; 2599 2600 /* The very first snapshot does not have a deadlist */ 2601 if (ds->ds_phys->ds_prev_snap_obj == 0) 2602 continue; 2603 2604 dsl_deadlist_space(&ds->ds_deadlist, 2605 &dlused, &dlcomp, &dluncomp); 2606 pa->used += dlused; 2607 pa->comp += dlcomp; 2608 pa->uncomp += dluncomp; 2609 } 2610 2611 /* 2612 * If we are a clone of a clone then we never reached ORIGIN, 2613 * so we need to subtract out the clone origin's used space. 2614 */ 2615 if (pa->origin_origin) { 2616 pa->used -= pa->origin_origin->ds_phys->ds_used_bytes; 2617 pa->comp -= pa->origin_origin->ds_phys->ds_compressed_bytes; 2618 pa->uncomp -= pa->origin_origin->ds_phys->ds_uncompressed_bytes; 2619 } 2620 2621 /* Check that there is enough space here */ 2622 err = dsl_dir_transfer_possible(origin_ds->ds_dir, hds->ds_dir, 2623 pa->used); 2624 if (err) 2625 return (err); 2626 2627 /* 2628 * Compute the amounts of space that will be used by snapshots 2629 * after the promotion (for both origin and clone). For each, 2630 * it is the amount of space that will be on all of their 2631 * deadlists (that was not born before their new origin). 2632 */ 2633 if (hds->ds_dir->dd_phys->dd_flags & DD_FLAG_USED_BREAKDOWN) { 2634 uint64_t space; 2635 2636 /* 2637 * Note, typically this will not be a clone of a clone, 2638 * so dd_origin_txg will be < TXG_INITIAL, so 2639 * these snaplist_space() -> dsl_deadlist_space_range() 2640 * calls will be fast because they do not have to 2641 * iterate over all bps. 2642 */ 2643 snap = list_head(&pa->origin_snaps); 2644 err = snaplist_space(&pa->shared_snaps, 2645 snap->ds->ds_dir->dd_origin_txg, &pa->cloneusedsnap); 2646 if (err) 2647 return (err); 2648 2649 err = snaplist_space(&pa->clone_snaps, 2650 snap->ds->ds_dir->dd_origin_txg, &space); 2651 if (err) 2652 return (err); 2653 pa->cloneusedsnap += space; 2654 } 2655 if (origin_ds->ds_dir->dd_phys->dd_flags & DD_FLAG_USED_BREAKDOWN) { 2656 err = snaplist_space(&pa->origin_snaps, 2657 origin_ds->ds_phys->ds_creation_txg, &pa->originusedsnap); 2658 if (err) 2659 return (err); 2660 } 2661 2662 return (0); 2663out: 2664 pa->err_ds = snap->ds->ds_snapname; 2665 return (err); 2666} 2667 2668static void 2669dsl_dataset_promote_sync(void *arg1, void *arg2, dmu_tx_t *tx) 2670{ 2671 dsl_dataset_t *hds = arg1; 2672 struct promotearg *pa = arg2; 2673 struct promotenode *snap = list_head(&pa->shared_snaps); 2674 dsl_dataset_t *origin_ds = snap->ds; 2675 dsl_dataset_t *origin_head; 2676 dsl_dir_t *dd = hds->ds_dir; 2677 dsl_pool_t *dp = hds->ds_dir->dd_pool; 2678 dsl_dir_t *odd = NULL; 2679 uint64_t oldnext_obj; 2680 int64_t delta; 2681 2682 ASSERT(0 == (hds->ds_phys->ds_flags & DS_FLAG_NOPROMOTE)); 2683 2684 snap = list_head(&pa->origin_snaps); 2685 origin_head = snap->ds; 2686 2687 /* 2688 * We need to explicitly open odd, since origin_ds's dd will be 2689 * changing. 2690 */ 2691 VERIFY(0 == dsl_dir_open_obj(dp, origin_ds->ds_dir->dd_object, 2692 NULL, FTAG, &odd)); 2693 2694 /* change origin's next snap */ 2695 dmu_buf_will_dirty(origin_ds->ds_dbuf, tx); 2696 oldnext_obj = origin_ds->ds_phys->ds_next_snap_obj; 2697 snap = list_tail(&pa->clone_snaps); 2698 ASSERT3U(snap->ds->ds_phys->ds_prev_snap_obj, ==, origin_ds->ds_object); 2699 origin_ds->ds_phys->ds_next_snap_obj = snap->ds->ds_object; 2700 2701 /* change the origin's next clone */ 2702 if (origin_ds->ds_phys->ds_next_clones_obj) { 2703 remove_from_next_clones(origin_ds, snap->ds->ds_object, tx); 2704 VERIFY3U(0, ==, zap_add_int(dp->dp_meta_objset, 2705 origin_ds->ds_phys->ds_next_clones_obj, 2706 oldnext_obj, tx)); 2707 } 2708 2709 /* change origin */ 2710 dmu_buf_will_dirty(dd->dd_dbuf, tx); 2711 ASSERT3U(dd->dd_phys->dd_origin_obj, ==, origin_ds->ds_object); 2712 dd->dd_phys->dd_origin_obj = odd->dd_phys->dd_origin_obj; 2713 dd->dd_origin_txg = origin_head->ds_dir->dd_origin_txg; 2714 dmu_buf_will_dirty(odd->dd_dbuf, tx); 2715 odd->dd_phys->dd_origin_obj = origin_ds->ds_object; 2716 origin_head->ds_dir->dd_origin_txg = 2717 origin_ds->ds_phys->ds_creation_txg; 2718 2719 /* change dd_clone entries */ 2720 if (spa_version(dp->dp_spa) >= SPA_VERSION_DIR_CLONES) { 2721 VERIFY3U(0, ==, zap_remove_int(dp->dp_meta_objset, 2722 odd->dd_phys->dd_clones, hds->ds_object, tx)); 2723 VERIFY3U(0, ==, zap_add_int(dp->dp_meta_objset, 2724 pa->origin_origin->ds_dir->dd_phys->dd_clones, 2725 hds->ds_object, tx)); 2726 2727 VERIFY3U(0, ==, zap_remove_int(dp->dp_meta_objset, 2728 pa->origin_origin->ds_dir->dd_phys->dd_clones, 2729 origin_head->ds_object, tx)); 2730 if (dd->dd_phys->dd_clones == 0) { 2731 dd->dd_phys->dd_clones = zap_create(dp->dp_meta_objset, 2732 DMU_OT_DSL_CLONES, DMU_OT_NONE, 0, tx); 2733 } 2734 VERIFY3U(0, ==, zap_add_int(dp->dp_meta_objset, 2735 dd->dd_phys->dd_clones, origin_head->ds_object, tx)); 2736 2737 } 2738 2739 /* move snapshots to this dir */ 2740 for (snap = list_head(&pa->shared_snaps); snap; 2741 snap = list_next(&pa->shared_snaps, snap)) { 2742 dsl_dataset_t *ds = snap->ds; 2743 2744 /* unregister props as dsl_dir is changing */ 2745 if (ds->ds_objset) { 2746 dmu_objset_evict(ds->ds_objset); 2747 ds->ds_objset = NULL; 2748 } 2749 /* move snap name entry */ 2750 VERIFY(0 == dsl_dataset_get_snapname(ds)); 2751 VERIFY(0 == dsl_dataset_snap_remove(origin_head, 2752 ds->ds_snapname, tx)); 2753 VERIFY(0 == zap_add(dp->dp_meta_objset, 2754 hds->ds_phys->ds_snapnames_zapobj, ds->ds_snapname, 2755 8, 1, &ds->ds_object, tx)); 2756 2757 /* change containing dsl_dir */ 2758 dmu_buf_will_dirty(ds->ds_dbuf, tx); 2759 ASSERT3U(ds->ds_phys->ds_dir_obj, ==, odd->dd_object); 2760 ds->ds_phys->ds_dir_obj = dd->dd_object; 2761 ASSERT3P(ds->ds_dir, ==, odd); 2762 dsl_dir_close(ds->ds_dir, ds); 2763 VERIFY(0 == dsl_dir_open_obj(dp, dd->dd_object, 2764 NULL, ds, &ds->ds_dir)); 2765 2766 /* move any clone references */ 2767 if (ds->ds_phys->ds_next_clones_obj && 2768 spa_version(dp->dp_spa) >= SPA_VERSION_DIR_CLONES) { 2769 zap_cursor_t zc; 2770 zap_attribute_t za; 2771 2772 for (zap_cursor_init(&zc, dp->dp_meta_objset, 2773 ds->ds_phys->ds_next_clones_obj); 2774 zap_cursor_retrieve(&zc, &za) == 0; 2775 zap_cursor_advance(&zc)) { 2776 dsl_dataset_t *cnds; 2777 uint64_t o; 2778 2779 if (za.za_first_integer == oldnext_obj) { 2780 /* 2781 * We've already moved the 2782 * origin's reference. 2783 */ 2784 continue; 2785 } 2786 2787 VERIFY3U(0, ==, dsl_dataset_hold_obj(dp, 2788 za.za_first_integer, FTAG, &cnds)); 2789 o = cnds->ds_dir->dd_phys->dd_head_dataset_obj; 2790 2791 VERIFY3U(zap_remove_int(dp->dp_meta_objset, 2792 odd->dd_phys->dd_clones, o, tx), ==, 0); 2793 VERIFY3U(zap_add_int(dp->dp_meta_objset, 2794 dd->dd_phys->dd_clones, o, tx), ==, 0); 2795 dsl_dataset_rele(cnds, FTAG); 2796 } 2797 zap_cursor_fini(&zc); 2798 } 2799 2800 ASSERT3U(dsl_prop_numcb(ds), ==, 0); 2801 } 2802 2803 /* 2804 * Change space accounting. 2805 * Note, pa->*usedsnap and dd_used_breakdown[SNAP] will either 2806 * both be valid, or both be 0 (resulting in delta == 0). This 2807 * is true for each of {clone,origin} independently. 2808 */ 2809 2810 delta = pa->cloneusedsnap - 2811 dd->dd_phys->dd_used_breakdown[DD_USED_SNAP]; 2812 ASSERT3S(delta, >=, 0); 2813 ASSERT3U(pa->used, >=, delta); 2814 dsl_dir_diduse_space(dd, DD_USED_SNAP, delta, 0, 0, tx); 2815 dsl_dir_diduse_space(dd, DD_USED_HEAD, 2816 pa->used - delta, pa->comp, pa->uncomp, tx); 2817 2818 delta = pa->originusedsnap - 2819 odd->dd_phys->dd_used_breakdown[DD_USED_SNAP]; 2820 ASSERT3S(delta, <=, 0); 2821 ASSERT3U(pa->used, >=, -delta); 2822 dsl_dir_diduse_space(odd, DD_USED_SNAP, delta, 0, 0, tx); 2823 dsl_dir_diduse_space(odd, DD_USED_HEAD, 2824 -pa->used - delta, -pa->comp, -pa->uncomp, tx); 2825 2826 origin_ds->ds_phys->ds_unique_bytes = pa->unique; 2827 2828 /* log history record */ 2829 spa_history_log_internal(LOG_DS_PROMOTE, dd->dd_pool->dp_spa, tx, 2830 "dataset = %llu", hds->ds_object); 2831 2832 dsl_dir_close(odd, FTAG); 2833} 2834 2835static char *snaplist_tag = "snaplist"; 2836/* 2837 * Make a list of dsl_dataset_t's for the snapshots between first_obj 2838 * (exclusive) and last_obj (inclusive). The list will be in reverse 2839 * order (last_obj will be the list_head()). If first_obj == 0, do all 2840 * snapshots back to this dataset's origin. 2841 */ 2842static int 2843snaplist_make(dsl_pool_t *dp, boolean_t own, 2844 uint64_t first_obj, uint64_t last_obj, list_t *l) 2845{ 2846 uint64_t obj = last_obj; 2847 2848 ASSERT(RW_LOCK_HELD(&dp->dp_config_rwlock)); 2849 2850 list_create(l, sizeof (struct promotenode), 2851 offsetof(struct promotenode, link)); 2852 2853 while (obj != first_obj) { 2854 dsl_dataset_t *ds; 2855 struct promotenode *snap; 2856 int err; 2857 2858 if (own) { 2859 err = dsl_dataset_own_obj(dp, obj, 2860 0, snaplist_tag, &ds); 2861 if (err == 0) 2862 dsl_dataset_make_exclusive(ds, snaplist_tag); 2863 } else { 2864 err = dsl_dataset_hold_obj(dp, obj, snaplist_tag, &ds); 2865 } 2866 if (err == ENOENT) { 2867 /* lost race with snapshot destroy */ 2868 struct promotenode *last = list_tail(l); 2869 ASSERT(obj != last->ds->ds_phys->ds_prev_snap_obj); 2870 obj = last->ds->ds_phys->ds_prev_snap_obj; 2871 continue; 2872 } else if (err) { 2873 return (err); 2874 } 2875 2876 if (first_obj == 0) 2877 first_obj = ds->ds_dir->dd_phys->dd_origin_obj; 2878 2879 snap = kmem_alloc(sizeof (struct promotenode), KM_SLEEP); 2880 snap->ds = ds; 2881 list_insert_tail(l, snap); 2882 obj = ds->ds_phys->ds_prev_snap_obj; 2883 } 2884 2885 return (0); 2886} 2887 2888static int 2889snaplist_space(list_t *l, uint64_t mintxg, uint64_t *spacep) 2890{ 2891 struct promotenode *snap; 2892 2893 *spacep = 0; 2894 for (snap = list_head(l); snap; snap = list_next(l, snap)) { 2895 uint64_t used, comp, uncomp; 2896 dsl_deadlist_space_range(&snap->ds->ds_deadlist, 2897 mintxg, UINT64_MAX, &used, &comp, &uncomp); 2898 *spacep += used; 2899 } 2900 return (0); 2901} 2902 2903static void 2904snaplist_destroy(list_t *l, boolean_t own) 2905{ 2906 struct promotenode *snap; 2907 2908 if (!l || !list_link_active(&l->list_head)) 2909 return; 2910 2911 while ((snap = list_tail(l)) != NULL) { 2912 list_remove(l, snap); 2913 if (own) 2914 dsl_dataset_disown(snap->ds, snaplist_tag); 2915 else 2916 dsl_dataset_rele(snap->ds, snaplist_tag); 2917 kmem_free(snap, sizeof (struct promotenode)); 2918 } 2919 list_destroy(l); 2920} 2921 2922/* 2923 * Promote a clone. Nomenclature note: 2924 * "clone" or "cds": the original clone which is being promoted 2925 * "origin" or "ods": the snapshot which is originally clone's origin 2926 * "origin head" or "ohds": the dataset which is the head 2927 * (filesystem/volume) for the origin 2928 * "origin origin": the origin of the origin's filesystem (typically 2929 * NULL, indicating that the clone is not a clone of a clone). 2930 */ 2931int 2932dsl_dataset_promote(const char *name, char *conflsnap) 2933{ 2934 dsl_dataset_t *ds; 2935 dsl_dir_t *dd; 2936 dsl_pool_t *dp; 2937 dmu_object_info_t doi; 2938 struct promotearg pa = { 0 }; 2939 struct promotenode *snap; 2940 int err; 2941 2942 err = dsl_dataset_hold(name, FTAG, &ds); 2943 if (err) 2944 return (err); 2945 dd = ds->ds_dir; 2946 dp = dd->dd_pool; 2947 2948 err = dmu_object_info(dp->dp_meta_objset, 2949 ds->ds_phys->ds_snapnames_zapobj, &doi); 2950 if (err) { 2951 dsl_dataset_rele(ds, FTAG); 2952 return (err); 2953 } 2954 2955 if (dsl_dataset_is_snapshot(ds) || dd->dd_phys->dd_origin_obj == 0) { 2956 dsl_dataset_rele(ds, FTAG); 2957 return (EINVAL); 2958 } 2959 2960 /* 2961 * We are going to inherit all the snapshots taken before our 2962 * origin (i.e., our new origin will be our parent's origin). 2963 * Take ownership of them so that we can rename them into our 2964 * namespace. 2965 */ 2966 rw_enter(&dp->dp_config_rwlock, RW_READER); 2967 2968 err = snaplist_make(dp, B_TRUE, 0, dd->dd_phys->dd_origin_obj, 2969 &pa.shared_snaps); 2970 if (err != 0) 2971 goto out; 2972 2973 err = snaplist_make(dp, B_FALSE, 0, ds->ds_object, &pa.clone_snaps); 2974 if (err != 0) 2975 goto out; 2976 2977 snap = list_head(&pa.shared_snaps); 2978 ASSERT3U(snap->ds->ds_object, ==, dd->dd_phys->dd_origin_obj); 2979 err = snaplist_make(dp, B_FALSE, dd->dd_phys->dd_origin_obj, 2980 snap->ds->ds_dir->dd_phys->dd_head_dataset_obj, &pa.origin_snaps); 2981 if (err != 0) 2982 goto out; 2983 2984 if (snap->ds->ds_dir->dd_phys->dd_origin_obj != 0) { 2985 err = dsl_dataset_hold_obj(dp, 2986 snap->ds->ds_dir->dd_phys->dd_origin_obj, 2987 FTAG, &pa.origin_origin); 2988 if (err != 0) 2989 goto out; 2990 } 2991 2992out: 2993 rw_exit(&dp->dp_config_rwlock); 2994 2995 /* 2996 * Add in 128x the snapnames zapobj size, since we will be moving 2997 * a bunch of snapnames to the promoted ds, and dirtying their 2998 * bonus buffers. 2999 */ 3000 if (err == 0) { 3001 err = dsl_sync_task_do(dp, dsl_dataset_promote_check, 3002 dsl_dataset_promote_sync, ds, &pa, 3003 2 + 2 * doi.doi_physical_blocks_512); 3004 if (err && pa.err_ds && conflsnap) 3005 (void) strncpy(conflsnap, pa.err_ds, MAXNAMELEN); 3006 } 3007 3008 snaplist_destroy(&pa.shared_snaps, B_TRUE); 3009 snaplist_destroy(&pa.clone_snaps, B_FALSE); 3010 snaplist_destroy(&pa.origin_snaps, B_FALSE); 3011 if (pa.origin_origin) 3012 dsl_dataset_rele(pa.origin_origin, FTAG); 3013 dsl_dataset_rele(ds, FTAG); 3014 return (err); 3015} 3016 3017struct cloneswaparg { 3018 dsl_dataset_t *cds; /* clone dataset */ 3019 dsl_dataset_t *ohds; /* origin's head dataset */ 3020 boolean_t force; 3021 int64_t unused_refres_delta; /* change in unconsumed refreservation */ 3022}; 3023 3024/* ARGSUSED */ 3025static int 3026dsl_dataset_clone_swap_check(void *arg1, void *arg2, dmu_tx_t *tx) 3027{ 3028 struct cloneswaparg *csa = arg1; 3029 3030 /* they should both be heads */ 3031 if (dsl_dataset_is_snapshot(csa->cds) || 3032 dsl_dataset_is_snapshot(csa->ohds)) 3033 return (EINVAL); 3034 3035 /* the branch point should be just before them */ 3036 if (csa->cds->ds_prev != csa->ohds->ds_prev) 3037 return (EINVAL); 3038 3039 /* cds should be the clone (unless they are unrelated) */ 3040 if (csa->cds->ds_prev != NULL && 3041 csa->cds->ds_prev != csa->cds->ds_dir->dd_pool->dp_origin_snap && 3042 csa->ohds->ds_object != 3043 csa->cds->ds_prev->ds_phys->ds_next_snap_obj) 3044 return (EINVAL); 3045 3046 /* the clone should be a child of the origin */ 3047 if (csa->cds->ds_dir->dd_parent != csa->ohds->ds_dir) 3048 return (EINVAL); 3049 3050 /* ohds shouldn't be modified unless 'force' */ 3051 if (!csa->force && dsl_dataset_modified_since_lastsnap(csa->ohds)) 3052 return (ETXTBSY); 3053 3054 /* adjust amount of any unconsumed refreservation */ 3055 csa->unused_refres_delta = 3056 (int64_t)MIN(csa->ohds->ds_reserved, 3057 csa->ohds->ds_phys->ds_unique_bytes) - 3058 (int64_t)MIN(csa->ohds->ds_reserved, 3059 csa->cds->ds_phys->ds_unique_bytes); 3060 3061 if (csa->unused_refres_delta > 0 && 3062 csa->unused_refres_delta > 3063 dsl_dir_space_available(csa->ohds->ds_dir, NULL, 0, TRUE)) 3064 return (ENOSPC); 3065 3066 if (csa->ohds->ds_quota != 0 && 3067 csa->cds->ds_phys->ds_unique_bytes > csa->ohds->ds_quota) 3068 return (EDQUOT); 3069 3070 return (0); 3071} 3072 3073/* ARGSUSED */ 3074static void 3075dsl_dataset_clone_swap_sync(void *arg1, void *arg2, dmu_tx_t *tx) 3076{ 3077 struct cloneswaparg *csa = arg1; 3078 dsl_pool_t *dp = csa->cds->ds_dir->dd_pool; 3079 3080 ASSERT(csa->cds->ds_reserved == 0); 3081 ASSERT(csa->ohds->ds_quota == 0 || 3082 csa->cds->ds_phys->ds_unique_bytes <= csa->ohds->ds_quota); 3083 3084 dmu_buf_will_dirty(csa->cds->ds_dbuf, tx); 3085 dmu_buf_will_dirty(csa->ohds->ds_dbuf, tx); 3086 3087 if (csa->cds->ds_objset != NULL) { 3088 dmu_objset_evict(csa->cds->ds_objset); 3089 csa->cds->ds_objset = NULL; 3090 } 3091 3092 if (csa->ohds->ds_objset != NULL) { 3093 dmu_objset_evict(csa->ohds->ds_objset); 3094 csa->ohds->ds_objset = NULL; 3095 } 3096 3097 /* 3098 * Reset origin's unique bytes, if it exists. 3099 */ 3100 if (csa->cds->ds_prev) { 3101 dsl_dataset_t *origin = csa->cds->ds_prev; 3102 uint64_t comp, uncomp; 3103 3104 dmu_buf_will_dirty(origin->ds_dbuf, tx); 3105 dsl_deadlist_space_range(&csa->cds->ds_deadlist, 3106 origin->ds_phys->ds_prev_snap_txg, UINT64_MAX, 3107 &origin->ds_phys->ds_unique_bytes, &comp, &uncomp); 3108 } 3109 3110 /* swap blkptrs */ 3111 { 3112 blkptr_t tmp; 3113 tmp = csa->ohds->ds_phys->ds_bp; 3114 csa->ohds->ds_phys->ds_bp = csa->cds->ds_phys->ds_bp; 3115 csa->cds->ds_phys->ds_bp = tmp; 3116 } 3117 3118 /* set dd_*_bytes */ 3119 { 3120 int64_t dused, dcomp, duncomp; 3121 uint64_t cdl_used, cdl_comp, cdl_uncomp; 3122 uint64_t odl_used, odl_comp, odl_uncomp; 3123 3124 ASSERT3U(csa->cds->ds_dir->dd_phys-> 3125 dd_used_breakdown[DD_USED_SNAP], ==, 0); 3126 3127 dsl_deadlist_space(&csa->cds->ds_deadlist, 3128 &cdl_used, &cdl_comp, &cdl_uncomp); 3129 dsl_deadlist_space(&csa->ohds->ds_deadlist, 3130 &odl_used, &odl_comp, &odl_uncomp); 3131 3132 dused = csa->cds->ds_phys->ds_used_bytes + cdl_used - 3133 (csa->ohds->ds_phys->ds_used_bytes + odl_used); 3134 dcomp = csa->cds->ds_phys->ds_compressed_bytes + cdl_comp - 3135 (csa->ohds->ds_phys->ds_compressed_bytes + odl_comp); 3136 duncomp = csa->cds->ds_phys->ds_uncompressed_bytes + 3137 cdl_uncomp - 3138 (csa->ohds->ds_phys->ds_uncompressed_bytes + odl_uncomp); 3139 3140 dsl_dir_diduse_space(csa->ohds->ds_dir, DD_USED_HEAD, 3141 dused, dcomp, duncomp, tx); 3142 dsl_dir_diduse_space(csa->cds->ds_dir, DD_USED_HEAD, 3143 -dused, -dcomp, -duncomp, tx); 3144 3145 /* 3146 * The difference in the space used by snapshots is the 3147 * difference in snapshot space due to the head's 3148 * deadlist (since that's the only thing that's 3149 * changing that affects the snapused). 3150 */ 3151 dsl_deadlist_space_range(&csa->cds->ds_deadlist, 3152 csa->ohds->ds_dir->dd_origin_txg, UINT64_MAX, 3153 &cdl_used, &cdl_comp, &cdl_uncomp); 3154 dsl_deadlist_space_range(&csa->ohds->ds_deadlist, 3155 csa->ohds->ds_dir->dd_origin_txg, UINT64_MAX, 3156 &odl_used, &odl_comp, &odl_uncomp); 3157 dsl_dir_transfer_space(csa->ohds->ds_dir, cdl_used - odl_used, 3158 DD_USED_HEAD, DD_USED_SNAP, tx); 3159 } 3160 3161 /* swap ds_*_bytes */ 3162 SWITCH64(csa->ohds->ds_phys->ds_used_bytes, 3163 csa->cds->ds_phys->ds_used_bytes); 3164 SWITCH64(csa->ohds->ds_phys->ds_compressed_bytes, 3165 csa->cds->ds_phys->ds_compressed_bytes); 3166 SWITCH64(csa->ohds->ds_phys->ds_uncompressed_bytes, 3167 csa->cds->ds_phys->ds_uncompressed_bytes); 3168 SWITCH64(csa->ohds->ds_phys->ds_unique_bytes, 3169 csa->cds->ds_phys->ds_unique_bytes); 3170 3171 /* apply any parent delta for change in unconsumed refreservation */ 3172 dsl_dir_diduse_space(csa->ohds->ds_dir, DD_USED_REFRSRV, 3173 csa->unused_refres_delta, 0, 0, tx); 3174 3175 /* 3176 * Swap deadlists. 3177 */ 3178 dsl_deadlist_close(&csa->cds->ds_deadlist); 3179 dsl_deadlist_close(&csa->ohds->ds_deadlist); 3180 SWITCH64(csa->ohds->ds_phys->ds_deadlist_obj, 3181 csa->cds->ds_phys->ds_deadlist_obj); 3182 dsl_deadlist_open(&csa->cds->ds_deadlist, dp->dp_meta_objset, 3183 csa->cds->ds_phys->ds_deadlist_obj); 3184 dsl_deadlist_open(&csa->ohds->ds_deadlist, dp->dp_meta_objset, 3185 csa->ohds->ds_phys->ds_deadlist_obj); 3186 3187 dsl_scan_ds_clone_swapped(csa->ohds, csa->cds, tx); 3188} 3189 3190/* 3191 * Swap 'clone' with its origin head datasets. Used at the end of "zfs 3192 * recv" into an existing fs to swizzle the file system to the new 3193 * version, and by "zfs rollback". Can also be used to swap two 3194 * independent head datasets if neither has any snapshots. 3195 */ 3196int 3197dsl_dataset_clone_swap(dsl_dataset_t *clone, dsl_dataset_t *origin_head, 3198 boolean_t force) 3199{ 3200 struct cloneswaparg csa; 3201 int error; 3202 3203 ASSERT(clone->ds_owner); 3204 ASSERT(origin_head->ds_owner); 3205retry: 3206 /* 3207 * Need exclusive access for the swap. If we're swapping these 3208 * datasets back after an error, we already hold the locks. 3209 */ 3210 if (!RW_WRITE_HELD(&clone->ds_rwlock)) 3211 rw_enter(&clone->ds_rwlock, RW_WRITER); 3212 if (!RW_WRITE_HELD(&origin_head->ds_rwlock) && 3213 !rw_tryenter(&origin_head->ds_rwlock, RW_WRITER)) { 3214 rw_exit(&clone->ds_rwlock); 3215 rw_enter(&origin_head->ds_rwlock, RW_WRITER); 3216 if (!rw_tryenter(&clone->ds_rwlock, RW_WRITER)) { 3217 rw_exit(&origin_head->ds_rwlock); 3218 goto retry; 3219 } 3220 } 3221 csa.cds = clone; 3222 csa.ohds = origin_head; 3223 csa.force = force; 3224 error = dsl_sync_task_do(clone->ds_dir->dd_pool, 3225 dsl_dataset_clone_swap_check, 3226 dsl_dataset_clone_swap_sync, &csa, NULL, 9); 3227 return (error); 3228} 3229 3230/* 3231 * Given a pool name and a dataset object number in that pool, 3232 * return the name of that dataset. 3233 */ 3234int 3235dsl_dsobj_to_dsname(char *pname, uint64_t obj, char *buf) 3236{ 3237 spa_t *spa; 3238 dsl_pool_t *dp; 3239 dsl_dataset_t *ds; 3240 int error; 3241 3242 if ((error = spa_open(pname, &spa, FTAG)) != 0) 3243 return (error); 3244 dp = spa_get_dsl(spa); 3245 rw_enter(&dp->dp_config_rwlock, RW_READER); 3246 if ((error = dsl_dataset_hold_obj(dp, obj, FTAG, &ds)) == 0) { 3247 dsl_dataset_name(ds, buf); 3248 dsl_dataset_rele(ds, FTAG); 3249 } 3250 rw_exit(&dp->dp_config_rwlock); 3251 spa_close(spa, FTAG); 3252 3253 return (error); 3254} 3255 3256int 3257dsl_dataset_check_quota(dsl_dataset_t *ds, boolean_t check_quota, 3258 uint64_t asize, uint64_t inflight, uint64_t *used, uint64_t *ref_rsrv) 3259{ 3260 int error = 0; 3261 3262 ASSERT3S(asize, >, 0); 3263 3264 /* 3265 * *ref_rsrv is the portion of asize that will come from any 3266 * unconsumed refreservation space. 3267 */ 3268 *ref_rsrv = 0; 3269 3270 mutex_enter(&ds->ds_lock); 3271 /* 3272 * Make a space adjustment for reserved bytes. 3273 */ 3274 if (ds->ds_reserved > ds->ds_phys->ds_unique_bytes) { 3275 ASSERT3U(*used, >=, 3276 ds->ds_reserved - ds->ds_phys->ds_unique_bytes); 3277 *used -= (ds->ds_reserved - ds->ds_phys->ds_unique_bytes); 3278 *ref_rsrv = 3279 asize - MIN(asize, parent_delta(ds, asize + inflight)); 3280 } 3281 3282 if (!check_quota || ds->ds_quota == 0) { 3283 mutex_exit(&ds->ds_lock); 3284 return (0); 3285 } 3286 /* 3287 * If they are requesting more space, and our current estimate 3288 * is over quota, they get to try again unless the actual 3289 * on-disk is over quota and there are no pending changes (which 3290 * may free up space for us). 3291 */ 3292 if (ds->ds_phys->ds_used_bytes + inflight >= ds->ds_quota) { 3293 if (inflight > 0 || ds->ds_phys->ds_used_bytes < ds->ds_quota) 3294 error = ERESTART; 3295 else 3296 error = EDQUOT; 3297 } 3298 mutex_exit(&ds->ds_lock); 3299 3300 return (error); 3301} 3302 3303/* ARGSUSED */ 3304static int 3305dsl_dataset_set_quota_check(void *arg1, void *arg2, dmu_tx_t *tx) 3306{ 3307 dsl_dataset_t *ds = arg1; 3308 dsl_prop_setarg_t *psa = arg2; 3309 int err; 3310 3311 if (spa_version(ds->ds_dir->dd_pool->dp_spa) < SPA_VERSION_REFQUOTA) 3312 return (ENOTSUP); 3313 3314 if ((err = dsl_prop_predict_sync(ds->ds_dir, psa)) != 0) 3315 return (err); 3316 3317 if (psa->psa_effective_value == 0) 3318 return (0); 3319 3320 if (psa->psa_effective_value < ds->ds_phys->ds_used_bytes || 3321 psa->psa_effective_value < ds->ds_reserved) 3322 return (ENOSPC); 3323 3324 return (0); 3325} 3326 3327extern void dsl_prop_set_sync(void *, void *, dmu_tx_t *); 3328 3329void 3330dsl_dataset_set_quota_sync(void *arg1, void *arg2, dmu_tx_t *tx) 3331{ 3332 dsl_dataset_t *ds = arg1; 3333 dsl_prop_setarg_t *psa = arg2; 3334 uint64_t effective_value = psa->psa_effective_value; 3335 3336 dsl_prop_set_sync(ds, psa, tx); 3337 DSL_PROP_CHECK_PREDICTION(ds->ds_dir, psa); 3338 3339 if (ds->ds_quota != effective_value) { 3340 dmu_buf_will_dirty(ds->ds_dbuf, tx); 3341 ds->ds_quota = effective_value; 3342 3343 spa_history_log_internal(LOG_DS_REFQUOTA, 3344 ds->ds_dir->dd_pool->dp_spa, tx, "%lld dataset = %llu ", 3345 (longlong_t)ds->ds_quota, ds->ds_object); 3346 } 3347} 3348 3349int 3350dsl_dataset_set_quota(const char *dsname, zprop_source_t source, uint64_t quota) 3351{ 3352 dsl_dataset_t *ds; 3353 dsl_prop_setarg_t psa; 3354 int err; 3355 3356 dsl_prop_setarg_init_uint64(&psa, "refquota", source, "a); 3357 3358 err = dsl_dataset_hold(dsname, FTAG, &ds); 3359 if (err) 3360 return (err); 3361 3362 /* 3363 * If someone removes a file, then tries to set the quota, we 3364 * want to make sure the file freeing takes effect. 3365 */ 3366 txg_wait_open(ds->ds_dir->dd_pool, 0); 3367 3368 err = dsl_sync_task_do(ds->ds_dir->dd_pool, 3369 dsl_dataset_set_quota_check, dsl_dataset_set_quota_sync, 3370 ds, &psa, 0); 3371 3372 dsl_dataset_rele(ds, FTAG); 3373 return (err); 3374} 3375 3376static int 3377dsl_dataset_set_reservation_check(void *arg1, void *arg2, dmu_tx_t *tx) 3378{ 3379 dsl_dataset_t *ds = arg1; 3380 dsl_prop_setarg_t *psa = arg2; 3381 uint64_t effective_value; 3382 uint64_t unique; 3383 int err; 3384 3385 if (spa_version(ds->ds_dir->dd_pool->dp_spa) < 3386 SPA_VERSION_REFRESERVATION) 3387 return (ENOTSUP); 3388 3389 if (dsl_dataset_is_snapshot(ds)) 3390 return (EINVAL); 3391 3392 if ((err = dsl_prop_predict_sync(ds->ds_dir, psa)) != 0) 3393 return (err); 3394 3395 effective_value = psa->psa_effective_value; 3396 3397 /* 3398 * If we are doing the preliminary check in open context, the 3399 * space estimates may be inaccurate. 3400 */ 3401 if (!dmu_tx_is_syncing(tx)) 3402 return (0); 3403 3404 mutex_enter(&ds->ds_lock); 3405 if (!DS_UNIQUE_IS_ACCURATE(ds)) 3406 dsl_dataset_recalc_head_uniq(ds); 3407 unique = ds->ds_phys->ds_unique_bytes; 3408 mutex_exit(&ds->ds_lock); 3409 3410 if (MAX(unique, effective_value) > MAX(unique, ds->ds_reserved)) { 3411 uint64_t delta = MAX(unique, effective_value) - 3412 MAX(unique, ds->ds_reserved); 3413 3414 if (delta > dsl_dir_space_available(ds->ds_dir, NULL, 0, TRUE)) 3415 return (ENOSPC); 3416 if (ds->ds_quota > 0 && 3417 effective_value > ds->ds_quota) 3418 return (ENOSPC); 3419 } 3420 3421 return (0); 3422} 3423 3424static void 3425dsl_dataset_set_reservation_sync(void *arg1, void *arg2, dmu_tx_t *tx) 3426{ 3427 dsl_dataset_t *ds = arg1; 3428 dsl_prop_setarg_t *psa = arg2; 3429 uint64_t effective_value = psa->psa_effective_value; 3430 uint64_t unique; 3431 int64_t delta; 3432 3433 dsl_prop_set_sync(ds, psa, tx); 3434 DSL_PROP_CHECK_PREDICTION(ds->ds_dir, psa); 3435 3436 dmu_buf_will_dirty(ds->ds_dbuf, tx); 3437 3438 mutex_enter(&ds->ds_dir->dd_lock); 3439 mutex_enter(&ds->ds_lock); 3440 ASSERT(DS_UNIQUE_IS_ACCURATE(ds)); 3441 unique = ds->ds_phys->ds_unique_bytes; 3442 delta = MAX(0, (int64_t)(effective_value - unique)) - 3443 MAX(0, (int64_t)(ds->ds_reserved - unique)); 3444 ds->ds_reserved = effective_value; 3445 mutex_exit(&ds->ds_lock); 3446 3447 dsl_dir_diduse_space(ds->ds_dir, DD_USED_REFRSRV, delta, 0, 0, tx); 3448 mutex_exit(&ds->ds_dir->dd_lock); 3449 3450 spa_history_log_internal(LOG_DS_REFRESERV, 3451 ds->ds_dir->dd_pool->dp_spa, tx, "%lld dataset = %llu", 3452 (longlong_t)effective_value, ds->ds_object); 3453} 3454 3455int 3456dsl_dataset_set_reservation(const char *dsname, zprop_source_t source, 3457 uint64_t reservation) 3458{ 3459 dsl_dataset_t *ds; 3460 dsl_prop_setarg_t psa; 3461 int err; 3462 3463 dsl_prop_setarg_init_uint64(&psa, "refreservation", source, 3464 &reservation); 3465 3466 err = dsl_dataset_hold(dsname, FTAG, &ds); 3467 if (err) 3468 return (err); 3469 3470 err = dsl_sync_task_do(ds->ds_dir->dd_pool, 3471 dsl_dataset_set_reservation_check, 3472 dsl_dataset_set_reservation_sync, ds, &psa, 0); 3473 3474 dsl_dataset_rele(ds, FTAG); 3475 return (err); 3476} 3477 3478typedef struct zfs_hold_cleanup_arg { 3479 dsl_pool_t *dp; 3480 uint64_t dsobj; 3481 char htag[MAXNAMELEN]; 3482} zfs_hold_cleanup_arg_t; 3483 3484static void 3485dsl_dataset_user_release_onexit(void *arg) 3486{ 3487 zfs_hold_cleanup_arg_t *ca = arg; 3488 3489 (void) dsl_dataset_user_release_tmp(ca->dp, ca->dsobj, ca->htag, 3490 B_TRUE); 3491 kmem_free(ca, sizeof (zfs_hold_cleanup_arg_t)); 3492} 3493 3494void 3495dsl_register_onexit_hold_cleanup(dsl_dataset_t *ds, const char *htag, 3496 minor_t minor) 3497{ 3498 zfs_hold_cleanup_arg_t *ca; 3499 3500 ca = kmem_alloc(sizeof (zfs_hold_cleanup_arg_t), KM_SLEEP); 3501 ca->dp = ds->ds_dir->dd_pool; 3502 ca->dsobj = ds->ds_object; 3503 (void) strlcpy(ca->htag, htag, sizeof (ca->htag)); 3504 VERIFY3U(0, ==, zfs_onexit_add_cb(minor, 3505 dsl_dataset_user_release_onexit, ca, NULL)); 3506} 3507 3508/* 3509 * If you add new checks here, you may need to add 3510 * additional checks to the "temporary" case in 3511 * snapshot_check() in dmu_objset.c. 3512 */ 3513static int 3514dsl_dataset_user_hold_check(void *arg1, void *arg2, dmu_tx_t *tx) 3515{ 3516 dsl_dataset_t *ds = arg1; 3517 struct dsl_ds_holdarg *ha = arg2; 3518 char *htag = ha->htag; 3519 objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset; 3520 int error = 0; 3521 3522 if (spa_version(ds->ds_dir->dd_pool->dp_spa) < SPA_VERSION_USERREFS) 3523 return (ENOTSUP); 3524 3525 if (!dsl_dataset_is_snapshot(ds)) 3526 return (EINVAL); 3527 3528 /* tags must be unique */ 3529 mutex_enter(&ds->ds_lock); 3530 if (ds->ds_phys->ds_userrefs_obj) { 3531 error = zap_lookup(mos, ds->ds_phys->ds_userrefs_obj, htag, 3532 8, 1, tx); 3533 if (error == 0) 3534 error = EEXIST; 3535 else if (error == ENOENT) 3536 error = 0; 3537 } 3538 mutex_exit(&ds->ds_lock); 3539 3540 if (error == 0 && ha->temphold && 3541 strlen(htag) + MAX_TAG_PREFIX_LEN >= MAXNAMELEN) 3542 error = E2BIG; 3543 3544 return (error); 3545} 3546 3547void 3548dsl_dataset_user_hold_sync(void *arg1, void *arg2, dmu_tx_t *tx) 3549{ 3550 dsl_dataset_t *ds = arg1; 3551 struct dsl_ds_holdarg *ha = arg2; 3552 char *htag = ha->htag; 3553 dsl_pool_t *dp = ds->ds_dir->dd_pool; 3554 objset_t *mos = dp->dp_meta_objset; 3555 uint64_t now = gethrestime_sec(); 3556 uint64_t zapobj; 3557 3558 mutex_enter(&ds->ds_lock); 3559 if (ds->ds_phys->ds_userrefs_obj == 0) { 3560 /* 3561 * This is the first user hold for this dataset. Create 3562 * the userrefs zap object. 3563 */ 3564 dmu_buf_will_dirty(ds->ds_dbuf, tx); 3565 zapobj = ds->ds_phys->ds_userrefs_obj = 3566 zap_create(mos, DMU_OT_USERREFS, DMU_OT_NONE, 0, tx); 3567 } else { 3568 zapobj = ds->ds_phys->ds_userrefs_obj; 3569 } 3570 ds->ds_userrefs++; 3571 mutex_exit(&ds->ds_lock); 3572 3573 VERIFY(0 == zap_add(mos, zapobj, htag, 8, 1, &now, tx)); 3574 3575 if (ha->temphold) { 3576 VERIFY(0 == dsl_pool_user_hold(dp, ds->ds_object, 3577 htag, &now, tx)); 3578 } 3579 3580 spa_history_log_internal(LOG_DS_USER_HOLD, 3581 dp->dp_spa, tx, "<%s> temp = %d dataset = %llu", htag, 3582 (int)ha->temphold, ds->ds_object); 3583} 3584 3585static int 3586dsl_dataset_user_hold_one(const char *dsname, void *arg) 3587{ 3588 struct dsl_ds_holdarg *ha = arg; 3589 dsl_dataset_t *ds; 3590 int error; 3591 char *name; 3592 3593 /* alloc a buffer to hold dsname@snapname plus terminating NULL */ 3594 name = kmem_asprintf("%s@%s", dsname, ha->snapname); 3595 error = dsl_dataset_hold(name, ha->dstg, &ds); 3596 strfree(name); 3597 if (error == 0) { 3598 ha->gotone = B_TRUE; 3599 dsl_sync_task_create(ha->dstg, dsl_dataset_user_hold_check, 3600 dsl_dataset_user_hold_sync, ds, ha, 0); 3601 } else if (error == ENOENT && ha->recursive) { 3602 error = 0; 3603 } else { 3604 (void) strlcpy(ha->failed, dsname, sizeof (ha->failed)); 3605 } 3606 return (error); 3607} 3608 3609int 3610dsl_dataset_user_hold_for_send(dsl_dataset_t *ds, char *htag, 3611 boolean_t temphold) 3612{ 3613 struct dsl_ds_holdarg *ha; 3614 int error; 3615 3616 ha = kmem_zalloc(sizeof (struct dsl_ds_holdarg), KM_SLEEP); 3617 ha->htag = htag; 3618 ha->temphold = temphold; 3619 error = dsl_sync_task_do(ds->ds_dir->dd_pool, 3620 dsl_dataset_user_hold_check, dsl_dataset_user_hold_sync, 3621 ds, ha, 0); 3622 kmem_free(ha, sizeof (struct dsl_ds_holdarg)); 3623 3624 return (error); 3625} 3626 3627int 3628dsl_dataset_user_hold(char *dsname, char *snapname, char *htag, 3629 boolean_t recursive, boolean_t temphold, int cleanup_fd) 3630{ 3631 struct dsl_ds_holdarg *ha; 3632 dsl_sync_task_t *dst; 3633 spa_t *spa; 3634 int error; 3635 minor_t minor = 0; 3636 3637 if (cleanup_fd != -1) { 3638 /* Currently we only support cleanup-on-exit of tempholds. */ 3639 if (!temphold) 3640 return (EINVAL); 3641 error = zfs_onexit_fd_hold(cleanup_fd, &minor); 3642 if (error) 3643 return (error); 3644 } 3645 3646 ha = kmem_zalloc(sizeof (struct dsl_ds_holdarg), KM_SLEEP); 3647 3648 (void) strlcpy(ha->failed, dsname, sizeof (ha->failed)); 3649 3650 error = spa_open(dsname, &spa, FTAG); 3651 if (error) { 3652 kmem_free(ha, sizeof (struct dsl_ds_holdarg)); 3653 if (cleanup_fd != -1) 3654 zfs_onexit_fd_rele(cleanup_fd); 3655 return (error); 3656 } 3657 3658 ha->dstg = dsl_sync_task_group_create(spa_get_dsl(spa)); 3659 ha->htag = htag; 3660 ha->snapname = snapname; 3661 ha->recursive = recursive; 3662 ha->temphold = temphold; 3663 3664 if (recursive) { 3665 error = dmu_objset_find(dsname, dsl_dataset_user_hold_one, 3666 ha, DS_FIND_CHILDREN); 3667 } else { 3668 error = dsl_dataset_user_hold_one(dsname, ha); 3669 } 3670 if (error == 0) 3671 error = dsl_sync_task_group_wait(ha->dstg); 3672 3673 for (dst = list_head(&ha->dstg->dstg_tasks); dst; 3674 dst = list_next(&ha->dstg->dstg_tasks, dst)) { 3675 dsl_dataset_t *ds = dst->dst_arg1; 3676 3677 if (dst->dst_err) { 3678 dsl_dataset_name(ds, ha->failed); 3679 *strchr(ha->failed, '@') = '\0'; 3680 } else if (error == 0 && minor != 0 && temphold) { 3681 /* 3682 * If this hold is to be released upon process exit, 3683 * register that action now. 3684 */ 3685 dsl_register_onexit_hold_cleanup(ds, htag, minor); 3686 } 3687 dsl_dataset_rele(ds, ha->dstg); 3688 } 3689 3690 if (error == 0 && recursive && !ha->gotone) 3691 error = ENOENT; 3692 3693 if (error) 3694 (void) strlcpy(dsname, ha->failed, sizeof (ha->failed)); 3695 3696 dsl_sync_task_group_destroy(ha->dstg); 3697 3698 kmem_free(ha, sizeof (struct dsl_ds_holdarg)); 3699 spa_close(spa, FTAG); 3700 if (cleanup_fd != -1) 3701 zfs_onexit_fd_rele(cleanup_fd); 3702 return (error); 3703} 3704 3705struct dsl_ds_releasearg { 3706 dsl_dataset_t *ds; 3707 const char *htag; 3708 boolean_t own; /* do we own or just hold ds? */ 3709}; 3710 3711static int 3712dsl_dataset_release_might_destroy(dsl_dataset_t *ds, const char *htag, 3713 boolean_t *might_destroy) 3714{ 3715 objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset; 3716 uint64_t zapobj; 3717 uint64_t tmp; 3718 int error; 3719 3720 *might_destroy = B_FALSE; 3721 3722 mutex_enter(&ds->ds_lock); 3723 zapobj = ds->ds_phys->ds_userrefs_obj; 3724 if (zapobj == 0) { 3725 /* The tag can't possibly exist */ 3726 mutex_exit(&ds->ds_lock); 3727 return (ESRCH); 3728 } 3729 3730 /* Make sure the tag exists */ 3731 error = zap_lookup(mos, zapobj, htag, 8, 1, &tmp); 3732 if (error) { 3733 mutex_exit(&ds->ds_lock); 3734 if (error == ENOENT) 3735 error = ESRCH; 3736 return (error); 3737 } 3738 3739 if (ds->ds_userrefs == 1 && ds->ds_phys->ds_num_children == 1 && 3740 DS_IS_DEFER_DESTROY(ds)) 3741 *might_destroy = B_TRUE; 3742 3743 mutex_exit(&ds->ds_lock); 3744 return (0); 3745} 3746 3747static int 3748dsl_dataset_user_release_check(void *arg1, void *tag, dmu_tx_t *tx) 3749{ 3750 struct dsl_ds_releasearg *ra = arg1; 3751 dsl_dataset_t *ds = ra->ds; 3752 boolean_t might_destroy; 3753 int error; 3754 3755 if (spa_version(ds->ds_dir->dd_pool->dp_spa) < SPA_VERSION_USERREFS) 3756 return (ENOTSUP); 3757 3758 error = dsl_dataset_release_might_destroy(ds, ra->htag, &might_destroy); 3759 if (error) 3760 return (error); 3761 3762 if (might_destroy) { 3763 struct dsl_ds_destroyarg dsda = {0}; 3764 3765 if (dmu_tx_is_syncing(tx)) { 3766 /* 3767 * If we're not prepared to remove the snapshot, 3768 * we can't allow the release to happen right now. 3769 */ 3770 if (!ra->own) 3771 return (EBUSY); 3772 } 3773 dsda.ds = ds; 3774 dsda.releasing = B_TRUE; 3775 return (dsl_dataset_destroy_check(&dsda, tag, tx)); 3776 } 3777 3778 return (0); 3779} 3780 3781static void 3782dsl_dataset_user_release_sync(void *arg1, void *tag, dmu_tx_t *tx) 3783{ 3784 struct dsl_ds_releasearg *ra = arg1; 3785 dsl_dataset_t *ds = ra->ds; 3786 dsl_pool_t *dp = ds->ds_dir->dd_pool; 3787 objset_t *mos = dp->dp_meta_objset; 3788 uint64_t zapobj; 3789 uint64_t dsobj = ds->ds_object; 3790 uint64_t refs; 3791 int error; 3792 3793 mutex_enter(&ds->ds_lock); 3794 ds->ds_userrefs--; 3795 refs = ds->ds_userrefs; 3796 mutex_exit(&ds->ds_lock); 3797 error = dsl_pool_user_release(dp, ds->ds_object, ra->htag, tx); 3798 VERIFY(error == 0 || error == ENOENT); 3799 zapobj = ds->ds_phys->ds_userrefs_obj; 3800 VERIFY(0 == zap_remove(mos, zapobj, ra->htag, tx)); 3801 if (ds->ds_userrefs == 0 && ds->ds_phys->ds_num_children == 1 && 3802 DS_IS_DEFER_DESTROY(ds)) { 3803 struct dsl_ds_destroyarg dsda = {0}; 3804 3805 ASSERT(ra->own); 3806 dsda.ds = ds; 3807 dsda.releasing = B_TRUE; 3808 /* We already did the destroy_check */ 3809 dsl_dataset_destroy_sync(&dsda, tag, tx); 3810 } 3811 3812 spa_history_log_internal(LOG_DS_USER_RELEASE, 3813 dp->dp_spa, tx, "<%s> %lld dataset = %llu", 3814 ra->htag, (longlong_t)refs, dsobj); 3815} 3816 3817static int 3818dsl_dataset_user_release_one(const char *dsname, void *arg) 3819{ 3820 struct dsl_ds_holdarg *ha = arg; 3821 struct dsl_ds_releasearg *ra; 3822 dsl_dataset_t *ds; 3823 int error; 3824 void *dtag = ha->dstg; 3825 char *name; 3826 boolean_t own = B_FALSE; 3827 boolean_t might_destroy; 3828 3829 /* alloc a buffer to hold dsname@snapname, plus the terminating NULL */ 3830 name = kmem_asprintf("%s@%s", dsname, ha->snapname); 3831 error = dsl_dataset_hold(name, dtag, &ds); 3832 strfree(name); 3833 if (error == ENOENT && ha->recursive) 3834 return (0); 3835 (void) strlcpy(ha->failed, dsname, sizeof (ha->failed)); 3836 if (error) 3837 return (error); 3838 3839 ha->gotone = B_TRUE; 3840 3841 ASSERT(dsl_dataset_is_snapshot(ds)); 3842 3843 error = dsl_dataset_release_might_destroy(ds, ha->htag, &might_destroy); 3844 if (error) { 3845 dsl_dataset_rele(ds, dtag); 3846 return (error); 3847 } 3848 3849 if (might_destroy) { 3850#ifdef _KERNEL 3851 name = kmem_asprintf("%s@%s", dsname, ha->snapname); 3852 error = zfs_unmount_snap(name, NULL); 3853 strfree(name); 3854 if (error) { 3855 dsl_dataset_rele(ds, dtag); 3856 return (error); 3857 } 3858#endif 3859 if (!dsl_dataset_tryown(ds, B_TRUE, dtag)) { 3860 dsl_dataset_rele(ds, dtag); 3861 return (EBUSY); 3862 } else { 3863 own = B_TRUE; 3864 dsl_dataset_make_exclusive(ds, dtag); 3865 } 3866 } 3867 3868 ra = kmem_alloc(sizeof (struct dsl_ds_releasearg), KM_SLEEP); 3869 ra->ds = ds; 3870 ra->htag = ha->htag; 3871 ra->own = own; 3872 dsl_sync_task_create(ha->dstg, dsl_dataset_user_release_check, 3873 dsl_dataset_user_release_sync, ra, dtag, 0); 3874 3875 return (0); 3876} 3877 3878int 3879dsl_dataset_user_release(char *dsname, char *snapname, char *htag, 3880 boolean_t recursive) 3881{ 3882 struct dsl_ds_holdarg *ha; 3883 dsl_sync_task_t *dst; 3884 spa_t *spa; 3885 int error; 3886 3887top: 3888 ha = kmem_zalloc(sizeof (struct dsl_ds_holdarg), KM_SLEEP); 3889 3890 (void) strlcpy(ha->failed, dsname, sizeof (ha->failed)); 3891 3892 error = spa_open(dsname, &spa, FTAG); 3893 if (error) { 3894 kmem_free(ha, sizeof (struct dsl_ds_holdarg)); 3895 return (error); 3896 } 3897 3898 ha->dstg = dsl_sync_task_group_create(spa_get_dsl(spa)); 3899 ha->htag = htag; 3900 ha->snapname = snapname; 3901 ha->recursive = recursive; 3902 if (recursive) { 3903 error = dmu_objset_find(dsname, dsl_dataset_user_release_one, 3904 ha, DS_FIND_CHILDREN); 3905 } else { 3906 error = dsl_dataset_user_release_one(dsname, ha); 3907 } 3908 if (error == 0) 3909 error = dsl_sync_task_group_wait(ha->dstg); 3910 3911 for (dst = list_head(&ha->dstg->dstg_tasks); dst; 3912 dst = list_next(&ha->dstg->dstg_tasks, dst)) { 3913 struct dsl_ds_releasearg *ra = dst->dst_arg1; 3914 dsl_dataset_t *ds = ra->ds; 3915 3916 if (dst->dst_err) 3917 dsl_dataset_name(ds, ha->failed); 3918 3919 if (ra->own) 3920 dsl_dataset_disown(ds, ha->dstg); 3921 else 3922 dsl_dataset_rele(ds, ha->dstg); 3923 3924 kmem_free(ra, sizeof (struct dsl_ds_releasearg)); 3925 } 3926 3927 if (error == 0 && recursive && !ha->gotone) 3928 error = ENOENT; 3929 3930 if (error && error != EBUSY) 3931 (void) strlcpy(dsname, ha->failed, sizeof (ha->failed)); 3932 3933 dsl_sync_task_group_destroy(ha->dstg); 3934 kmem_free(ha, sizeof (struct dsl_ds_holdarg)); 3935 spa_close(spa, FTAG); 3936 3937 /* 3938 * We can get EBUSY if we were racing with deferred destroy and 3939 * dsl_dataset_user_release_check() hadn't done the necessary 3940 * open context setup. We can also get EBUSY if we're racing 3941 * with destroy and that thread is the ds_owner. Either way 3942 * the busy condition should be transient, and we should retry 3943 * the release operation. 3944 */ 3945 if (error == EBUSY) 3946 goto top; 3947 3948 return (error); 3949} 3950 3951/* 3952 * Called at spa_load time (with retry == B_FALSE) to release a stale 3953 * temporary user hold. Also called by the onexit code (with retry == B_TRUE). 3954 */ 3955int 3956dsl_dataset_user_release_tmp(dsl_pool_t *dp, uint64_t dsobj, char *htag, 3957 boolean_t retry) 3958{ 3959 dsl_dataset_t *ds; 3960 char *snap; 3961 char *name; 3962 int namelen; 3963 int error; 3964 3965 do { 3966 rw_enter(&dp->dp_config_rwlock, RW_READER); 3967 error = dsl_dataset_hold_obj(dp, dsobj, FTAG, &ds); 3968 rw_exit(&dp->dp_config_rwlock); 3969 if (error) 3970 return (error); 3971 namelen = dsl_dataset_namelen(ds)+1; 3972 name = kmem_alloc(namelen, KM_SLEEP); 3973 dsl_dataset_name(ds, name); 3974 dsl_dataset_rele(ds, FTAG); 3975 3976 snap = strchr(name, '@'); 3977 *snap = '\0'; 3978 ++snap; 3979 error = dsl_dataset_user_release(name, snap, htag, B_FALSE); 3980 kmem_free(name, namelen); 3981 3982 /* 3983 * The object can't have been destroyed because we have a hold, 3984 * but it might have been renamed, resulting in ENOENT. Retry 3985 * if we've been requested to do so. 3986 * 3987 * It would be nice if we could use the dsobj all the way 3988 * through and avoid ENOENT entirely. But we might need to 3989 * unmount the snapshot, and there's currently no way to lookup 3990 * a vfsp using a ZFS object id. 3991 */ 3992 } while ((error == ENOENT) && retry); 3993 3994 return (error); 3995} 3996 3997int 3998dsl_dataset_get_holds(const char *dsname, nvlist_t **nvp) 3999{ 4000 dsl_dataset_t *ds; 4001 int err; 4002 4003 err = dsl_dataset_hold(dsname, FTAG, &ds); 4004 if (err) 4005 return (err); 4006 4007 VERIFY(0 == nvlist_alloc(nvp, NV_UNIQUE_NAME, KM_SLEEP)); 4008 if (ds->ds_phys->ds_userrefs_obj != 0) { 4009 zap_attribute_t *za; 4010 zap_cursor_t zc; 4011 4012 za = kmem_alloc(sizeof (zap_attribute_t), KM_SLEEP); 4013 for (zap_cursor_init(&zc, ds->ds_dir->dd_pool->dp_meta_objset, 4014 ds->ds_phys->ds_userrefs_obj); 4015 zap_cursor_retrieve(&zc, za) == 0; 4016 zap_cursor_advance(&zc)) { 4017 VERIFY(0 == nvlist_add_uint64(*nvp, za->za_name, 4018 za->za_first_integer)); 4019 } 4020 zap_cursor_fini(&zc); 4021 kmem_free(za, sizeof (zap_attribute_t)); 4022 } 4023 dsl_dataset_rele(ds, FTAG); 4024 return (0); 4025} 4026 4027/*
| 2294 } 2295} 2296 2297void 2298dsl_dataset_fast_stat(dsl_dataset_t *ds, dmu_objset_stats_t *stat) 2299{ 2300 stat->dds_creation_txg = ds->ds_phys->ds_creation_txg; 2301 stat->dds_inconsistent = ds->ds_phys->ds_flags & DS_FLAG_INCONSISTENT; 2302 stat->dds_guid = ds->ds_phys->ds_guid; 2303 if (ds->ds_phys->ds_next_snap_obj) { 2304 stat->dds_is_snapshot = B_TRUE; 2305 stat->dds_num_clones = ds->ds_phys->ds_num_children - 1; 2306 } else { 2307 stat->dds_is_snapshot = B_FALSE; 2308 stat->dds_num_clones = 0; 2309 } 2310 2311 /* clone origin is really a dsl_dir thing... */ 2312 rw_enter(&ds->ds_dir->dd_pool->dp_config_rwlock, RW_READER); 2313 if (dsl_dir_is_clone(ds->ds_dir)) { 2314 dsl_dataset_t *ods; 2315 2316 VERIFY(0 == dsl_dataset_get_ref(ds->ds_dir->dd_pool, 2317 ds->ds_dir->dd_phys->dd_origin_obj, FTAG, &ods)); 2318 dsl_dataset_name(ods, stat->dds_origin); 2319 dsl_dataset_drop_ref(ods, FTAG); 2320 } else { 2321 stat->dds_origin[0] = '\0'; 2322 } 2323 rw_exit(&ds->ds_dir->dd_pool->dp_config_rwlock); 2324} 2325 2326uint64_t 2327dsl_dataset_fsid_guid(dsl_dataset_t *ds) 2328{ 2329 return (ds->ds_fsid_guid); 2330} 2331 2332void 2333dsl_dataset_space(dsl_dataset_t *ds, 2334 uint64_t *refdbytesp, uint64_t *availbytesp, 2335 uint64_t *usedobjsp, uint64_t *availobjsp) 2336{ 2337 *refdbytesp = ds->ds_phys->ds_used_bytes; 2338 *availbytesp = dsl_dir_space_available(ds->ds_dir, NULL, 0, TRUE); 2339 if (ds->ds_reserved > ds->ds_phys->ds_unique_bytes) 2340 *availbytesp += ds->ds_reserved - ds->ds_phys->ds_unique_bytes; 2341 if (ds->ds_quota != 0) { 2342 /* 2343 * Adjust available bytes according to refquota 2344 */ 2345 if (*refdbytesp < ds->ds_quota) 2346 *availbytesp = MIN(*availbytesp, 2347 ds->ds_quota - *refdbytesp); 2348 else 2349 *availbytesp = 0; 2350 } 2351 *usedobjsp = ds->ds_phys->ds_bp.blk_fill; 2352 *availobjsp = DN_MAX_OBJECT - *usedobjsp; 2353} 2354 2355boolean_t 2356dsl_dataset_modified_since_lastsnap(dsl_dataset_t *ds) 2357{ 2358 dsl_pool_t *dp = ds->ds_dir->dd_pool; 2359 2360 ASSERT(RW_LOCK_HELD(&dp->dp_config_rwlock) || 2361 dsl_pool_sync_context(dp)); 2362 if (ds->ds_prev == NULL) 2363 return (B_FALSE); 2364 if (ds->ds_phys->ds_bp.blk_birth > 2365 ds->ds_prev->ds_phys->ds_creation_txg) { 2366 objset_t *os, *os_prev; 2367 /* 2368 * It may be that only the ZIL differs, because it was 2369 * reset in the head. Don't count that as being 2370 * modified. 2371 */ 2372 if (dmu_objset_from_ds(ds, &os) != 0) 2373 return (B_TRUE); 2374 if (dmu_objset_from_ds(ds->ds_prev, &os_prev) != 0) 2375 return (B_TRUE); 2376 return (bcmp(&os->os_phys->os_meta_dnode, 2377 &os_prev->os_phys->os_meta_dnode, 2378 sizeof (os->os_phys->os_meta_dnode)) != 0); 2379 } 2380 return (B_FALSE); 2381} 2382 2383/* ARGSUSED */ 2384static int 2385dsl_dataset_snapshot_rename_check(void *arg1, void *arg2, dmu_tx_t *tx) 2386{ 2387 dsl_dataset_t *ds = arg1; 2388 char *newsnapname = arg2; 2389 dsl_dir_t *dd = ds->ds_dir; 2390 dsl_dataset_t *hds; 2391 uint64_t val; 2392 int err; 2393 2394 err = dsl_dataset_hold_obj(dd->dd_pool, 2395 dd->dd_phys->dd_head_dataset_obj, FTAG, &hds); 2396 if (err) 2397 return (err); 2398 2399 /* new name better not be in use */ 2400 err = dsl_dataset_snap_lookup(hds, newsnapname, &val); 2401 dsl_dataset_rele(hds, FTAG); 2402 2403 if (err == 0) 2404 err = EEXIST; 2405 else if (err == ENOENT) 2406 err = 0; 2407 2408 /* dataset name + 1 for the "@" + the new snapshot name must fit */ 2409 if (dsl_dir_namelen(ds->ds_dir) + 1 + strlen(newsnapname) >= MAXNAMELEN) 2410 err = ENAMETOOLONG; 2411 2412 return (err); 2413} 2414 2415static void 2416dsl_dataset_snapshot_rename_sync(void *arg1, void *arg2, dmu_tx_t *tx) 2417{ 2418 char oldname[MAXPATHLEN], newname[MAXPATHLEN]; 2419 dsl_dataset_t *ds = arg1; 2420 const char *newsnapname = arg2; 2421 dsl_dir_t *dd = ds->ds_dir; 2422 objset_t *mos = dd->dd_pool->dp_meta_objset; 2423 dsl_dataset_t *hds; 2424 int err; 2425 2426 ASSERT(ds->ds_phys->ds_next_snap_obj != 0); 2427 2428 VERIFY(0 == dsl_dataset_hold_obj(dd->dd_pool, 2429 dd->dd_phys->dd_head_dataset_obj, FTAG, &hds)); 2430 2431 VERIFY(0 == dsl_dataset_get_snapname(ds)); 2432 err = dsl_dataset_snap_remove(hds, ds->ds_snapname, tx); 2433 ASSERT3U(err, ==, 0); 2434 dsl_dataset_name(ds, oldname); 2435 mutex_enter(&ds->ds_lock); 2436 (void) strcpy(ds->ds_snapname, newsnapname); 2437 mutex_exit(&ds->ds_lock); 2438 err = zap_add(mos, hds->ds_phys->ds_snapnames_zapobj, 2439 ds->ds_snapname, 8, 1, &ds->ds_object, tx); 2440 ASSERT3U(err, ==, 0); 2441 dsl_dataset_name(ds, newname); 2442#ifdef _KERNEL 2443 zvol_rename_minors(oldname, newname); 2444#endif 2445 2446 spa_history_log_internal(LOG_DS_RENAME, dd->dd_pool->dp_spa, tx, 2447 "dataset = %llu", ds->ds_object); 2448 dsl_dataset_rele(hds, FTAG); 2449} 2450 2451struct renamesnaparg { 2452 dsl_sync_task_group_t *dstg; 2453 char failed[MAXPATHLEN]; 2454 char *oldsnap; 2455 char *newsnap; 2456}; 2457 2458static int 2459dsl_snapshot_rename_one(const char *name, void *arg) 2460{ 2461 struct renamesnaparg *ra = arg; 2462 dsl_dataset_t *ds = NULL; 2463 char *snapname; 2464 int err; 2465 2466 snapname = kmem_asprintf("%s@%s", name, ra->oldsnap); 2467 (void) strlcpy(ra->failed, snapname, sizeof (ra->failed)); 2468 2469 /* 2470 * For recursive snapshot renames the parent won't be changing 2471 * so we just pass name for both the to/from argument. 2472 */ 2473 err = zfs_secpolicy_rename_perms(snapname, snapname, CRED()); 2474 if (err != 0) { 2475 strfree(snapname); 2476 return (err == ENOENT ? 0 : err); 2477 } 2478 2479#ifdef _KERNEL 2480 /* 2481 * For all filesystems undergoing rename, we'll need to unmount it. 2482 */ 2483 (void) zfs_unmount_snap(snapname, NULL); 2484#endif 2485 err = dsl_dataset_hold(snapname, ra->dstg, &ds); 2486 strfree(snapname); 2487 if (err != 0) 2488 return (err == ENOENT ? 0 : err); 2489 2490 dsl_sync_task_create(ra->dstg, dsl_dataset_snapshot_rename_check, 2491 dsl_dataset_snapshot_rename_sync, ds, ra->newsnap, 0); 2492 2493 return (0); 2494} 2495 2496static int 2497dsl_recursive_rename(char *oldname, const char *newname) 2498{ 2499 int err; 2500 struct renamesnaparg *ra; 2501 dsl_sync_task_t *dst; 2502 spa_t *spa; 2503 char *cp, *fsname = spa_strdup(oldname); 2504 int len = strlen(oldname) + 1; 2505 2506 /* truncate the snapshot name to get the fsname */ 2507 cp = strchr(fsname, '@'); 2508 *cp = '\0'; 2509 2510 err = spa_open(fsname, &spa, FTAG); 2511 if (err) { 2512 kmem_free(fsname, len); 2513 return (err); 2514 } 2515 ra = kmem_alloc(sizeof (struct renamesnaparg), KM_SLEEP); 2516 ra->dstg = dsl_sync_task_group_create(spa_get_dsl(spa)); 2517 2518 ra->oldsnap = strchr(oldname, '@') + 1; 2519 ra->newsnap = strchr(newname, '@') + 1; 2520 *ra->failed = '\0'; 2521 2522 err = dmu_objset_find(fsname, dsl_snapshot_rename_one, ra, 2523 DS_FIND_CHILDREN); 2524 kmem_free(fsname, len); 2525 2526 if (err == 0) { 2527 err = dsl_sync_task_group_wait(ra->dstg); 2528 } 2529 2530 for (dst = list_head(&ra->dstg->dstg_tasks); dst; 2531 dst = list_next(&ra->dstg->dstg_tasks, dst)) { 2532 dsl_dataset_t *ds = dst->dst_arg1; 2533 if (dst->dst_err) { 2534 dsl_dir_name(ds->ds_dir, ra->failed); 2535 (void) strlcat(ra->failed, "@", sizeof (ra->failed)); 2536 (void) strlcat(ra->failed, ra->newsnap, 2537 sizeof (ra->failed)); 2538 } 2539 dsl_dataset_rele(ds, ra->dstg); 2540 } 2541 2542 if (err) 2543 (void) strlcpy(oldname, ra->failed, sizeof (ra->failed)); 2544 2545 dsl_sync_task_group_destroy(ra->dstg); 2546 kmem_free(ra, sizeof (struct renamesnaparg)); 2547 spa_close(spa, FTAG); 2548 return (err); 2549} 2550 2551static int 2552dsl_valid_rename(const char *oldname, void *arg) 2553{ 2554 int delta = *(int *)arg; 2555 2556 if (strlen(oldname) + delta >= MAXNAMELEN) 2557 return (ENAMETOOLONG); 2558 2559 return (0); 2560} 2561 2562#pragma weak dmu_objset_rename = dsl_dataset_rename 2563int 2564dsl_dataset_rename(char *oldname, const char *newname, int flags) 2565{ 2566 dsl_dir_t *dd; 2567 dsl_dataset_t *ds; 2568 const char *tail; 2569 int err; 2570 2571 err = dsl_dir_open(oldname, FTAG, &dd, &tail); 2572 if (err) 2573 return (err); 2574 2575 if (tail == NULL) { 2576 int delta = strlen(newname) - strlen(oldname); 2577 2578 /* if we're growing, validate child name lengths */ 2579 if (delta > 0) 2580 err = dmu_objset_find(oldname, dsl_valid_rename, 2581 &delta, DS_FIND_CHILDREN | DS_FIND_SNAPSHOTS); 2582 2583 if (err == 0) 2584 err = dsl_dir_rename(dd, newname, flags); 2585 dsl_dir_close(dd, FTAG); 2586 return (err); 2587 } 2588 2589 if (tail[0] != '@') { 2590 /* the name ended in a nonexistent component */ 2591 dsl_dir_close(dd, FTAG); 2592 return (ENOENT); 2593 } 2594 2595 dsl_dir_close(dd, FTAG); 2596 2597 /* new name must be snapshot in same filesystem */ 2598 tail = strchr(newname, '@'); 2599 if (tail == NULL) 2600 return (EINVAL); 2601 tail++; 2602 if (strncmp(oldname, newname, tail - newname) != 0) 2603 return (EXDEV); 2604 2605 if (flags & ZFS_RENAME_RECURSIVE) { 2606 err = dsl_recursive_rename(oldname, newname); 2607 } else { 2608 err = dsl_dataset_hold(oldname, FTAG, &ds); 2609 if (err) 2610 return (err); 2611 2612 err = dsl_sync_task_do(ds->ds_dir->dd_pool, 2613 dsl_dataset_snapshot_rename_check, 2614 dsl_dataset_snapshot_rename_sync, ds, (char *)tail, 1); 2615 2616 dsl_dataset_rele(ds, FTAG); 2617 } 2618 2619 return (err); 2620} 2621 2622struct promotenode { 2623 list_node_t link; 2624 dsl_dataset_t *ds; 2625}; 2626 2627struct promotearg { 2628 list_t shared_snaps, origin_snaps, clone_snaps; 2629 dsl_dataset_t *origin_origin; 2630 uint64_t used, comp, uncomp, unique, cloneusedsnap, originusedsnap; 2631 char *err_ds; 2632}; 2633 2634static int snaplist_space(list_t *l, uint64_t mintxg, uint64_t *spacep); 2635static boolean_t snaplist_unstable(list_t *l); 2636 2637static int 2638dsl_dataset_promote_check(void *arg1, void *arg2, dmu_tx_t *tx) 2639{ 2640 dsl_dataset_t *hds = arg1; 2641 struct promotearg *pa = arg2; 2642 struct promotenode *snap = list_head(&pa->shared_snaps); 2643 dsl_dataset_t *origin_ds = snap->ds; 2644 int err; 2645 uint64_t unused; 2646 2647 /* Check that it is a real clone */ 2648 if (!dsl_dir_is_clone(hds->ds_dir)) 2649 return (EINVAL); 2650 2651 /* Since this is so expensive, don't do the preliminary check */ 2652 if (!dmu_tx_is_syncing(tx)) 2653 return (0); 2654 2655 if (hds->ds_phys->ds_flags & DS_FLAG_NOPROMOTE) 2656 return (EXDEV); 2657 2658 /* compute origin's new unique space */ 2659 snap = list_tail(&pa->clone_snaps); 2660 ASSERT3U(snap->ds->ds_phys->ds_prev_snap_obj, ==, origin_ds->ds_object); 2661 dsl_deadlist_space_range(&snap->ds->ds_deadlist, 2662 origin_ds->ds_phys->ds_prev_snap_txg, UINT64_MAX, 2663 &pa->unique, &unused, &unused); 2664 2665 /* 2666 * Walk the snapshots that we are moving 2667 * 2668 * Compute space to transfer. Consider the incremental changes 2669 * to used for each snapshot: 2670 * (my used) = (prev's used) + (blocks born) - (blocks killed) 2671 * So each snapshot gave birth to: 2672 * (blocks born) = (my used) - (prev's used) + (blocks killed) 2673 * So a sequence would look like: 2674 * (uN - u(N-1) + kN) + ... + (u1 - u0 + k1) + (u0 - 0 + k0) 2675 * Which simplifies to: 2676 * uN + kN + kN-1 + ... + k1 + k0 2677 * Note however, if we stop before we reach the ORIGIN we get: 2678 * uN + kN + kN-1 + ... + kM - uM-1 2679 */ 2680 pa->used = origin_ds->ds_phys->ds_used_bytes; 2681 pa->comp = origin_ds->ds_phys->ds_compressed_bytes; 2682 pa->uncomp = origin_ds->ds_phys->ds_uncompressed_bytes; 2683 for (snap = list_head(&pa->shared_snaps); snap; 2684 snap = list_next(&pa->shared_snaps, snap)) { 2685 uint64_t val, dlused, dlcomp, dluncomp; 2686 dsl_dataset_t *ds = snap->ds; 2687 2688 /* Check that the snapshot name does not conflict */ 2689 VERIFY(0 == dsl_dataset_get_snapname(ds)); 2690 err = dsl_dataset_snap_lookup(hds, ds->ds_snapname, &val); 2691 if (err == 0) { 2692 err = EEXIST; 2693 goto out; 2694 } 2695 if (err != ENOENT) 2696 goto out; 2697 2698 /* The very first snapshot does not have a deadlist */ 2699 if (ds->ds_phys->ds_prev_snap_obj == 0) 2700 continue; 2701 2702 dsl_deadlist_space(&ds->ds_deadlist, 2703 &dlused, &dlcomp, &dluncomp); 2704 pa->used += dlused; 2705 pa->comp += dlcomp; 2706 pa->uncomp += dluncomp; 2707 } 2708 2709 /* 2710 * If we are a clone of a clone then we never reached ORIGIN, 2711 * so we need to subtract out the clone origin's used space. 2712 */ 2713 if (pa->origin_origin) { 2714 pa->used -= pa->origin_origin->ds_phys->ds_used_bytes; 2715 pa->comp -= pa->origin_origin->ds_phys->ds_compressed_bytes; 2716 pa->uncomp -= pa->origin_origin->ds_phys->ds_uncompressed_bytes; 2717 } 2718 2719 /* Check that there is enough space here */ 2720 err = dsl_dir_transfer_possible(origin_ds->ds_dir, hds->ds_dir, 2721 pa->used); 2722 if (err) 2723 return (err); 2724 2725 /* 2726 * Compute the amounts of space that will be used by snapshots 2727 * after the promotion (for both origin and clone). For each, 2728 * it is the amount of space that will be on all of their 2729 * deadlists (that was not born before their new origin). 2730 */ 2731 if (hds->ds_dir->dd_phys->dd_flags & DD_FLAG_USED_BREAKDOWN) { 2732 uint64_t space; 2733 2734 /* 2735 * Note, typically this will not be a clone of a clone, 2736 * so dd_origin_txg will be < TXG_INITIAL, so 2737 * these snaplist_space() -> dsl_deadlist_space_range() 2738 * calls will be fast because they do not have to 2739 * iterate over all bps. 2740 */ 2741 snap = list_head(&pa->origin_snaps); 2742 err = snaplist_space(&pa->shared_snaps, 2743 snap->ds->ds_dir->dd_origin_txg, &pa->cloneusedsnap); 2744 if (err) 2745 return (err); 2746 2747 err = snaplist_space(&pa->clone_snaps, 2748 snap->ds->ds_dir->dd_origin_txg, &space); 2749 if (err) 2750 return (err); 2751 pa->cloneusedsnap += space; 2752 } 2753 if (origin_ds->ds_dir->dd_phys->dd_flags & DD_FLAG_USED_BREAKDOWN) { 2754 err = snaplist_space(&pa->origin_snaps, 2755 origin_ds->ds_phys->ds_creation_txg, &pa->originusedsnap); 2756 if (err) 2757 return (err); 2758 } 2759 2760 return (0); 2761out: 2762 pa->err_ds = snap->ds->ds_snapname; 2763 return (err); 2764} 2765 2766static void 2767dsl_dataset_promote_sync(void *arg1, void *arg2, dmu_tx_t *tx) 2768{ 2769 dsl_dataset_t *hds = arg1; 2770 struct promotearg *pa = arg2; 2771 struct promotenode *snap = list_head(&pa->shared_snaps); 2772 dsl_dataset_t *origin_ds = snap->ds; 2773 dsl_dataset_t *origin_head; 2774 dsl_dir_t *dd = hds->ds_dir; 2775 dsl_pool_t *dp = hds->ds_dir->dd_pool; 2776 dsl_dir_t *odd = NULL; 2777 uint64_t oldnext_obj; 2778 int64_t delta; 2779 2780 ASSERT(0 == (hds->ds_phys->ds_flags & DS_FLAG_NOPROMOTE)); 2781 2782 snap = list_head(&pa->origin_snaps); 2783 origin_head = snap->ds; 2784 2785 /* 2786 * We need to explicitly open odd, since origin_ds's dd will be 2787 * changing. 2788 */ 2789 VERIFY(0 == dsl_dir_open_obj(dp, origin_ds->ds_dir->dd_object, 2790 NULL, FTAG, &odd)); 2791 2792 /* change origin's next snap */ 2793 dmu_buf_will_dirty(origin_ds->ds_dbuf, tx); 2794 oldnext_obj = origin_ds->ds_phys->ds_next_snap_obj; 2795 snap = list_tail(&pa->clone_snaps); 2796 ASSERT3U(snap->ds->ds_phys->ds_prev_snap_obj, ==, origin_ds->ds_object); 2797 origin_ds->ds_phys->ds_next_snap_obj = snap->ds->ds_object; 2798 2799 /* change the origin's next clone */ 2800 if (origin_ds->ds_phys->ds_next_clones_obj) { 2801 remove_from_next_clones(origin_ds, snap->ds->ds_object, tx); 2802 VERIFY3U(0, ==, zap_add_int(dp->dp_meta_objset, 2803 origin_ds->ds_phys->ds_next_clones_obj, 2804 oldnext_obj, tx)); 2805 } 2806 2807 /* change origin */ 2808 dmu_buf_will_dirty(dd->dd_dbuf, tx); 2809 ASSERT3U(dd->dd_phys->dd_origin_obj, ==, origin_ds->ds_object); 2810 dd->dd_phys->dd_origin_obj = odd->dd_phys->dd_origin_obj; 2811 dd->dd_origin_txg = origin_head->ds_dir->dd_origin_txg; 2812 dmu_buf_will_dirty(odd->dd_dbuf, tx); 2813 odd->dd_phys->dd_origin_obj = origin_ds->ds_object; 2814 origin_head->ds_dir->dd_origin_txg = 2815 origin_ds->ds_phys->ds_creation_txg; 2816 2817 /* change dd_clone entries */ 2818 if (spa_version(dp->dp_spa) >= SPA_VERSION_DIR_CLONES) { 2819 VERIFY3U(0, ==, zap_remove_int(dp->dp_meta_objset, 2820 odd->dd_phys->dd_clones, hds->ds_object, tx)); 2821 VERIFY3U(0, ==, zap_add_int(dp->dp_meta_objset, 2822 pa->origin_origin->ds_dir->dd_phys->dd_clones, 2823 hds->ds_object, tx)); 2824 2825 VERIFY3U(0, ==, zap_remove_int(dp->dp_meta_objset, 2826 pa->origin_origin->ds_dir->dd_phys->dd_clones, 2827 origin_head->ds_object, tx)); 2828 if (dd->dd_phys->dd_clones == 0) { 2829 dd->dd_phys->dd_clones = zap_create(dp->dp_meta_objset, 2830 DMU_OT_DSL_CLONES, DMU_OT_NONE, 0, tx); 2831 } 2832 VERIFY3U(0, ==, zap_add_int(dp->dp_meta_objset, 2833 dd->dd_phys->dd_clones, origin_head->ds_object, tx)); 2834 2835 } 2836 2837 /* move snapshots to this dir */ 2838 for (snap = list_head(&pa->shared_snaps); snap; 2839 snap = list_next(&pa->shared_snaps, snap)) { 2840 dsl_dataset_t *ds = snap->ds; 2841 2842 /* unregister props as dsl_dir is changing */ 2843 if (ds->ds_objset) { 2844 dmu_objset_evict(ds->ds_objset); 2845 ds->ds_objset = NULL; 2846 } 2847 /* move snap name entry */ 2848 VERIFY(0 == dsl_dataset_get_snapname(ds)); 2849 VERIFY(0 == dsl_dataset_snap_remove(origin_head, 2850 ds->ds_snapname, tx)); 2851 VERIFY(0 == zap_add(dp->dp_meta_objset, 2852 hds->ds_phys->ds_snapnames_zapobj, ds->ds_snapname, 2853 8, 1, &ds->ds_object, tx)); 2854 2855 /* change containing dsl_dir */ 2856 dmu_buf_will_dirty(ds->ds_dbuf, tx); 2857 ASSERT3U(ds->ds_phys->ds_dir_obj, ==, odd->dd_object); 2858 ds->ds_phys->ds_dir_obj = dd->dd_object; 2859 ASSERT3P(ds->ds_dir, ==, odd); 2860 dsl_dir_close(ds->ds_dir, ds); 2861 VERIFY(0 == dsl_dir_open_obj(dp, dd->dd_object, 2862 NULL, ds, &ds->ds_dir)); 2863 2864 /* move any clone references */ 2865 if (ds->ds_phys->ds_next_clones_obj && 2866 spa_version(dp->dp_spa) >= SPA_VERSION_DIR_CLONES) { 2867 zap_cursor_t zc; 2868 zap_attribute_t za; 2869 2870 for (zap_cursor_init(&zc, dp->dp_meta_objset, 2871 ds->ds_phys->ds_next_clones_obj); 2872 zap_cursor_retrieve(&zc, &za) == 0; 2873 zap_cursor_advance(&zc)) { 2874 dsl_dataset_t *cnds; 2875 uint64_t o; 2876 2877 if (za.za_first_integer == oldnext_obj) { 2878 /* 2879 * We've already moved the 2880 * origin's reference. 2881 */ 2882 continue; 2883 } 2884 2885 VERIFY3U(0, ==, dsl_dataset_hold_obj(dp, 2886 za.za_first_integer, FTAG, &cnds)); 2887 o = cnds->ds_dir->dd_phys->dd_head_dataset_obj; 2888 2889 VERIFY3U(zap_remove_int(dp->dp_meta_objset, 2890 odd->dd_phys->dd_clones, o, tx), ==, 0); 2891 VERIFY3U(zap_add_int(dp->dp_meta_objset, 2892 dd->dd_phys->dd_clones, o, tx), ==, 0); 2893 dsl_dataset_rele(cnds, FTAG); 2894 } 2895 zap_cursor_fini(&zc); 2896 } 2897 2898 ASSERT3U(dsl_prop_numcb(ds), ==, 0); 2899 } 2900 2901 /* 2902 * Change space accounting. 2903 * Note, pa->*usedsnap and dd_used_breakdown[SNAP] will either 2904 * both be valid, or both be 0 (resulting in delta == 0). This 2905 * is true for each of {clone,origin} independently. 2906 */ 2907 2908 delta = pa->cloneusedsnap - 2909 dd->dd_phys->dd_used_breakdown[DD_USED_SNAP]; 2910 ASSERT3S(delta, >=, 0); 2911 ASSERT3U(pa->used, >=, delta); 2912 dsl_dir_diduse_space(dd, DD_USED_SNAP, delta, 0, 0, tx); 2913 dsl_dir_diduse_space(dd, DD_USED_HEAD, 2914 pa->used - delta, pa->comp, pa->uncomp, tx); 2915 2916 delta = pa->originusedsnap - 2917 odd->dd_phys->dd_used_breakdown[DD_USED_SNAP]; 2918 ASSERT3S(delta, <=, 0); 2919 ASSERT3U(pa->used, >=, -delta); 2920 dsl_dir_diduse_space(odd, DD_USED_SNAP, delta, 0, 0, tx); 2921 dsl_dir_diduse_space(odd, DD_USED_HEAD, 2922 -pa->used - delta, -pa->comp, -pa->uncomp, tx); 2923 2924 origin_ds->ds_phys->ds_unique_bytes = pa->unique; 2925 2926 /* log history record */ 2927 spa_history_log_internal(LOG_DS_PROMOTE, dd->dd_pool->dp_spa, tx, 2928 "dataset = %llu", hds->ds_object); 2929 2930 dsl_dir_close(odd, FTAG); 2931} 2932 2933static char *snaplist_tag = "snaplist"; 2934/* 2935 * Make a list of dsl_dataset_t's for the snapshots between first_obj 2936 * (exclusive) and last_obj (inclusive). The list will be in reverse 2937 * order (last_obj will be the list_head()). If first_obj == 0, do all 2938 * snapshots back to this dataset's origin. 2939 */ 2940static int 2941snaplist_make(dsl_pool_t *dp, boolean_t own, 2942 uint64_t first_obj, uint64_t last_obj, list_t *l) 2943{ 2944 uint64_t obj = last_obj; 2945 2946 ASSERT(RW_LOCK_HELD(&dp->dp_config_rwlock)); 2947 2948 list_create(l, sizeof (struct promotenode), 2949 offsetof(struct promotenode, link)); 2950 2951 while (obj != first_obj) { 2952 dsl_dataset_t *ds; 2953 struct promotenode *snap; 2954 int err; 2955 2956 if (own) { 2957 err = dsl_dataset_own_obj(dp, obj, 2958 0, snaplist_tag, &ds); 2959 if (err == 0) 2960 dsl_dataset_make_exclusive(ds, snaplist_tag); 2961 } else { 2962 err = dsl_dataset_hold_obj(dp, obj, snaplist_tag, &ds); 2963 } 2964 if (err == ENOENT) { 2965 /* lost race with snapshot destroy */ 2966 struct promotenode *last = list_tail(l); 2967 ASSERT(obj != last->ds->ds_phys->ds_prev_snap_obj); 2968 obj = last->ds->ds_phys->ds_prev_snap_obj; 2969 continue; 2970 } else if (err) { 2971 return (err); 2972 } 2973 2974 if (first_obj == 0) 2975 first_obj = ds->ds_dir->dd_phys->dd_origin_obj; 2976 2977 snap = kmem_alloc(sizeof (struct promotenode), KM_SLEEP); 2978 snap->ds = ds; 2979 list_insert_tail(l, snap); 2980 obj = ds->ds_phys->ds_prev_snap_obj; 2981 } 2982 2983 return (0); 2984} 2985 2986static int 2987snaplist_space(list_t *l, uint64_t mintxg, uint64_t *spacep) 2988{ 2989 struct promotenode *snap; 2990 2991 *spacep = 0; 2992 for (snap = list_head(l); snap; snap = list_next(l, snap)) { 2993 uint64_t used, comp, uncomp; 2994 dsl_deadlist_space_range(&snap->ds->ds_deadlist, 2995 mintxg, UINT64_MAX, &used, &comp, &uncomp); 2996 *spacep += used; 2997 } 2998 return (0); 2999} 3000 3001static void 3002snaplist_destroy(list_t *l, boolean_t own) 3003{ 3004 struct promotenode *snap; 3005 3006 if (!l || !list_link_active(&l->list_head)) 3007 return; 3008 3009 while ((snap = list_tail(l)) != NULL) { 3010 list_remove(l, snap); 3011 if (own) 3012 dsl_dataset_disown(snap->ds, snaplist_tag); 3013 else 3014 dsl_dataset_rele(snap->ds, snaplist_tag); 3015 kmem_free(snap, sizeof (struct promotenode)); 3016 } 3017 list_destroy(l); 3018} 3019 3020/* 3021 * Promote a clone. Nomenclature note: 3022 * "clone" or "cds": the original clone which is being promoted 3023 * "origin" or "ods": the snapshot which is originally clone's origin 3024 * "origin head" or "ohds": the dataset which is the head 3025 * (filesystem/volume) for the origin 3026 * "origin origin": the origin of the origin's filesystem (typically 3027 * NULL, indicating that the clone is not a clone of a clone). 3028 */ 3029int 3030dsl_dataset_promote(const char *name, char *conflsnap) 3031{ 3032 dsl_dataset_t *ds; 3033 dsl_dir_t *dd; 3034 dsl_pool_t *dp; 3035 dmu_object_info_t doi; 3036 struct promotearg pa = { 0 }; 3037 struct promotenode *snap; 3038 int err; 3039 3040 err = dsl_dataset_hold(name, FTAG, &ds); 3041 if (err) 3042 return (err); 3043 dd = ds->ds_dir; 3044 dp = dd->dd_pool; 3045 3046 err = dmu_object_info(dp->dp_meta_objset, 3047 ds->ds_phys->ds_snapnames_zapobj, &doi); 3048 if (err) { 3049 dsl_dataset_rele(ds, FTAG); 3050 return (err); 3051 } 3052 3053 if (dsl_dataset_is_snapshot(ds) || dd->dd_phys->dd_origin_obj == 0) { 3054 dsl_dataset_rele(ds, FTAG); 3055 return (EINVAL); 3056 } 3057 3058 /* 3059 * We are going to inherit all the snapshots taken before our 3060 * origin (i.e., our new origin will be our parent's origin). 3061 * Take ownership of them so that we can rename them into our 3062 * namespace. 3063 */ 3064 rw_enter(&dp->dp_config_rwlock, RW_READER); 3065 3066 err = snaplist_make(dp, B_TRUE, 0, dd->dd_phys->dd_origin_obj, 3067 &pa.shared_snaps); 3068 if (err != 0) 3069 goto out; 3070 3071 err = snaplist_make(dp, B_FALSE, 0, ds->ds_object, &pa.clone_snaps); 3072 if (err != 0) 3073 goto out; 3074 3075 snap = list_head(&pa.shared_snaps); 3076 ASSERT3U(snap->ds->ds_object, ==, dd->dd_phys->dd_origin_obj); 3077 err = snaplist_make(dp, B_FALSE, dd->dd_phys->dd_origin_obj, 3078 snap->ds->ds_dir->dd_phys->dd_head_dataset_obj, &pa.origin_snaps); 3079 if (err != 0) 3080 goto out; 3081 3082 if (snap->ds->ds_dir->dd_phys->dd_origin_obj != 0) { 3083 err = dsl_dataset_hold_obj(dp, 3084 snap->ds->ds_dir->dd_phys->dd_origin_obj, 3085 FTAG, &pa.origin_origin); 3086 if (err != 0) 3087 goto out; 3088 } 3089 3090out: 3091 rw_exit(&dp->dp_config_rwlock); 3092 3093 /* 3094 * Add in 128x the snapnames zapobj size, since we will be moving 3095 * a bunch of snapnames to the promoted ds, and dirtying their 3096 * bonus buffers. 3097 */ 3098 if (err == 0) { 3099 err = dsl_sync_task_do(dp, dsl_dataset_promote_check, 3100 dsl_dataset_promote_sync, ds, &pa, 3101 2 + 2 * doi.doi_physical_blocks_512); 3102 if (err && pa.err_ds && conflsnap) 3103 (void) strncpy(conflsnap, pa.err_ds, MAXNAMELEN); 3104 } 3105 3106 snaplist_destroy(&pa.shared_snaps, B_TRUE); 3107 snaplist_destroy(&pa.clone_snaps, B_FALSE); 3108 snaplist_destroy(&pa.origin_snaps, B_FALSE); 3109 if (pa.origin_origin) 3110 dsl_dataset_rele(pa.origin_origin, FTAG); 3111 dsl_dataset_rele(ds, FTAG); 3112 return (err); 3113} 3114 3115struct cloneswaparg { 3116 dsl_dataset_t *cds; /* clone dataset */ 3117 dsl_dataset_t *ohds; /* origin's head dataset */ 3118 boolean_t force; 3119 int64_t unused_refres_delta; /* change in unconsumed refreservation */ 3120}; 3121 3122/* ARGSUSED */ 3123static int 3124dsl_dataset_clone_swap_check(void *arg1, void *arg2, dmu_tx_t *tx) 3125{ 3126 struct cloneswaparg *csa = arg1; 3127 3128 /* they should both be heads */ 3129 if (dsl_dataset_is_snapshot(csa->cds) || 3130 dsl_dataset_is_snapshot(csa->ohds)) 3131 return (EINVAL); 3132 3133 /* the branch point should be just before them */ 3134 if (csa->cds->ds_prev != csa->ohds->ds_prev) 3135 return (EINVAL); 3136 3137 /* cds should be the clone (unless they are unrelated) */ 3138 if (csa->cds->ds_prev != NULL && 3139 csa->cds->ds_prev != csa->cds->ds_dir->dd_pool->dp_origin_snap && 3140 csa->ohds->ds_object != 3141 csa->cds->ds_prev->ds_phys->ds_next_snap_obj) 3142 return (EINVAL); 3143 3144 /* the clone should be a child of the origin */ 3145 if (csa->cds->ds_dir->dd_parent != csa->ohds->ds_dir) 3146 return (EINVAL); 3147 3148 /* ohds shouldn't be modified unless 'force' */ 3149 if (!csa->force && dsl_dataset_modified_since_lastsnap(csa->ohds)) 3150 return (ETXTBSY); 3151 3152 /* adjust amount of any unconsumed refreservation */ 3153 csa->unused_refres_delta = 3154 (int64_t)MIN(csa->ohds->ds_reserved, 3155 csa->ohds->ds_phys->ds_unique_bytes) - 3156 (int64_t)MIN(csa->ohds->ds_reserved, 3157 csa->cds->ds_phys->ds_unique_bytes); 3158 3159 if (csa->unused_refres_delta > 0 && 3160 csa->unused_refres_delta > 3161 dsl_dir_space_available(csa->ohds->ds_dir, NULL, 0, TRUE)) 3162 return (ENOSPC); 3163 3164 if (csa->ohds->ds_quota != 0 && 3165 csa->cds->ds_phys->ds_unique_bytes > csa->ohds->ds_quota) 3166 return (EDQUOT); 3167 3168 return (0); 3169} 3170 3171/* ARGSUSED */ 3172static void 3173dsl_dataset_clone_swap_sync(void *arg1, void *arg2, dmu_tx_t *tx) 3174{ 3175 struct cloneswaparg *csa = arg1; 3176 dsl_pool_t *dp = csa->cds->ds_dir->dd_pool; 3177 3178 ASSERT(csa->cds->ds_reserved == 0); 3179 ASSERT(csa->ohds->ds_quota == 0 || 3180 csa->cds->ds_phys->ds_unique_bytes <= csa->ohds->ds_quota); 3181 3182 dmu_buf_will_dirty(csa->cds->ds_dbuf, tx); 3183 dmu_buf_will_dirty(csa->ohds->ds_dbuf, tx); 3184 3185 if (csa->cds->ds_objset != NULL) { 3186 dmu_objset_evict(csa->cds->ds_objset); 3187 csa->cds->ds_objset = NULL; 3188 } 3189 3190 if (csa->ohds->ds_objset != NULL) { 3191 dmu_objset_evict(csa->ohds->ds_objset); 3192 csa->ohds->ds_objset = NULL; 3193 } 3194 3195 /* 3196 * Reset origin's unique bytes, if it exists. 3197 */ 3198 if (csa->cds->ds_prev) { 3199 dsl_dataset_t *origin = csa->cds->ds_prev; 3200 uint64_t comp, uncomp; 3201 3202 dmu_buf_will_dirty(origin->ds_dbuf, tx); 3203 dsl_deadlist_space_range(&csa->cds->ds_deadlist, 3204 origin->ds_phys->ds_prev_snap_txg, UINT64_MAX, 3205 &origin->ds_phys->ds_unique_bytes, &comp, &uncomp); 3206 } 3207 3208 /* swap blkptrs */ 3209 { 3210 blkptr_t tmp; 3211 tmp = csa->ohds->ds_phys->ds_bp; 3212 csa->ohds->ds_phys->ds_bp = csa->cds->ds_phys->ds_bp; 3213 csa->cds->ds_phys->ds_bp = tmp; 3214 } 3215 3216 /* set dd_*_bytes */ 3217 { 3218 int64_t dused, dcomp, duncomp; 3219 uint64_t cdl_used, cdl_comp, cdl_uncomp; 3220 uint64_t odl_used, odl_comp, odl_uncomp; 3221 3222 ASSERT3U(csa->cds->ds_dir->dd_phys-> 3223 dd_used_breakdown[DD_USED_SNAP], ==, 0); 3224 3225 dsl_deadlist_space(&csa->cds->ds_deadlist, 3226 &cdl_used, &cdl_comp, &cdl_uncomp); 3227 dsl_deadlist_space(&csa->ohds->ds_deadlist, 3228 &odl_used, &odl_comp, &odl_uncomp); 3229 3230 dused = csa->cds->ds_phys->ds_used_bytes + cdl_used - 3231 (csa->ohds->ds_phys->ds_used_bytes + odl_used); 3232 dcomp = csa->cds->ds_phys->ds_compressed_bytes + cdl_comp - 3233 (csa->ohds->ds_phys->ds_compressed_bytes + odl_comp); 3234 duncomp = csa->cds->ds_phys->ds_uncompressed_bytes + 3235 cdl_uncomp - 3236 (csa->ohds->ds_phys->ds_uncompressed_bytes + odl_uncomp); 3237 3238 dsl_dir_diduse_space(csa->ohds->ds_dir, DD_USED_HEAD, 3239 dused, dcomp, duncomp, tx); 3240 dsl_dir_diduse_space(csa->cds->ds_dir, DD_USED_HEAD, 3241 -dused, -dcomp, -duncomp, tx); 3242 3243 /* 3244 * The difference in the space used by snapshots is the 3245 * difference in snapshot space due to the head's 3246 * deadlist (since that's the only thing that's 3247 * changing that affects the snapused). 3248 */ 3249 dsl_deadlist_space_range(&csa->cds->ds_deadlist, 3250 csa->ohds->ds_dir->dd_origin_txg, UINT64_MAX, 3251 &cdl_used, &cdl_comp, &cdl_uncomp); 3252 dsl_deadlist_space_range(&csa->ohds->ds_deadlist, 3253 csa->ohds->ds_dir->dd_origin_txg, UINT64_MAX, 3254 &odl_used, &odl_comp, &odl_uncomp); 3255 dsl_dir_transfer_space(csa->ohds->ds_dir, cdl_used - odl_used, 3256 DD_USED_HEAD, DD_USED_SNAP, tx); 3257 } 3258 3259 /* swap ds_*_bytes */ 3260 SWITCH64(csa->ohds->ds_phys->ds_used_bytes, 3261 csa->cds->ds_phys->ds_used_bytes); 3262 SWITCH64(csa->ohds->ds_phys->ds_compressed_bytes, 3263 csa->cds->ds_phys->ds_compressed_bytes); 3264 SWITCH64(csa->ohds->ds_phys->ds_uncompressed_bytes, 3265 csa->cds->ds_phys->ds_uncompressed_bytes); 3266 SWITCH64(csa->ohds->ds_phys->ds_unique_bytes, 3267 csa->cds->ds_phys->ds_unique_bytes); 3268 3269 /* apply any parent delta for change in unconsumed refreservation */ 3270 dsl_dir_diduse_space(csa->ohds->ds_dir, DD_USED_REFRSRV, 3271 csa->unused_refres_delta, 0, 0, tx); 3272 3273 /* 3274 * Swap deadlists. 3275 */ 3276 dsl_deadlist_close(&csa->cds->ds_deadlist); 3277 dsl_deadlist_close(&csa->ohds->ds_deadlist); 3278 SWITCH64(csa->ohds->ds_phys->ds_deadlist_obj, 3279 csa->cds->ds_phys->ds_deadlist_obj); 3280 dsl_deadlist_open(&csa->cds->ds_deadlist, dp->dp_meta_objset, 3281 csa->cds->ds_phys->ds_deadlist_obj); 3282 dsl_deadlist_open(&csa->ohds->ds_deadlist, dp->dp_meta_objset, 3283 csa->ohds->ds_phys->ds_deadlist_obj); 3284 3285 dsl_scan_ds_clone_swapped(csa->ohds, csa->cds, tx); 3286} 3287 3288/* 3289 * Swap 'clone' with its origin head datasets. Used at the end of "zfs 3290 * recv" into an existing fs to swizzle the file system to the new 3291 * version, and by "zfs rollback". Can also be used to swap two 3292 * independent head datasets if neither has any snapshots. 3293 */ 3294int 3295dsl_dataset_clone_swap(dsl_dataset_t *clone, dsl_dataset_t *origin_head, 3296 boolean_t force) 3297{ 3298 struct cloneswaparg csa; 3299 int error; 3300 3301 ASSERT(clone->ds_owner); 3302 ASSERT(origin_head->ds_owner); 3303retry: 3304 /* 3305 * Need exclusive access for the swap. If we're swapping these 3306 * datasets back after an error, we already hold the locks. 3307 */ 3308 if (!RW_WRITE_HELD(&clone->ds_rwlock)) 3309 rw_enter(&clone->ds_rwlock, RW_WRITER); 3310 if (!RW_WRITE_HELD(&origin_head->ds_rwlock) && 3311 !rw_tryenter(&origin_head->ds_rwlock, RW_WRITER)) { 3312 rw_exit(&clone->ds_rwlock); 3313 rw_enter(&origin_head->ds_rwlock, RW_WRITER); 3314 if (!rw_tryenter(&clone->ds_rwlock, RW_WRITER)) { 3315 rw_exit(&origin_head->ds_rwlock); 3316 goto retry; 3317 } 3318 } 3319 csa.cds = clone; 3320 csa.ohds = origin_head; 3321 csa.force = force; 3322 error = dsl_sync_task_do(clone->ds_dir->dd_pool, 3323 dsl_dataset_clone_swap_check, 3324 dsl_dataset_clone_swap_sync, &csa, NULL, 9); 3325 return (error); 3326} 3327 3328/* 3329 * Given a pool name and a dataset object number in that pool, 3330 * return the name of that dataset. 3331 */ 3332int 3333dsl_dsobj_to_dsname(char *pname, uint64_t obj, char *buf) 3334{ 3335 spa_t *spa; 3336 dsl_pool_t *dp; 3337 dsl_dataset_t *ds; 3338 int error; 3339 3340 if ((error = spa_open(pname, &spa, FTAG)) != 0) 3341 return (error); 3342 dp = spa_get_dsl(spa); 3343 rw_enter(&dp->dp_config_rwlock, RW_READER); 3344 if ((error = dsl_dataset_hold_obj(dp, obj, FTAG, &ds)) == 0) { 3345 dsl_dataset_name(ds, buf); 3346 dsl_dataset_rele(ds, FTAG); 3347 } 3348 rw_exit(&dp->dp_config_rwlock); 3349 spa_close(spa, FTAG); 3350 3351 return (error); 3352} 3353 3354int 3355dsl_dataset_check_quota(dsl_dataset_t *ds, boolean_t check_quota, 3356 uint64_t asize, uint64_t inflight, uint64_t *used, uint64_t *ref_rsrv) 3357{ 3358 int error = 0; 3359 3360 ASSERT3S(asize, >, 0); 3361 3362 /* 3363 * *ref_rsrv is the portion of asize that will come from any 3364 * unconsumed refreservation space. 3365 */ 3366 *ref_rsrv = 0; 3367 3368 mutex_enter(&ds->ds_lock); 3369 /* 3370 * Make a space adjustment for reserved bytes. 3371 */ 3372 if (ds->ds_reserved > ds->ds_phys->ds_unique_bytes) { 3373 ASSERT3U(*used, >=, 3374 ds->ds_reserved - ds->ds_phys->ds_unique_bytes); 3375 *used -= (ds->ds_reserved - ds->ds_phys->ds_unique_bytes); 3376 *ref_rsrv = 3377 asize - MIN(asize, parent_delta(ds, asize + inflight)); 3378 } 3379 3380 if (!check_quota || ds->ds_quota == 0) { 3381 mutex_exit(&ds->ds_lock); 3382 return (0); 3383 } 3384 /* 3385 * If they are requesting more space, and our current estimate 3386 * is over quota, they get to try again unless the actual 3387 * on-disk is over quota and there are no pending changes (which 3388 * may free up space for us). 3389 */ 3390 if (ds->ds_phys->ds_used_bytes + inflight >= ds->ds_quota) { 3391 if (inflight > 0 || ds->ds_phys->ds_used_bytes < ds->ds_quota) 3392 error = ERESTART; 3393 else 3394 error = EDQUOT; 3395 } 3396 mutex_exit(&ds->ds_lock); 3397 3398 return (error); 3399} 3400 3401/* ARGSUSED */ 3402static int 3403dsl_dataset_set_quota_check(void *arg1, void *arg2, dmu_tx_t *tx) 3404{ 3405 dsl_dataset_t *ds = arg1; 3406 dsl_prop_setarg_t *psa = arg2; 3407 int err; 3408 3409 if (spa_version(ds->ds_dir->dd_pool->dp_spa) < SPA_VERSION_REFQUOTA) 3410 return (ENOTSUP); 3411 3412 if ((err = dsl_prop_predict_sync(ds->ds_dir, psa)) != 0) 3413 return (err); 3414 3415 if (psa->psa_effective_value == 0) 3416 return (0); 3417 3418 if (psa->psa_effective_value < ds->ds_phys->ds_used_bytes || 3419 psa->psa_effective_value < ds->ds_reserved) 3420 return (ENOSPC); 3421 3422 return (0); 3423} 3424 3425extern void dsl_prop_set_sync(void *, void *, dmu_tx_t *); 3426 3427void 3428dsl_dataset_set_quota_sync(void *arg1, void *arg2, dmu_tx_t *tx) 3429{ 3430 dsl_dataset_t *ds = arg1; 3431 dsl_prop_setarg_t *psa = arg2; 3432 uint64_t effective_value = psa->psa_effective_value; 3433 3434 dsl_prop_set_sync(ds, psa, tx); 3435 DSL_PROP_CHECK_PREDICTION(ds->ds_dir, psa); 3436 3437 if (ds->ds_quota != effective_value) { 3438 dmu_buf_will_dirty(ds->ds_dbuf, tx); 3439 ds->ds_quota = effective_value; 3440 3441 spa_history_log_internal(LOG_DS_REFQUOTA, 3442 ds->ds_dir->dd_pool->dp_spa, tx, "%lld dataset = %llu ", 3443 (longlong_t)ds->ds_quota, ds->ds_object); 3444 } 3445} 3446 3447int 3448dsl_dataset_set_quota(const char *dsname, zprop_source_t source, uint64_t quota) 3449{ 3450 dsl_dataset_t *ds; 3451 dsl_prop_setarg_t psa; 3452 int err; 3453 3454 dsl_prop_setarg_init_uint64(&psa, "refquota", source, "a); 3455 3456 err = dsl_dataset_hold(dsname, FTAG, &ds); 3457 if (err) 3458 return (err); 3459 3460 /* 3461 * If someone removes a file, then tries to set the quota, we 3462 * want to make sure the file freeing takes effect. 3463 */ 3464 txg_wait_open(ds->ds_dir->dd_pool, 0); 3465 3466 err = dsl_sync_task_do(ds->ds_dir->dd_pool, 3467 dsl_dataset_set_quota_check, dsl_dataset_set_quota_sync, 3468 ds, &psa, 0); 3469 3470 dsl_dataset_rele(ds, FTAG); 3471 return (err); 3472} 3473 3474static int 3475dsl_dataset_set_reservation_check(void *arg1, void *arg2, dmu_tx_t *tx) 3476{ 3477 dsl_dataset_t *ds = arg1; 3478 dsl_prop_setarg_t *psa = arg2; 3479 uint64_t effective_value; 3480 uint64_t unique; 3481 int err; 3482 3483 if (spa_version(ds->ds_dir->dd_pool->dp_spa) < 3484 SPA_VERSION_REFRESERVATION) 3485 return (ENOTSUP); 3486 3487 if (dsl_dataset_is_snapshot(ds)) 3488 return (EINVAL); 3489 3490 if ((err = dsl_prop_predict_sync(ds->ds_dir, psa)) != 0) 3491 return (err); 3492 3493 effective_value = psa->psa_effective_value; 3494 3495 /* 3496 * If we are doing the preliminary check in open context, the 3497 * space estimates may be inaccurate. 3498 */ 3499 if (!dmu_tx_is_syncing(tx)) 3500 return (0); 3501 3502 mutex_enter(&ds->ds_lock); 3503 if (!DS_UNIQUE_IS_ACCURATE(ds)) 3504 dsl_dataset_recalc_head_uniq(ds); 3505 unique = ds->ds_phys->ds_unique_bytes; 3506 mutex_exit(&ds->ds_lock); 3507 3508 if (MAX(unique, effective_value) > MAX(unique, ds->ds_reserved)) { 3509 uint64_t delta = MAX(unique, effective_value) - 3510 MAX(unique, ds->ds_reserved); 3511 3512 if (delta > dsl_dir_space_available(ds->ds_dir, NULL, 0, TRUE)) 3513 return (ENOSPC); 3514 if (ds->ds_quota > 0 && 3515 effective_value > ds->ds_quota) 3516 return (ENOSPC); 3517 } 3518 3519 return (0); 3520} 3521 3522static void 3523dsl_dataset_set_reservation_sync(void *arg1, void *arg2, dmu_tx_t *tx) 3524{ 3525 dsl_dataset_t *ds = arg1; 3526 dsl_prop_setarg_t *psa = arg2; 3527 uint64_t effective_value = psa->psa_effective_value; 3528 uint64_t unique; 3529 int64_t delta; 3530 3531 dsl_prop_set_sync(ds, psa, tx); 3532 DSL_PROP_CHECK_PREDICTION(ds->ds_dir, psa); 3533 3534 dmu_buf_will_dirty(ds->ds_dbuf, tx); 3535 3536 mutex_enter(&ds->ds_dir->dd_lock); 3537 mutex_enter(&ds->ds_lock); 3538 ASSERT(DS_UNIQUE_IS_ACCURATE(ds)); 3539 unique = ds->ds_phys->ds_unique_bytes; 3540 delta = MAX(0, (int64_t)(effective_value - unique)) - 3541 MAX(0, (int64_t)(ds->ds_reserved - unique)); 3542 ds->ds_reserved = effective_value; 3543 mutex_exit(&ds->ds_lock); 3544 3545 dsl_dir_diduse_space(ds->ds_dir, DD_USED_REFRSRV, delta, 0, 0, tx); 3546 mutex_exit(&ds->ds_dir->dd_lock); 3547 3548 spa_history_log_internal(LOG_DS_REFRESERV, 3549 ds->ds_dir->dd_pool->dp_spa, tx, "%lld dataset = %llu", 3550 (longlong_t)effective_value, ds->ds_object); 3551} 3552 3553int 3554dsl_dataset_set_reservation(const char *dsname, zprop_source_t source, 3555 uint64_t reservation) 3556{ 3557 dsl_dataset_t *ds; 3558 dsl_prop_setarg_t psa; 3559 int err; 3560 3561 dsl_prop_setarg_init_uint64(&psa, "refreservation", source, 3562 &reservation); 3563 3564 err = dsl_dataset_hold(dsname, FTAG, &ds); 3565 if (err) 3566 return (err); 3567 3568 err = dsl_sync_task_do(ds->ds_dir->dd_pool, 3569 dsl_dataset_set_reservation_check, 3570 dsl_dataset_set_reservation_sync, ds, &psa, 0); 3571 3572 dsl_dataset_rele(ds, FTAG); 3573 return (err); 3574} 3575 3576typedef struct zfs_hold_cleanup_arg { 3577 dsl_pool_t *dp; 3578 uint64_t dsobj; 3579 char htag[MAXNAMELEN]; 3580} zfs_hold_cleanup_arg_t; 3581 3582static void 3583dsl_dataset_user_release_onexit(void *arg) 3584{ 3585 zfs_hold_cleanup_arg_t *ca = arg; 3586 3587 (void) dsl_dataset_user_release_tmp(ca->dp, ca->dsobj, ca->htag, 3588 B_TRUE); 3589 kmem_free(ca, sizeof (zfs_hold_cleanup_arg_t)); 3590} 3591 3592void 3593dsl_register_onexit_hold_cleanup(dsl_dataset_t *ds, const char *htag, 3594 minor_t minor) 3595{ 3596 zfs_hold_cleanup_arg_t *ca; 3597 3598 ca = kmem_alloc(sizeof (zfs_hold_cleanup_arg_t), KM_SLEEP); 3599 ca->dp = ds->ds_dir->dd_pool; 3600 ca->dsobj = ds->ds_object; 3601 (void) strlcpy(ca->htag, htag, sizeof (ca->htag)); 3602 VERIFY3U(0, ==, zfs_onexit_add_cb(minor, 3603 dsl_dataset_user_release_onexit, ca, NULL)); 3604} 3605 3606/* 3607 * If you add new checks here, you may need to add 3608 * additional checks to the "temporary" case in 3609 * snapshot_check() in dmu_objset.c. 3610 */ 3611static int 3612dsl_dataset_user_hold_check(void *arg1, void *arg2, dmu_tx_t *tx) 3613{ 3614 dsl_dataset_t *ds = arg1; 3615 struct dsl_ds_holdarg *ha = arg2; 3616 char *htag = ha->htag; 3617 objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset; 3618 int error = 0; 3619 3620 if (spa_version(ds->ds_dir->dd_pool->dp_spa) < SPA_VERSION_USERREFS) 3621 return (ENOTSUP); 3622 3623 if (!dsl_dataset_is_snapshot(ds)) 3624 return (EINVAL); 3625 3626 /* tags must be unique */ 3627 mutex_enter(&ds->ds_lock); 3628 if (ds->ds_phys->ds_userrefs_obj) { 3629 error = zap_lookup(mos, ds->ds_phys->ds_userrefs_obj, htag, 3630 8, 1, tx); 3631 if (error == 0) 3632 error = EEXIST; 3633 else if (error == ENOENT) 3634 error = 0; 3635 } 3636 mutex_exit(&ds->ds_lock); 3637 3638 if (error == 0 && ha->temphold && 3639 strlen(htag) + MAX_TAG_PREFIX_LEN >= MAXNAMELEN) 3640 error = E2BIG; 3641 3642 return (error); 3643} 3644 3645void 3646dsl_dataset_user_hold_sync(void *arg1, void *arg2, dmu_tx_t *tx) 3647{ 3648 dsl_dataset_t *ds = arg1; 3649 struct dsl_ds_holdarg *ha = arg2; 3650 char *htag = ha->htag; 3651 dsl_pool_t *dp = ds->ds_dir->dd_pool; 3652 objset_t *mos = dp->dp_meta_objset; 3653 uint64_t now = gethrestime_sec(); 3654 uint64_t zapobj; 3655 3656 mutex_enter(&ds->ds_lock); 3657 if (ds->ds_phys->ds_userrefs_obj == 0) { 3658 /* 3659 * This is the first user hold for this dataset. Create 3660 * the userrefs zap object. 3661 */ 3662 dmu_buf_will_dirty(ds->ds_dbuf, tx); 3663 zapobj = ds->ds_phys->ds_userrefs_obj = 3664 zap_create(mos, DMU_OT_USERREFS, DMU_OT_NONE, 0, tx); 3665 } else { 3666 zapobj = ds->ds_phys->ds_userrefs_obj; 3667 } 3668 ds->ds_userrefs++; 3669 mutex_exit(&ds->ds_lock); 3670 3671 VERIFY(0 == zap_add(mos, zapobj, htag, 8, 1, &now, tx)); 3672 3673 if (ha->temphold) { 3674 VERIFY(0 == dsl_pool_user_hold(dp, ds->ds_object, 3675 htag, &now, tx)); 3676 } 3677 3678 spa_history_log_internal(LOG_DS_USER_HOLD, 3679 dp->dp_spa, tx, "<%s> temp = %d dataset = %llu", htag, 3680 (int)ha->temphold, ds->ds_object); 3681} 3682 3683static int 3684dsl_dataset_user_hold_one(const char *dsname, void *arg) 3685{ 3686 struct dsl_ds_holdarg *ha = arg; 3687 dsl_dataset_t *ds; 3688 int error; 3689 char *name; 3690 3691 /* alloc a buffer to hold dsname@snapname plus terminating NULL */ 3692 name = kmem_asprintf("%s@%s", dsname, ha->snapname); 3693 error = dsl_dataset_hold(name, ha->dstg, &ds); 3694 strfree(name); 3695 if (error == 0) { 3696 ha->gotone = B_TRUE; 3697 dsl_sync_task_create(ha->dstg, dsl_dataset_user_hold_check, 3698 dsl_dataset_user_hold_sync, ds, ha, 0); 3699 } else if (error == ENOENT && ha->recursive) { 3700 error = 0; 3701 } else { 3702 (void) strlcpy(ha->failed, dsname, sizeof (ha->failed)); 3703 } 3704 return (error); 3705} 3706 3707int 3708dsl_dataset_user_hold_for_send(dsl_dataset_t *ds, char *htag, 3709 boolean_t temphold) 3710{ 3711 struct dsl_ds_holdarg *ha; 3712 int error; 3713 3714 ha = kmem_zalloc(sizeof (struct dsl_ds_holdarg), KM_SLEEP); 3715 ha->htag = htag; 3716 ha->temphold = temphold; 3717 error = dsl_sync_task_do(ds->ds_dir->dd_pool, 3718 dsl_dataset_user_hold_check, dsl_dataset_user_hold_sync, 3719 ds, ha, 0); 3720 kmem_free(ha, sizeof (struct dsl_ds_holdarg)); 3721 3722 return (error); 3723} 3724 3725int 3726dsl_dataset_user_hold(char *dsname, char *snapname, char *htag, 3727 boolean_t recursive, boolean_t temphold, int cleanup_fd) 3728{ 3729 struct dsl_ds_holdarg *ha; 3730 dsl_sync_task_t *dst; 3731 spa_t *spa; 3732 int error; 3733 minor_t minor = 0; 3734 3735 if (cleanup_fd != -1) { 3736 /* Currently we only support cleanup-on-exit of tempholds. */ 3737 if (!temphold) 3738 return (EINVAL); 3739 error = zfs_onexit_fd_hold(cleanup_fd, &minor); 3740 if (error) 3741 return (error); 3742 } 3743 3744 ha = kmem_zalloc(sizeof (struct dsl_ds_holdarg), KM_SLEEP); 3745 3746 (void) strlcpy(ha->failed, dsname, sizeof (ha->failed)); 3747 3748 error = spa_open(dsname, &spa, FTAG); 3749 if (error) { 3750 kmem_free(ha, sizeof (struct dsl_ds_holdarg)); 3751 if (cleanup_fd != -1) 3752 zfs_onexit_fd_rele(cleanup_fd); 3753 return (error); 3754 } 3755 3756 ha->dstg = dsl_sync_task_group_create(spa_get_dsl(spa)); 3757 ha->htag = htag; 3758 ha->snapname = snapname; 3759 ha->recursive = recursive; 3760 ha->temphold = temphold; 3761 3762 if (recursive) { 3763 error = dmu_objset_find(dsname, dsl_dataset_user_hold_one, 3764 ha, DS_FIND_CHILDREN); 3765 } else { 3766 error = dsl_dataset_user_hold_one(dsname, ha); 3767 } 3768 if (error == 0) 3769 error = dsl_sync_task_group_wait(ha->dstg); 3770 3771 for (dst = list_head(&ha->dstg->dstg_tasks); dst; 3772 dst = list_next(&ha->dstg->dstg_tasks, dst)) { 3773 dsl_dataset_t *ds = dst->dst_arg1; 3774 3775 if (dst->dst_err) { 3776 dsl_dataset_name(ds, ha->failed); 3777 *strchr(ha->failed, '@') = '\0'; 3778 } else if (error == 0 && minor != 0 && temphold) { 3779 /* 3780 * If this hold is to be released upon process exit, 3781 * register that action now. 3782 */ 3783 dsl_register_onexit_hold_cleanup(ds, htag, minor); 3784 } 3785 dsl_dataset_rele(ds, ha->dstg); 3786 } 3787 3788 if (error == 0 && recursive && !ha->gotone) 3789 error = ENOENT; 3790 3791 if (error) 3792 (void) strlcpy(dsname, ha->failed, sizeof (ha->failed)); 3793 3794 dsl_sync_task_group_destroy(ha->dstg); 3795 3796 kmem_free(ha, sizeof (struct dsl_ds_holdarg)); 3797 spa_close(spa, FTAG); 3798 if (cleanup_fd != -1) 3799 zfs_onexit_fd_rele(cleanup_fd); 3800 return (error); 3801} 3802 3803struct dsl_ds_releasearg { 3804 dsl_dataset_t *ds; 3805 const char *htag; 3806 boolean_t own; /* do we own or just hold ds? */ 3807}; 3808 3809static int 3810dsl_dataset_release_might_destroy(dsl_dataset_t *ds, const char *htag, 3811 boolean_t *might_destroy) 3812{ 3813 objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset; 3814 uint64_t zapobj; 3815 uint64_t tmp; 3816 int error; 3817 3818 *might_destroy = B_FALSE; 3819 3820 mutex_enter(&ds->ds_lock); 3821 zapobj = ds->ds_phys->ds_userrefs_obj; 3822 if (zapobj == 0) { 3823 /* The tag can't possibly exist */ 3824 mutex_exit(&ds->ds_lock); 3825 return (ESRCH); 3826 } 3827 3828 /* Make sure the tag exists */ 3829 error = zap_lookup(mos, zapobj, htag, 8, 1, &tmp); 3830 if (error) { 3831 mutex_exit(&ds->ds_lock); 3832 if (error == ENOENT) 3833 error = ESRCH; 3834 return (error); 3835 } 3836 3837 if (ds->ds_userrefs == 1 && ds->ds_phys->ds_num_children == 1 && 3838 DS_IS_DEFER_DESTROY(ds)) 3839 *might_destroy = B_TRUE; 3840 3841 mutex_exit(&ds->ds_lock); 3842 return (0); 3843} 3844 3845static int 3846dsl_dataset_user_release_check(void *arg1, void *tag, dmu_tx_t *tx) 3847{ 3848 struct dsl_ds_releasearg *ra = arg1; 3849 dsl_dataset_t *ds = ra->ds; 3850 boolean_t might_destroy; 3851 int error; 3852 3853 if (spa_version(ds->ds_dir->dd_pool->dp_spa) < SPA_VERSION_USERREFS) 3854 return (ENOTSUP); 3855 3856 error = dsl_dataset_release_might_destroy(ds, ra->htag, &might_destroy); 3857 if (error) 3858 return (error); 3859 3860 if (might_destroy) { 3861 struct dsl_ds_destroyarg dsda = {0}; 3862 3863 if (dmu_tx_is_syncing(tx)) { 3864 /* 3865 * If we're not prepared to remove the snapshot, 3866 * we can't allow the release to happen right now. 3867 */ 3868 if (!ra->own) 3869 return (EBUSY); 3870 } 3871 dsda.ds = ds; 3872 dsda.releasing = B_TRUE; 3873 return (dsl_dataset_destroy_check(&dsda, tag, tx)); 3874 } 3875 3876 return (0); 3877} 3878 3879static void 3880dsl_dataset_user_release_sync(void *arg1, void *tag, dmu_tx_t *tx) 3881{ 3882 struct dsl_ds_releasearg *ra = arg1; 3883 dsl_dataset_t *ds = ra->ds; 3884 dsl_pool_t *dp = ds->ds_dir->dd_pool; 3885 objset_t *mos = dp->dp_meta_objset; 3886 uint64_t zapobj; 3887 uint64_t dsobj = ds->ds_object; 3888 uint64_t refs; 3889 int error; 3890 3891 mutex_enter(&ds->ds_lock); 3892 ds->ds_userrefs--; 3893 refs = ds->ds_userrefs; 3894 mutex_exit(&ds->ds_lock); 3895 error = dsl_pool_user_release(dp, ds->ds_object, ra->htag, tx); 3896 VERIFY(error == 0 || error == ENOENT); 3897 zapobj = ds->ds_phys->ds_userrefs_obj; 3898 VERIFY(0 == zap_remove(mos, zapobj, ra->htag, tx)); 3899 if (ds->ds_userrefs == 0 && ds->ds_phys->ds_num_children == 1 && 3900 DS_IS_DEFER_DESTROY(ds)) { 3901 struct dsl_ds_destroyarg dsda = {0}; 3902 3903 ASSERT(ra->own); 3904 dsda.ds = ds; 3905 dsda.releasing = B_TRUE; 3906 /* We already did the destroy_check */ 3907 dsl_dataset_destroy_sync(&dsda, tag, tx); 3908 } 3909 3910 spa_history_log_internal(LOG_DS_USER_RELEASE, 3911 dp->dp_spa, tx, "<%s> %lld dataset = %llu", 3912 ra->htag, (longlong_t)refs, dsobj); 3913} 3914 3915static int 3916dsl_dataset_user_release_one(const char *dsname, void *arg) 3917{ 3918 struct dsl_ds_holdarg *ha = arg; 3919 struct dsl_ds_releasearg *ra; 3920 dsl_dataset_t *ds; 3921 int error; 3922 void *dtag = ha->dstg; 3923 char *name; 3924 boolean_t own = B_FALSE; 3925 boolean_t might_destroy; 3926 3927 /* alloc a buffer to hold dsname@snapname, plus the terminating NULL */ 3928 name = kmem_asprintf("%s@%s", dsname, ha->snapname); 3929 error = dsl_dataset_hold(name, dtag, &ds); 3930 strfree(name); 3931 if (error == ENOENT && ha->recursive) 3932 return (0); 3933 (void) strlcpy(ha->failed, dsname, sizeof (ha->failed)); 3934 if (error) 3935 return (error); 3936 3937 ha->gotone = B_TRUE; 3938 3939 ASSERT(dsl_dataset_is_snapshot(ds)); 3940 3941 error = dsl_dataset_release_might_destroy(ds, ha->htag, &might_destroy); 3942 if (error) { 3943 dsl_dataset_rele(ds, dtag); 3944 return (error); 3945 } 3946 3947 if (might_destroy) { 3948#ifdef _KERNEL 3949 name = kmem_asprintf("%s@%s", dsname, ha->snapname); 3950 error = zfs_unmount_snap(name, NULL); 3951 strfree(name); 3952 if (error) { 3953 dsl_dataset_rele(ds, dtag); 3954 return (error); 3955 } 3956#endif 3957 if (!dsl_dataset_tryown(ds, B_TRUE, dtag)) { 3958 dsl_dataset_rele(ds, dtag); 3959 return (EBUSY); 3960 } else { 3961 own = B_TRUE; 3962 dsl_dataset_make_exclusive(ds, dtag); 3963 } 3964 } 3965 3966 ra = kmem_alloc(sizeof (struct dsl_ds_releasearg), KM_SLEEP); 3967 ra->ds = ds; 3968 ra->htag = ha->htag; 3969 ra->own = own; 3970 dsl_sync_task_create(ha->dstg, dsl_dataset_user_release_check, 3971 dsl_dataset_user_release_sync, ra, dtag, 0); 3972 3973 return (0); 3974} 3975 3976int 3977dsl_dataset_user_release(char *dsname, char *snapname, char *htag, 3978 boolean_t recursive) 3979{ 3980 struct dsl_ds_holdarg *ha; 3981 dsl_sync_task_t *dst; 3982 spa_t *spa; 3983 int error; 3984 3985top: 3986 ha = kmem_zalloc(sizeof (struct dsl_ds_holdarg), KM_SLEEP); 3987 3988 (void) strlcpy(ha->failed, dsname, sizeof (ha->failed)); 3989 3990 error = spa_open(dsname, &spa, FTAG); 3991 if (error) { 3992 kmem_free(ha, sizeof (struct dsl_ds_holdarg)); 3993 return (error); 3994 } 3995 3996 ha->dstg = dsl_sync_task_group_create(spa_get_dsl(spa)); 3997 ha->htag = htag; 3998 ha->snapname = snapname; 3999 ha->recursive = recursive; 4000 if (recursive) { 4001 error = dmu_objset_find(dsname, dsl_dataset_user_release_one, 4002 ha, DS_FIND_CHILDREN); 4003 } else { 4004 error = dsl_dataset_user_release_one(dsname, ha); 4005 } 4006 if (error == 0) 4007 error = dsl_sync_task_group_wait(ha->dstg); 4008 4009 for (dst = list_head(&ha->dstg->dstg_tasks); dst; 4010 dst = list_next(&ha->dstg->dstg_tasks, dst)) { 4011 struct dsl_ds_releasearg *ra = dst->dst_arg1; 4012 dsl_dataset_t *ds = ra->ds; 4013 4014 if (dst->dst_err) 4015 dsl_dataset_name(ds, ha->failed); 4016 4017 if (ra->own) 4018 dsl_dataset_disown(ds, ha->dstg); 4019 else 4020 dsl_dataset_rele(ds, ha->dstg); 4021 4022 kmem_free(ra, sizeof (struct dsl_ds_releasearg)); 4023 } 4024 4025 if (error == 0 && recursive && !ha->gotone) 4026 error = ENOENT; 4027 4028 if (error && error != EBUSY) 4029 (void) strlcpy(dsname, ha->failed, sizeof (ha->failed)); 4030 4031 dsl_sync_task_group_destroy(ha->dstg); 4032 kmem_free(ha, sizeof (struct dsl_ds_holdarg)); 4033 spa_close(spa, FTAG); 4034 4035 /* 4036 * We can get EBUSY if we were racing with deferred destroy and 4037 * dsl_dataset_user_release_check() hadn't done the necessary 4038 * open context setup. We can also get EBUSY if we're racing 4039 * with destroy and that thread is the ds_owner. Either way 4040 * the busy condition should be transient, and we should retry 4041 * the release operation. 4042 */ 4043 if (error == EBUSY) 4044 goto top; 4045 4046 return (error); 4047} 4048 4049/* 4050 * Called at spa_load time (with retry == B_FALSE) to release a stale 4051 * temporary user hold. Also called by the onexit code (with retry == B_TRUE). 4052 */ 4053int 4054dsl_dataset_user_release_tmp(dsl_pool_t *dp, uint64_t dsobj, char *htag, 4055 boolean_t retry) 4056{ 4057 dsl_dataset_t *ds; 4058 char *snap; 4059 char *name; 4060 int namelen; 4061 int error; 4062 4063 do { 4064 rw_enter(&dp->dp_config_rwlock, RW_READER); 4065 error = dsl_dataset_hold_obj(dp, dsobj, FTAG, &ds); 4066 rw_exit(&dp->dp_config_rwlock); 4067 if (error) 4068 return (error); 4069 namelen = dsl_dataset_namelen(ds)+1; 4070 name = kmem_alloc(namelen, KM_SLEEP); 4071 dsl_dataset_name(ds, name); 4072 dsl_dataset_rele(ds, FTAG); 4073 4074 snap = strchr(name, '@'); 4075 *snap = '\0'; 4076 ++snap; 4077 error = dsl_dataset_user_release(name, snap, htag, B_FALSE); 4078 kmem_free(name, namelen); 4079 4080 /* 4081 * The object can't have been destroyed because we have a hold, 4082 * but it might have been renamed, resulting in ENOENT. Retry 4083 * if we've been requested to do so. 4084 * 4085 * It would be nice if we could use the dsobj all the way 4086 * through and avoid ENOENT entirely. But we might need to 4087 * unmount the snapshot, and there's currently no way to lookup 4088 * a vfsp using a ZFS object id. 4089 */ 4090 } while ((error == ENOENT) && retry); 4091 4092 return (error); 4093} 4094 4095int 4096dsl_dataset_get_holds(const char *dsname, nvlist_t **nvp) 4097{ 4098 dsl_dataset_t *ds; 4099 int err; 4100 4101 err = dsl_dataset_hold(dsname, FTAG, &ds); 4102 if (err) 4103 return (err); 4104 4105 VERIFY(0 == nvlist_alloc(nvp, NV_UNIQUE_NAME, KM_SLEEP)); 4106 if (ds->ds_phys->ds_userrefs_obj != 0) { 4107 zap_attribute_t *za; 4108 zap_cursor_t zc; 4109 4110 za = kmem_alloc(sizeof (zap_attribute_t), KM_SLEEP); 4111 for (zap_cursor_init(&zc, ds->ds_dir->dd_pool->dp_meta_objset, 4112 ds->ds_phys->ds_userrefs_obj); 4113 zap_cursor_retrieve(&zc, za) == 0; 4114 zap_cursor_advance(&zc)) { 4115 VERIFY(0 == nvlist_add_uint64(*nvp, za->za_name, 4116 za->za_first_integer)); 4117 } 4118 zap_cursor_fini(&zc); 4119 kmem_free(za, sizeof (zap_attribute_t)); 4120 } 4121 dsl_dataset_rele(ds, FTAG); 4122 return (0); 4123} 4124 4125/*
|
4028 * Note, this fuction is used as the callback for dmu_objset_find(). We
| 4126 * Note, this function is used as the callback for dmu_objset_find(). We
|
4029 * always return 0 so that we will continue to find and process 4030 * inconsistent datasets, even if we encounter an error trying to 4031 * process one of them. 4032 */ 4033/* ARGSUSED */ 4034int 4035dsl_destroy_inconsistent(const char *dsname, void *arg) 4036{ 4037 dsl_dataset_t *ds; 4038 4039 if (dsl_dataset_own(dsname, B_TRUE, FTAG, &ds) == 0) { 4040 if (DS_IS_INCONSISTENT(ds)) 4041 (void) dsl_dataset_destroy(ds, FTAG, B_FALSE); 4042 else 4043 dsl_dataset_disown(ds, FTAG); 4044 } 4045 return (0); 4046}
| 4127 * always return 0 so that we will continue to find and process 4128 * inconsistent datasets, even if we encounter an error trying to 4129 * process one of them. 4130 */ 4131/* ARGSUSED */ 4132int 4133dsl_destroy_inconsistent(const char *dsname, void *arg) 4134{ 4135 dsl_dataset_t *ds; 4136 4137 if (dsl_dataset_own(dsname, B_TRUE, FTAG, &ds) == 0) { 4138 if (DS_IS_INCONSISTENT(ds)) 4139 (void) dsl_dataset_destroy(ds, FTAG, B_FALSE); 4140 else 4141 dsl_dataset_disown(ds, FTAG); 4142 } 4143 return (0); 4144}
|
| 4145 4146/* 4147 * Return (in *usedp) the amount of space written in new that is not 4148 * present in oldsnap. New may be a snapshot or the head. Old must be 4149 * a snapshot before new, in new's filesystem (or its origin). If not then 4150 * fail and return EINVAL. 4151 * 4152 * The written space is calculated by considering two components: First, we 4153 * ignore any freed space, and calculate the written as new's used space 4154 * minus old's used space. Next, we add in the amount of space that was freed 4155 * between the two snapshots, thus reducing new's used space relative to old's. 4156 * Specifically, this is the space that was born before old->ds_creation_txg, 4157 * and freed before new (ie. on new's deadlist or a previous deadlist). 4158 * 4159 * space freed [---------------------] 4160 * snapshots ---O-------O--------O-------O------ 4161 * oldsnap new 4162 */ 4163int 4164dsl_dataset_space_written(dsl_dataset_t *oldsnap, dsl_dataset_t *new, 4165 uint64_t *usedp, uint64_t *compp, uint64_t *uncompp) 4166{ 4167 int err = 0; 4168 uint64_t snapobj; 4169 dsl_pool_t *dp = new->ds_dir->dd_pool; 4170 4171 *usedp = 0; 4172 *usedp += new->ds_phys->ds_used_bytes; 4173 *usedp -= oldsnap->ds_phys->ds_used_bytes; 4174 4175 *compp = 0; 4176 *compp += new->ds_phys->ds_compressed_bytes; 4177 *compp -= oldsnap->ds_phys->ds_compressed_bytes; 4178 4179 *uncompp = 0; 4180 *uncompp += new->ds_phys->ds_uncompressed_bytes; 4181 *uncompp -= oldsnap->ds_phys->ds_uncompressed_bytes; 4182 4183 rw_enter(&dp->dp_config_rwlock, RW_READER); 4184 snapobj = new->ds_object; 4185 while (snapobj != oldsnap->ds_object) { 4186 dsl_dataset_t *snap; 4187 uint64_t used, comp, uncomp; 4188 4189 err = dsl_dataset_hold_obj(dp, snapobj, FTAG, &snap); 4190 if (err != 0) 4191 break; 4192 4193 if (snap->ds_phys->ds_prev_snap_txg == 4194 oldsnap->ds_phys->ds_creation_txg) { 4195 /* 4196 * The blocks in the deadlist can not be born after 4197 * ds_prev_snap_txg, so get the whole deadlist space, 4198 * which is more efficient (especially for old-format 4199 * deadlists). Unfortunately the deadlist code 4200 * doesn't have enough information to make this 4201 * optimization itself. 4202 */ 4203 dsl_deadlist_space(&snap->ds_deadlist, 4204 &used, &comp, &uncomp); 4205 } else { 4206 dsl_deadlist_space_range(&snap->ds_deadlist, 4207 0, oldsnap->ds_phys->ds_creation_txg, 4208 &used, &comp, &uncomp); 4209 } 4210 *usedp += used; 4211 *compp += comp; 4212 *uncompp += uncomp; 4213 4214 /* 4215 * If we get to the beginning of the chain of snapshots 4216 * (ds_prev_snap_obj == 0) before oldsnap, then oldsnap 4217 * was not a snapshot of/before new. 4218 */ 4219 snapobj = snap->ds_phys->ds_prev_snap_obj; 4220 dsl_dataset_rele(snap, FTAG); 4221 if (snapobj == 0) { 4222 err = EINVAL; 4223 break; 4224 } 4225 4226 } 4227 rw_exit(&dp->dp_config_rwlock); 4228 return (err); 4229} 4230 4231/* 4232 * Return (in *usedp) the amount of space that will be reclaimed if firstsnap, 4233 * lastsnap, and all snapshots in between are deleted. 4234 * 4235 * blocks that would be freed [---------------------------] 4236 * snapshots ---O-------O--------O-------O--------O 4237 * firstsnap lastsnap 4238 * 4239 * This is the set of blocks that were born after the snap before firstsnap, 4240 * (birth > firstsnap->prev_snap_txg) and died before the snap after the 4241 * last snap (ie, is on lastsnap->ds_next->ds_deadlist or an earlier deadlist). 4242 * We calculate this by iterating over the relevant deadlists (from the snap 4243 * after lastsnap, backward to the snap after firstsnap), summing up the 4244 * space on the deadlist that was born after the snap before firstsnap. 4245 */ 4246int 4247dsl_dataset_space_wouldfree(dsl_dataset_t *firstsnap, 4248 dsl_dataset_t *lastsnap, 4249 uint64_t *usedp, uint64_t *compp, uint64_t *uncompp) 4250{ 4251 int err = 0; 4252 uint64_t snapobj; 4253 dsl_pool_t *dp = firstsnap->ds_dir->dd_pool; 4254 4255 ASSERT(dsl_dataset_is_snapshot(firstsnap)); 4256 ASSERT(dsl_dataset_is_snapshot(lastsnap)); 4257 4258 /* 4259 * Check that the snapshots are in the same dsl_dir, and firstsnap 4260 * is before lastsnap. 4261 */ 4262 if (firstsnap->ds_dir != lastsnap->ds_dir || 4263 firstsnap->ds_phys->ds_creation_txg > 4264 lastsnap->ds_phys->ds_creation_txg) 4265 return (EINVAL); 4266 4267 *usedp = *compp = *uncompp = 0; 4268 4269 rw_enter(&dp->dp_config_rwlock, RW_READER); 4270 snapobj = lastsnap->ds_phys->ds_next_snap_obj; 4271 while (snapobj != firstsnap->ds_object) { 4272 dsl_dataset_t *ds; 4273 uint64_t used, comp, uncomp; 4274 4275 err = dsl_dataset_hold_obj(dp, snapobj, FTAG, &ds); 4276 if (err != 0) 4277 break; 4278 4279 dsl_deadlist_space_range(&ds->ds_deadlist, 4280 firstsnap->ds_phys->ds_prev_snap_txg, UINT64_MAX, 4281 &used, &comp, &uncomp); 4282 *usedp += used; 4283 *compp += comp; 4284 *uncompp += uncomp; 4285 4286 snapobj = ds->ds_phys->ds_prev_snap_obj; 4287 ASSERT3U(snapobj, !=, 0); 4288 dsl_dataset_rele(ds, FTAG); 4289 } 4290 rw_exit(&dp->dp_config_rwlock); 4291 return (err); 4292}
|
| |