dnode.c (177698) | dnode.c (185029) |
---|---|
1/* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE --- 5 unchanged lines hidden (view full) --- 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21/* | 1/* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE --- 5 unchanged lines hidden (view full) --- 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21/* |
22 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. | 22 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. |
23 * Use is subject to license terms. 24 */ 25 | 23 * Use is subject to license terms. 24 */ 25 |
26#pragma ident "%Z%%M% %I% %E% SMI" 27 | |
28#include <sys/zfs_context.h> 29#include <sys/dbuf.h> 30#include <sys/dnode.h> 31#include <sys/dmu.h> 32#include <sys/dmu_impl.h> 33#include <sys/dmu_tx.h> 34#include <sys/dmu_objset.h> 35#include <sys/dsl_dir.h> --- 201 unchanged lines hidden (view full) --- 237 238 if (rp1->fr_blkid < rp2->fr_blkid) 239 return (-1); 240 else if (rp1->fr_blkid > rp2->fr_blkid) 241 return (1); 242 else return (0); 243} 244 | 26#include <sys/zfs_context.h> 27#include <sys/dbuf.h> 28#include <sys/dnode.h> 29#include <sys/dmu.h> 30#include <sys/dmu_impl.h> 31#include <sys/dmu_tx.h> 32#include <sys/dmu_objset.h> 33#include <sys/dsl_dir.h> --- 201 unchanged lines hidden (view full) --- 235 236 if (rp1->fr_blkid < rp2->fr_blkid) 237 return (-1); 238 else if (rp1->fr_blkid > rp2->fr_blkid) 239 return (1); 240 else return (0); 241} 242 |
243void 244dnode_setbonuslen(dnode_t *dn, int newsize, dmu_tx_t *tx) 245{ 246 ASSERT3U(refcount_count(&dn->dn_holds), >=, 1); 247 248 dnode_setdirty(dn, tx); 249 rw_enter(&dn->dn_struct_rwlock, RW_WRITER); 250 ASSERT3U(newsize, <=, DN_MAX_BONUSLEN - 251 (dn->dn_nblkptr-1) * sizeof (blkptr_t)); 252 dn->dn_bonuslen = newsize; 253 if (newsize == 0) 254 dn->dn_next_bonuslen[tx->tx_txg & TXG_MASK] = DN_ZERO_BONUSLEN; 255 else 256 dn->dn_next_bonuslen[tx->tx_txg & TXG_MASK] = dn->dn_bonuslen; 257 rw_exit(&dn->dn_struct_rwlock); 258} 259 |
|
245static void 246dnode_setdblksz(dnode_t *dn, int size) 247{ 248 ASSERT3U(P2PHASE(size, SPA_MINBLOCKSIZE), ==, 0); 249 ASSERT3U(size, <=, SPA_MAXBLOCKSIZE); 250 ASSERT3U(size, >=, SPA_MINBLOCKSIZE); 251 ASSERT3U(size >> SPA_MINBLOCKSHIFT, <, 252 1<<(sizeof (dn->dn_phys->dn_datablkszsec) * 8)); --- 27 unchanged lines hidden (view full) --- 280 281 dmu_zfetch_init(&dn->dn_zfetch, dn); 282 283 ASSERT(dn->dn_phys->dn_type < DMU_OT_NUMTYPES); 284 mutex_enter(&os->os_lock); 285 list_insert_head(&os->os_dnodes, dn); 286 mutex_exit(&os->os_lock); 287 | 260static void 261dnode_setdblksz(dnode_t *dn, int size) 262{ 263 ASSERT3U(P2PHASE(size, SPA_MINBLOCKSIZE), ==, 0); 264 ASSERT3U(size, <=, SPA_MAXBLOCKSIZE); 265 ASSERT3U(size, >=, SPA_MINBLOCKSIZE); 266 ASSERT3U(size >> SPA_MINBLOCKSHIFT, <, 267 1<<(sizeof (dn->dn_phys->dn_datablkszsec) * 8)); --- 27 unchanged lines hidden (view full) --- 295 296 dmu_zfetch_init(&dn->dn_zfetch, dn); 297 298 ASSERT(dn->dn_phys->dn_type < DMU_OT_NUMTYPES); 299 mutex_enter(&os->os_lock); 300 list_insert_head(&os->os_dnodes, dn); 301 mutex_exit(&os->os_lock); 302 |
303 arc_space_consume(sizeof (dnode_t)); |
|
288 return (dn); 289} 290 291static void 292dnode_destroy(dnode_t *dn) 293{ 294 objset_impl_t *os = dn->dn_objset; 295 --- 18 unchanged lines hidden (view full) --- 314 } 315 dmu_zfetch_rele(&dn->dn_zfetch); 316 if (dn->dn_bonus) { 317 mutex_enter(&dn->dn_bonus->db_mtx); 318 dbuf_evict(dn->dn_bonus); 319 dn->dn_bonus = NULL; 320 } 321 kmem_cache_free(dnode_cache, dn); | 304 return (dn); 305} 306 307static void 308dnode_destroy(dnode_t *dn) 309{ 310 objset_impl_t *os = dn->dn_objset; 311 --- 18 unchanged lines hidden (view full) --- 330 } 331 dmu_zfetch_rele(&dn->dn_zfetch); 332 if (dn->dn_bonus) { 333 mutex_enter(&dn->dn_bonus->db_mtx); 334 dbuf_evict(dn->dn_bonus); 335 dn->dn_bonus = NULL; 336 } 337 kmem_cache_free(dnode_cache, dn); |
338 arc_space_return(sizeof (dnode_t)); |
|
322} 323 324void 325dnode_allocate(dnode_t *dn, dmu_object_type_t ot, int blocksize, int ibs, 326 dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx) 327{ 328 int i; 329 --- 27 unchanged lines hidden (view full) --- 357 ASSERT3U(dn->dn_assigned_txg, ==, 0); 358 ASSERT(refcount_is_zero(&dn->dn_tx_holds)); 359 ASSERT3U(refcount_count(&dn->dn_holds), <=, 1); 360 ASSERT3P(list_head(&dn->dn_dbufs), ==, NULL); 361 362 for (i = 0; i < TXG_SIZE; i++) { 363 ASSERT3U(dn->dn_next_nlevels[i], ==, 0); 364 ASSERT3U(dn->dn_next_indblkshift[i], ==, 0); | 339} 340 341void 342dnode_allocate(dnode_t *dn, dmu_object_type_t ot, int blocksize, int ibs, 343 dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx) 344{ 345 int i; 346 --- 27 unchanged lines hidden (view full) --- 374 ASSERT3U(dn->dn_assigned_txg, ==, 0); 375 ASSERT(refcount_is_zero(&dn->dn_tx_holds)); 376 ASSERT3U(refcount_count(&dn->dn_holds), <=, 1); 377 ASSERT3P(list_head(&dn->dn_dbufs), ==, NULL); 378 379 for (i = 0; i < TXG_SIZE; i++) { 380 ASSERT3U(dn->dn_next_nlevels[i], ==, 0); 381 ASSERT3U(dn->dn_next_indblkshift[i], ==, 0); |
382 ASSERT3U(dn->dn_next_bonuslen[i], ==, 0); |
|
365 ASSERT3U(dn->dn_next_blksz[i], ==, 0); 366 ASSERT(!list_link_active(&dn->dn_dirty_link[i])); 367 ASSERT3P(list_head(&dn->dn_dirty_records[i]), ==, NULL); 368 ASSERT3U(avl_numnodes(&dn->dn_ranges[i]), ==, 0); 369 } 370 371 dn->dn_type = ot; 372 dnode_setdblksz(dn, blocksize); --- 11 unchanged lines hidden (view full) --- 384 kmem_free(dn->dn_dirtyctx_firstset, 1); 385 dn->dn_dirtyctx_firstset = NULL; 386 } 387 388 dn->dn_allocated_txg = tx->tx_txg; 389 390 dnode_setdirty(dn, tx); 391 dn->dn_next_indblkshift[tx->tx_txg & TXG_MASK] = ibs; | 383 ASSERT3U(dn->dn_next_blksz[i], ==, 0); 384 ASSERT(!list_link_active(&dn->dn_dirty_link[i])); 385 ASSERT3P(list_head(&dn->dn_dirty_records[i]), ==, NULL); 386 ASSERT3U(avl_numnodes(&dn->dn_ranges[i]), ==, 0); 387 } 388 389 dn->dn_type = ot; 390 dnode_setdblksz(dn, blocksize); --- 11 unchanged lines hidden (view full) --- 402 kmem_free(dn->dn_dirtyctx_firstset, 1); 403 dn->dn_dirtyctx_firstset = NULL; 404 } 405 406 dn->dn_allocated_txg = tx->tx_txg; 407 408 dnode_setdirty(dn, tx); 409 dn->dn_next_indblkshift[tx->tx_txg & TXG_MASK] = ibs; |
410 dn->dn_next_bonuslen[tx->tx_txg & TXG_MASK] = dn->dn_bonuslen; |
|
392 dn->dn_next_blksz[tx->tx_txg & TXG_MASK] = dn->dn_datablksz; 393} 394 395void 396dnode_reallocate(dnode_t *dn, dmu_object_type_t ot, int blocksize, 397 dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx) 398{ | 411 dn->dn_next_blksz[tx->tx_txg & TXG_MASK] = dn->dn_datablksz; 412} 413 414void 415dnode_reallocate(dnode_t *dn, dmu_object_type_t ot, int blocksize, 416 dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx) 417{ |
399 int i; | 418 int i, old_nblkptr; |
400 dmu_buf_impl_t *db = NULL; 401 402 ASSERT3U(blocksize, >=, SPA_MINBLOCKSIZE); 403 ASSERT3U(blocksize, <=, SPA_MAXBLOCKSIZE); 404 ASSERT3U(blocksize % SPA_MINBLOCKSIZE, ==, 0); 405 ASSERT(dn->dn_object != DMU_META_DNODE_OBJECT || dmu_tx_private_ok(tx)); 406 ASSERT(tx->tx_txg != 0); 407 ASSERT((bonustype == DMU_OT_NONE && bonuslen == 0) || 408 (bonustype != DMU_OT_NONE && bonuslen != 0)); 409 ASSERT3U(bonustype, <, DMU_OT_NUMTYPES); 410 ASSERT3U(bonuslen, <=, DN_MAX_BONUSLEN); 411 412 for (i = 0; i < TXG_SIZE; i++) 413 ASSERT(!list_link_active(&dn->dn_dirty_link[i])); 414 415 /* clean up any unreferenced dbufs */ | 419 dmu_buf_impl_t *db = NULL; 420 421 ASSERT3U(blocksize, >=, SPA_MINBLOCKSIZE); 422 ASSERT3U(blocksize, <=, SPA_MAXBLOCKSIZE); 423 ASSERT3U(blocksize % SPA_MINBLOCKSIZE, ==, 0); 424 ASSERT(dn->dn_object != DMU_META_DNODE_OBJECT || dmu_tx_private_ok(tx)); 425 ASSERT(tx->tx_txg != 0); 426 ASSERT((bonustype == DMU_OT_NONE && bonuslen == 0) || 427 (bonustype != DMU_OT_NONE && bonuslen != 0)); 428 ASSERT3U(bonustype, <, DMU_OT_NUMTYPES); 429 ASSERT3U(bonuslen, <=, DN_MAX_BONUSLEN); 430 431 for (i = 0; i < TXG_SIZE; i++) 432 ASSERT(!list_link_active(&dn->dn_dirty_link[i])); 433 434 /* clean up any unreferenced dbufs */ |
416 (void) dnode_evict_dbufs(dn, 0); | 435 dnode_evict_dbufs(dn); |
417 ASSERT3P(list_head(&dn->dn_dbufs), ==, NULL); 418 419 /* 420 * XXX I should really have a generation number to tell if we 421 * need to do this... 422 */ 423 if (blocksize != dn->dn_datablksz || 424 dn->dn_bonustype != bonustype || dn->dn_bonuslen != bonuslen) { --- 6 unchanged lines hidden (view full) --- 431 if (blocksize != dn->dn_datablksz && 432 (!BP_IS_HOLE(&dn->dn_phys->dn_blkptr[0]) || 433 list_head(&dn->dn_dbufs) != NULL)) { 434 db = dbuf_hold(dn, 0, FTAG); 435 dbuf_new_size(db, blocksize, tx); 436 } 437 dnode_setdblksz(dn, blocksize); 438 dnode_setdirty(dn, tx); | 436 ASSERT3P(list_head(&dn->dn_dbufs), ==, NULL); 437 438 /* 439 * XXX I should really have a generation number to tell if we 440 * need to do this... 441 */ 442 if (blocksize != dn->dn_datablksz || 443 dn->dn_bonustype != bonustype || dn->dn_bonuslen != bonuslen) { --- 6 unchanged lines hidden (view full) --- 450 if (blocksize != dn->dn_datablksz && 451 (!BP_IS_HOLE(&dn->dn_phys->dn_blkptr[0]) || 452 list_head(&dn->dn_dbufs) != NULL)) { 453 db = dbuf_hold(dn, 0, FTAG); 454 dbuf_new_size(db, blocksize, tx); 455 } 456 dnode_setdblksz(dn, blocksize); 457 dnode_setdirty(dn, tx); |
458 dn->dn_next_bonuslen[tx->tx_txg&TXG_MASK] = bonuslen; |
|
439 dn->dn_next_blksz[tx->tx_txg&TXG_MASK] = blocksize; 440 rw_exit(&dn->dn_struct_rwlock); | 459 dn->dn_next_blksz[tx->tx_txg&TXG_MASK] = blocksize; 460 rw_exit(&dn->dn_struct_rwlock); |
441 if (db) { | 461 if (db) |
442 dbuf_rele(db, FTAG); | 462 dbuf_rele(db, FTAG); |
443 db = NULL; 444 } | |
445 446 /* change type */ 447 dn->dn_type = ot; 448 | 463 464 /* change type */ 465 dn->dn_type = ot; 466 |
449 if (dn->dn_bonuslen != bonuslen) { 450 /* change bonus size */ 451 if (bonuslen == 0) 452 bonuslen = 1; /* XXX */ 453 rw_enter(&dn->dn_struct_rwlock, RW_WRITER); 454 if (dn->dn_bonus == NULL) 455 dn->dn_bonus = dbuf_create_bonus(dn); 456 db = dn->dn_bonus; 457 rw_exit(&dn->dn_struct_rwlock); 458 if (refcount_add(&db->db_holds, FTAG) == 1) 459 dnode_add_ref(dn, db); 460 VERIFY(0 == dbuf_read(db, NULL, DB_RF_MUST_SUCCEED)); 461 mutex_enter(&db->db_mtx); 462 ASSERT3U(db->db.db_size, ==, dn->dn_bonuslen); 463 ASSERT(db->db.db_data != NULL); 464 db->db.db_size = bonuslen; 465 mutex_exit(&db->db_mtx); 466 (void) dbuf_dirty(db, tx); 467 } 468 | |
469 /* change bonus size and type */ 470 mutex_enter(&dn->dn_mtx); | 467 /* change bonus size and type */ 468 mutex_enter(&dn->dn_mtx); |
469 old_nblkptr = dn->dn_nblkptr; |
|
471 dn->dn_bonustype = bonustype; 472 dn->dn_bonuslen = bonuslen; 473 dn->dn_nblkptr = 1 + ((DN_MAX_BONUSLEN - bonuslen) >> SPA_BLKPTRSHIFT); 474 dn->dn_checksum = ZIO_CHECKSUM_INHERIT; 475 dn->dn_compress = ZIO_COMPRESS_INHERIT; 476 ASSERT3U(dn->dn_nblkptr, <=, DN_MAX_NBLKPTR); 477 | 470 dn->dn_bonustype = bonustype; 471 dn->dn_bonuslen = bonuslen; 472 dn->dn_nblkptr = 1 + ((DN_MAX_BONUSLEN - bonuslen) >> SPA_BLKPTRSHIFT); 473 dn->dn_checksum = ZIO_CHECKSUM_INHERIT; 474 dn->dn_compress = ZIO_COMPRESS_INHERIT; 475 ASSERT3U(dn->dn_nblkptr, <=, DN_MAX_NBLKPTR); 476 |
478 /* 479 * NB: we have to do the dbuf_rele after we've changed the 480 * dn_bonuslen, for the sake of dbuf_verify(). 481 */ 482 if (db) 483 dbuf_rele(db, FTAG); | 477 /* XXX - for now, we can't make nblkptr smaller */ 478 ASSERT3U(dn->dn_nblkptr, >=, old_nblkptr); |
484 | 479 |
480 /* fix up the bonus db_size if dn_nblkptr has changed */ 481 if (dn->dn_bonus && dn->dn_bonuslen != old_nblkptr) { 482 dn->dn_bonus->db.db_size = 483 DN_MAX_BONUSLEN - (dn->dn_nblkptr-1) * sizeof (blkptr_t); 484 ASSERT(dn->dn_bonuslen <= dn->dn_bonus->db.db_size); 485 } 486 |
|
485 dn->dn_allocated_txg = tx->tx_txg; 486 mutex_exit(&dn->dn_mtx); 487} 488 489void 490dnode_special_close(dnode_t *dn) 491{ 492 /* --- 61 unchanged lines hidden (view full) --- 554 int epb, idx, err; 555 int drop_struct_lock = FALSE; 556 int type; 557 uint64_t blk; 558 dnode_t *mdn, *dn; 559 dmu_buf_impl_t *db; 560 dnode_t **children_dnodes; 561 | 487 dn->dn_allocated_txg = tx->tx_txg; 488 mutex_exit(&dn->dn_mtx); 489} 490 491void 492dnode_special_close(dnode_t *dn) 493{ 494 /* --- 61 unchanged lines hidden (view full) --- 556 int epb, idx, err; 557 int drop_struct_lock = FALSE; 558 int type; 559 uint64_t blk; 560 dnode_t *mdn, *dn; 561 dmu_buf_impl_t *db; 562 dnode_t **children_dnodes; 563 |
564 /* 565 * If you are holding the spa config lock as writer, you shouldn't 566 * be asking the DMU to do *anything*. 567 */ 568 ASSERT(spa_config_held(os->os_spa, SCL_ALL, RW_WRITER) == 0); 569 |
|
562 if (object == 0 || object >= DN_MAX_OBJECT) 563 return (EINVAL); 564 565 mdn = os->os_meta_dnode; 566 567 DNODE_VERIFY(mdn); 568 569 if (!RW_WRITE_HELD(&mdn->dn_struct_rwlock)) { --- 27 unchanged lines hidden (view full) --- 597 if (winner = dmu_buf_set_user(&db->db, children_dnodes, NULL, 598 dnode_buf_pageout)) { 599 kmem_free(children_dnodes, epb * sizeof (dnode_t *)); 600 children_dnodes = winner; 601 } 602 } 603 604 if ((dn = children_dnodes[idx]) == NULL) { | 570 if (object == 0 || object >= DN_MAX_OBJECT) 571 return (EINVAL); 572 573 mdn = os->os_meta_dnode; 574 575 DNODE_VERIFY(mdn); 576 577 if (!RW_WRITE_HELD(&mdn->dn_struct_rwlock)) { --- 27 unchanged lines hidden (view full) --- 605 if (winner = dmu_buf_set_user(&db->db, children_dnodes, NULL, 606 dnode_buf_pageout)) { 607 kmem_free(children_dnodes, epb * sizeof (dnode_t *)); 608 children_dnodes = winner; 609 } 610 } 611 612 if ((dn = children_dnodes[idx]) == NULL) { |
613 dnode_phys_t *dnp = (dnode_phys_t *)db->db.db_data+idx; |
|
605 dnode_t *winner; | 614 dnode_t *winner; |
606 dn = dnode_create(os, (dnode_phys_t *)db->db.db_data+idx, 607 db, object); | 615 616 dn = dnode_create(os, dnp, db, object); |
608 winner = atomic_cas_ptr(&children_dnodes[idx], NULL, dn); 609 if (winner != NULL) { 610 dnode_destroy(dn); 611 dn = winner; 612 } 613 } 614 615 mutex_enter(&dn->dn_mtx); --- 23 unchanged lines hidden (view full) --- 639 * Return held dnode if the object is allocated, NULL if not. 640 */ 641int 642dnode_hold(objset_impl_t *os, uint64_t object, void *tag, dnode_t **dnp) 643{ 644 return (dnode_hold_impl(os, object, DNODE_MUST_BE_ALLOCATED, tag, dnp)); 645} 646 | 617 winner = atomic_cas_ptr(&children_dnodes[idx], NULL, dn); 618 if (winner != NULL) { 619 dnode_destroy(dn); 620 dn = winner; 621 } 622 } 623 624 mutex_enter(&dn->dn_mtx); --- 23 unchanged lines hidden (view full) --- 648 * Return held dnode if the object is allocated, NULL if not. 649 */ 650int 651dnode_hold(objset_impl_t *os, uint64_t object, void *tag, dnode_t **dnp) 652{ 653 return (dnode_hold_impl(os, object, DNODE_MUST_BE_ALLOCATED, tag, dnp)); 654} 655 |
647void | 656/* 657 * Can only add a reference if there is already at least one 658 * reference on the dnode. Returns FALSE if unable to add a 659 * new reference. 660 */ 661boolean_t |
648dnode_add_ref(dnode_t *dn, void *tag) 649{ | 662dnode_add_ref(dnode_t *dn, void *tag) 663{ |
650 ASSERT(refcount_count(&dn->dn_holds) > 0); 651 (void) refcount_add(&dn->dn_holds, tag); | 664 mutex_enter(&dn->dn_mtx); 665 if (refcount_is_zero(&dn->dn_holds)) { 666 mutex_exit(&dn->dn_mtx); 667 return (FALSE); 668 } 669 VERIFY(1 < refcount_add(&dn->dn_holds, tag)); 670 mutex_exit(&dn->dn_mtx); 671 return (TRUE); |
652} 653 654void 655dnode_rele(dnode_t *dn, void *tag) 656{ 657 uint64_t refs; 658 | 672} 673 674void 675dnode_rele(dnode_t *dn, void *tag) 676{ 677 uint64_t refs; 678 |
679 mutex_enter(&dn->dn_mtx); |
|
659 refs = refcount_remove(&dn->dn_holds, tag); | 680 refs = refcount_remove(&dn->dn_holds, tag); |
681 mutex_exit(&dn->dn_mtx); |
|
660 /* NOTE: the DNODE_DNODE does not have a dn_dbuf */ 661 if (refs == 0 && dn->dn_dbuf) 662 dbuf_rele(dn->dn_dbuf, dn); 663} 664 665void 666dnode_setdirty(dnode_t *dn, dmu_tx_t *tx) 667{ --- 19 unchanged lines hidden (view full) --- 687 */ 688 if (list_link_active(&dn->dn_dirty_link[txg & TXG_MASK])) { 689 mutex_exit(&os->os_lock); 690 return; 691 } 692 693 ASSERT(!refcount_is_zero(&dn->dn_holds) || list_head(&dn->dn_dbufs)); 694 ASSERT(dn->dn_datablksz != 0); | 682 /* NOTE: the DNODE_DNODE does not have a dn_dbuf */ 683 if (refs == 0 && dn->dn_dbuf) 684 dbuf_rele(dn->dn_dbuf, dn); 685} 686 687void 688dnode_setdirty(dnode_t *dn, dmu_tx_t *tx) 689{ --- 19 unchanged lines hidden (view full) --- 709 */ 710 if (list_link_active(&dn->dn_dirty_link[txg & TXG_MASK])) { 711 mutex_exit(&os->os_lock); 712 return; 713 } 714 715 ASSERT(!refcount_is_zero(&dn->dn_holds) || list_head(&dn->dn_dbufs)); 716 ASSERT(dn->dn_datablksz != 0); |
717 ASSERT3U(dn->dn_next_bonuslen[txg&TXG_MASK], ==, 0); |
|
695 ASSERT3U(dn->dn_next_blksz[txg&TXG_MASK], ==, 0); 696 697 dprintf_ds(os->os_dsl_dataset, "obj=%llu txg=%llu\n", 698 dn->dn_object, txg); 699 700 if (dn->dn_free_txg > 0 && dn->dn_free_txg <= txg) { 701 list_insert_tail(&os->os_free_dnodes[txg&TXG_MASK], dn); 702 } else { --- 6 unchanged lines hidden (view full) --- 709 * The dnode maintains a hold on its containing dbuf as 710 * long as there are holds on it. Each instantiated child 711 * dbuf maintaines a hold on the dnode. When the last child 712 * drops its hold, the dnode will drop its hold on the 713 * containing dbuf. We add a "dirty hold" here so that the 714 * dnode will hang around after we finish processing its 715 * children. 716 */ | 718 ASSERT3U(dn->dn_next_blksz[txg&TXG_MASK], ==, 0); 719 720 dprintf_ds(os->os_dsl_dataset, "obj=%llu txg=%llu\n", 721 dn->dn_object, txg); 722 723 if (dn->dn_free_txg > 0 && dn->dn_free_txg <= txg) { 724 list_insert_tail(&os->os_free_dnodes[txg&TXG_MASK], dn); 725 } else { --- 6 unchanged lines hidden (view full) --- 732 * The dnode maintains a hold on its containing dbuf as 733 * long as there are holds on it. Each instantiated child 734 * dbuf maintaines a hold on the dnode. When the last child 735 * drops its hold, the dnode will drop its hold on the 736 * containing dbuf. We add a "dirty hold" here so that the 737 * dnode will hang around after we finish processing its 738 * children. 739 */ |
717 dnode_add_ref(dn, (void *)(uintptr_t)tx->tx_txg); | 740 VERIFY(dnode_add_ref(dn, (void *)(uintptr_t)tx->tx_txg)); |
718 719 (void) dbuf_dirty(dn->dn_dbuf, tx); 720 721 dsl_dataset_dirty(os->os_dsl_dataset, tx); 722} 723 724void 725dnode_free(dnode_t *dn, dmu_tx_t *tx) --- 31 unchanged lines hidden (view full) --- 757/* 758 * Try to change the block size for the indicated dnode. This can only 759 * succeed if there are no blocks allocated or dirty beyond first block 760 */ 761int 762dnode_set_blksz(dnode_t *dn, uint64_t size, int ibs, dmu_tx_t *tx) 763{ 764 dmu_buf_impl_t *db, *db_next; | 741 742 (void) dbuf_dirty(dn->dn_dbuf, tx); 743 744 dsl_dataset_dirty(os->os_dsl_dataset, tx); 745} 746 747void 748dnode_free(dnode_t *dn, dmu_tx_t *tx) --- 31 unchanged lines hidden (view full) --- 780/* 781 * Try to change the block size for the indicated dnode. This can only 782 * succeed if there are no blocks allocated or dirty beyond first block 783 */ 784int 785dnode_set_blksz(dnode_t *dn, uint64_t size, int ibs, dmu_tx_t *tx) 786{ 787 dmu_buf_impl_t *db, *db_next; |
765 int have_db0 = FALSE; | 788 int err; |
766 767 if (size == 0) 768 size = SPA_MINBLOCKSIZE; 769 if (size > SPA_MAXBLOCKSIZE) 770 size = SPA_MAXBLOCKSIZE; 771 else 772 size = P2ROUNDUP(size, SPA_MINBLOCKSIZE); 773 --- 8 unchanged lines hidden (view full) --- 782 /* Check for any allocated blocks beyond the first */ 783 if (dn->dn_phys->dn_maxblkid != 0) 784 goto fail; 785 786 mutex_enter(&dn->dn_dbufs_mtx); 787 for (db = list_head(&dn->dn_dbufs); db; db = db_next) { 788 db_next = list_next(&dn->dn_dbufs, db); 789 | 789 790 if (size == 0) 791 size = SPA_MINBLOCKSIZE; 792 if (size > SPA_MAXBLOCKSIZE) 793 size = SPA_MAXBLOCKSIZE; 794 else 795 size = P2ROUNDUP(size, SPA_MINBLOCKSIZE); 796 --- 8 unchanged lines hidden (view full) --- 805 /* Check for any allocated blocks beyond the first */ 806 if (dn->dn_phys->dn_maxblkid != 0) 807 goto fail; 808 809 mutex_enter(&dn->dn_dbufs_mtx); 810 for (db = list_head(&dn->dn_dbufs); db; db = db_next) { 811 db_next = list_next(&dn->dn_dbufs, db); 812 |
790 if (db->db_blkid == 0) { 791 have_db0 = TRUE; 792 } else if (db->db_blkid != DB_BONUS_BLKID) { | 813 if (db->db_blkid != 0 && db->db_blkid != DB_BONUS_BLKID) { |
793 mutex_exit(&dn->dn_dbufs_mtx); 794 goto fail; 795 } 796 } 797 mutex_exit(&dn->dn_dbufs_mtx); 798 799 if (ibs && dn->dn_nlevels != 1) 800 goto fail; 801 | 814 mutex_exit(&dn->dn_dbufs_mtx); 815 goto fail; 816 } 817 } 818 mutex_exit(&dn->dn_dbufs_mtx); 819 820 if (ibs && dn->dn_nlevels != 1) 821 goto fail; 822 |
802 db = NULL; 803 if (!BP_IS_HOLE(&dn->dn_phys->dn_blkptr[0]) || have_db0) { 804 /* obtain the old block */ 805 db = dbuf_hold(dn, 0, FTAG); | 823 /* resize the old block */ 824 err = dbuf_hold_impl(dn, 0, 0, TRUE, FTAG, &db); 825 if (err == 0) |
806 dbuf_new_size(db, size, tx); | 826 dbuf_new_size(db, size, tx); |
807 } | 827 else if (err != ENOENT) 828 goto fail; |
808 809 dnode_setdblksz(dn, size); 810 dnode_setdirty(dn, tx); 811 dn->dn_next_blksz[tx->tx_txg&TXG_MASK] = size; 812 if (ibs) { 813 dn->dn_indblkshift = ibs; 814 dn->dn_next_indblkshift[tx->tx_txg&TXG_MASK] = ibs; 815 } | 829 830 dnode_setdblksz(dn, size); 831 dnode_setdirty(dn, tx); 832 dn->dn_next_blksz[tx->tx_txg&TXG_MASK] = size; 833 if (ibs) { 834 dn->dn_indblkshift = ibs; 835 dn->dn_next_indblkshift[tx->tx_txg&TXG_MASK] = ibs; 836 } |
816 | 837 /* rele after we have fixed the blocksize in the dnode */ |
817 if (db) 818 dbuf_rele(db, FTAG); 819 820 rw_exit(&dn->dn_struct_rwlock); 821 return (0); 822 823fail: 824 rw_exit(&dn->dn_struct_rwlock); 825 return (ENOTSUP); 826} 827 | 838 if (db) 839 dbuf_rele(db, FTAG); 840 841 rw_exit(&dn->dn_struct_rwlock); 842 return (0); 843 844fail: 845 rw_exit(&dn->dn_struct_rwlock); 846 return (ENOTSUP); 847} 848 |
849/* read-holding callers must not rely on the lock being continuously held */ |
|
828void | 850void |
829dnode_new_blkid(dnode_t *dn, uint64_t blkid, dmu_tx_t *tx) | 851dnode_new_blkid(dnode_t *dn, uint64_t blkid, dmu_tx_t *tx, boolean_t have_read) |
830{ 831 uint64_t txgoff = tx->tx_txg & TXG_MASK; | 852{ 853 uint64_t txgoff = tx->tx_txg & TXG_MASK; |
832 int drop_struct_lock = FALSE; | |
833 int epbs, new_nlevels; 834 uint64_t sz; 835 836 ASSERT(blkid != DB_BONUS_BLKID); 837 | 854 int epbs, new_nlevels; 855 uint64_t sz; 856 857 ASSERT(blkid != DB_BONUS_BLKID); 858 |
838 if (!RW_WRITE_HELD(&dn->dn_struct_rwlock)) { 839 rw_enter(&dn->dn_struct_rwlock, RW_WRITER); 840 drop_struct_lock = TRUE; | 859 ASSERT(have_read ? 860 RW_READ_HELD(&dn->dn_struct_rwlock) : 861 RW_WRITE_HELD(&dn->dn_struct_rwlock)); 862 863 /* 864 * if we have a read-lock, check to see if we need to do any work 865 * before upgrading to a write-lock. 866 */ 867 if (have_read) { 868 if (blkid <= dn->dn_maxblkid) 869 return; 870 871 if (!rw_tryupgrade(&dn->dn_struct_rwlock)) { 872 rw_exit(&dn->dn_struct_rwlock); 873 rw_enter(&dn->dn_struct_rwlock, RW_WRITER); 874 } |
841 } 842 843 if (blkid <= dn->dn_maxblkid) 844 goto out; 845 846 dn->dn_maxblkid = blkid; 847 848 /* --- 35 unchanged lines hidden (view full) --- 884 dr->dr_parent = new; 885 } 886 } 887 mutex_exit(&new->dt.di.dr_mtx); 888 mutex_exit(&dn->dn_mtx); 889 } 890 891out: | 875 } 876 877 if (blkid <= dn->dn_maxblkid) 878 goto out; 879 880 dn->dn_maxblkid = blkid; 881 882 /* --- 35 unchanged lines hidden (view full) --- 918 dr->dr_parent = new; 919 } 920 } 921 mutex_exit(&new->dt.di.dr_mtx); 922 mutex_exit(&dn->dn_mtx); 923 } 924 925out: |
892 if (drop_struct_lock) 893 rw_exit(&dn->dn_struct_rwlock); | 926 if (have_read) 927 rw_downgrade(&dn->dn_struct_rwlock); |
894} 895 896void 897dnode_clear_range(dnode_t *dn, uint64_t blkid, uint64_t nblks, dmu_tx_t *tx) 898{ 899 avl_tree_t *tree = &dn->dn_ranges[tx->tx_txg&TXG_MASK]; 900 avl_index_t where; 901 free_range_t *rp; --- 44 unchanged lines hidden (view full) --- 946 } 947} 948 949void 950dnode_free_range(dnode_t *dn, uint64_t off, uint64_t len, dmu_tx_t *tx) 951{ 952 dmu_buf_impl_t *db; 953 uint64_t blkoff, blkid, nblks; | 928} 929 930void 931dnode_clear_range(dnode_t *dn, uint64_t blkid, uint64_t nblks, dmu_tx_t *tx) 932{ 933 avl_tree_t *tree = &dn->dn_ranges[tx->tx_txg&TXG_MASK]; 934 avl_index_t where; 935 free_range_t *rp; --- 44 unchanged lines hidden (view full) --- 980 } 981} 982 983void 984dnode_free_range(dnode_t *dn, uint64_t off, uint64_t len, dmu_tx_t *tx) 985{ 986 dmu_buf_impl_t *db; 987 uint64_t blkoff, blkid, nblks; |
954 int blksz, head; | 988 int blksz, blkshift, head, tail; |
955 int trunc = FALSE; | 989 int trunc = FALSE; |
990 int epbs; |
|
956 957 rw_enter(&dn->dn_struct_rwlock, RW_WRITER); 958 blksz = dn->dn_datablksz; | 991 992 rw_enter(&dn->dn_struct_rwlock, RW_WRITER); 993 blksz = dn->dn_datablksz; |
994 blkshift = dn->dn_datablkshift; 995 epbs = dn->dn_indblkshift - SPA_BLKPTRSHIFT; |
|
959 | 996 |
960 /* If the range is past the end of the file, this is a no-op */ 961 if (off >= blksz * (dn->dn_maxblkid+1)) 962 goto out; | |
963 if (len == -1ULL) { 964 len = UINT64_MAX - off; 965 trunc = TRUE; 966 } 967 968 /* 969 * First, block align the region to free: 970 */ 971 if (ISP2(blksz)) { 972 head = P2NPHASE(off, blksz); 973 blkoff = P2PHASE(off, blksz); | 997 if (len == -1ULL) { 998 len = UINT64_MAX - off; 999 trunc = TRUE; 1000 } 1001 1002 /* 1003 * First, block align the region to free: 1004 */ 1005 if (ISP2(blksz)) { 1006 head = P2NPHASE(off, blksz); 1007 blkoff = P2PHASE(off, blksz); |
1008 if ((off >> blkshift) > dn->dn_maxblkid) 1009 goto out; |
|
974 } else { 975 ASSERT(dn->dn_maxblkid == 0); 976 if (off == 0 && len >= blksz) { | 1010 } else { 1011 ASSERT(dn->dn_maxblkid == 0); 1012 if (off == 0 && len >= blksz) { |
977 /* Freeing the whole block; don't do any head. */ 978 head = 0; | 1013 /* Freeing the whole block; fast-track this request */ 1014 blkid = 0; 1015 nblks = 1; 1016 goto done; 1017 } else if (off >= blksz) { 1018 /* Freeing past end-of-data */ 1019 goto out; |
979 } else { 980 /* Freeing part of the block. */ 981 head = blksz - off; 982 ASSERT3U(head, >, 0); 983 } 984 blkoff = off; 985 } 986 /* zero out any partial block data at the start of the range */ --- 16 unchanged lines hidden (view full) --- 1003 } 1004 dbuf_rele(db, FTAG); 1005 } 1006 off += head; 1007 len -= head; 1008 } 1009 1010 /* If the range was less than one block, we're done */ | 1020 } else { 1021 /* Freeing part of the block. */ 1022 head = blksz - off; 1023 ASSERT3U(head, >, 0); 1024 } 1025 blkoff = off; 1026 } 1027 /* zero out any partial block data at the start of the range */ --- 16 unchanged lines hidden (view full) --- 1044 } 1045 dbuf_rele(db, FTAG); 1046 } 1047 off += head; 1048 len -= head; 1049 } 1050 1051 /* If the range was less than one block, we're done */ |
1011 if (len == 0 || off >= blksz * (dn->dn_maxblkid+1)) | 1052 if (len == 0) |
1012 goto out; 1013 | 1053 goto out; 1054 |
1014 if (!ISP2(blksz)) { 1015 /* 1016 * They are freeing the whole block of a 1017 * non-power-of-two blocksize file. Skip all the messy 1018 * math. 1019 */ 1020 ASSERT3U(off, ==, 0); 1021 ASSERT3U(len, >=, blksz); 1022 blkid = 0; 1023 nblks = 1; 1024 } else { 1025 int tail; 1026 int epbs = dn->dn_indblkshift - SPA_BLKPTRSHIFT; 1027 int blkshift = dn->dn_datablkshift; | 1055 /* If the remaining range is past end of file, we're done */ 1056 if ((off >> blkshift) > dn->dn_maxblkid) 1057 goto out; |
1028 | 1058 |
1029 /* If the remaining range is past end of file, we're done */ 1030 if (off > dn->dn_maxblkid << blkshift) 1031 goto out; | 1059 ASSERT(ISP2(blksz)); 1060 if (trunc) 1061 tail = 0; 1062 else 1063 tail = P2PHASE(len, blksz); |
1032 | 1064 |
1033 if (off + len == UINT64_MAX) 1034 tail = 0; 1035 else 1036 tail = P2PHASE(len, blksz); 1037 1038 ASSERT3U(P2PHASE(off, blksz), ==, 0); 1039 /* zero out any partial block data at the end of the range */ 1040 if (tail) { 1041 if (len < tail) 1042 tail = len; 1043 if (dbuf_hold_impl(dn, 0, dbuf_whichblock(dn, off+len), 1044 TRUE, FTAG, &db) == 0) { 1045 /* don't dirty if not on disk and not dirty */ 1046 if (db->db_last_dirty || 1047 (db->db_blkptr && 1048 !BP_IS_HOLE(db->db_blkptr))) { 1049 rw_exit(&dn->dn_struct_rwlock); 1050 dbuf_will_dirty(db, tx); 1051 rw_enter(&dn->dn_struct_rwlock, 1052 RW_WRITER); 1053 bzero(db->db.db_data, tail); 1054 } 1055 dbuf_rele(db, FTAG); | 1065 ASSERT3U(P2PHASE(off, blksz), ==, 0); 1066 /* zero out any partial block data at the end of the range */ 1067 if (tail) { 1068 if (len < tail) 1069 tail = len; 1070 if (dbuf_hold_impl(dn, 0, dbuf_whichblock(dn, off+len), 1071 TRUE, FTAG, &db) == 0) { 1072 /* don't dirty if not on disk and not dirty */ 1073 if (db->db_last_dirty || 1074 (db->db_blkptr && !BP_IS_HOLE(db->db_blkptr))) { 1075 rw_exit(&dn->dn_struct_rwlock); 1076 dbuf_will_dirty(db, tx); 1077 rw_enter(&dn->dn_struct_rwlock, RW_WRITER); 1078 bzero(db->db.db_data, tail); |
1056 } | 1079 } |
1057 len -= tail; | 1080 dbuf_rele(db, FTAG); |
1058 } | 1081 } |
1059 /* If the range did not include a full block, we are done */ 1060 if (len == 0) 1061 goto out; | 1082 len -= tail; 1083 } |
1062 | 1084 |
1063 /* dirty the left indirects */ 1064 if (dn->dn_nlevels > 1 && off != 0) { 1065 db = dbuf_hold_level(dn, 1, 1066 (off - head) >> (blkshift + epbs), FTAG); | 1085 /* If the range did not include a full block, we are done */ 1086 if (len == 0) 1087 goto out; 1088 1089 ASSERT(IS_P2ALIGNED(off, blksz)); 1090 ASSERT(trunc || IS_P2ALIGNED(len, blksz)); 1091 blkid = off >> blkshift; 1092 nblks = len >> blkshift; 1093 if (trunc) 1094 nblks += 1; 1095 1096 /* 1097 * Read in and mark all the level-1 indirects dirty, 1098 * so that they will stay in memory until syncing phase. 1099 * Always dirty the first and last indirect to make sure 1100 * we dirty all the partial indirects. 1101 */ 1102 if (dn->dn_nlevels > 1) { 1103 uint64_t i, first, last; 1104 int shift = epbs + dn->dn_datablkshift; 1105 1106 first = blkid >> epbs; 1107 if (db = dbuf_hold_level(dn, 1, first, FTAG)) { |
1067 dbuf_will_dirty(db, tx); 1068 dbuf_rele(db, FTAG); 1069 } | 1108 dbuf_will_dirty(db, tx); 1109 dbuf_rele(db, FTAG); 1110 } |
1070 1071 /* dirty the right indirects */ 1072 if (dn->dn_nlevels > 1 && !trunc) { 1073 db = dbuf_hold_level(dn, 1, 1074 (off + len + tail - 1) >> (blkshift + epbs), FTAG); | 1111 if (trunc) 1112 last = dn->dn_maxblkid >> epbs; 1113 else 1114 last = (blkid + nblks - 1) >> epbs; 1115 if (last > first && (db = dbuf_hold_level(dn, 1, last, FTAG))) { |
1075 dbuf_will_dirty(db, tx); 1076 dbuf_rele(db, FTAG); 1077 } | 1116 dbuf_will_dirty(db, tx); 1117 dbuf_rele(db, FTAG); 1118 } |
1119 for (i = first + 1; i < last; i++) { 1120 uint64_t ibyte = i << shift; 1121 int err; |
|
1078 | 1122 |
1079 /* 1080 * Finally, add this range to the dnode range list, we 1081 * will finish up this free operation in the syncing phase. 1082 */ 1083 ASSERT(IS_P2ALIGNED(off, 1<<blkshift)); 1084 ASSERT(off + len == UINT64_MAX || 1085 IS_P2ALIGNED(len, 1<<blkshift)); 1086 blkid = off >> blkshift; 1087 nblks = len >> blkshift; 1088 1089 if (trunc) 1090 dn->dn_maxblkid = (blkid ? blkid - 1 : 0); | 1123 err = dnode_next_offset(dn, 1124 DNODE_FIND_HAVELOCK, &ibyte, 1, 1, 0); 1125 i = ibyte >> shift; 1126 if (err == ESRCH || i >= last) 1127 break; 1128 ASSERT(err == 0); 1129 db = dbuf_hold_level(dn, 1, i, FTAG); 1130 if (db) { 1131 dbuf_will_dirty(db, tx); 1132 dbuf_rele(db, FTAG); 1133 } 1134 } |
1091 } | 1135 } |
1092 | 1136done: 1137 /* 1138 * Add this range to the dnode range list. 1139 * We will finish up this free operation in the syncing phase. 1140 */ |
1093 mutex_enter(&dn->dn_mtx); 1094 dnode_clear_range(dn, blkid, nblks, tx); 1095 { 1096 free_range_t *rp, *found; 1097 avl_index_t where; 1098 avl_tree_t *tree = &dn->dn_ranges[tx->tx_txg&TXG_MASK]; 1099 1100 /* Add new range to dn_ranges */ 1101 rp = kmem_alloc(sizeof (free_range_t), KM_SLEEP); 1102 rp->fr_blkid = blkid; 1103 rp->fr_nblks = nblks; 1104 found = avl_find(tree, rp, &where); 1105 ASSERT(found == NULL); 1106 avl_insert(tree, rp, where); 1107 dprintf_dnode(dn, "blkid=%llu nblks=%llu txg=%llu\n", 1108 blkid, nblks, tx->tx_txg); 1109 } 1110 mutex_exit(&dn->dn_mtx); 1111 | 1141 mutex_enter(&dn->dn_mtx); 1142 dnode_clear_range(dn, blkid, nblks, tx); 1143 { 1144 free_range_t *rp, *found; 1145 avl_index_t where; 1146 avl_tree_t *tree = &dn->dn_ranges[tx->tx_txg&TXG_MASK]; 1147 1148 /* Add new range to dn_ranges */ 1149 rp = kmem_alloc(sizeof (free_range_t), KM_SLEEP); 1150 rp->fr_blkid = blkid; 1151 rp->fr_nblks = nblks; 1152 found = avl_find(tree, rp, &where); 1153 ASSERT(found == NULL); 1154 avl_insert(tree, rp, where); 1155 dprintf_dnode(dn, "blkid=%llu nblks=%llu txg=%llu\n", 1156 blkid, nblks, tx->tx_txg); 1157 } 1158 mutex_exit(&dn->dn_mtx); 1159 |
1112 dbuf_free_range(dn, blkid, nblks, tx); | 1160 dbuf_free_range(dn, blkid, blkid + nblks - 1, tx); |
1113 dnode_setdirty(dn, tx); 1114out: | 1161 dnode_setdirty(dn, tx); 1162out: |
1163 if (trunc && dn->dn_maxblkid >= (off >> blkshift)) 1164 dn->dn_maxblkid = (off >> blkshift ? (off >> blkshift) - 1 : 0); 1165 |
|
1115 rw_exit(&dn->dn_struct_rwlock); 1116} 1117 1118/* return TRUE if this blkid was freed in a recent txg, or FALSE if it wasn't */ 1119uint64_t 1120dnode_block_freed(dnode_t *dn, uint64_t blkid) 1121{ 1122 free_range_t range_tofind; --- 51 unchanged lines hidden (view full) --- 1174 mutex_enter(&dn->dn_mtx); 1175 space = DN_USED_BYTES(dn->dn_phys); 1176 if (delta > 0) { 1177 ASSERT3U(space + delta, >=, space); /* no overflow */ 1178 } else { 1179 ASSERT3U(space, >=, -delta); /* no underflow */ 1180 } 1181 space += delta; | 1166 rw_exit(&dn->dn_struct_rwlock); 1167} 1168 1169/* return TRUE if this blkid was freed in a recent txg, or FALSE if it wasn't */ 1170uint64_t 1171dnode_block_freed(dnode_t *dn, uint64_t blkid) 1172{ 1173 free_range_t range_tofind; --- 51 unchanged lines hidden (view full) --- 1225 mutex_enter(&dn->dn_mtx); 1226 space = DN_USED_BYTES(dn->dn_phys); 1227 if (delta > 0) { 1228 ASSERT3U(space + delta, >=, space); /* no overflow */ 1229 } else { 1230 ASSERT3U(space, >=, -delta); /* no underflow */ 1231 } 1232 space += delta; |
1182 if (spa_version(dn->dn_objset->os_spa) < ZFS_VERSION_DNODE_BYTES) { | 1233 if (spa_version(dn->dn_objset->os_spa) < SPA_VERSION_DNODE_BYTES) { |
1183 ASSERT((dn->dn_phys->dn_flags & DNODE_FLAG_USED_BYTES) == 0); 1184 ASSERT3U(P2PHASE(space, 1<<DEV_BSHIFT), ==, 0); 1185 dn->dn_phys->dn_used = space >> DEV_BSHIFT; 1186 } else { 1187 dn->dn_phys->dn_used = space; 1188 dn->dn_phys->dn_flags |= DNODE_FLAG_USED_BYTES; 1189 } 1190 mutex_exit(&dn->dn_mtx); --- 15 unchanged lines hidden (view full) --- 1206 1207 if (ds) 1208 dsl_dir_willuse_space(ds->ds_dir, space, tx); 1209 1210 dmu_tx_willuse_space(tx, space); 1211} 1212 1213static int | 1234 ASSERT((dn->dn_phys->dn_flags & DNODE_FLAG_USED_BYTES) == 0); 1235 ASSERT3U(P2PHASE(space, 1<<DEV_BSHIFT), ==, 0); 1236 dn->dn_phys->dn_used = space >> DEV_BSHIFT; 1237 } else { 1238 dn->dn_phys->dn_used = space; 1239 dn->dn_phys->dn_flags |= DNODE_FLAG_USED_BYTES; 1240 } 1241 mutex_exit(&dn->dn_mtx); --- 15 unchanged lines hidden (view full) --- 1257 1258 if (ds) 1259 dsl_dir_willuse_space(ds->ds_dir, space, tx); 1260 1261 dmu_tx_willuse_space(tx, space); 1262} 1263 1264static int |
1214dnode_next_offset_level(dnode_t *dn, boolean_t hole, uint64_t *offset, | 1265dnode_next_offset_level(dnode_t *dn, int flags, uint64_t *offset, |
1215 int lvl, uint64_t blkfill, uint64_t txg) 1216{ 1217 dmu_buf_impl_t *db = NULL; 1218 void *data = NULL; 1219 uint64_t epbs = dn->dn_phys->dn_indblkshift - SPA_BLKPTRSHIFT; 1220 uint64_t epb = 1ULL << epbs; 1221 uint64_t minfill, maxfill; | 1266 int lvl, uint64_t blkfill, uint64_t txg) 1267{ 1268 dmu_buf_impl_t *db = NULL; 1269 void *data = NULL; 1270 uint64_t epbs = dn->dn_phys->dn_indblkshift - SPA_BLKPTRSHIFT; 1271 uint64_t epb = 1ULL << epbs; 1272 uint64_t minfill, maxfill; |
1222 int i, error, span; | 1273 boolean_t hole; 1274 int i, inc, error, span; |
1223 1224 dprintf("probing object %llu offset %llx level %d of %u\n", 1225 dn->dn_object, *offset, lvl, dn->dn_phys->dn_nlevels); 1226 | 1275 1276 dprintf("probing object %llu offset %llx level %d of %u\n", 1277 dn->dn_object, *offset, lvl, dn->dn_phys->dn_nlevels); 1278 |
1279 hole = flags & DNODE_FIND_HOLE; 1280 inc = (flags & DNODE_FIND_BACKWARDS) ? -1 : 1; 1281 ASSERT(txg == 0 || !hole); 1282 |
|
1227 if (lvl == dn->dn_phys->dn_nlevels) { 1228 error = 0; 1229 epb = dn->dn_phys->dn_nblkptr; 1230 data = dn->dn_phys->dn_blkptr; 1231 } else { 1232 uint64_t blkid = dbuf_whichblock(dn, *offset) >> (epbs * lvl); 1233 error = dbuf_hold_impl(dn, lvl, blkid, TRUE, FTAG, &db); 1234 if (error) { | 1283 if (lvl == dn->dn_phys->dn_nlevels) { 1284 error = 0; 1285 epb = dn->dn_phys->dn_nblkptr; 1286 data = dn->dn_phys->dn_blkptr; 1287 } else { 1288 uint64_t blkid = dbuf_whichblock(dn, *offset) >> (epbs * lvl); 1289 error = dbuf_hold_impl(dn, lvl, blkid, TRUE, FTAG, &db); 1290 if (error) { |
1235 if (error == ENOENT) 1236 return (hole ? 0 : ESRCH); 1237 return (error); | 1291 if (error != ENOENT) 1292 return (error); 1293 if (hole) 1294 return (0); 1295 /* 1296 * This can only happen when we are searching up 1297 * the block tree for data. We don't really need to 1298 * adjust the offset, as we will just end up looking 1299 * at the pointer to this block in its parent, and its 1300 * going to be unallocated, so we will skip over it. 1301 */ 1302 return (ESRCH); |
1238 } 1239 error = dbuf_read(db, NULL, DB_RF_CANFAIL | DB_RF_HAVESTRUCT); 1240 if (error) { 1241 dbuf_rele(db, FTAG); 1242 return (error); 1243 } 1244 data = db->db.db_data; 1245 } 1246 1247 if (db && txg && 1248 (db->db_blkptr == NULL || db->db_blkptr->blk_birth <= txg)) { | 1303 } 1304 error = dbuf_read(db, NULL, DB_RF_CANFAIL | DB_RF_HAVESTRUCT); 1305 if (error) { 1306 dbuf_rele(db, FTAG); 1307 return (error); 1308 } 1309 data = db->db.db_data; 1310 } 1311 1312 if (db && txg && 1313 (db->db_blkptr == NULL || db->db_blkptr->blk_birth <= txg)) { |
1314 /* 1315 * This can only happen when we are searching up the tree 1316 * and these conditions mean that we need to keep climbing. 1317 */ |
|
1249 error = ESRCH; 1250 } else if (lvl == 0) { 1251 dnode_phys_t *dnp = data; 1252 span = DNODE_SHIFT; 1253 ASSERT(dn->dn_type == DMU_OT_DNODE); 1254 | 1318 error = ESRCH; 1319 } else if (lvl == 0) { 1320 dnode_phys_t *dnp = data; 1321 span = DNODE_SHIFT; 1322 ASSERT(dn->dn_type == DMU_OT_DNODE); 1323 |
1255 for (i = (*offset >> span) & (blkfill - 1); i < blkfill; i++) { | 1324 for (i = (*offset >> span) & (blkfill - 1); 1325 i >= 0 && i < blkfill; i += inc) { |
1256 boolean_t newcontents = B_TRUE; 1257 if (txg) { 1258 int j; 1259 newcontents = B_FALSE; 1260 for (j = 0; j < dnp[i].dn_nblkptr; j++) { 1261 if (dnp[i].dn_blkptr[j].blk_birth > txg) 1262 newcontents = B_TRUE; 1263 } 1264 } 1265 if (!dnp[i].dn_type == hole && newcontents) 1266 break; | 1326 boolean_t newcontents = B_TRUE; 1327 if (txg) { 1328 int j; 1329 newcontents = B_FALSE; 1330 for (j = 0; j < dnp[i].dn_nblkptr; j++) { 1331 if (dnp[i].dn_blkptr[j].blk_birth > txg) 1332 newcontents = B_TRUE; 1333 } 1334 } 1335 if (!dnp[i].dn_type == hole && newcontents) 1336 break; |
1267 *offset += 1ULL << span; | 1337 *offset += (1ULL << span) * inc; |
1268 } | 1338 } |
1269 if (i == blkfill) | 1339 if (i < 0 || i == blkfill) |
1270 error = ESRCH; 1271 } else { 1272 blkptr_t *bp = data; 1273 span = (lvl - 1) * epbs + dn->dn_datablkshift; 1274 minfill = 0; 1275 maxfill = blkfill << ((lvl - 1) * epbs); 1276 1277 if (hole) 1278 maxfill--; 1279 else 1280 minfill++; 1281 1282 for (i = (*offset >> span) & ((1ULL << epbs) - 1); | 1340 error = ESRCH; 1341 } else { 1342 blkptr_t *bp = data; 1343 span = (lvl - 1) * epbs + dn->dn_datablkshift; 1344 minfill = 0; 1345 maxfill = blkfill << ((lvl - 1) * epbs); 1346 1347 if (hole) 1348 maxfill--; 1349 else 1350 minfill++; 1351 1352 for (i = (*offset >> span) & ((1ULL << epbs) - 1); |
1283 i < epb; i++) { | 1353 i >= 0 && i < epb; i += inc) { |
1284 if (bp[i].blk_fill >= minfill && 1285 bp[i].blk_fill <= maxfill && | 1354 if (bp[i].blk_fill >= minfill && 1355 bp[i].blk_fill <= maxfill && |
1286 bp[i].blk_birth > txg) | 1356 (hole || bp[i].blk_birth > txg)) |
1287 break; | 1357 break; |
1288 *offset += 1ULL << span; | 1358 if (inc < 0 && *offset < (1ULL << span)) 1359 *offset = 0; 1360 else 1361 *offset += (1ULL << span) * inc; |
1289 } | 1362 } |
1290 if (i >= epb) | 1363 if (i < 0 || i == epb) |
1291 error = ESRCH; 1292 } 1293 1294 if (db) 1295 dbuf_rele(db, FTAG); 1296 1297 return (error); 1298} 1299 1300/* 1301 * Find the next hole, data, or sparse region at or after *offset. 1302 * The value 'blkfill' tells us how many items we expect to find 1303 * in an L0 data block; this value is 1 for normal objects, 1304 * DNODES_PER_BLOCK for the meta dnode, and some fraction of 1305 * DNODES_PER_BLOCK when searching for sparse regions thereof. 1306 * 1307 * Examples: 1308 * | 1364 error = ESRCH; 1365 } 1366 1367 if (db) 1368 dbuf_rele(db, FTAG); 1369 1370 return (error); 1371} 1372 1373/* 1374 * Find the next hole, data, or sparse region at or after *offset. 1375 * The value 'blkfill' tells us how many items we expect to find 1376 * in an L0 data block; this value is 1 for normal objects, 1377 * DNODES_PER_BLOCK for the meta dnode, and some fraction of 1378 * DNODES_PER_BLOCK when searching for sparse regions thereof. 1379 * 1380 * Examples: 1381 * |
1309 * dnode_next_offset(dn, hole, offset, 1, 1, 0); 1310 * Finds the next hole/data in a file. | 1382 * dnode_next_offset(dn, flags, offset, 1, 1, 0); 1383 * Finds the next/previous hole/data in a file. |
1311 * Used in dmu_offset_next(). 1312 * | 1384 * Used in dmu_offset_next(). 1385 * |
1313 * dnode_next_offset(mdn, hole, offset, 0, DNODES_PER_BLOCK, txg); | 1386 * dnode_next_offset(mdn, flags, offset, 0, DNODES_PER_BLOCK, txg); |
1314 * Finds the next free/allocated dnode an objset's meta-dnode. 1315 * Only finds objects that have new contents since txg (ie. 1316 * bonus buffer changes and content removal are ignored). 1317 * Used in dmu_object_next(). 1318 * | 1387 * Finds the next free/allocated dnode an objset's meta-dnode. 1388 * Only finds objects that have new contents since txg (ie. 1389 * bonus buffer changes and content removal are ignored). 1390 * Used in dmu_object_next(). 1391 * |
1319 * dnode_next_offset(mdn, TRUE, offset, 2, DNODES_PER_BLOCK >> 2, 0); | 1392 * dnode_next_offset(mdn, DNODE_FIND_HOLE, offset, 2, DNODES_PER_BLOCK >> 2, 0); |
1320 * Finds the next L2 meta-dnode bp that's at most 1/4 full. 1321 * Used in dmu_object_alloc(). 1322 */ 1323int | 1393 * Finds the next L2 meta-dnode bp that's at most 1/4 full. 1394 * Used in dmu_object_alloc(). 1395 */ 1396int |
1324dnode_next_offset(dnode_t *dn, boolean_t hole, uint64_t *offset, | 1397dnode_next_offset(dnode_t *dn, int flags, uint64_t *offset, |
1325 int minlvl, uint64_t blkfill, uint64_t txg) 1326{ | 1398 int minlvl, uint64_t blkfill, uint64_t txg) 1399{ |
1400 uint64_t initial_offset = *offset; |
|
1327 int lvl, maxlvl; 1328 int error = 0; | 1401 int lvl, maxlvl; 1402 int error = 0; |
1329 uint64_t initial_offset = *offset; | |
1330 | 1403 |
1331 rw_enter(&dn->dn_struct_rwlock, RW_READER); | 1404 if (!(flags & DNODE_FIND_HAVELOCK)) 1405 rw_enter(&dn->dn_struct_rwlock, RW_READER); |
1332 1333 if (dn->dn_phys->dn_nlevels == 0) { | 1406 1407 if (dn->dn_phys->dn_nlevels == 0) { |
1334 rw_exit(&dn->dn_struct_rwlock); 1335 return (ESRCH); | 1408 error = ESRCH; 1409 goto out; |
1336 } 1337 1338 if (dn->dn_datablkshift == 0) { 1339 if (*offset < dn->dn_datablksz) { | 1410 } 1411 1412 if (dn->dn_datablkshift == 0) { 1413 if (*offset < dn->dn_datablksz) { |
1340 if (hole) | 1414 if (flags & DNODE_FIND_HOLE) |
1341 *offset = dn->dn_datablksz; 1342 } else { 1343 error = ESRCH; 1344 } | 1415 *offset = dn->dn_datablksz; 1416 } else { 1417 error = ESRCH; 1418 } |
1345 rw_exit(&dn->dn_struct_rwlock); 1346 return (error); | 1419 goto out; |
1347 } 1348 1349 maxlvl = dn->dn_phys->dn_nlevels; 1350 1351 for (lvl = minlvl; lvl <= maxlvl; lvl++) { 1352 error = dnode_next_offset_level(dn, | 1420 } 1421 1422 maxlvl = dn->dn_phys->dn_nlevels; 1423 1424 for (lvl = minlvl; lvl <= maxlvl; lvl++) { 1425 error = dnode_next_offset_level(dn, |
1353 hole, offset, lvl, blkfill, txg); | 1426 flags, offset, lvl, blkfill, txg); |
1354 if (error != ESRCH) 1355 break; 1356 } 1357 | 1427 if (error != ESRCH) 1428 break; 1429 } 1430 |
1358 while (--lvl >= minlvl && error == 0) { | 1431 while (error == 0 && --lvl >= minlvl) { |
1359 error = dnode_next_offset_level(dn, | 1432 error = dnode_next_offset_level(dn, |
1360 hole, offset, lvl, blkfill, txg); | 1433 flags, offset, lvl, blkfill, txg); |
1361 } 1362 | 1434 } 1435 |
1363 rw_exit(&dn->dn_struct_rwlock); 1364 1365 if (error == 0 && initial_offset > *offset) | 1436 if (error == 0 && (flags & DNODE_FIND_BACKWARDS ? 1437 initial_offset < *offset : initial_offset > *offset)) |
1366 error = ESRCH; | 1438 error = ESRCH; |
1439out: 1440 if (!(flags & DNODE_FIND_HAVELOCK)) 1441 rw_exit(&dn->dn_struct_rwlock); |
|
1367 1368 return (error); 1369} | 1442 1443 return (error); 1444} |