dbuf.c (177698) | dbuf.c (185029) |
---|---|
1/* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE --- 5 unchanged lines hidden (view full) --- 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21/* | 1/* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE --- 5 unchanged lines hidden (view full) --- 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21/* |
22 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. | 22 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. |
23 * Use is subject to license terms. 24 */ 25 | 23 * Use is subject to license terms. 24 */ 25 |
26#pragma ident "%Z%%M% %I% %E% SMI" 27 | |
28#include <sys/zfs_context.h> 29#include <sys/dmu.h> 30#include <sys/dmu_impl.h> 31#include <sys/dbuf.h> 32#include <sys/dmu_objset.h> 33#include <sys/dsl_dataset.h> 34#include <sys/dsl_dir.h> 35#include <sys/dmu_tx.h> 36#include <sys/spa.h> 37#include <sys/zio.h> 38#include <sys/dmu_zfetch.h> 39 40static void dbuf_destroy(dmu_buf_impl_t *db); 41static int dbuf_undirty(dmu_buf_impl_t *db, dmu_tx_t *tx); | 26#include <sys/zfs_context.h> 27#include <sys/dmu.h> 28#include <sys/dmu_impl.h> 29#include <sys/dbuf.h> 30#include <sys/dmu_objset.h> 31#include <sys/dsl_dataset.h> 32#include <sys/dsl_dir.h> 33#include <sys/dmu_tx.h> 34#include <sys/spa.h> 35#include <sys/zio.h> 36#include <sys/dmu_zfetch.h> 37 38static void dbuf_destroy(dmu_buf_impl_t *db); 39static int dbuf_undirty(dmu_buf_impl_t *db, dmu_tx_t *tx); |
42static void dbuf_write(dbuf_dirty_record_t *dr, arc_buf_t *data, int checksum, 43 int compress, dmu_tx_t *tx); | 40static void dbuf_write(dbuf_dirty_record_t *dr, arc_buf_t *data, dmu_tx_t *tx); |
44static arc_done_func_t dbuf_write_ready; 45static arc_done_func_t dbuf_write_done; 46 | 41static arc_done_func_t dbuf_write_ready; 42static arc_done_func_t dbuf_write_done; 43 |
47int zfs_mdcomp_disable = 0; 48SYSCTL_DECL(_vfs_zfs); 49TUNABLE_INT("vfs.zfs.mdcomp_disable", &zfs_mdcomp_disable); 50SYSCTL_INT(_vfs_zfs, OID_AUTO, mdcomp_disable, CTLFLAG_RDTUN, 51 &zfs_mdcomp_disable, 0, "Disable metadata compression"); 52 | |
53/* 54 * Global data structures and functions for the dbuf cache. 55 */ 56static kmem_cache_t *dbuf_cache; 57 58/* ARGSUSED */ 59static int 60dbuf_cons(void *vdb, void *unused, int kmflag) --- 245 unchanged lines hidden (view full) --- 306 ASSERT3U(db->db.db_object, ==, dn->dn_object); 307 ASSERT3P(db->db_objset, ==, dn->dn_objset); 308 ASSERT3U(db->db_level, <, dn->dn_nlevels); 309 ASSERT(db->db_blkid == DB_BONUS_BLKID || 310 list_head(&dn->dn_dbufs)); 311 } 312 if (db->db_blkid == DB_BONUS_BLKID) { 313 ASSERT(dn != NULL); | 44/* 45 * Global data structures and functions for the dbuf cache. 46 */ 47static kmem_cache_t *dbuf_cache; 48 49/* ARGSUSED */ 50static int 51dbuf_cons(void *vdb, void *unused, int kmflag) --- 245 unchanged lines hidden (view full) --- 297 ASSERT3U(db->db.db_object, ==, dn->dn_object); 298 ASSERT3P(db->db_objset, ==, dn->dn_objset); 299 ASSERT3U(db->db_level, <, dn->dn_nlevels); 300 ASSERT(db->db_blkid == DB_BONUS_BLKID || 301 list_head(&dn->dn_dbufs)); 302 } 303 if (db->db_blkid == DB_BONUS_BLKID) { 304 ASSERT(dn != NULL); |
314 ASSERT3U(db->db.db_size, ==, dn->dn_bonuslen); | 305 ASSERT3U(db->db.db_size, >=, dn->dn_bonuslen); |
315 ASSERT3U(db->db.db_offset, ==, DB_BONUS_BLKID); 316 } else { 317 ASSERT3U(db->db.db_offset, ==, db->db_blkid * db->db.db_size); 318 } 319 320 if (db->db_level == 0) { 321 /* we can be momentarily larger in dnode_set_blksz() */ 322 if (db->db_blkid != DB_BONUS_BLKID && dn) { --- 132 unchanged lines hidden (view full) --- 455 cv_broadcast(&db->db_changed); 456 mutex_exit(&db->db_mtx); 457 dbuf_rele(db, NULL); 458} 459 460static void 461dbuf_read_impl(dmu_buf_impl_t *db, zio_t *zio, uint32_t *flags) 462{ | 306 ASSERT3U(db->db.db_offset, ==, DB_BONUS_BLKID); 307 } else { 308 ASSERT3U(db->db.db_offset, ==, db->db_blkid * db->db.db_size); 309 } 310 311 if (db->db_level == 0) { 312 /* we can be momentarily larger in dnode_set_blksz() */ 313 if (db->db_blkid != DB_BONUS_BLKID && dn) { --- 132 unchanged lines hidden (view full) --- 446 cv_broadcast(&db->db_changed); 447 mutex_exit(&db->db_mtx); 448 dbuf_rele(db, NULL); 449} 450 451static void 452dbuf_read_impl(dmu_buf_impl_t *db, zio_t *zio, uint32_t *flags) 453{ |
463 blkptr_t *bp; | 454 dnode_t *dn = db->db_dnode; |
464 zbookmark_t zb; 465 uint32_t aflags = ARC_NOWAIT; | 455 zbookmark_t zb; 456 uint32_t aflags = ARC_NOWAIT; |
457 arc_buf_t *pbuf; |
|
466 467 ASSERT(!refcount_is_zero(&db->db_holds)); 468 /* We need the struct_rwlock to prevent db_blkptr from changing. */ | 458 459 ASSERT(!refcount_is_zero(&db->db_holds)); 460 /* We need the struct_rwlock to prevent db_blkptr from changing. */ |
469 ASSERT(RW_LOCK_HELD(&db->db_dnode->dn_struct_rwlock)); | 461 ASSERT(RW_LOCK_HELD(&dn->dn_struct_rwlock)); |
470 ASSERT(MUTEX_HELD(&db->db_mtx)); 471 ASSERT(db->db_state == DB_UNCACHED); 472 ASSERT(db->db_buf == NULL); 473 474 if (db->db_blkid == DB_BONUS_BLKID) { | 462 ASSERT(MUTEX_HELD(&db->db_mtx)); 463 ASSERT(db->db_state == DB_UNCACHED); 464 ASSERT(db->db_buf == NULL); 465 466 if (db->db_blkid == DB_BONUS_BLKID) { |
475 ASSERT3U(db->db_dnode->dn_bonuslen, ==, db->db.db_size); | 467 int bonuslen = dn->dn_bonuslen; 468 469 ASSERT3U(bonuslen, <=, db->db.db_size); |
476 db->db.db_data = zio_buf_alloc(DN_MAX_BONUSLEN); | 470 db->db.db_data = zio_buf_alloc(DN_MAX_BONUSLEN); |
477 if (db->db.db_size < DN_MAX_BONUSLEN) | 471 arc_space_consume(DN_MAX_BONUSLEN); 472 if (bonuslen < DN_MAX_BONUSLEN) |
478 bzero(db->db.db_data, DN_MAX_BONUSLEN); | 473 bzero(db->db.db_data, DN_MAX_BONUSLEN); |
479 bcopy(DN_BONUS(db->db_dnode->dn_phys), db->db.db_data, 480 db->db.db_size); | 474 bcopy(DN_BONUS(dn->dn_phys), db->db.db_data, 475 bonuslen); |
481 dbuf_update_data(db); 482 db->db_state = DB_CACHED; 483 mutex_exit(&db->db_mtx); 484 return; 485 } 486 | 476 dbuf_update_data(db); 477 db->db_state = DB_CACHED; 478 mutex_exit(&db->db_mtx); 479 return; 480 } 481 |
487 if (db->db_level == 0 && dnode_block_freed(db->db_dnode, db->db_blkid)) 488 bp = NULL; 489 else 490 bp = db->db_blkptr; 491 492 if (bp == NULL) 493 dprintf_dbuf(db, "blkptr: %s\n", "NULL"); 494 else 495 dprintf_dbuf_bp(db, bp, "%s", "blkptr:"); 496 497 if (bp == NULL || BP_IS_HOLE(bp)) { | 482 /* 483 * Recheck BP_IS_HOLE() after dnode_block_freed() in case dnode_sync() 484 * processes the delete record and clears the bp while we are waiting 485 * for the dn_mtx (resulting in a "no" from block_freed). 486 */ 487 if (db->db_blkptr == NULL || BP_IS_HOLE(db->db_blkptr) || 488 (db->db_level == 0 && (dnode_block_freed(dn, db->db_blkid) || 489 BP_IS_HOLE(db->db_blkptr)))) { |
498 arc_buf_contents_t type = DBUF_GET_BUFC_TYPE(db); 499 | 490 arc_buf_contents_t type = DBUF_GET_BUFC_TYPE(db); 491 |
500 ASSERT(bp == NULL || BP_IS_HOLE(bp)); 501 dbuf_set_data(db, arc_buf_alloc(db->db_dnode->dn_objset->os_spa, | 492 dbuf_set_data(db, arc_buf_alloc(dn->dn_objset->os_spa, |
502 db->db.db_size, db, type)); 503 bzero(db->db.db_data, db->db.db_size); 504 db->db_state = DB_CACHED; 505 *flags |= DB_RF_CACHED; 506 mutex_exit(&db->db_mtx); 507 return; 508 } 509 510 db->db_state = DB_READ; 511 mutex_exit(&db->db_mtx); 512 | 493 db->db.db_size, db, type)); 494 bzero(db->db.db_data, db->db.db_size); 495 db->db_state = DB_CACHED; 496 *flags |= DB_RF_CACHED; 497 mutex_exit(&db->db_mtx); 498 return; 499 } 500 501 db->db_state = DB_READ; 502 mutex_exit(&db->db_mtx); 503 |
504 if (DBUF_IS_L2CACHEABLE(db)) 505 aflags |= ARC_L2CACHE; 506 |
|
513 zb.zb_objset = db->db_objset->os_dsl_dataset ? 514 db->db_objset->os_dsl_dataset->ds_object : 0; 515 zb.zb_object = db->db.db_object; 516 zb.zb_level = db->db_level; 517 zb.zb_blkid = db->db_blkid; 518 519 dbuf_add_ref(db, NULL); 520 /* ZIO_FLAG_CANFAIL callers have to check the parent zio's error */ | 507 zb.zb_objset = db->db_objset->os_dsl_dataset ? 508 db->db_objset->os_dsl_dataset->ds_object : 0; 509 zb.zb_object = db->db.db_object; 510 zb.zb_level = db->db_level; 511 zb.zb_blkid = db->db_blkid; 512 513 dbuf_add_ref(db, NULL); 514 /* ZIO_FLAG_CANFAIL callers have to check the parent zio's error */ |
521 ASSERT3U(db->db_dnode->dn_type, <, DMU_OT_NUMTYPES); 522 (void) arc_read(zio, db->db_dnode->dn_objset->os_spa, bp, 523 db->db_level > 0 ? byteswap_uint64_array : 524 dmu_ot[db->db_dnode->dn_type].ot_byteswap, | 515 516 if (db->db_parent) 517 pbuf = db->db_parent->db_buf; 518 else 519 pbuf = db->db_objset->os_phys_buf; 520 521 (void) arc_read(zio, dn->dn_objset->os_spa, db->db_blkptr, pbuf, |
525 dbuf_read_done, db, ZIO_PRIORITY_SYNC_READ, 526 (*flags & DB_RF_CANFAIL) ? ZIO_FLAG_CANFAIL : ZIO_FLAG_MUSTSUCCEED, 527 &aflags, &zb); 528 if (aflags & ARC_CACHED) 529 *flags |= DB_RF_CACHED; 530} 531 532int --- 8 unchanged lines hidden (view full) --- 541 * can't be freed while we have a hold on the buffer. 542 */ 543 ASSERT(!refcount_is_zero(&db->db_holds)); 544 545 if ((flags & DB_RF_HAVESTRUCT) == 0) 546 rw_enter(&db->db_dnode->dn_struct_rwlock, RW_READER); 547 548 prefetch = db->db_level == 0 && db->db_blkid != DB_BONUS_BLKID && | 522 dbuf_read_done, db, ZIO_PRIORITY_SYNC_READ, 523 (*flags & DB_RF_CANFAIL) ? ZIO_FLAG_CANFAIL : ZIO_FLAG_MUSTSUCCEED, 524 &aflags, &zb); 525 if (aflags & ARC_CACHED) 526 *flags |= DB_RF_CACHED; 527} 528 529int --- 8 unchanged lines hidden (view full) --- 538 * can't be freed while we have a hold on the buffer. 539 */ 540 ASSERT(!refcount_is_zero(&db->db_holds)); 541 542 if ((flags & DB_RF_HAVESTRUCT) == 0) 543 rw_enter(&db->db_dnode->dn_struct_rwlock, RW_READER); 544 545 prefetch = db->db_level == 0 && db->db_blkid != DB_BONUS_BLKID && |
549 (flags & DB_RF_NOPREFETCH) == 0 && db->db_dnode != NULL; | 546 (flags & DB_RF_NOPREFETCH) == 0 && db->db_dnode != NULL && 547 DBUF_IS_CACHEABLE(db); |
550 551 mutex_enter(&db->db_mtx); 552 if (db->db_state == DB_CACHED) { 553 mutex_exit(&db->db_mtx); 554 if (prefetch) 555 dmu_zfetch(&db->db_dnode->dn_zfetch, db->db.db_offset, 556 db->db.db_size, TRUE); 557 if ((flags & DB_RF_HAVESTRUCT) == 0) --- 98 unchanged lines hidden (view full) --- 656 * reset the reference to point to a new copy, 657 * or (if there a no active holders) 658 * just null out the current db_data pointer. 659 */ 660 ASSERT(dr->dr_txg >= txg - 2); 661 if (db->db_blkid == DB_BONUS_BLKID) { 662 /* Note that the data bufs here are zio_bufs */ 663 dr->dt.dl.dr_data = zio_buf_alloc(DN_MAX_BONUSLEN); | 548 549 mutex_enter(&db->db_mtx); 550 if (db->db_state == DB_CACHED) { 551 mutex_exit(&db->db_mtx); 552 if (prefetch) 553 dmu_zfetch(&db->db_dnode->dn_zfetch, db->db.db_offset, 554 db->db.db_size, TRUE); 555 if ((flags & DB_RF_HAVESTRUCT) == 0) --- 98 unchanged lines hidden (view full) --- 654 * reset the reference to point to a new copy, 655 * or (if there a no active holders) 656 * just null out the current db_data pointer. 657 */ 658 ASSERT(dr->dr_txg >= txg - 2); 659 if (db->db_blkid == DB_BONUS_BLKID) { 660 /* Note that the data bufs here are zio_bufs */ 661 dr->dt.dl.dr_data = zio_buf_alloc(DN_MAX_BONUSLEN); |
662 arc_space_consume(DN_MAX_BONUSLEN); |
|
664 bcopy(db->db.db_data, dr->dt.dl.dr_data, DN_MAX_BONUSLEN); 665 } else if (refcount_count(&db->db_holds) > db->db_dirtycnt) { 666 int size = db->db.db_size; 667 arc_buf_contents_t type = DBUF_GET_BUFC_TYPE(db); 668 dr->dt.dl.dr_data = arc_buf_alloc( 669 db->db_dnode->dn_objset->os_spa, size, db, type); 670 bcopy(db->db.db_data, dr->dt.dl.dr_data->b_data, size); 671 } else { --- 13 unchanged lines hidden (view full) --- 685 686 if (db->db_blkid == DB_BONUS_BLKID || 687 dr->dt.dl.dr_override_state == DR_NOT_OVERRIDDEN) 688 return; 689 690 /* free this block */ 691 if (!BP_IS_HOLE(&dr->dt.dl.dr_overridden_by)) { 692 /* XXX can get silent EIO here */ | 663 bcopy(db->db.db_data, dr->dt.dl.dr_data, DN_MAX_BONUSLEN); 664 } else if (refcount_count(&db->db_holds) > db->db_dirtycnt) { 665 int size = db->db.db_size; 666 arc_buf_contents_t type = DBUF_GET_BUFC_TYPE(db); 667 dr->dt.dl.dr_data = arc_buf_alloc( 668 db->db_dnode->dn_objset->os_spa, size, db, type); 669 bcopy(db->db.db_data, dr->dt.dl.dr_data->b_data, size); 670 } else { --- 13 unchanged lines hidden (view full) --- 684 685 if (db->db_blkid == DB_BONUS_BLKID || 686 dr->dt.dl.dr_override_state == DR_NOT_OVERRIDDEN) 687 return; 688 689 /* free this block */ 690 if (!BP_IS_HOLE(&dr->dt.dl.dr_overridden_by)) { 691 /* XXX can get silent EIO here */ |
693 (void) arc_free(NULL, db->db_dnode->dn_objset->os_spa, | 692 (void) dsl_free(NULL, 693 spa_get_dsl(db->db_dnode->dn_objset->os_spa), |
694 txg, &dr->dt.dl.dr_overridden_by, NULL, NULL, ARC_WAIT); 695 } 696 dr->dt.dl.dr_override_state = DR_NOT_OVERRIDDEN; 697 /* 698 * Release the already-written buffer, so we leave it in 699 * a consistent dirty state. Note that all callers are 700 * modifying the buffer, so they will immediately do 701 * another (redundant) arc_release(). Therefore, leave 702 * the buf thawed to save the effort of freezing & 703 * immediately re-thawing it. 704 */ 705 arc_release(dr->dt.dl.dr_data, db); 706} 707 | 694 txg, &dr->dt.dl.dr_overridden_by, NULL, NULL, ARC_WAIT); 695 } 696 dr->dt.dl.dr_override_state = DR_NOT_OVERRIDDEN; 697 /* 698 * Release the already-written buffer, so we leave it in 699 * a consistent dirty state. Note that all callers are 700 * modifying the buffer, so they will immediately do 701 * another (redundant) arc_release(). Therefore, leave 702 * the buf thawed to save the effort of freezing & 703 * immediately re-thawing it. 704 */ 705 arc_release(dr->dt.dl.dr_data, db); 706} 707 |
708/* 709 * Evict (if its unreferenced) or clear (if its referenced) any level-0 710 * data blocks in the free range, so that any future readers will find 711 * empty blocks. Also, if we happen accross any level-1 dbufs in the 712 * range that have not already been marked dirty, mark them dirty so 713 * they stay in memory. 714 */ |
|
708void | 715void |
709dbuf_free_range(dnode_t *dn, uint64_t blkid, uint64_t nblks, dmu_tx_t *tx) | 716dbuf_free_range(dnode_t *dn, uint64_t start, uint64_t end, dmu_tx_t *tx) |
710{ 711 dmu_buf_impl_t *db, *db_next; 712 uint64_t txg = tx->tx_txg; | 717{ 718 dmu_buf_impl_t *db, *db_next; 719 uint64_t txg = tx->tx_txg; |
720 int epbs = dn->dn_indblkshift - SPA_BLKPTRSHIFT; 721 uint64_t first_l1 = start >> epbs; 722 uint64_t last_l1 = end >> epbs; |
|
713 | 723 |
714 dprintf_dnode(dn, "blkid=%llu nblks=%llu\n", blkid, nblks); | 724 if (end > dn->dn_maxblkid) { 725 end = dn->dn_maxblkid; 726 last_l1 = end >> epbs; 727 } 728 dprintf_dnode(dn, "start=%llu end=%llu\n", start, end); |
715 mutex_enter(&dn->dn_dbufs_mtx); 716 for (db = list_head(&dn->dn_dbufs); db; db = db_next) { 717 db_next = list_next(&dn->dn_dbufs, db); 718 ASSERT(db->db_blkid != DB_BONUS_BLKID); | 729 mutex_enter(&dn->dn_dbufs_mtx); 730 for (db = list_head(&dn->dn_dbufs); db; db = db_next) { 731 db_next = list_next(&dn->dn_dbufs, db); 732 ASSERT(db->db_blkid != DB_BONUS_BLKID); |
733 734 if (db->db_level == 1 && 735 db->db_blkid >= first_l1 && db->db_blkid <= last_l1) { 736 mutex_enter(&db->db_mtx); 737 if (db->db_last_dirty && 738 db->db_last_dirty->dr_txg < txg) { 739 dbuf_add_ref(db, FTAG); 740 mutex_exit(&db->db_mtx); 741 dbuf_will_dirty(db, tx); 742 dbuf_rele(db, FTAG); 743 } else { 744 mutex_exit(&db->db_mtx); 745 } 746 } 747 |
|
719 if (db->db_level != 0) 720 continue; 721 dprintf_dbuf(db, "found buf %s\n", ""); | 748 if (db->db_level != 0) 749 continue; 750 dprintf_dbuf(db, "found buf %s\n", ""); |
722 if (db->db_blkid < blkid || 723 db->db_blkid >= blkid+nblks) | 751 if (db->db_blkid < start || db->db_blkid > end) |
724 continue; 725 726 /* found a level 0 buffer in the range */ 727 if (dbuf_undirty(db, tx)) 728 continue; 729 730 mutex_enter(&db->db_mtx); 731 if (db->db_state == DB_UNCACHED || --- 46 unchanged lines hidden (view full) --- 778 } 779 780 mutex_exit(&db->db_mtx); 781 } 782 mutex_exit(&dn->dn_dbufs_mtx); 783} 784 785static int | 752 continue; 753 754 /* found a level 0 buffer in the range */ 755 if (dbuf_undirty(db, tx)) 756 continue; 757 758 mutex_enter(&db->db_mtx); 759 if (db->db_state == DB_UNCACHED || --- 46 unchanged lines hidden (view full) --- 806 } 807 808 mutex_exit(&db->db_mtx); 809 } 810 mutex_exit(&dn->dn_dbufs_mtx); 811} 812 813static int |
786dbuf_new_block(dmu_buf_impl_t *db) | 814dbuf_block_freeable(dmu_buf_impl_t *db) |
787{ 788 dsl_dataset_t *ds = db->db_objset->os_dsl_dataset; 789 uint64_t birth_txg = 0; 790 | 815{ 816 dsl_dataset_t *ds = db->db_objset->os_dsl_dataset; 817 uint64_t birth_txg = 0; 818 |
791 /* Don't count meta-objects */ 792 if (ds == NULL) 793 return (FALSE); 794 | |
795 /* 796 * We don't need any locking to protect db_blkptr: 797 * If it's syncing, then db_last_dirty will be set 798 * so we'll ignore db_blkptr. 799 */ 800 ASSERT(MUTEX_HELD(&db->db_mtx)); | 819 /* 820 * We don't need any locking to protect db_blkptr: 821 * If it's syncing, then db_last_dirty will be set 822 * so we'll ignore db_blkptr. 823 */ 824 ASSERT(MUTEX_HELD(&db->db_mtx)); |
801 /* If we have been dirtied since the last snapshot, its not new */ | |
802 if (db->db_last_dirty) 803 birth_txg = db->db_last_dirty->dr_txg; 804 else if (db->db_blkptr) 805 birth_txg = db->db_blkptr->blk_birth; 806 | 825 if (db->db_last_dirty) 826 birth_txg = db->db_last_dirty->dr_txg; 827 else if (db->db_blkptr) 828 birth_txg = db->db_blkptr->blk_birth; 829 |
830 /* If we don't exist or are in a snapshot, we can't be freed */ |
|
807 if (birth_txg) | 831 if (birth_txg) |
808 return (!dsl_dataset_block_freeable(ds, birth_txg)); | 832 return (ds == NULL || 833 dsl_dataset_block_freeable(ds, birth_txg)); |
809 else | 834 else |
810 return (TRUE); | 835 return (FALSE); |
811} 812 813void 814dbuf_new_size(dmu_buf_impl_t *db, int size, dmu_tx_t *tx) 815{ 816 arc_buf_t *buf, *obuf; 817 int osize = db->db.db_size; 818 arc_buf_contents_t type = DBUF_GET_BUFC_TYPE(db); --- 41 unchanged lines hidden (view full) --- 860 861dbuf_dirty_record_t * 862dbuf_dirty(dmu_buf_impl_t *db, dmu_tx_t *tx) 863{ 864 dnode_t *dn = db->db_dnode; 865 objset_impl_t *os = dn->dn_objset; 866 dbuf_dirty_record_t **drp, *dr; 867 int drop_struct_lock = FALSE; | 836} 837 838void 839dbuf_new_size(dmu_buf_impl_t *db, int size, dmu_tx_t *tx) 840{ 841 arc_buf_t *buf, *obuf; 842 int osize = db->db.db_size; 843 arc_buf_contents_t type = DBUF_GET_BUFC_TYPE(db); --- 41 unchanged lines hidden (view full) --- 885 886dbuf_dirty_record_t * 887dbuf_dirty(dmu_buf_impl_t *db, dmu_tx_t *tx) 888{ 889 dnode_t *dn = db->db_dnode; 890 objset_impl_t *os = dn->dn_objset; 891 dbuf_dirty_record_t **drp, *dr; 892 int drop_struct_lock = FALSE; |
893 boolean_t do_free_accounting = B_FALSE; |
|
868 int txgoff = tx->tx_txg & TXG_MASK; 869 870 ASSERT(tx->tx_txg != 0); 871 ASSERT(!refcount_is_zero(&db->db_holds)); 872 DMU_TX_DIRTY_BUF(tx, db); 873 874 /* 875 * Shouldn't dirty a regular buffer in syncing context. Private --- 41 unchanged lines hidden (view full) --- 917 mutex_exit(&dn->dn_mtx); 918 919 /* 920 * If this buffer is already dirty, we're done. 921 */ 922 drp = &db->db_last_dirty; 923 ASSERT(*drp == NULL || (*drp)->dr_txg <= tx->tx_txg || 924 db->db.db_object == DMU_META_DNODE_OBJECT); | 894 int txgoff = tx->tx_txg & TXG_MASK; 895 896 ASSERT(tx->tx_txg != 0); 897 ASSERT(!refcount_is_zero(&db->db_holds)); 898 DMU_TX_DIRTY_BUF(tx, db); 899 900 /* 901 * Shouldn't dirty a regular buffer in syncing context. Private --- 41 unchanged lines hidden (view full) --- 943 mutex_exit(&dn->dn_mtx); 944 945 /* 946 * If this buffer is already dirty, we're done. 947 */ 948 drp = &db->db_last_dirty; 949 ASSERT(*drp == NULL || (*drp)->dr_txg <= tx->tx_txg || 950 db->db.db_object == DMU_META_DNODE_OBJECT); |
925 while (*drp && (*drp)->dr_txg > tx->tx_txg) 926 drp = &(*drp)->dr_next; 927 if (*drp && (*drp)->dr_txg == tx->tx_txg) { | 951 while ((dr = *drp) != NULL && dr->dr_txg > tx->tx_txg) 952 drp = &dr->dr_next; 953 if (dr && dr->dr_txg == tx->tx_txg) { |
928 if (db->db_level == 0 && db->db_blkid != DB_BONUS_BLKID) { 929 /* 930 * If this buffer has already been written out, 931 * we now need to reset its state. 932 */ | 954 if (db->db_level == 0 && db->db_blkid != DB_BONUS_BLKID) { 955 /* 956 * If this buffer has already been written out, 957 * we now need to reset its state. 958 */ |
933 dbuf_unoverride(*drp); | 959 dbuf_unoverride(dr); |
934 if (db->db.db_object != DMU_META_DNODE_OBJECT) 935 arc_buf_thaw(db->db_buf); 936 } 937 mutex_exit(&db->db_mtx); | 960 if (db->db.db_object != DMU_META_DNODE_OBJECT) 961 arc_buf_thaw(db->db_buf); 962 } 963 mutex_exit(&db->db_mtx); |
938 return (*drp); | 964 return (dr); |
939 } 940 941 /* 942 * Only valid if not already dirty. 943 */ 944 ASSERT(dn->dn_dirtyctx == DN_UNDIRTIED || dn->dn_dirtyctx == 945 (dmu_tx_is_syncing(tx) ? DN_DIRTY_SYNC : DN_DIRTY_OPEN)); 946 --- 14 unchanged lines hidden (view full) --- 961 ASSERT(!dmu_tx_is_syncing(tx) || 962 os->os_dsl_dataset == NULL || 963 !dsl_dir_is_private(os->os_dsl_dataset->ds_dir) || 964 !BP_IS_HOLE(os->os_rootbp)); 965 ASSERT(db->db.db_size != 0); 966 967 dprintf_dbuf(db, "size=%llx\n", (u_longlong_t)db->db.db_size); 968 | 965 } 966 967 /* 968 * Only valid if not already dirty. 969 */ 970 ASSERT(dn->dn_dirtyctx == DN_UNDIRTIED || dn->dn_dirtyctx == 971 (dmu_tx_is_syncing(tx) ? DN_DIRTY_SYNC : DN_DIRTY_OPEN)); 972 --- 14 unchanged lines hidden (view full) --- 987 ASSERT(!dmu_tx_is_syncing(tx) || 988 os->os_dsl_dataset == NULL || 989 !dsl_dir_is_private(os->os_dsl_dataset->ds_dir) || 990 !BP_IS_HOLE(os->os_rootbp)); 991 ASSERT(db->db.db_size != 0); 992 993 dprintf_dbuf(db, "size=%llx\n", (u_longlong_t)db->db.db_size); 994 |
995 if (db->db_blkid != DB_BONUS_BLKID) { 996 /* 997 * Update the accounting. 998 * Note: we delay "free accounting" until after we drop 999 * the db_mtx. This keeps us from grabbing other locks 1000 * (and possibly deadlocking) in bp_get_dasize() while 1001 * also holding the db_mtx. 1002 */ 1003 dnode_willuse_space(dn, db->db.db_size, tx); 1004 do_free_accounting = dbuf_block_freeable(db); 1005 } 1006 |
|
969 /* 970 * If this buffer is dirty in an old transaction group we need 971 * to make a copy of it so that the changes we make in this 972 * transaction group won't leak out when we sync the older txg. 973 */ 974 dr = kmem_zalloc(sizeof (dbuf_dirty_record_t), KM_SLEEP); 975 if (db->db_level == 0) { 976 void *data_old = db->db_buf; --- 33 unchanged lines hidden (view full) --- 1010 */ 1011 if (db->db_level == 0 && db->db_blkid != DB_BONUS_BLKID) { 1012 mutex_enter(&dn->dn_mtx); 1013 dnode_clear_range(dn, db->db_blkid, 1, tx); 1014 mutex_exit(&dn->dn_mtx); 1015 db->db_freed_in_flight = FALSE; 1016 } 1017 | 1007 /* 1008 * If this buffer is dirty in an old transaction group we need 1009 * to make a copy of it so that the changes we make in this 1010 * transaction group won't leak out when we sync the older txg. 1011 */ 1012 dr = kmem_zalloc(sizeof (dbuf_dirty_record_t), KM_SLEEP); 1013 if (db->db_level == 0) { 1014 void *data_old = db->db_buf; --- 33 unchanged lines hidden (view full) --- 1048 */ 1049 if (db->db_level == 0 && db->db_blkid != DB_BONUS_BLKID) { 1050 mutex_enter(&dn->dn_mtx); 1051 dnode_clear_range(dn, db->db_blkid, 1, tx); 1052 mutex_exit(&dn->dn_mtx); 1053 db->db_freed_in_flight = FALSE; 1054 } 1055 |
1018 if (db->db_blkid != DB_BONUS_BLKID) { 1019 /* 1020 * Update the accounting. 1021 */ 1022 if (!dbuf_new_block(db) && db->db_blkptr) { 1023 /* 1024 * This is only a guess -- if the dbuf is dirty 1025 * in a previous txg, we don't know how much 1026 * space it will use on disk yet. We should 1027 * really have the struct_rwlock to access 1028 * db_blkptr, but since this is just a guess, 1029 * it's OK if we get an odd answer. 1030 */ 1031 dnode_willuse_space(dn, 1032 -bp_get_dasize(os->os_spa, db->db_blkptr), tx); 1033 } 1034 dnode_willuse_space(dn, db->db.db_size, tx); 1035 } 1036 | |
1037 /* 1038 * This buffer is now part of this txg 1039 */ 1040 dbuf_add_ref(db, (void *)(uintptr_t)tx->tx_txg); 1041 db->db_dirtycnt += 1; 1042 ASSERT3U(db->db_dirtycnt, <=, 3); 1043 1044 mutex_exit(&db->db_mtx); 1045 1046 if (db->db_blkid == DB_BONUS_BLKID) { 1047 mutex_enter(&dn->dn_mtx); 1048 ASSERT(!list_link_active(&dr->dr_dirty_node)); 1049 list_insert_tail(&dn->dn_dirty_records[txgoff], dr); 1050 mutex_exit(&dn->dn_mtx); 1051 dnode_setdirty(dn, tx); 1052 return (dr); | 1056 /* 1057 * This buffer is now part of this txg 1058 */ 1059 dbuf_add_ref(db, (void *)(uintptr_t)tx->tx_txg); 1060 db->db_dirtycnt += 1; 1061 ASSERT3U(db->db_dirtycnt, <=, 3); 1062 1063 mutex_exit(&db->db_mtx); 1064 1065 if (db->db_blkid == DB_BONUS_BLKID) { 1066 mutex_enter(&dn->dn_mtx); 1067 ASSERT(!list_link_active(&dr->dr_dirty_node)); 1068 list_insert_tail(&dn->dn_dirty_records[txgoff], dr); 1069 mutex_exit(&dn->dn_mtx); 1070 dnode_setdirty(dn, tx); 1071 return (dr); |
1072 } else if (do_free_accounting) { 1073 blkptr_t *bp = db->db_blkptr; 1074 int64_t willfree = (bp && !BP_IS_HOLE(bp)) ? 1075 bp_get_dasize(os->os_spa, bp) : db->db.db_size; 1076 /* 1077 * This is only a guess -- if the dbuf is dirty 1078 * in a previous txg, we don't know how much 1079 * space it will use on disk yet. We should 1080 * really have the struct_rwlock to access 1081 * db_blkptr, but since this is just a guess, 1082 * it's OK if we get an odd answer. 1083 */ 1084 dnode_willuse_space(dn, -willfree, tx); |
|
1053 } 1054 | 1085 } 1086 |
1055 if (db->db_level == 0) { 1056 dnode_new_blkid(dn, db->db_blkid, tx); 1057 ASSERT(dn->dn_maxblkid >= db->db_blkid); 1058 } 1059 | |
1060 if (!RW_WRITE_HELD(&dn->dn_struct_rwlock)) { 1061 rw_enter(&dn->dn_struct_rwlock, RW_READER); 1062 drop_struct_lock = TRUE; 1063 } 1064 | 1087 if (!RW_WRITE_HELD(&dn->dn_struct_rwlock)) { 1088 rw_enter(&dn->dn_struct_rwlock, RW_READER); 1089 drop_struct_lock = TRUE; 1090 } 1091 |
1092 if (db->db_level == 0) { 1093 dnode_new_blkid(dn, db->db_blkid, tx, drop_struct_lock); 1094 ASSERT(dn->dn_maxblkid >= db->db_blkid); 1095 } 1096 |
|
1065 if (db->db_level+1 < dn->dn_nlevels) { 1066 dmu_buf_impl_t *parent = db->db_parent; 1067 dbuf_dirty_record_t *di; 1068 int parent_held = FALSE; 1069 1070 if (db->db_parent == NULL || db->db_parent == dn->dn_dbuf) { 1071 int epbs = dn->dn_indblkshift - SPA_BLKPTRSHIFT; 1072 --- 37 unchanged lines hidden (view full) --- 1110 return (dr); 1111} 1112 1113static int 1114dbuf_undirty(dmu_buf_impl_t *db, dmu_tx_t *tx) 1115{ 1116 dnode_t *dn = db->db_dnode; 1117 uint64_t txg = tx->tx_txg; | 1097 if (db->db_level+1 < dn->dn_nlevels) { 1098 dmu_buf_impl_t *parent = db->db_parent; 1099 dbuf_dirty_record_t *di; 1100 int parent_held = FALSE; 1101 1102 if (db->db_parent == NULL || db->db_parent == dn->dn_dbuf) { 1103 int epbs = dn->dn_indblkshift - SPA_BLKPTRSHIFT; 1104 --- 37 unchanged lines hidden (view full) --- 1142 return (dr); 1143} 1144 1145static int 1146dbuf_undirty(dmu_buf_impl_t *db, dmu_tx_t *tx) 1147{ 1148 dnode_t *dn = db->db_dnode; 1149 uint64_t txg = tx->tx_txg; |
1118 dbuf_dirty_record_t *dr; | 1150 dbuf_dirty_record_t *dr, **drp; |
1119 1120 ASSERT(txg != 0); 1121 ASSERT(db->db_blkid != DB_BONUS_BLKID); 1122 1123 mutex_enter(&db->db_mtx); 1124 1125 /* 1126 * If this buffer is not dirty, we're done. 1127 */ | 1151 1152 ASSERT(txg != 0); 1153 ASSERT(db->db_blkid != DB_BONUS_BLKID); 1154 1155 mutex_enter(&db->db_mtx); 1156 1157 /* 1158 * If this buffer is not dirty, we're done. 1159 */ |
1128 for (dr = db->db_last_dirty; dr; dr = dr->dr_next) | 1160 for (drp = &db->db_last_dirty; (dr = *drp) != NULL; drp = &dr->dr_next) |
1129 if (dr->dr_txg <= txg) 1130 break; 1131 if (dr == NULL || dr->dr_txg < txg) { 1132 mutex_exit(&db->db_mtx); 1133 return (0); 1134 } 1135 ASSERT(dr->dr_txg == txg); 1136 --- 13 unchanged lines hidden (view full) --- 1150 } 1151 1152 dprintf_dbuf(db, "size=%llx\n", (u_longlong_t)db->db.db_size); 1153 1154 ASSERT(db->db.db_size != 0); 1155 1156 /* XXX would be nice to fix up dn_towrite_space[] */ 1157 | 1161 if (dr->dr_txg <= txg) 1162 break; 1163 if (dr == NULL || dr->dr_txg < txg) { 1164 mutex_exit(&db->db_mtx); 1165 return (0); 1166 } 1167 ASSERT(dr->dr_txg == txg); 1168 --- 13 unchanged lines hidden (view full) --- 1182 } 1183 1184 dprintf_dbuf(db, "size=%llx\n", (u_longlong_t)db->db.db_size); 1185 1186 ASSERT(db->db.db_size != 0); 1187 1188 /* XXX would be nice to fix up dn_towrite_space[] */ 1189 |
1158 db->db_last_dirty = dr->dr_next; | 1190 *drp = dr->dr_next; |
1159 1160 if (dr->dr_parent) { 1161 mutex_enter(&dr->dr_parent->dt.di.dr_mtx); 1162 list_remove(&dr->dr_parent->dt.di.dr_children, dr); 1163 mutex_exit(&dr->dr_parent->dt.di.dr_mtx); 1164 } else if (db->db_level+1 == dn->dn_nlevels) { | 1191 1192 if (dr->dr_parent) { 1193 mutex_enter(&dr->dr_parent->dt.di.dr_mtx); 1194 list_remove(&dr->dr_parent->dt.di.dr_children, dr); 1195 mutex_exit(&dr->dr_parent->dt.di.dr_mtx); 1196 } else if (db->db_level+1 == dn->dn_nlevels) { |
1165 ASSERT3P(db->db_parent, ==, dn->dn_dbuf); | 1197 ASSERT(db->db_blkptr == NULL || db->db_parent == dn->dn_dbuf); |
1166 mutex_enter(&dn->dn_mtx); 1167 list_remove(&dn->dn_dirty_records[txg & TXG_MASK], dr); 1168 mutex_exit(&dn->dn_mtx); 1169 } 1170 1171 if (db->db_level == 0) { 1172 dbuf_unoverride(dr); 1173 1174 ASSERT(db->db_buf != NULL); 1175 ASSERT(dr->dt.dl.dr_data != NULL); 1176 if (dr->dt.dl.dr_data != db->db_buf) 1177 VERIFY(arc_buf_remove_ref(dr->dt.dl.dr_data, db) == 1); 1178 } else { 1179 ASSERT(db->db_buf != NULL); 1180 ASSERT(list_head(&dr->dt.di.dr_children) == NULL); | 1198 mutex_enter(&dn->dn_mtx); 1199 list_remove(&dn->dn_dirty_records[txg & TXG_MASK], dr); 1200 mutex_exit(&dn->dn_mtx); 1201 } 1202 1203 if (db->db_level == 0) { 1204 dbuf_unoverride(dr); 1205 1206 ASSERT(db->db_buf != NULL); 1207 ASSERT(dr->dt.dl.dr_data != NULL); 1208 if (dr->dt.dl.dr_data != db->db_buf) 1209 VERIFY(arc_buf_remove_ref(dr->dt.dl.dr_data, db) == 1); 1210 } else { 1211 ASSERT(db->db_buf != NULL); 1212 ASSERT(list_head(&dr->dt.di.dr_children) == NULL); |
1181 list_destroy(&dr->dt.di.dr_children); | |
1182 mutex_destroy(&dr->dt.di.dr_mtx); | 1213 mutex_destroy(&dr->dt.di.dr_mtx); |
1214 list_destroy(&dr->dt.di.dr_children); |
|
1183 } 1184 kmem_free(dr, sizeof (dbuf_dirty_record_t)); 1185 1186 ASSERT(db->db_dirtycnt > 0); 1187 db->db_dirtycnt -= 1; 1188 1189 if (refcount_remove(&db->db_holds, (void *)(uintptr_t)txg) == 0) { 1190 arc_buf_t *buf = db->db_buf; --- 8 unchanged lines hidden (view full) --- 1199 mutex_exit(&db->db_mtx); 1200 return (0); 1201} 1202 1203#pragma weak dmu_buf_will_dirty = dbuf_will_dirty 1204void 1205dbuf_will_dirty(dmu_buf_impl_t *db, dmu_tx_t *tx) 1206{ | 1215 } 1216 kmem_free(dr, sizeof (dbuf_dirty_record_t)); 1217 1218 ASSERT(db->db_dirtycnt > 0); 1219 db->db_dirtycnt -= 1; 1220 1221 if (refcount_remove(&db->db_holds, (void *)(uintptr_t)txg) == 0) { 1222 arc_buf_t *buf = db->db_buf; --- 8 unchanged lines hidden (view full) --- 1231 mutex_exit(&db->db_mtx); 1232 return (0); 1233} 1234 1235#pragma weak dmu_buf_will_dirty = dbuf_will_dirty 1236void 1237dbuf_will_dirty(dmu_buf_impl_t *db, dmu_tx_t *tx) 1238{ |
1207 int rf = DB_RF_MUST_SUCCEED; | 1239 int rf = DB_RF_MUST_SUCCEED | DB_RF_NOPREFETCH; |
1208 1209 ASSERT(tx->tx_txg != 0); 1210 ASSERT(!refcount_is_zero(&db->db_holds)); 1211 1212 if (RW_WRITE_HELD(&db->db_dnode->dn_struct_rwlock)) 1213 rf |= DB_RF_HAVESTRUCT; 1214 (void) dbuf_read(db, NULL, rf); 1215 (void) dbuf_dirty(db, tx); --- 61 unchanged lines hidden (view full) --- 1277 1278 ASSERT(MUTEX_HELD(&db->db_mtx)); 1279 ASSERT(refcount_is_zero(&db->db_holds)); 1280 1281 dbuf_evict_user(db); 1282 1283 if (db->db_state == DB_CACHED) { 1284 ASSERT(db->db.db_data != NULL); | 1240 1241 ASSERT(tx->tx_txg != 0); 1242 ASSERT(!refcount_is_zero(&db->db_holds)); 1243 1244 if (RW_WRITE_HELD(&db->db_dnode->dn_struct_rwlock)) 1245 rf |= DB_RF_HAVESTRUCT; 1246 (void) dbuf_read(db, NULL, rf); 1247 (void) dbuf_dirty(db, tx); --- 61 unchanged lines hidden (view full) --- 1309 1310 ASSERT(MUTEX_HELD(&db->db_mtx)); 1311 ASSERT(refcount_is_zero(&db->db_holds)); 1312 1313 dbuf_evict_user(db); 1314 1315 if (db->db_state == DB_CACHED) { 1316 ASSERT(db->db.db_data != NULL); |
1285 if (db->db_blkid == DB_BONUS_BLKID) | 1317 if (db->db_blkid == DB_BONUS_BLKID) { |
1286 zio_buf_free(db->db.db_data, DN_MAX_BONUSLEN); | 1318 zio_buf_free(db->db.db_data, DN_MAX_BONUSLEN); |
1319 arc_space_return(DN_MAX_BONUSLEN); 1320 } |
|
1287 db->db.db_data = NULL; 1288 db->db_state = DB_UNCACHED; 1289 } 1290 1291 ASSERT3U(db->db_state, ==, DB_UNCACHED); 1292 ASSERT(db->db_data_pending == NULL); 1293 1294 db->db_state = DB_EVICTING; 1295 db->db_blkptr = NULL; 1296 1297 if (db->db_blkid != DB_BONUS_BLKID && MUTEX_HELD(&dn->dn_dbufs_mtx)) { 1298 list_remove(&dn->dn_dbufs, db); 1299 dnode_rele(dn, db); | 1321 db->db.db_data = NULL; 1322 db->db_state = DB_UNCACHED; 1323 } 1324 1325 ASSERT3U(db->db_state, ==, DB_UNCACHED); 1326 ASSERT(db->db_data_pending == NULL); 1327 1328 db->db_state = DB_EVICTING; 1329 db->db_blkptr = NULL; 1330 1331 if (db->db_blkid != DB_BONUS_BLKID && MUTEX_HELD(&dn->dn_dbufs_mtx)) { 1332 list_remove(&dn->dn_dbufs, db); 1333 dnode_rele(dn, db); |
1334 db->db_dnode = NULL; |
|
1300 } 1301 1302 if (db->db_buf) 1303 dbuf_gone = arc_buf_evict(db->db_buf); 1304 1305 if (!dbuf_gone) 1306 mutex_exit(&db->db_mtx); 1307 --- 84 unchanged lines hidden (view full) --- 1392 db->db_user_ptr = NULL; 1393 db->db_user_data_ptr_ptr = NULL; 1394 db->db_evict_func = NULL; 1395 db->db_immediate_evict = 0; 1396 db->db_freed_in_flight = 0; 1397 1398 if (blkid == DB_BONUS_BLKID) { 1399 ASSERT3P(parent, ==, dn->dn_dbuf); | 1335 } 1336 1337 if (db->db_buf) 1338 dbuf_gone = arc_buf_evict(db->db_buf); 1339 1340 if (!dbuf_gone) 1341 mutex_exit(&db->db_mtx); 1342 --- 84 unchanged lines hidden (view full) --- 1427 db->db_user_ptr = NULL; 1428 db->db_user_data_ptr_ptr = NULL; 1429 db->db_evict_func = NULL; 1430 db->db_immediate_evict = 0; 1431 db->db_freed_in_flight = 0; 1432 1433 if (blkid == DB_BONUS_BLKID) { 1434 ASSERT3P(parent, ==, dn->dn_dbuf); |
1400 db->db.db_size = dn->dn_bonuslen; | 1435 db->db.db_size = DN_MAX_BONUSLEN - 1436 (dn->dn_nblkptr-1) * sizeof (blkptr_t); 1437 ASSERT3U(db->db.db_size, >=, dn->dn_bonuslen); |
1401 db->db.db_offset = DB_BONUS_BLKID; 1402 db->db_state = DB_UNCACHED; 1403 /* the bonus dbuf is not placed in the hash table */ | 1438 db->db.db_offset = DB_BONUS_BLKID; 1439 db->db_state = DB_UNCACHED; 1440 /* the bonus dbuf is not placed in the hash table */ |
1441 arc_space_consume(sizeof (dmu_buf_impl_t)); |
|
1404 return (db); 1405 } else { 1406 int blocksize = 1407 db->db_level ? 1<<dn->dn_indblkshift : dn->dn_datablksz; 1408 db->db.db_size = blocksize; 1409 db->db.db_offset = db->db_blkid * blocksize; 1410 } 1411 --- 10 unchanged lines hidden (view full) --- 1422 /* someone else inserted it first */ 1423 kmem_cache_free(dbuf_cache, db); 1424 mutex_exit(&dn->dn_dbufs_mtx); 1425 return (odb); 1426 } 1427 list_insert_head(&dn->dn_dbufs, db); 1428 db->db_state = DB_UNCACHED; 1429 mutex_exit(&dn->dn_dbufs_mtx); | 1442 return (db); 1443 } else { 1444 int blocksize = 1445 db->db_level ? 1<<dn->dn_indblkshift : dn->dn_datablksz; 1446 db->db.db_size = blocksize; 1447 db->db.db_offset = db->db_blkid * blocksize; 1448 } 1449 --- 10 unchanged lines hidden (view full) --- 1460 /* someone else inserted it first */ 1461 kmem_cache_free(dbuf_cache, db); 1462 mutex_exit(&dn->dn_dbufs_mtx); 1463 return (odb); 1464 } 1465 list_insert_head(&dn->dn_dbufs, db); 1466 db->db_state = DB_UNCACHED; 1467 mutex_exit(&dn->dn_dbufs_mtx); |
1468 arc_space_consume(sizeof (dmu_buf_impl_t)); |
|
1430 1431 if (parent && parent != dn->dn_dbuf) 1432 dbuf_add_ref(parent, db); 1433 1434 ASSERT(dn->dn_object == DMU_META_DNODE_OBJECT || 1435 refcount_count(&dn->dn_holds) > 0); 1436 (void) refcount_add(&dn->dn_holds, db); 1437 --- 26 unchanged lines hidden (view full) --- 1464} 1465 1466static void 1467dbuf_destroy(dmu_buf_impl_t *db) 1468{ 1469 ASSERT(refcount_is_zero(&db->db_holds)); 1470 1471 if (db->db_blkid != DB_BONUS_BLKID) { | 1469 1470 if (parent && parent != dn->dn_dbuf) 1471 dbuf_add_ref(parent, db); 1472 1473 ASSERT(dn->dn_object == DMU_META_DNODE_OBJECT || 1474 refcount_count(&dn->dn_holds) > 0); 1475 (void) refcount_add(&dn->dn_holds, db); 1476 --- 26 unchanged lines hidden (view full) --- 1503} 1504 1505static void 1506dbuf_destroy(dmu_buf_impl_t *db) 1507{ 1508 ASSERT(refcount_is_zero(&db->db_holds)); 1509 1510 if (db->db_blkid != DB_BONUS_BLKID) { |
1472 dnode_t *dn = db->db_dnode; 1473 | |
1474 /* 1475 * If this dbuf is still on the dn_dbufs list, 1476 * remove it from that list. 1477 */ | 1511 /* 1512 * If this dbuf is still on the dn_dbufs list, 1513 * remove it from that list. 1514 */ |
1478 if (list_link_active(&db->db_link)) { | 1515 if (db->db_dnode) { 1516 dnode_t *dn = db->db_dnode; 1517 |
1479 mutex_enter(&dn->dn_dbufs_mtx); 1480 list_remove(&dn->dn_dbufs, db); 1481 mutex_exit(&dn->dn_dbufs_mtx); 1482 1483 dnode_rele(dn, db); | 1518 mutex_enter(&dn->dn_dbufs_mtx); 1519 list_remove(&dn->dn_dbufs, db); 1520 mutex_exit(&dn->dn_dbufs_mtx); 1521 1522 dnode_rele(dn, db); |
1523 db->db_dnode = NULL; |
|
1484 } 1485 dbuf_hash_remove(db); 1486 } 1487 db->db_parent = NULL; | 1524 } 1525 dbuf_hash_remove(db); 1526 } 1527 db->db_parent = NULL; |
1488 db->db_dnode = NULL; | |
1489 db->db_buf = NULL; 1490 | 1528 db->db_buf = NULL; 1529 |
1530 ASSERT(!list_link_active(&db->db_link)); |
|
1491 ASSERT(db->db.db_data == NULL); 1492 ASSERT(db->db_hash_next == NULL); 1493 ASSERT(db->db_blkptr == NULL); 1494 ASSERT(db->db_data_pending == NULL); 1495 1496 kmem_cache_free(dbuf_cache, db); | 1531 ASSERT(db->db.db_data == NULL); 1532 ASSERT(db->db_hash_next == NULL); 1533 ASSERT(db->db_blkptr == NULL); 1534 ASSERT(db->db_data_pending == NULL); 1535 1536 kmem_cache_free(dbuf_cache, db); |
1537 arc_space_return(sizeof (dmu_buf_impl_t)); |
|
1497} 1498 1499void 1500dbuf_prefetch(dnode_t *dn, uint64_t blkid) 1501{ 1502 dmu_buf_impl_t *db = NULL; 1503 blkptr_t *bp = NULL; 1504 --- 15 unchanged lines hidden (view full) --- 1520 return; 1521 } 1522 mutex_exit(&db->db_mtx); 1523 db = NULL; 1524 } 1525 1526 if (dbuf_findbp(dn, 0, blkid, TRUE, &db, &bp) == 0) { 1527 if (bp && !BP_IS_HOLE(bp)) { | 1538} 1539 1540void 1541dbuf_prefetch(dnode_t *dn, uint64_t blkid) 1542{ 1543 dmu_buf_impl_t *db = NULL; 1544 blkptr_t *bp = NULL; 1545 --- 15 unchanged lines hidden (view full) --- 1561 return; 1562 } 1563 mutex_exit(&db->db_mtx); 1564 db = NULL; 1565 } 1566 1567 if (dbuf_findbp(dn, 0, blkid, TRUE, &db, &bp) == 0) { 1568 if (bp && !BP_IS_HOLE(bp)) { |
1569 arc_buf_t *pbuf; |
|
1528 uint32_t aflags = ARC_NOWAIT | ARC_PREFETCH; 1529 zbookmark_t zb; 1530 zb.zb_objset = dn->dn_objset->os_dsl_dataset ? 1531 dn->dn_objset->os_dsl_dataset->ds_object : 0; 1532 zb.zb_object = dn->dn_object; 1533 zb.zb_level = 0; 1534 zb.zb_blkid = blkid; 1535 | 1570 uint32_t aflags = ARC_NOWAIT | ARC_PREFETCH; 1571 zbookmark_t zb; 1572 zb.zb_objset = dn->dn_objset->os_dsl_dataset ? 1573 dn->dn_objset->os_dsl_dataset->ds_object : 0; 1574 zb.zb_object = dn->dn_object; 1575 zb.zb_level = 0; 1576 zb.zb_blkid = blkid; 1577 |
1536 (void) arc_read(NULL, dn->dn_objset->os_spa, bp, 1537 dmu_ot[dn->dn_type].ot_byteswap, 1538 NULL, NULL, ZIO_PRIORITY_ASYNC_READ, | 1578 if (db) 1579 pbuf = db->db_buf; 1580 else 1581 pbuf = dn->dn_objset->os_phys_buf; 1582 1583 (void) arc_read(NULL, dn->dn_objset->os_spa, 1584 bp, pbuf, NULL, NULL, ZIO_PRIORITY_ASYNC_READ, |
1539 ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE, 1540 &aflags, &zb); 1541 } 1542 if (db) 1543 dbuf_rele(db, NULL); 1544 } 1545} 1546 --- 100 unchanged lines hidden (view full) --- 1647dmu_buf_impl_t * 1648dbuf_hold_level(dnode_t *dn, int level, uint64_t blkid, void *tag) 1649{ 1650 dmu_buf_impl_t *db; 1651 int err = dbuf_hold_impl(dn, level, blkid, FALSE, tag, &db); 1652 return (err ? NULL : db); 1653} 1654 | 1585 ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE, 1586 &aflags, &zb); 1587 } 1588 if (db) 1589 dbuf_rele(db, NULL); 1590 } 1591} 1592 --- 100 unchanged lines hidden (view full) --- 1693dmu_buf_impl_t * 1694dbuf_hold_level(dnode_t *dn, int level, uint64_t blkid, void *tag) 1695{ 1696 dmu_buf_impl_t *db; 1697 int err = dbuf_hold_impl(dn, level, blkid, FALSE, tag, &db); 1698 return (err ? NULL : db); 1699} 1700 |
1655dmu_buf_impl_t * | 1701void |
1656dbuf_create_bonus(dnode_t *dn) 1657{ | 1702dbuf_create_bonus(dnode_t *dn) 1703{ |
1658 dmu_buf_impl_t *db = dn->dn_bonus; 1659 | |
1660 ASSERT(RW_WRITE_HELD(&dn->dn_struct_rwlock)); 1661 1662 ASSERT(dn->dn_bonus == NULL); | 1704 ASSERT(RW_WRITE_HELD(&dn->dn_struct_rwlock)); 1705 1706 ASSERT(dn->dn_bonus == NULL); |
1663 db = dbuf_create(dn, 0, DB_BONUS_BLKID, dn->dn_dbuf, NULL); 1664 return (db); | 1707 dn->dn_bonus = dbuf_create(dn, 0, DB_BONUS_BLKID, dn->dn_dbuf, NULL); |
1665} 1666 1667#pragma weak dmu_buf_add_ref = dbuf_add_ref 1668void 1669dbuf_add_ref(dmu_buf_impl_t *db, void *tag) 1670{ 1671 int64_t holds = refcount_add(&db->db_holds, tag); 1672 ASSERT(holds > 1); --- 38 unchanged lines hidden (view full) --- 1711 /* 1712 * This dbuf has anonymous data associated with it. 1713 */ 1714 dbuf_set_data(db, NULL); 1715 VERIFY(arc_buf_remove_ref(buf, db) == 1); 1716 dbuf_evict(db); 1717 } else { 1718 VERIFY(arc_buf_remove_ref(db->db_buf, db) == 0); | 1708} 1709 1710#pragma weak dmu_buf_add_ref = dbuf_add_ref 1711void 1712dbuf_add_ref(dmu_buf_impl_t *db, void *tag) 1713{ 1714 int64_t holds = refcount_add(&db->db_holds, tag); 1715 ASSERT(holds > 1); --- 38 unchanged lines hidden (view full) --- 1754 /* 1755 * This dbuf has anonymous data associated with it. 1756 */ 1757 dbuf_set_data(db, NULL); 1758 VERIFY(arc_buf_remove_ref(buf, db) == 1); 1759 dbuf_evict(db); 1760 } else { 1761 VERIFY(arc_buf_remove_ref(db->db_buf, db) == 0); |
1719 mutex_exit(&db->db_mtx); | 1762 if (!DBUF_IS_CACHEABLE(db)) 1763 dbuf_clear(db); 1764 else 1765 mutex_exit(&db->db_mtx); |
1720 } 1721 } else { 1722 mutex_exit(&db->db_mtx); 1723 } 1724} 1725 1726#pragma weak dmu_buf_refcount = dbuf_refcount 1727uint64_t --- 119 unchanged lines hidden (view full) --- 1847 ASSERT3U(db->db_state, ==, DB_CACHED); 1848 ASSERT3U(db->db.db_size, ==, 1<<dn->dn_phys->dn_indblkshift); 1849 ASSERT(db->db_buf != NULL); 1850 1851 dbuf_check_blkptr(dn, db); 1852 1853 db->db_data_pending = dr; 1854 | 1766 } 1767 } else { 1768 mutex_exit(&db->db_mtx); 1769 } 1770} 1771 1772#pragma weak dmu_buf_refcount = dbuf_refcount 1773uint64_t --- 119 unchanged lines hidden (view full) --- 1893 ASSERT3U(db->db_state, ==, DB_CACHED); 1894 ASSERT3U(db->db.db_size, ==, 1<<dn->dn_phys->dn_indblkshift); 1895 ASSERT(db->db_buf != NULL); 1896 1897 dbuf_check_blkptr(dn, db); 1898 1899 db->db_data_pending = dr; 1900 |
1855 arc_release(db->db_buf, db); | |
1856 mutex_exit(&db->db_mtx); | 1901 mutex_exit(&db->db_mtx); |
1902 dbuf_write(dr, db->db_buf, tx); |
|
1857 | 1903 |
1858 /* 1859 * XXX -- we should design a compression algorithm 1860 * that specializes in arrays of bps. 1861 */ 1862 dbuf_write(dr, db->db_buf, ZIO_CHECKSUM_FLETCHER_4, 1863 zfs_mdcomp_disable ? ZIO_COMPRESS_EMPTY : ZIO_COMPRESS_LZJB, tx); 1864 | |
1865 zio = dr->dr_zio; 1866 mutex_enter(&dr->dt.di.dr_mtx); 1867 dbuf_sync_list(&dr->dt.di.dr_children, tx); 1868 ASSERT(list_head(&dr->dt.di.dr_children) == NULL); 1869 mutex_exit(&dr->dt.di.dr_mtx); 1870 zio_nowait(zio); 1871} 1872 1873static void 1874dbuf_sync_leaf(dbuf_dirty_record_t *dr, dmu_tx_t *tx) 1875{ 1876 arc_buf_t **datap = &dr->dt.dl.dr_data; 1877 dmu_buf_impl_t *db = dr->dr_dbuf; 1878 dnode_t *dn = db->db_dnode; 1879 objset_impl_t *os = dn->dn_objset; 1880 uint64_t txg = tx->tx_txg; | 1904 zio = dr->dr_zio; 1905 mutex_enter(&dr->dt.di.dr_mtx); 1906 dbuf_sync_list(&dr->dt.di.dr_children, tx); 1907 ASSERT(list_head(&dr->dt.di.dr_children) == NULL); 1908 mutex_exit(&dr->dt.di.dr_mtx); 1909 zio_nowait(zio); 1910} 1911 1912static void 1913dbuf_sync_leaf(dbuf_dirty_record_t *dr, dmu_tx_t *tx) 1914{ 1915 arc_buf_t **datap = &dr->dt.dl.dr_data; 1916 dmu_buf_impl_t *db = dr->dr_dbuf; 1917 dnode_t *dn = db->db_dnode; 1918 objset_impl_t *os = dn->dn_objset; 1919 uint64_t txg = tx->tx_txg; |
1881 int checksum, compress; | |
1882 int blksz; 1883 1884 ASSERT(dmu_tx_is_syncing(tx)); 1885 1886 dprintf_dbuf_bp(db, db->db_blkptr, "blkptr=%p", db->db_blkptr); 1887 1888 mutex_enter(&db->db_mtx); 1889 /* --- 14 unchanged lines hidden (view full) --- 1904 /* 1905 * If this is a bonus buffer, simply copy the bonus data into the 1906 * dnode. It will be written out when the dnode is synced (and it 1907 * will be synced, since it must have been dirty for dbuf_sync to 1908 * be called). 1909 */ 1910 if (db->db_blkid == DB_BONUS_BLKID) { 1911 dbuf_dirty_record_t **drp; | 1920 int blksz; 1921 1922 ASSERT(dmu_tx_is_syncing(tx)); 1923 1924 dprintf_dbuf_bp(db, db->db_blkptr, "blkptr=%p", db->db_blkptr); 1925 1926 mutex_enter(&db->db_mtx); 1927 /* --- 14 unchanged lines hidden (view full) --- 1942 /* 1943 * If this is a bonus buffer, simply copy the bonus data into the 1944 * dnode. It will be written out when the dnode is synced (and it 1945 * will be synced, since it must have been dirty for dbuf_sync to 1946 * be called). 1947 */ 1948 if (db->db_blkid == DB_BONUS_BLKID) { 1949 dbuf_dirty_record_t **drp; |
1912 /* 1913 * Use dn_phys->dn_bonuslen since db.db_size is the length 1914 * of the bonus buffer in the open transaction rather than 1915 * the syncing transaction. 1916 */ | 1950 |
1917 ASSERT(*datap != NULL); 1918 ASSERT3U(db->db_level, ==, 0); 1919 ASSERT3U(dn->dn_phys->dn_bonuslen, <=, DN_MAX_BONUSLEN); 1920 bcopy(*datap, DN_BONUS(dn->dn_phys), dn->dn_phys->dn_bonuslen); | 1951 ASSERT(*datap != NULL); 1952 ASSERT3U(db->db_level, ==, 0); 1953 ASSERT3U(dn->dn_phys->dn_bonuslen, <=, DN_MAX_BONUSLEN); 1954 bcopy(*datap, DN_BONUS(dn->dn_phys), dn->dn_phys->dn_bonuslen); |
1921 if (*datap != db->db.db_data) | 1955 if (*datap != db->db.db_data) { |
1922 zio_buf_free(*datap, DN_MAX_BONUSLEN); | 1956 zio_buf_free(*datap, DN_MAX_BONUSLEN); |
1957 arc_space_return(DN_MAX_BONUSLEN); 1958 } |
|
1923 db->db_data_pending = NULL; 1924 drp = &db->db_last_dirty; 1925 while (*drp != dr) 1926 drp = &(*drp)->dr_next; | 1959 db->db_data_pending = NULL; 1960 drp = &db->db_last_dirty; 1961 while (*drp != dr) 1962 drp = &(*drp)->dr_next; |
1927 ASSERT((*drp)->dr_next == NULL); 1928 *drp = NULL; | 1963 ASSERT(dr->dr_next == NULL); 1964 *drp = dr->dr_next; |
1929 if (dr->dr_dbuf->db_level != 0) { 1930 list_destroy(&dr->dt.di.dr_children); 1931 mutex_destroy(&dr->dt.di.dr_mtx); 1932 } 1933 kmem_free(dr, sizeof (dbuf_dirty_record_t)); 1934 ASSERT(db->db_dirtycnt > 0); 1935 db->db_dirtycnt -= 1; 1936 mutex_exit(&db->db_mtx); 1937 dbuf_rele(db, (void *)(uintptr_t)txg); 1938 return; 1939 } 1940 1941 /* | 1965 if (dr->dr_dbuf->db_level != 0) { 1966 list_destroy(&dr->dt.di.dr_children); 1967 mutex_destroy(&dr->dt.di.dr_mtx); 1968 } 1969 kmem_free(dr, sizeof (dbuf_dirty_record_t)); 1970 ASSERT(db->db_dirtycnt > 0); 1971 db->db_dirtycnt -= 1; 1972 mutex_exit(&db->db_mtx); 1973 dbuf_rele(db, (void *)(uintptr_t)txg); 1974 return; 1975 } 1976 1977 /* |
1978 * This function may have dropped the db_mtx lock allowing a dmu_sync 1979 * operation to sneak in. As a result, we need to ensure that we 1980 * don't check the dr_override_state until we have returned from 1981 * dbuf_check_blkptr. 1982 */ 1983 dbuf_check_blkptr(dn, db); 1984 1985 /* |
|
1942 * If this buffer is in the middle of an immdiate write, 1943 * wait for the synchronous IO to complete. 1944 */ 1945 while (dr->dt.dl.dr_override_state == DR_IN_DMU_SYNC) { 1946 ASSERT(dn->dn_object != DMU_META_DNODE_OBJECT); 1947 cv_wait(&db->db_changed, &db->db_mtx); 1948 ASSERT(dr->dt.dl.dr_override_state != DR_NOT_OVERRIDDEN); 1949 } 1950 | 1986 * If this buffer is in the middle of an immdiate write, 1987 * wait for the synchronous IO to complete. 1988 */ 1989 while (dr->dt.dl.dr_override_state == DR_IN_DMU_SYNC) { 1990 ASSERT(dn->dn_object != DMU_META_DNODE_OBJECT); 1991 cv_wait(&db->db_changed, &db->db_mtx); 1992 ASSERT(dr->dt.dl.dr_override_state != DR_NOT_OVERRIDDEN); 1993 } 1994 |
1951 dbuf_check_blkptr(dn, db); 1952 | |
1953 /* 1954 * If this dbuf has already been written out via an immediate write, 1955 * just complete the write by copying over the new block pointer and 1956 * updating the accounting via the write-completion functions. 1957 */ 1958 if (dr->dt.dl.dr_override_state == DR_OVERRIDDEN) { 1959 zio_t zio_fake; 1960 1961 zio_fake.io_private = &db; 1962 zio_fake.io_error = 0; 1963 zio_fake.io_bp = db->db_blkptr; 1964 zio_fake.io_bp_orig = *db->db_blkptr; 1965 zio_fake.io_txg = txg; | 1995 /* 1996 * If this dbuf has already been written out via an immediate write, 1997 * just complete the write by copying over the new block pointer and 1998 * updating the accounting via the write-completion functions. 1999 */ 2000 if (dr->dt.dl.dr_override_state == DR_OVERRIDDEN) { 2001 zio_t zio_fake; 2002 2003 zio_fake.io_private = &db; 2004 zio_fake.io_error = 0; 2005 zio_fake.io_bp = db->db_blkptr; 2006 zio_fake.io_bp_orig = *db->db_blkptr; 2007 zio_fake.io_txg = txg; |
2008 zio_fake.io_flags = 0; |
|
1966 1967 *db->db_blkptr = dr->dt.dl.dr_overridden_by; 1968 dr->dt.dl.dr_override_state = DR_NOT_OVERRIDDEN; 1969 db->db_data_pending = dr; 1970 dr->dr_zio = &zio_fake; 1971 mutex_exit(&db->db_mtx); 1972 | 2009 2010 *db->db_blkptr = dr->dt.dl.dr_overridden_by; 2011 dr->dt.dl.dr_override_state = DR_NOT_OVERRIDDEN; 2012 db->db_data_pending = dr; 2013 dr->dr_zio = &zio_fake; 2014 mutex_exit(&db->db_mtx); 2015 |
2016 ASSERT(!DVA_EQUAL(BP_IDENTITY(zio_fake.io_bp), 2017 BP_IDENTITY(&zio_fake.io_bp_orig)) || 2018 BP_IS_HOLE(zio_fake.io_bp)); 2019 |
|
1973 if (BP_IS_OLDER(&zio_fake.io_bp_orig, txg)) | 2020 if (BP_IS_OLDER(&zio_fake.io_bp_orig, txg)) |
1974 dsl_dataset_block_kill(os->os_dsl_dataset, | 2021 (void) dsl_dataset_block_kill(os->os_dsl_dataset, |
1975 &zio_fake.io_bp_orig, dn->dn_zio, tx); 1976 1977 dbuf_write_ready(&zio_fake, db->db_buf, db); 1978 dbuf_write_done(&zio_fake, db->db_buf, db); 1979 1980 return; 1981 } 1982 --- 9 unchanged lines hidden (view full) --- 1992 * NOTE: this copy does not need to be made for objects only 1993 * modified in the syncing context (e.g. DNONE_DNODE blocks). 1994 */ 1995 if (refcount_count(&db->db_holds) > 1 && *datap == db->db_buf) { 1996 arc_buf_contents_t type = DBUF_GET_BUFC_TYPE(db); 1997 *datap = arc_buf_alloc(os->os_spa, blksz, db, type); 1998 bcopy(db->db.db_data, (*datap)->b_data, blksz); 1999 } | 2022 &zio_fake.io_bp_orig, dn->dn_zio, tx); 2023 2024 dbuf_write_ready(&zio_fake, db->db_buf, db); 2025 dbuf_write_done(&zio_fake, db->db_buf, db); 2026 2027 return; 2028 } 2029 --- 9 unchanged lines hidden (view full) --- 2039 * NOTE: this copy does not need to be made for objects only 2040 * modified in the syncing context (e.g. DNONE_DNODE blocks). 2041 */ 2042 if (refcount_count(&db->db_holds) > 1 && *datap == db->db_buf) { 2043 arc_buf_contents_t type = DBUF_GET_BUFC_TYPE(db); 2044 *datap = arc_buf_alloc(os->os_spa, blksz, db, type); 2045 bcopy(db->db.db_data, (*datap)->b_data, blksz); 2046 } |
2000 } else { 2001 /* 2002 * Private object buffers are released here rather 2003 * than in dbuf_dirty() since they are only modified 2004 * in the syncing context and we don't want the 2005 * overhead of making multiple copies of the data. 2006 */ 2007 arc_release(db->db_buf, db); | |
2008 } 2009 2010 ASSERT(*datap != NULL); 2011 db->db_data_pending = dr; 2012 2013 mutex_exit(&db->db_mtx); 2014 | 2047 } 2048 2049 ASSERT(*datap != NULL); 2050 db->db_data_pending = dr; 2051 2052 mutex_exit(&db->db_mtx); 2053 |
2015 /* 2016 * Allow dnode settings to override objset settings, 2017 * except for metadata checksums. 2018 */ 2019 if (dmu_ot[dn->dn_type].ot_metadata) { 2020 checksum = os->os_md_checksum; 2021 compress = zio_compress_select(dn->dn_compress, 2022 os->os_md_compress); 2023 } else { 2024 checksum = zio_checksum_select(dn->dn_checksum, 2025 os->os_checksum); 2026 compress = zio_compress_select(dn->dn_compress, 2027 os->os_compress); 2028 } | 2054 dbuf_write(dr, *datap, tx); |
2029 | 2055 |
2030 dbuf_write(dr, *datap, checksum, compress, tx); 2031 | |
2032 ASSERT(!list_link_active(&dr->dr_dirty_node)); 2033 if (dn->dn_object == DMU_META_DNODE_OBJECT) 2034 list_insert_tail(&dn->dn_dirty_records[txg&TXG_MASK], dr); 2035 else 2036 zio_nowait(dr->dr_zio); 2037} 2038 2039void --- 18 unchanged lines hidden (view full) --- 2058 if (dr->dr_dbuf->db_level > 0) 2059 dbuf_sync_indirect(dr, tx); 2060 else 2061 dbuf_sync_leaf(dr, tx); 2062 } 2063} 2064 2065static void | 2056 ASSERT(!list_link_active(&dr->dr_dirty_node)); 2057 if (dn->dn_object == DMU_META_DNODE_OBJECT) 2058 list_insert_tail(&dn->dn_dirty_records[txg&TXG_MASK], dr); 2059 else 2060 zio_nowait(dr->dr_zio); 2061} 2062 2063void --- 18 unchanged lines hidden (view full) --- 2082 if (dr->dr_dbuf->db_level > 0) 2083 dbuf_sync_indirect(dr, tx); 2084 else 2085 dbuf_sync_leaf(dr, tx); 2086 } 2087} 2088 2089static void |
2066dbuf_write(dbuf_dirty_record_t *dr, arc_buf_t *data, int checksum, 2067 int compress, dmu_tx_t *tx) | 2090dbuf_write(dbuf_dirty_record_t *dr, arc_buf_t *data, dmu_tx_t *tx) |
2068{ 2069 dmu_buf_impl_t *db = dr->dr_dbuf; 2070 dnode_t *dn = db->db_dnode; 2071 objset_impl_t *os = dn->dn_objset; 2072 dmu_buf_impl_t *parent = db->db_parent; 2073 uint64_t txg = tx->tx_txg; 2074 zbookmark_t zb; | 2091{ 2092 dmu_buf_impl_t *db = dr->dr_dbuf; 2093 dnode_t *dn = db->db_dnode; 2094 objset_impl_t *os = dn->dn_objset; 2095 dmu_buf_impl_t *parent = db->db_parent; 2096 uint64_t txg = tx->tx_txg; 2097 zbookmark_t zb; |
2098 writeprops_t wp = { 0 }; |
|
2075 zio_t *zio; | 2099 zio_t *zio; |
2076 int zio_flags; | |
2077 | 2100 |
2101 if (!BP_IS_HOLE(db->db_blkptr) && 2102 (db->db_level > 0 || dn->dn_type == DMU_OT_DNODE)) { 2103 /* 2104 * Private object buffers are released here rather 2105 * than in dbuf_dirty() since they are only modified 2106 * in the syncing context and we don't want the 2107 * overhead of making multiple copies of the data. 2108 */ 2109 arc_release(data, db); 2110 } else { 2111 ASSERT(arc_released(data)); 2112 /* XXX why do we need to thaw here? */ 2113 arc_buf_thaw(data); 2114 } 2115 |
|
2078 if (parent != dn->dn_dbuf) { 2079 ASSERT(parent && parent->db_data_pending); 2080 ASSERT(db->db_level == parent->db_level-1); 2081 ASSERT(arc_released(parent->db_buf)); 2082 zio = parent->db_data_pending->dr_zio; 2083 } else { 2084 ASSERT(db->db_level == dn->dn_phys->dn_nlevels-1); 2085 ASSERT3P(db->db_blkptr, ==, --- 5 unchanged lines hidden (view full) --- 2091 ASSERT3U(db->db_blkptr->blk_birth, <=, txg); 2092 ASSERT(zio); 2093 2094 zb.zb_objset = os->os_dsl_dataset ? os->os_dsl_dataset->ds_object : 0; 2095 zb.zb_object = db->db.db_object; 2096 zb.zb_level = db->db_level; 2097 zb.zb_blkid = db->db_blkid; 2098 | 2116 if (parent != dn->dn_dbuf) { 2117 ASSERT(parent && parent->db_data_pending); 2118 ASSERT(db->db_level == parent->db_level-1); 2119 ASSERT(arc_released(parent->db_buf)); 2120 zio = parent->db_data_pending->dr_zio; 2121 } else { 2122 ASSERT(db->db_level == dn->dn_phys->dn_nlevels-1); 2123 ASSERT3P(db->db_blkptr, ==, --- 5 unchanged lines hidden (view full) --- 2129 ASSERT3U(db->db_blkptr->blk_birth, <=, txg); 2130 ASSERT(zio); 2131 2132 zb.zb_objset = os->os_dsl_dataset ? os->os_dsl_dataset->ds_object : 0; 2133 zb.zb_object = db->db.db_object; 2134 zb.zb_level = db->db_level; 2135 zb.zb_blkid = db->db_blkid; 2136 |
2099 zio_flags = ZIO_FLAG_MUSTSUCCEED; 2100 if (dmu_ot[dn->dn_type].ot_metadata || zb.zb_level != 0) 2101 zio_flags |= ZIO_FLAG_METADATA; | 2137 wp.wp_type = dn->dn_type; 2138 wp.wp_level = db->db_level; 2139 wp.wp_copies = os->os_copies; 2140 wp.wp_dncompress = dn->dn_compress; 2141 wp.wp_oscompress = os->os_compress; 2142 wp.wp_dnchecksum = dn->dn_checksum; 2143 wp.wp_oschecksum = os->os_checksum; 2144 |
2102 if (BP_IS_OLDER(db->db_blkptr, txg)) | 2145 if (BP_IS_OLDER(db->db_blkptr, txg)) |
2103 dsl_dataset_block_kill( | 2146 (void) dsl_dataset_block_kill( |
2104 os->os_dsl_dataset, db->db_blkptr, zio, tx); 2105 | 2147 os->os_dsl_dataset, db->db_blkptr, zio, tx); 2148 |
2106 dr->dr_zio = arc_write(zio, os->os_spa, checksum, compress, 2107 dmu_get_replication_level(os, &zb, dn->dn_type), txg, 2108 db->db_blkptr, data, dbuf_write_ready, dbuf_write_done, db, 2109 ZIO_PRIORITY_ASYNC_WRITE, zio_flags, &zb); | 2149 dr->dr_zio = arc_write(zio, os->os_spa, &wp, 2150 DBUF_IS_L2CACHEABLE(db), txg, db->db_blkptr, 2151 data, dbuf_write_ready, dbuf_write_done, db, 2152 ZIO_PRIORITY_ASYNC_WRITE, ZIO_FLAG_MUSTSUCCEED, &zb); |
2110} 2111 2112/* ARGSUSED */ 2113static void 2114dbuf_write_ready(zio_t *zio, arc_buf_t *buf, void *vdb) 2115{ 2116 dmu_buf_impl_t *db = vdb; 2117 dnode_t *dn = db->db_dnode; 2118 objset_impl_t *os = dn->dn_objset; | 2153} 2154 2155/* ARGSUSED */ 2156static void 2157dbuf_write_ready(zio_t *zio, arc_buf_t *buf, void *vdb) 2158{ 2159 dmu_buf_impl_t *db = vdb; 2160 dnode_t *dn = db->db_dnode; 2161 objset_impl_t *os = dn->dn_objset; |
2162 blkptr_t *bp = zio->io_bp; |
|
2119 blkptr_t *bp_orig = &zio->io_bp_orig; 2120 uint64_t fill = 0; 2121 int old_size, new_size, i; 2122 | 2163 blkptr_t *bp_orig = &zio->io_bp_orig; 2164 uint64_t fill = 0; 2165 int old_size, new_size, i; 2166 |
2167 ASSERT(db->db_blkptr == bp); 2168 |
|
2123 dprintf_dbuf_bp(db, bp_orig, "bp_orig: %s", ""); 2124 2125 old_size = bp_get_dasize(os->os_spa, bp_orig); | 2169 dprintf_dbuf_bp(db, bp_orig, "bp_orig: %s", ""); 2170 2171 old_size = bp_get_dasize(os->os_spa, bp_orig); |
2126 new_size = bp_get_dasize(os->os_spa, zio->io_bp); | 2172 new_size = bp_get_dasize(os->os_spa, bp); |
2127 | 2173 |
2128 dnode_diduse_space(dn, new_size-old_size); | 2174 dnode_diduse_space(dn, new_size - old_size); |
2129 | 2175 |
2130 if (BP_IS_HOLE(zio->io_bp)) { | 2176 if (BP_IS_HOLE(bp)) { |
2131 dsl_dataset_t *ds = os->os_dsl_dataset; 2132 dmu_tx_t *tx = os->os_synctx; 2133 2134 if (bp_orig->blk_birth == tx->tx_txg) | 2177 dsl_dataset_t *ds = os->os_dsl_dataset; 2178 dmu_tx_t *tx = os->os_synctx; 2179 2180 if (bp_orig->blk_birth == tx->tx_txg) |
2135 dsl_dataset_block_kill(ds, bp_orig, NULL, tx); 2136 ASSERT3U(db->db_blkptr->blk_fill, ==, 0); | 2181 (void) dsl_dataset_block_kill(ds, bp_orig, zio, tx); 2182 ASSERT3U(bp->blk_fill, ==, 0); |
2137 return; 2138 } 2139 | 2183 return; 2184 } 2185 |
2186 ASSERT(BP_GET_TYPE(bp) == dn->dn_type); 2187 ASSERT(BP_GET_LEVEL(bp) == db->db_level); 2188 |
|
2140 mutex_enter(&db->db_mtx); 2141 2142 if (db->db_level == 0) { 2143 mutex_enter(&dn->dn_mtx); 2144 if (db->db_blkid > dn->dn_phys->dn_maxblkid) 2145 dn->dn_phys->dn_maxblkid = db->db_blkid; 2146 mutex_exit(&dn->dn_mtx); 2147 2148 if (dn->dn_type == DMU_OT_DNODE) { 2149 dnode_phys_t *dnp = db->db.db_data; 2150 for (i = db->db.db_size >> DNODE_SHIFT; i > 0; 2151 i--, dnp++) { 2152 if (dnp->dn_type != DMU_OT_NONE) 2153 fill++; 2154 } 2155 } else { 2156 fill = 1; 2157 } 2158 } else { | 2189 mutex_enter(&db->db_mtx); 2190 2191 if (db->db_level == 0) { 2192 mutex_enter(&dn->dn_mtx); 2193 if (db->db_blkid > dn->dn_phys->dn_maxblkid) 2194 dn->dn_phys->dn_maxblkid = db->db_blkid; 2195 mutex_exit(&dn->dn_mtx); 2196 2197 if (dn->dn_type == DMU_OT_DNODE) { 2198 dnode_phys_t *dnp = db->db.db_data; 2199 for (i = db->db.db_size >> DNODE_SHIFT; i > 0; 2200 i--, dnp++) { 2201 if (dnp->dn_type != DMU_OT_NONE) 2202 fill++; 2203 } 2204 } else { 2205 fill = 1; 2206 } 2207 } else { |
2159 blkptr_t *bp = db->db.db_data; | 2208 blkptr_t *ibp = db->db.db_data; |
2160 ASSERT3U(db->db.db_size, ==, 1<<dn->dn_phys->dn_indblkshift); | 2209 ASSERT3U(db->db.db_size, ==, 1<<dn->dn_phys->dn_indblkshift); |
2161 for (i = db->db.db_size >> SPA_BLKPTRSHIFT; i > 0; i--, bp++) { 2162 if (BP_IS_HOLE(bp)) | 2210 for (i = db->db.db_size >> SPA_BLKPTRSHIFT; i > 0; i--, ibp++) { 2211 if (BP_IS_HOLE(ibp)) |
2163 continue; | 2212 continue; |
2164 ASSERT3U(BP_GET_LSIZE(bp), ==, | 2213 ASSERT3U(BP_GET_LSIZE(ibp), ==, |
2165 db->db_level == 1 ? dn->dn_datablksz : 2166 (1<<dn->dn_phys->dn_indblkshift)); | 2214 db->db_level == 1 ? dn->dn_datablksz : 2215 (1<<dn->dn_phys->dn_indblkshift)); |
2167 fill += bp->blk_fill; | 2216 fill += ibp->blk_fill; |
2168 } 2169 } 2170 | 2217 } 2218 } 2219 |
2171 db->db_blkptr->blk_fill = fill; 2172 BP_SET_TYPE(db->db_blkptr, dn->dn_type); 2173 BP_SET_LEVEL(db->db_blkptr, db->db_level); | 2220 bp->blk_fill = fill; |
2174 2175 mutex_exit(&db->db_mtx); 2176 | 2221 2222 mutex_exit(&db->db_mtx); 2223 |
2177 /* We must do this after we've set the bp's type and level */ 2178 if (!DVA_EQUAL(BP_IDENTITY(zio->io_bp), BP_IDENTITY(bp_orig))) { | 2224 if (zio->io_flags & ZIO_FLAG_IO_REWRITE) { 2225 ASSERT(DVA_EQUAL(BP_IDENTITY(bp), BP_IDENTITY(bp_orig))); 2226 } else { |
2179 dsl_dataset_t *ds = os->os_dsl_dataset; 2180 dmu_tx_t *tx = os->os_synctx; 2181 2182 if (bp_orig->blk_birth == tx->tx_txg) | 2227 dsl_dataset_t *ds = os->os_dsl_dataset; 2228 dmu_tx_t *tx = os->os_synctx; 2229 2230 if (bp_orig->blk_birth == tx->tx_txg) |
2183 dsl_dataset_block_kill(ds, bp_orig, NULL, tx); 2184 dsl_dataset_block_born(ds, zio->io_bp, tx); | 2231 (void) dsl_dataset_block_kill(ds, bp_orig, zio, tx); 2232 dsl_dataset_block_born(ds, bp, tx); |
2185 } 2186} 2187 2188/* ARGSUSED */ 2189static void 2190dbuf_write_done(zio_t *zio, arc_buf_t *buf, void *vdb) 2191{ 2192 dmu_buf_impl_t *db = vdb; 2193 uint64_t txg = zio->io_txg; 2194 dbuf_dirty_record_t **drp, *dr; 2195 2196 ASSERT3U(zio->io_error, ==, 0); 2197 2198 mutex_enter(&db->db_mtx); 2199 2200 drp = &db->db_last_dirty; | 2233 } 2234} 2235 2236/* ARGSUSED */ 2237static void 2238dbuf_write_done(zio_t *zio, arc_buf_t *buf, void *vdb) 2239{ 2240 dmu_buf_impl_t *db = vdb; 2241 uint64_t txg = zio->io_txg; 2242 dbuf_dirty_record_t **drp, *dr; 2243 2244 ASSERT3U(zio->io_error, ==, 0); 2245 2246 mutex_enter(&db->db_mtx); 2247 2248 drp = &db->db_last_dirty; |
2201 while (*drp != db->db_data_pending) 2202 drp = &(*drp)->dr_next; 2203 ASSERT(!list_link_active(&(*drp)->dr_dirty_node)); 2204 ASSERT((*drp)->dr_txg == txg); 2205 ASSERT((*drp)->dr_next == NULL); 2206 dr = *drp; 2207 *drp = NULL; | 2249 while ((dr = *drp) != db->db_data_pending) 2250 drp = &dr->dr_next; 2251 ASSERT(!list_link_active(&dr->dr_dirty_node)); 2252 ASSERT(dr->dr_txg == txg); 2253 ASSERT(dr->dr_next == NULL); 2254 *drp = dr->dr_next; |
2208 2209 if (db->db_level == 0) { 2210 ASSERT(db->db_blkid != DB_BONUS_BLKID); 2211 ASSERT(dr->dt.dl.dr_override_state == DR_NOT_OVERRIDDEN); 2212 2213 if (dr->dt.dl.dr_data != db->db_buf) 2214 VERIFY(arc_buf_remove_ref(dr->dt.dl.dr_data, db) == 1); 2215 else if (!BP_IS_HOLE(db->db_blkptr)) --- 9 unchanged lines hidden (view full) --- 2225 int epbs = 2226 dn->dn_phys->dn_indblkshift - SPA_BLKPTRSHIFT; 2227 ASSERT3U(BP_GET_LSIZE(db->db_blkptr), ==, 2228 db->db.db_size); 2229 ASSERT3U(dn->dn_phys->dn_maxblkid 2230 >> (db->db_level * epbs), >=, db->db_blkid); 2231 arc_set_callback(db->db_buf, dbuf_do_evict, db); 2232 } | 2255 2256 if (db->db_level == 0) { 2257 ASSERT(db->db_blkid != DB_BONUS_BLKID); 2258 ASSERT(dr->dt.dl.dr_override_state == DR_NOT_OVERRIDDEN); 2259 2260 if (dr->dt.dl.dr_data != db->db_buf) 2261 VERIFY(arc_buf_remove_ref(dr->dt.dl.dr_data, db) == 1); 2262 else if (!BP_IS_HOLE(db->db_blkptr)) --- 9 unchanged lines hidden (view full) --- 2272 int epbs = 2273 dn->dn_phys->dn_indblkshift - SPA_BLKPTRSHIFT; 2274 ASSERT3U(BP_GET_LSIZE(db->db_blkptr), ==, 2275 db->db.db_size); 2276 ASSERT3U(dn->dn_phys->dn_maxblkid 2277 >> (db->db_level * epbs), >=, db->db_blkid); 2278 arc_set_callback(db->db_buf, dbuf_do_evict, db); 2279 } |
2233 list_destroy(&dr->dt.di.dr_children); | |
2234 mutex_destroy(&dr->dt.di.dr_mtx); | 2280 mutex_destroy(&dr->dt.di.dr_mtx); |
2281 list_destroy(&dr->dt.di.dr_children); |
|
2235 } 2236 kmem_free(dr, sizeof (dbuf_dirty_record_t)); 2237 2238 cv_broadcast(&db->db_changed); 2239 ASSERT(db->db_dirtycnt > 0); 2240 db->db_dirtycnt -= 1; 2241 db->db_data_pending = NULL; 2242 mutex_exit(&db->db_mtx); 2243 2244 dprintf_dbuf_bp(db, zio->io_bp, "bp: %s", ""); 2245 2246 dbuf_rele(db, (void *)(uintptr_t)txg); 2247} | 2282 } 2283 kmem_free(dr, sizeof (dbuf_dirty_record_t)); 2284 2285 cv_broadcast(&db->db_changed); 2286 ASSERT(db->db_dirtycnt > 0); 2287 db->db_dirtycnt -= 1; 2288 db->db_data_pending = NULL; 2289 mutex_exit(&db->db_mtx); 2290 2291 dprintf_dbuf_bp(db, zio->io_bp, "bp: %s", ""); 2292 2293 dbuf_rele(db, (void *)(uintptr_t)txg); 2294} |