Deleted Added
full compact
dbuf.c (177698) dbuf.c (185029)
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE

--- 5 unchanged lines hidden (view full) ---

14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21/*
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE

--- 5 unchanged lines hidden (view full) ---

14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21/*
22 * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
22 * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
24 */
25
23 * Use is subject to license terms.
24 */
25
26#pragma ident "%Z%%M% %I% %E% SMI"
27
28#include <sys/zfs_context.h>
29#include <sys/dmu.h>
30#include <sys/dmu_impl.h>
31#include <sys/dbuf.h>
32#include <sys/dmu_objset.h>
33#include <sys/dsl_dataset.h>
34#include <sys/dsl_dir.h>
35#include <sys/dmu_tx.h>
36#include <sys/spa.h>
37#include <sys/zio.h>
38#include <sys/dmu_zfetch.h>
39
40static void dbuf_destroy(dmu_buf_impl_t *db);
41static int dbuf_undirty(dmu_buf_impl_t *db, dmu_tx_t *tx);
26#include <sys/zfs_context.h>
27#include <sys/dmu.h>
28#include <sys/dmu_impl.h>
29#include <sys/dbuf.h>
30#include <sys/dmu_objset.h>
31#include <sys/dsl_dataset.h>
32#include <sys/dsl_dir.h>
33#include <sys/dmu_tx.h>
34#include <sys/spa.h>
35#include <sys/zio.h>
36#include <sys/dmu_zfetch.h>
37
38static void dbuf_destroy(dmu_buf_impl_t *db);
39static int dbuf_undirty(dmu_buf_impl_t *db, dmu_tx_t *tx);
42static void dbuf_write(dbuf_dirty_record_t *dr, arc_buf_t *data, int checksum,
43 int compress, dmu_tx_t *tx);
40static void dbuf_write(dbuf_dirty_record_t *dr, arc_buf_t *data, dmu_tx_t *tx);
44static arc_done_func_t dbuf_write_ready;
45static arc_done_func_t dbuf_write_done;
46
41static arc_done_func_t dbuf_write_ready;
42static arc_done_func_t dbuf_write_done;
43
47int zfs_mdcomp_disable = 0;
48SYSCTL_DECL(_vfs_zfs);
49TUNABLE_INT("vfs.zfs.mdcomp_disable", &zfs_mdcomp_disable);
50SYSCTL_INT(_vfs_zfs, OID_AUTO, mdcomp_disable, CTLFLAG_RDTUN,
51 &zfs_mdcomp_disable, 0, "Disable metadata compression");
52
53/*
54 * Global data structures and functions for the dbuf cache.
55 */
56static kmem_cache_t *dbuf_cache;
57
58/* ARGSUSED */
59static int
60dbuf_cons(void *vdb, void *unused, int kmflag)

--- 245 unchanged lines hidden (view full) ---

306 ASSERT3U(db->db.db_object, ==, dn->dn_object);
307 ASSERT3P(db->db_objset, ==, dn->dn_objset);
308 ASSERT3U(db->db_level, <, dn->dn_nlevels);
309 ASSERT(db->db_blkid == DB_BONUS_BLKID ||
310 list_head(&dn->dn_dbufs));
311 }
312 if (db->db_blkid == DB_BONUS_BLKID) {
313 ASSERT(dn != NULL);
44/*
45 * Global data structures and functions for the dbuf cache.
46 */
47static kmem_cache_t *dbuf_cache;
48
49/* ARGSUSED */
50static int
51dbuf_cons(void *vdb, void *unused, int kmflag)

--- 245 unchanged lines hidden (view full) ---

297 ASSERT3U(db->db.db_object, ==, dn->dn_object);
298 ASSERT3P(db->db_objset, ==, dn->dn_objset);
299 ASSERT3U(db->db_level, <, dn->dn_nlevels);
300 ASSERT(db->db_blkid == DB_BONUS_BLKID ||
301 list_head(&dn->dn_dbufs));
302 }
303 if (db->db_blkid == DB_BONUS_BLKID) {
304 ASSERT(dn != NULL);
314 ASSERT3U(db->db.db_size, ==, dn->dn_bonuslen);
305 ASSERT3U(db->db.db_size, >=, dn->dn_bonuslen);
315 ASSERT3U(db->db.db_offset, ==, DB_BONUS_BLKID);
316 } else {
317 ASSERT3U(db->db.db_offset, ==, db->db_blkid * db->db.db_size);
318 }
319
320 if (db->db_level == 0) {
321 /* we can be momentarily larger in dnode_set_blksz() */
322 if (db->db_blkid != DB_BONUS_BLKID && dn) {

--- 132 unchanged lines hidden (view full) ---

455 cv_broadcast(&db->db_changed);
456 mutex_exit(&db->db_mtx);
457 dbuf_rele(db, NULL);
458}
459
460static void
461dbuf_read_impl(dmu_buf_impl_t *db, zio_t *zio, uint32_t *flags)
462{
306 ASSERT3U(db->db.db_offset, ==, DB_BONUS_BLKID);
307 } else {
308 ASSERT3U(db->db.db_offset, ==, db->db_blkid * db->db.db_size);
309 }
310
311 if (db->db_level == 0) {
312 /* we can be momentarily larger in dnode_set_blksz() */
313 if (db->db_blkid != DB_BONUS_BLKID && dn) {

--- 132 unchanged lines hidden (view full) ---

446 cv_broadcast(&db->db_changed);
447 mutex_exit(&db->db_mtx);
448 dbuf_rele(db, NULL);
449}
450
451static void
452dbuf_read_impl(dmu_buf_impl_t *db, zio_t *zio, uint32_t *flags)
453{
463 blkptr_t *bp;
454 dnode_t *dn = db->db_dnode;
464 zbookmark_t zb;
465 uint32_t aflags = ARC_NOWAIT;
455 zbookmark_t zb;
456 uint32_t aflags = ARC_NOWAIT;
457 arc_buf_t *pbuf;
466
467 ASSERT(!refcount_is_zero(&db->db_holds));
468 /* We need the struct_rwlock to prevent db_blkptr from changing. */
458
459 ASSERT(!refcount_is_zero(&db->db_holds));
460 /* We need the struct_rwlock to prevent db_blkptr from changing. */
469 ASSERT(RW_LOCK_HELD(&db->db_dnode->dn_struct_rwlock));
461 ASSERT(RW_LOCK_HELD(&dn->dn_struct_rwlock));
470 ASSERT(MUTEX_HELD(&db->db_mtx));
471 ASSERT(db->db_state == DB_UNCACHED);
472 ASSERT(db->db_buf == NULL);
473
474 if (db->db_blkid == DB_BONUS_BLKID) {
462 ASSERT(MUTEX_HELD(&db->db_mtx));
463 ASSERT(db->db_state == DB_UNCACHED);
464 ASSERT(db->db_buf == NULL);
465
466 if (db->db_blkid == DB_BONUS_BLKID) {
475 ASSERT3U(db->db_dnode->dn_bonuslen, ==, db->db.db_size);
467 int bonuslen = dn->dn_bonuslen;
468
469 ASSERT3U(bonuslen, <=, db->db.db_size);
476 db->db.db_data = zio_buf_alloc(DN_MAX_BONUSLEN);
470 db->db.db_data = zio_buf_alloc(DN_MAX_BONUSLEN);
477 if (db->db.db_size < DN_MAX_BONUSLEN)
471 arc_space_consume(DN_MAX_BONUSLEN);
472 if (bonuslen < DN_MAX_BONUSLEN)
478 bzero(db->db.db_data, DN_MAX_BONUSLEN);
473 bzero(db->db.db_data, DN_MAX_BONUSLEN);
479 bcopy(DN_BONUS(db->db_dnode->dn_phys), db->db.db_data,
480 db->db.db_size);
474 bcopy(DN_BONUS(dn->dn_phys), db->db.db_data,
475 bonuslen);
481 dbuf_update_data(db);
482 db->db_state = DB_CACHED;
483 mutex_exit(&db->db_mtx);
484 return;
485 }
486
476 dbuf_update_data(db);
477 db->db_state = DB_CACHED;
478 mutex_exit(&db->db_mtx);
479 return;
480 }
481
487 if (db->db_level == 0 && dnode_block_freed(db->db_dnode, db->db_blkid))
488 bp = NULL;
489 else
490 bp = db->db_blkptr;
491
492 if (bp == NULL)
493 dprintf_dbuf(db, "blkptr: %s\n", "NULL");
494 else
495 dprintf_dbuf_bp(db, bp, "%s", "blkptr:");
496
497 if (bp == NULL || BP_IS_HOLE(bp)) {
482 /*
483 * Recheck BP_IS_HOLE() after dnode_block_freed() in case dnode_sync()
484 * processes the delete record and clears the bp while we are waiting
485 * for the dn_mtx (resulting in a "no" from block_freed).
486 */
487 if (db->db_blkptr == NULL || BP_IS_HOLE(db->db_blkptr) ||
488 (db->db_level == 0 && (dnode_block_freed(dn, db->db_blkid) ||
489 BP_IS_HOLE(db->db_blkptr)))) {
498 arc_buf_contents_t type = DBUF_GET_BUFC_TYPE(db);
499
490 arc_buf_contents_t type = DBUF_GET_BUFC_TYPE(db);
491
500 ASSERT(bp == NULL || BP_IS_HOLE(bp));
501 dbuf_set_data(db, arc_buf_alloc(db->db_dnode->dn_objset->os_spa,
492 dbuf_set_data(db, arc_buf_alloc(dn->dn_objset->os_spa,
502 db->db.db_size, db, type));
503 bzero(db->db.db_data, db->db.db_size);
504 db->db_state = DB_CACHED;
505 *flags |= DB_RF_CACHED;
506 mutex_exit(&db->db_mtx);
507 return;
508 }
509
510 db->db_state = DB_READ;
511 mutex_exit(&db->db_mtx);
512
493 db->db.db_size, db, type));
494 bzero(db->db.db_data, db->db.db_size);
495 db->db_state = DB_CACHED;
496 *flags |= DB_RF_CACHED;
497 mutex_exit(&db->db_mtx);
498 return;
499 }
500
501 db->db_state = DB_READ;
502 mutex_exit(&db->db_mtx);
503
504 if (DBUF_IS_L2CACHEABLE(db))
505 aflags |= ARC_L2CACHE;
506
513 zb.zb_objset = db->db_objset->os_dsl_dataset ?
514 db->db_objset->os_dsl_dataset->ds_object : 0;
515 zb.zb_object = db->db.db_object;
516 zb.zb_level = db->db_level;
517 zb.zb_blkid = db->db_blkid;
518
519 dbuf_add_ref(db, NULL);
520 /* ZIO_FLAG_CANFAIL callers have to check the parent zio's error */
507 zb.zb_objset = db->db_objset->os_dsl_dataset ?
508 db->db_objset->os_dsl_dataset->ds_object : 0;
509 zb.zb_object = db->db.db_object;
510 zb.zb_level = db->db_level;
511 zb.zb_blkid = db->db_blkid;
512
513 dbuf_add_ref(db, NULL);
514 /* ZIO_FLAG_CANFAIL callers have to check the parent zio's error */
521 ASSERT3U(db->db_dnode->dn_type, <, DMU_OT_NUMTYPES);
522 (void) arc_read(zio, db->db_dnode->dn_objset->os_spa, bp,
523 db->db_level > 0 ? byteswap_uint64_array :
524 dmu_ot[db->db_dnode->dn_type].ot_byteswap,
515
516 if (db->db_parent)
517 pbuf = db->db_parent->db_buf;
518 else
519 pbuf = db->db_objset->os_phys_buf;
520
521 (void) arc_read(zio, dn->dn_objset->os_spa, db->db_blkptr, pbuf,
525 dbuf_read_done, db, ZIO_PRIORITY_SYNC_READ,
526 (*flags & DB_RF_CANFAIL) ? ZIO_FLAG_CANFAIL : ZIO_FLAG_MUSTSUCCEED,
527 &aflags, &zb);
528 if (aflags & ARC_CACHED)
529 *flags |= DB_RF_CACHED;
530}
531
532int

--- 8 unchanged lines hidden (view full) ---

541 * can't be freed while we have a hold on the buffer.
542 */
543 ASSERT(!refcount_is_zero(&db->db_holds));
544
545 if ((flags & DB_RF_HAVESTRUCT) == 0)
546 rw_enter(&db->db_dnode->dn_struct_rwlock, RW_READER);
547
548 prefetch = db->db_level == 0 && db->db_blkid != DB_BONUS_BLKID &&
522 dbuf_read_done, db, ZIO_PRIORITY_SYNC_READ,
523 (*flags & DB_RF_CANFAIL) ? ZIO_FLAG_CANFAIL : ZIO_FLAG_MUSTSUCCEED,
524 &aflags, &zb);
525 if (aflags & ARC_CACHED)
526 *flags |= DB_RF_CACHED;
527}
528
529int

--- 8 unchanged lines hidden (view full) ---

538 * can't be freed while we have a hold on the buffer.
539 */
540 ASSERT(!refcount_is_zero(&db->db_holds));
541
542 if ((flags & DB_RF_HAVESTRUCT) == 0)
543 rw_enter(&db->db_dnode->dn_struct_rwlock, RW_READER);
544
545 prefetch = db->db_level == 0 && db->db_blkid != DB_BONUS_BLKID &&
549 (flags & DB_RF_NOPREFETCH) == 0 && db->db_dnode != NULL;
546 (flags & DB_RF_NOPREFETCH) == 0 && db->db_dnode != NULL &&
547 DBUF_IS_CACHEABLE(db);
550
551 mutex_enter(&db->db_mtx);
552 if (db->db_state == DB_CACHED) {
553 mutex_exit(&db->db_mtx);
554 if (prefetch)
555 dmu_zfetch(&db->db_dnode->dn_zfetch, db->db.db_offset,
556 db->db.db_size, TRUE);
557 if ((flags & DB_RF_HAVESTRUCT) == 0)

--- 98 unchanged lines hidden (view full) ---

656 * reset the reference to point to a new copy,
657 * or (if there a no active holders)
658 * just null out the current db_data pointer.
659 */
660 ASSERT(dr->dr_txg >= txg - 2);
661 if (db->db_blkid == DB_BONUS_BLKID) {
662 /* Note that the data bufs here are zio_bufs */
663 dr->dt.dl.dr_data = zio_buf_alloc(DN_MAX_BONUSLEN);
548
549 mutex_enter(&db->db_mtx);
550 if (db->db_state == DB_CACHED) {
551 mutex_exit(&db->db_mtx);
552 if (prefetch)
553 dmu_zfetch(&db->db_dnode->dn_zfetch, db->db.db_offset,
554 db->db.db_size, TRUE);
555 if ((flags & DB_RF_HAVESTRUCT) == 0)

--- 98 unchanged lines hidden (view full) ---

654 * reset the reference to point to a new copy,
655 * or (if there a no active holders)
656 * just null out the current db_data pointer.
657 */
658 ASSERT(dr->dr_txg >= txg - 2);
659 if (db->db_blkid == DB_BONUS_BLKID) {
660 /* Note that the data bufs here are zio_bufs */
661 dr->dt.dl.dr_data = zio_buf_alloc(DN_MAX_BONUSLEN);
662 arc_space_consume(DN_MAX_BONUSLEN);
664 bcopy(db->db.db_data, dr->dt.dl.dr_data, DN_MAX_BONUSLEN);
665 } else if (refcount_count(&db->db_holds) > db->db_dirtycnt) {
666 int size = db->db.db_size;
667 arc_buf_contents_t type = DBUF_GET_BUFC_TYPE(db);
668 dr->dt.dl.dr_data = arc_buf_alloc(
669 db->db_dnode->dn_objset->os_spa, size, db, type);
670 bcopy(db->db.db_data, dr->dt.dl.dr_data->b_data, size);
671 } else {

--- 13 unchanged lines hidden (view full) ---

685
686 if (db->db_blkid == DB_BONUS_BLKID ||
687 dr->dt.dl.dr_override_state == DR_NOT_OVERRIDDEN)
688 return;
689
690 /* free this block */
691 if (!BP_IS_HOLE(&dr->dt.dl.dr_overridden_by)) {
692 /* XXX can get silent EIO here */
663 bcopy(db->db.db_data, dr->dt.dl.dr_data, DN_MAX_BONUSLEN);
664 } else if (refcount_count(&db->db_holds) > db->db_dirtycnt) {
665 int size = db->db.db_size;
666 arc_buf_contents_t type = DBUF_GET_BUFC_TYPE(db);
667 dr->dt.dl.dr_data = arc_buf_alloc(
668 db->db_dnode->dn_objset->os_spa, size, db, type);
669 bcopy(db->db.db_data, dr->dt.dl.dr_data->b_data, size);
670 } else {

--- 13 unchanged lines hidden (view full) ---

684
685 if (db->db_blkid == DB_BONUS_BLKID ||
686 dr->dt.dl.dr_override_state == DR_NOT_OVERRIDDEN)
687 return;
688
689 /* free this block */
690 if (!BP_IS_HOLE(&dr->dt.dl.dr_overridden_by)) {
691 /* XXX can get silent EIO here */
693 (void) arc_free(NULL, db->db_dnode->dn_objset->os_spa,
692 (void) dsl_free(NULL,
693 spa_get_dsl(db->db_dnode->dn_objset->os_spa),
694 txg, &dr->dt.dl.dr_overridden_by, NULL, NULL, ARC_WAIT);
695 }
696 dr->dt.dl.dr_override_state = DR_NOT_OVERRIDDEN;
697 /*
698 * Release the already-written buffer, so we leave it in
699 * a consistent dirty state. Note that all callers are
700 * modifying the buffer, so they will immediately do
701 * another (redundant) arc_release(). Therefore, leave
702 * the buf thawed to save the effort of freezing &
703 * immediately re-thawing it.
704 */
705 arc_release(dr->dt.dl.dr_data, db);
706}
707
694 txg, &dr->dt.dl.dr_overridden_by, NULL, NULL, ARC_WAIT);
695 }
696 dr->dt.dl.dr_override_state = DR_NOT_OVERRIDDEN;
697 /*
698 * Release the already-written buffer, so we leave it in
699 * a consistent dirty state. Note that all callers are
700 * modifying the buffer, so they will immediately do
701 * another (redundant) arc_release(). Therefore, leave
702 * the buf thawed to save the effort of freezing &
703 * immediately re-thawing it.
704 */
705 arc_release(dr->dt.dl.dr_data, db);
706}
707
708/*
709 * Evict (if its unreferenced) or clear (if its referenced) any level-0
710 * data blocks in the free range, so that any future readers will find
711 * empty blocks. Also, if we happen accross any level-1 dbufs in the
712 * range that have not already been marked dirty, mark them dirty so
713 * they stay in memory.
714 */
708void
715void
709dbuf_free_range(dnode_t *dn, uint64_t blkid, uint64_t nblks, dmu_tx_t *tx)
716dbuf_free_range(dnode_t *dn, uint64_t start, uint64_t end, dmu_tx_t *tx)
710{
711 dmu_buf_impl_t *db, *db_next;
712 uint64_t txg = tx->tx_txg;
717{
718 dmu_buf_impl_t *db, *db_next;
719 uint64_t txg = tx->tx_txg;
720 int epbs = dn->dn_indblkshift - SPA_BLKPTRSHIFT;
721 uint64_t first_l1 = start >> epbs;
722 uint64_t last_l1 = end >> epbs;
713
723
714 dprintf_dnode(dn, "blkid=%llu nblks=%llu\n", blkid, nblks);
724 if (end > dn->dn_maxblkid) {
725 end = dn->dn_maxblkid;
726 last_l1 = end >> epbs;
727 }
728 dprintf_dnode(dn, "start=%llu end=%llu\n", start, end);
715 mutex_enter(&dn->dn_dbufs_mtx);
716 for (db = list_head(&dn->dn_dbufs); db; db = db_next) {
717 db_next = list_next(&dn->dn_dbufs, db);
718 ASSERT(db->db_blkid != DB_BONUS_BLKID);
729 mutex_enter(&dn->dn_dbufs_mtx);
730 for (db = list_head(&dn->dn_dbufs); db; db = db_next) {
731 db_next = list_next(&dn->dn_dbufs, db);
732 ASSERT(db->db_blkid != DB_BONUS_BLKID);
733
734 if (db->db_level == 1 &&
735 db->db_blkid >= first_l1 && db->db_blkid <= last_l1) {
736 mutex_enter(&db->db_mtx);
737 if (db->db_last_dirty &&
738 db->db_last_dirty->dr_txg < txg) {
739 dbuf_add_ref(db, FTAG);
740 mutex_exit(&db->db_mtx);
741 dbuf_will_dirty(db, tx);
742 dbuf_rele(db, FTAG);
743 } else {
744 mutex_exit(&db->db_mtx);
745 }
746 }
747
719 if (db->db_level != 0)
720 continue;
721 dprintf_dbuf(db, "found buf %s\n", "");
748 if (db->db_level != 0)
749 continue;
750 dprintf_dbuf(db, "found buf %s\n", "");
722 if (db->db_blkid < blkid ||
723 db->db_blkid >= blkid+nblks)
751 if (db->db_blkid < start || db->db_blkid > end)
724 continue;
725
726 /* found a level 0 buffer in the range */
727 if (dbuf_undirty(db, tx))
728 continue;
729
730 mutex_enter(&db->db_mtx);
731 if (db->db_state == DB_UNCACHED ||

--- 46 unchanged lines hidden (view full) ---

778 }
779
780 mutex_exit(&db->db_mtx);
781 }
782 mutex_exit(&dn->dn_dbufs_mtx);
783}
784
785static int
752 continue;
753
754 /* found a level 0 buffer in the range */
755 if (dbuf_undirty(db, tx))
756 continue;
757
758 mutex_enter(&db->db_mtx);
759 if (db->db_state == DB_UNCACHED ||

--- 46 unchanged lines hidden (view full) ---

806 }
807
808 mutex_exit(&db->db_mtx);
809 }
810 mutex_exit(&dn->dn_dbufs_mtx);
811}
812
813static int
786dbuf_new_block(dmu_buf_impl_t *db)
814dbuf_block_freeable(dmu_buf_impl_t *db)
787{
788 dsl_dataset_t *ds = db->db_objset->os_dsl_dataset;
789 uint64_t birth_txg = 0;
790
815{
816 dsl_dataset_t *ds = db->db_objset->os_dsl_dataset;
817 uint64_t birth_txg = 0;
818
791 /* Don't count meta-objects */
792 if (ds == NULL)
793 return (FALSE);
794
795 /*
796 * We don't need any locking to protect db_blkptr:
797 * If it's syncing, then db_last_dirty will be set
798 * so we'll ignore db_blkptr.
799 */
800 ASSERT(MUTEX_HELD(&db->db_mtx));
819 /*
820 * We don't need any locking to protect db_blkptr:
821 * If it's syncing, then db_last_dirty will be set
822 * so we'll ignore db_blkptr.
823 */
824 ASSERT(MUTEX_HELD(&db->db_mtx));
801 /* If we have been dirtied since the last snapshot, its not new */
802 if (db->db_last_dirty)
803 birth_txg = db->db_last_dirty->dr_txg;
804 else if (db->db_blkptr)
805 birth_txg = db->db_blkptr->blk_birth;
806
825 if (db->db_last_dirty)
826 birth_txg = db->db_last_dirty->dr_txg;
827 else if (db->db_blkptr)
828 birth_txg = db->db_blkptr->blk_birth;
829
830 /* If we don't exist or are in a snapshot, we can't be freed */
807 if (birth_txg)
831 if (birth_txg)
808 return (!dsl_dataset_block_freeable(ds, birth_txg));
832 return (ds == NULL ||
833 dsl_dataset_block_freeable(ds, birth_txg));
809 else
834 else
810 return (TRUE);
835 return (FALSE);
811}
812
813void
814dbuf_new_size(dmu_buf_impl_t *db, int size, dmu_tx_t *tx)
815{
816 arc_buf_t *buf, *obuf;
817 int osize = db->db.db_size;
818 arc_buf_contents_t type = DBUF_GET_BUFC_TYPE(db);

--- 41 unchanged lines hidden (view full) ---

860
861dbuf_dirty_record_t *
862dbuf_dirty(dmu_buf_impl_t *db, dmu_tx_t *tx)
863{
864 dnode_t *dn = db->db_dnode;
865 objset_impl_t *os = dn->dn_objset;
866 dbuf_dirty_record_t **drp, *dr;
867 int drop_struct_lock = FALSE;
836}
837
838void
839dbuf_new_size(dmu_buf_impl_t *db, int size, dmu_tx_t *tx)
840{
841 arc_buf_t *buf, *obuf;
842 int osize = db->db.db_size;
843 arc_buf_contents_t type = DBUF_GET_BUFC_TYPE(db);

--- 41 unchanged lines hidden (view full) ---

885
886dbuf_dirty_record_t *
887dbuf_dirty(dmu_buf_impl_t *db, dmu_tx_t *tx)
888{
889 dnode_t *dn = db->db_dnode;
890 objset_impl_t *os = dn->dn_objset;
891 dbuf_dirty_record_t **drp, *dr;
892 int drop_struct_lock = FALSE;
893 boolean_t do_free_accounting = B_FALSE;
868 int txgoff = tx->tx_txg & TXG_MASK;
869
870 ASSERT(tx->tx_txg != 0);
871 ASSERT(!refcount_is_zero(&db->db_holds));
872 DMU_TX_DIRTY_BUF(tx, db);
873
874 /*
875 * Shouldn't dirty a regular buffer in syncing context. Private

--- 41 unchanged lines hidden (view full) ---

917 mutex_exit(&dn->dn_mtx);
918
919 /*
920 * If this buffer is already dirty, we're done.
921 */
922 drp = &db->db_last_dirty;
923 ASSERT(*drp == NULL || (*drp)->dr_txg <= tx->tx_txg ||
924 db->db.db_object == DMU_META_DNODE_OBJECT);
894 int txgoff = tx->tx_txg & TXG_MASK;
895
896 ASSERT(tx->tx_txg != 0);
897 ASSERT(!refcount_is_zero(&db->db_holds));
898 DMU_TX_DIRTY_BUF(tx, db);
899
900 /*
901 * Shouldn't dirty a regular buffer in syncing context. Private

--- 41 unchanged lines hidden (view full) ---

943 mutex_exit(&dn->dn_mtx);
944
945 /*
946 * If this buffer is already dirty, we're done.
947 */
948 drp = &db->db_last_dirty;
949 ASSERT(*drp == NULL || (*drp)->dr_txg <= tx->tx_txg ||
950 db->db.db_object == DMU_META_DNODE_OBJECT);
925 while (*drp && (*drp)->dr_txg > tx->tx_txg)
926 drp = &(*drp)->dr_next;
927 if (*drp && (*drp)->dr_txg == tx->tx_txg) {
951 while ((dr = *drp) != NULL && dr->dr_txg > tx->tx_txg)
952 drp = &dr->dr_next;
953 if (dr && dr->dr_txg == tx->tx_txg) {
928 if (db->db_level == 0 && db->db_blkid != DB_BONUS_BLKID) {
929 /*
930 * If this buffer has already been written out,
931 * we now need to reset its state.
932 */
954 if (db->db_level == 0 && db->db_blkid != DB_BONUS_BLKID) {
955 /*
956 * If this buffer has already been written out,
957 * we now need to reset its state.
958 */
933 dbuf_unoverride(*drp);
959 dbuf_unoverride(dr);
934 if (db->db.db_object != DMU_META_DNODE_OBJECT)
935 arc_buf_thaw(db->db_buf);
936 }
937 mutex_exit(&db->db_mtx);
960 if (db->db.db_object != DMU_META_DNODE_OBJECT)
961 arc_buf_thaw(db->db_buf);
962 }
963 mutex_exit(&db->db_mtx);
938 return (*drp);
964 return (dr);
939 }
940
941 /*
942 * Only valid if not already dirty.
943 */
944 ASSERT(dn->dn_dirtyctx == DN_UNDIRTIED || dn->dn_dirtyctx ==
945 (dmu_tx_is_syncing(tx) ? DN_DIRTY_SYNC : DN_DIRTY_OPEN));
946

--- 14 unchanged lines hidden (view full) ---

961 ASSERT(!dmu_tx_is_syncing(tx) ||
962 os->os_dsl_dataset == NULL ||
963 !dsl_dir_is_private(os->os_dsl_dataset->ds_dir) ||
964 !BP_IS_HOLE(os->os_rootbp));
965 ASSERT(db->db.db_size != 0);
966
967 dprintf_dbuf(db, "size=%llx\n", (u_longlong_t)db->db.db_size);
968
965 }
966
967 /*
968 * Only valid if not already dirty.
969 */
970 ASSERT(dn->dn_dirtyctx == DN_UNDIRTIED || dn->dn_dirtyctx ==
971 (dmu_tx_is_syncing(tx) ? DN_DIRTY_SYNC : DN_DIRTY_OPEN));
972

--- 14 unchanged lines hidden (view full) ---

987 ASSERT(!dmu_tx_is_syncing(tx) ||
988 os->os_dsl_dataset == NULL ||
989 !dsl_dir_is_private(os->os_dsl_dataset->ds_dir) ||
990 !BP_IS_HOLE(os->os_rootbp));
991 ASSERT(db->db.db_size != 0);
992
993 dprintf_dbuf(db, "size=%llx\n", (u_longlong_t)db->db.db_size);
994
995 if (db->db_blkid != DB_BONUS_BLKID) {
996 /*
997 * Update the accounting.
998 * Note: we delay "free accounting" until after we drop
999 * the db_mtx. This keeps us from grabbing other locks
1000 * (and possibly deadlocking) in bp_get_dasize() while
1001 * also holding the db_mtx.
1002 */
1003 dnode_willuse_space(dn, db->db.db_size, tx);
1004 do_free_accounting = dbuf_block_freeable(db);
1005 }
1006
969 /*
970 * If this buffer is dirty in an old transaction group we need
971 * to make a copy of it so that the changes we make in this
972 * transaction group won't leak out when we sync the older txg.
973 */
974 dr = kmem_zalloc(sizeof (dbuf_dirty_record_t), KM_SLEEP);
975 if (db->db_level == 0) {
976 void *data_old = db->db_buf;

--- 33 unchanged lines hidden (view full) ---

1010 */
1011 if (db->db_level == 0 && db->db_blkid != DB_BONUS_BLKID) {
1012 mutex_enter(&dn->dn_mtx);
1013 dnode_clear_range(dn, db->db_blkid, 1, tx);
1014 mutex_exit(&dn->dn_mtx);
1015 db->db_freed_in_flight = FALSE;
1016 }
1017
1007 /*
1008 * If this buffer is dirty in an old transaction group we need
1009 * to make a copy of it so that the changes we make in this
1010 * transaction group won't leak out when we sync the older txg.
1011 */
1012 dr = kmem_zalloc(sizeof (dbuf_dirty_record_t), KM_SLEEP);
1013 if (db->db_level == 0) {
1014 void *data_old = db->db_buf;

--- 33 unchanged lines hidden (view full) ---

1048 */
1049 if (db->db_level == 0 && db->db_blkid != DB_BONUS_BLKID) {
1050 mutex_enter(&dn->dn_mtx);
1051 dnode_clear_range(dn, db->db_blkid, 1, tx);
1052 mutex_exit(&dn->dn_mtx);
1053 db->db_freed_in_flight = FALSE;
1054 }
1055
1018 if (db->db_blkid != DB_BONUS_BLKID) {
1019 /*
1020 * Update the accounting.
1021 */
1022 if (!dbuf_new_block(db) && db->db_blkptr) {
1023 /*
1024 * This is only a guess -- if the dbuf is dirty
1025 * in a previous txg, we don't know how much
1026 * space it will use on disk yet. We should
1027 * really have the struct_rwlock to access
1028 * db_blkptr, but since this is just a guess,
1029 * it's OK if we get an odd answer.
1030 */
1031 dnode_willuse_space(dn,
1032 -bp_get_dasize(os->os_spa, db->db_blkptr), tx);
1033 }
1034 dnode_willuse_space(dn, db->db.db_size, tx);
1035 }
1036
1037 /*
1038 * This buffer is now part of this txg
1039 */
1040 dbuf_add_ref(db, (void *)(uintptr_t)tx->tx_txg);
1041 db->db_dirtycnt += 1;
1042 ASSERT3U(db->db_dirtycnt, <=, 3);
1043
1044 mutex_exit(&db->db_mtx);
1045
1046 if (db->db_blkid == DB_BONUS_BLKID) {
1047 mutex_enter(&dn->dn_mtx);
1048 ASSERT(!list_link_active(&dr->dr_dirty_node));
1049 list_insert_tail(&dn->dn_dirty_records[txgoff], dr);
1050 mutex_exit(&dn->dn_mtx);
1051 dnode_setdirty(dn, tx);
1052 return (dr);
1056 /*
1057 * This buffer is now part of this txg
1058 */
1059 dbuf_add_ref(db, (void *)(uintptr_t)tx->tx_txg);
1060 db->db_dirtycnt += 1;
1061 ASSERT3U(db->db_dirtycnt, <=, 3);
1062
1063 mutex_exit(&db->db_mtx);
1064
1065 if (db->db_blkid == DB_BONUS_BLKID) {
1066 mutex_enter(&dn->dn_mtx);
1067 ASSERT(!list_link_active(&dr->dr_dirty_node));
1068 list_insert_tail(&dn->dn_dirty_records[txgoff], dr);
1069 mutex_exit(&dn->dn_mtx);
1070 dnode_setdirty(dn, tx);
1071 return (dr);
1072 } else if (do_free_accounting) {
1073 blkptr_t *bp = db->db_blkptr;
1074 int64_t willfree = (bp && !BP_IS_HOLE(bp)) ?
1075 bp_get_dasize(os->os_spa, bp) : db->db.db_size;
1076 /*
1077 * This is only a guess -- if the dbuf is dirty
1078 * in a previous txg, we don't know how much
1079 * space it will use on disk yet. We should
1080 * really have the struct_rwlock to access
1081 * db_blkptr, but since this is just a guess,
1082 * it's OK if we get an odd answer.
1083 */
1084 dnode_willuse_space(dn, -willfree, tx);
1053 }
1054
1085 }
1086
1055 if (db->db_level == 0) {
1056 dnode_new_blkid(dn, db->db_blkid, tx);
1057 ASSERT(dn->dn_maxblkid >= db->db_blkid);
1058 }
1059
1060 if (!RW_WRITE_HELD(&dn->dn_struct_rwlock)) {
1061 rw_enter(&dn->dn_struct_rwlock, RW_READER);
1062 drop_struct_lock = TRUE;
1063 }
1064
1087 if (!RW_WRITE_HELD(&dn->dn_struct_rwlock)) {
1088 rw_enter(&dn->dn_struct_rwlock, RW_READER);
1089 drop_struct_lock = TRUE;
1090 }
1091
1092 if (db->db_level == 0) {
1093 dnode_new_blkid(dn, db->db_blkid, tx, drop_struct_lock);
1094 ASSERT(dn->dn_maxblkid >= db->db_blkid);
1095 }
1096
1065 if (db->db_level+1 < dn->dn_nlevels) {
1066 dmu_buf_impl_t *parent = db->db_parent;
1067 dbuf_dirty_record_t *di;
1068 int parent_held = FALSE;
1069
1070 if (db->db_parent == NULL || db->db_parent == dn->dn_dbuf) {
1071 int epbs = dn->dn_indblkshift - SPA_BLKPTRSHIFT;
1072

--- 37 unchanged lines hidden (view full) ---

1110 return (dr);
1111}
1112
1113static int
1114dbuf_undirty(dmu_buf_impl_t *db, dmu_tx_t *tx)
1115{
1116 dnode_t *dn = db->db_dnode;
1117 uint64_t txg = tx->tx_txg;
1097 if (db->db_level+1 < dn->dn_nlevels) {
1098 dmu_buf_impl_t *parent = db->db_parent;
1099 dbuf_dirty_record_t *di;
1100 int parent_held = FALSE;
1101
1102 if (db->db_parent == NULL || db->db_parent == dn->dn_dbuf) {
1103 int epbs = dn->dn_indblkshift - SPA_BLKPTRSHIFT;
1104

--- 37 unchanged lines hidden (view full) ---

1142 return (dr);
1143}
1144
1145static int
1146dbuf_undirty(dmu_buf_impl_t *db, dmu_tx_t *tx)
1147{
1148 dnode_t *dn = db->db_dnode;
1149 uint64_t txg = tx->tx_txg;
1118 dbuf_dirty_record_t *dr;
1150 dbuf_dirty_record_t *dr, **drp;
1119
1120 ASSERT(txg != 0);
1121 ASSERT(db->db_blkid != DB_BONUS_BLKID);
1122
1123 mutex_enter(&db->db_mtx);
1124
1125 /*
1126 * If this buffer is not dirty, we're done.
1127 */
1151
1152 ASSERT(txg != 0);
1153 ASSERT(db->db_blkid != DB_BONUS_BLKID);
1154
1155 mutex_enter(&db->db_mtx);
1156
1157 /*
1158 * If this buffer is not dirty, we're done.
1159 */
1128 for (dr = db->db_last_dirty; dr; dr = dr->dr_next)
1160 for (drp = &db->db_last_dirty; (dr = *drp) != NULL; drp = &dr->dr_next)
1129 if (dr->dr_txg <= txg)
1130 break;
1131 if (dr == NULL || dr->dr_txg < txg) {
1132 mutex_exit(&db->db_mtx);
1133 return (0);
1134 }
1135 ASSERT(dr->dr_txg == txg);
1136

--- 13 unchanged lines hidden (view full) ---

1150 }
1151
1152 dprintf_dbuf(db, "size=%llx\n", (u_longlong_t)db->db.db_size);
1153
1154 ASSERT(db->db.db_size != 0);
1155
1156 /* XXX would be nice to fix up dn_towrite_space[] */
1157
1161 if (dr->dr_txg <= txg)
1162 break;
1163 if (dr == NULL || dr->dr_txg < txg) {
1164 mutex_exit(&db->db_mtx);
1165 return (0);
1166 }
1167 ASSERT(dr->dr_txg == txg);
1168

--- 13 unchanged lines hidden (view full) ---

1182 }
1183
1184 dprintf_dbuf(db, "size=%llx\n", (u_longlong_t)db->db.db_size);
1185
1186 ASSERT(db->db.db_size != 0);
1187
1188 /* XXX would be nice to fix up dn_towrite_space[] */
1189
1158 db->db_last_dirty = dr->dr_next;
1190 *drp = dr->dr_next;
1159
1160 if (dr->dr_parent) {
1161 mutex_enter(&dr->dr_parent->dt.di.dr_mtx);
1162 list_remove(&dr->dr_parent->dt.di.dr_children, dr);
1163 mutex_exit(&dr->dr_parent->dt.di.dr_mtx);
1164 } else if (db->db_level+1 == dn->dn_nlevels) {
1191
1192 if (dr->dr_parent) {
1193 mutex_enter(&dr->dr_parent->dt.di.dr_mtx);
1194 list_remove(&dr->dr_parent->dt.di.dr_children, dr);
1195 mutex_exit(&dr->dr_parent->dt.di.dr_mtx);
1196 } else if (db->db_level+1 == dn->dn_nlevels) {
1165 ASSERT3P(db->db_parent, ==, dn->dn_dbuf);
1197 ASSERT(db->db_blkptr == NULL || db->db_parent == dn->dn_dbuf);
1166 mutex_enter(&dn->dn_mtx);
1167 list_remove(&dn->dn_dirty_records[txg & TXG_MASK], dr);
1168 mutex_exit(&dn->dn_mtx);
1169 }
1170
1171 if (db->db_level == 0) {
1172 dbuf_unoverride(dr);
1173
1174 ASSERT(db->db_buf != NULL);
1175 ASSERT(dr->dt.dl.dr_data != NULL);
1176 if (dr->dt.dl.dr_data != db->db_buf)
1177 VERIFY(arc_buf_remove_ref(dr->dt.dl.dr_data, db) == 1);
1178 } else {
1179 ASSERT(db->db_buf != NULL);
1180 ASSERT(list_head(&dr->dt.di.dr_children) == NULL);
1198 mutex_enter(&dn->dn_mtx);
1199 list_remove(&dn->dn_dirty_records[txg & TXG_MASK], dr);
1200 mutex_exit(&dn->dn_mtx);
1201 }
1202
1203 if (db->db_level == 0) {
1204 dbuf_unoverride(dr);
1205
1206 ASSERT(db->db_buf != NULL);
1207 ASSERT(dr->dt.dl.dr_data != NULL);
1208 if (dr->dt.dl.dr_data != db->db_buf)
1209 VERIFY(arc_buf_remove_ref(dr->dt.dl.dr_data, db) == 1);
1210 } else {
1211 ASSERT(db->db_buf != NULL);
1212 ASSERT(list_head(&dr->dt.di.dr_children) == NULL);
1181 list_destroy(&dr->dt.di.dr_children);
1182 mutex_destroy(&dr->dt.di.dr_mtx);
1213 mutex_destroy(&dr->dt.di.dr_mtx);
1214 list_destroy(&dr->dt.di.dr_children);
1183 }
1184 kmem_free(dr, sizeof (dbuf_dirty_record_t));
1185
1186 ASSERT(db->db_dirtycnt > 0);
1187 db->db_dirtycnt -= 1;
1188
1189 if (refcount_remove(&db->db_holds, (void *)(uintptr_t)txg) == 0) {
1190 arc_buf_t *buf = db->db_buf;

--- 8 unchanged lines hidden (view full) ---

1199 mutex_exit(&db->db_mtx);
1200 return (0);
1201}
1202
1203#pragma weak dmu_buf_will_dirty = dbuf_will_dirty
1204void
1205dbuf_will_dirty(dmu_buf_impl_t *db, dmu_tx_t *tx)
1206{
1215 }
1216 kmem_free(dr, sizeof (dbuf_dirty_record_t));
1217
1218 ASSERT(db->db_dirtycnt > 0);
1219 db->db_dirtycnt -= 1;
1220
1221 if (refcount_remove(&db->db_holds, (void *)(uintptr_t)txg) == 0) {
1222 arc_buf_t *buf = db->db_buf;

--- 8 unchanged lines hidden (view full) ---

1231 mutex_exit(&db->db_mtx);
1232 return (0);
1233}
1234
1235#pragma weak dmu_buf_will_dirty = dbuf_will_dirty
1236void
1237dbuf_will_dirty(dmu_buf_impl_t *db, dmu_tx_t *tx)
1238{
1207 int rf = DB_RF_MUST_SUCCEED;
1239 int rf = DB_RF_MUST_SUCCEED | DB_RF_NOPREFETCH;
1208
1209 ASSERT(tx->tx_txg != 0);
1210 ASSERT(!refcount_is_zero(&db->db_holds));
1211
1212 if (RW_WRITE_HELD(&db->db_dnode->dn_struct_rwlock))
1213 rf |= DB_RF_HAVESTRUCT;
1214 (void) dbuf_read(db, NULL, rf);
1215 (void) dbuf_dirty(db, tx);

--- 61 unchanged lines hidden (view full) ---

1277
1278 ASSERT(MUTEX_HELD(&db->db_mtx));
1279 ASSERT(refcount_is_zero(&db->db_holds));
1280
1281 dbuf_evict_user(db);
1282
1283 if (db->db_state == DB_CACHED) {
1284 ASSERT(db->db.db_data != NULL);
1240
1241 ASSERT(tx->tx_txg != 0);
1242 ASSERT(!refcount_is_zero(&db->db_holds));
1243
1244 if (RW_WRITE_HELD(&db->db_dnode->dn_struct_rwlock))
1245 rf |= DB_RF_HAVESTRUCT;
1246 (void) dbuf_read(db, NULL, rf);
1247 (void) dbuf_dirty(db, tx);

--- 61 unchanged lines hidden (view full) ---

1309
1310 ASSERT(MUTEX_HELD(&db->db_mtx));
1311 ASSERT(refcount_is_zero(&db->db_holds));
1312
1313 dbuf_evict_user(db);
1314
1315 if (db->db_state == DB_CACHED) {
1316 ASSERT(db->db.db_data != NULL);
1285 if (db->db_blkid == DB_BONUS_BLKID)
1317 if (db->db_blkid == DB_BONUS_BLKID) {
1286 zio_buf_free(db->db.db_data, DN_MAX_BONUSLEN);
1318 zio_buf_free(db->db.db_data, DN_MAX_BONUSLEN);
1319 arc_space_return(DN_MAX_BONUSLEN);
1320 }
1287 db->db.db_data = NULL;
1288 db->db_state = DB_UNCACHED;
1289 }
1290
1291 ASSERT3U(db->db_state, ==, DB_UNCACHED);
1292 ASSERT(db->db_data_pending == NULL);
1293
1294 db->db_state = DB_EVICTING;
1295 db->db_blkptr = NULL;
1296
1297 if (db->db_blkid != DB_BONUS_BLKID && MUTEX_HELD(&dn->dn_dbufs_mtx)) {
1298 list_remove(&dn->dn_dbufs, db);
1299 dnode_rele(dn, db);
1321 db->db.db_data = NULL;
1322 db->db_state = DB_UNCACHED;
1323 }
1324
1325 ASSERT3U(db->db_state, ==, DB_UNCACHED);
1326 ASSERT(db->db_data_pending == NULL);
1327
1328 db->db_state = DB_EVICTING;
1329 db->db_blkptr = NULL;
1330
1331 if (db->db_blkid != DB_BONUS_BLKID && MUTEX_HELD(&dn->dn_dbufs_mtx)) {
1332 list_remove(&dn->dn_dbufs, db);
1333 dnode_rele(dn, db);
1334 db->db_dnode = NULL;
1300 }
1301
1302 if (db->db_buf)
1303 dbuf_gone = arc_buf_evict(db->db_buf);
1304
1305 if (!dbuf_gone)
1306 mutex_exit(&db->db_mtx);
1307

--- 84 unchanged lines hidden (view full) ---

1392 db->db_user_ptr = NULL;
1393 db->db_user_data_ptr_ptr = NULL;
1394 db->db_evict_func = NULL;
1395 db->db_immediate_evict = 0;
1396 db->db_freed_in_flight = 0;
1397
1398 if (blkid == DB_BONUS_BLKID) {
1399 ASSERT3P(parent, ==, dn->dn_dbuf);
1335 }
1336
1337 if (db->db_buf)
1338 dbuf_gone = arc_buf_evict(db->db_buf);
1339
1340 if (!dbuf_gone)
1341 mutex_exit(&db->db_mtx);
1342

--- 84 unchanged lines hidden (view full) ---

1427 db->db_user_ptr = NULL;
1428 db->db_user_data_ptr_ptr = NULL;
1429 db->db_evict_func = NULL;
1430 db->db_immediate_evict = 0;
1431 db->db_freed_in_flight = 0;
1432
1433 if (blkid == DB_BONUS_BLKID) {
1434 ASSERT3P(parent, ==, dn->dn_dbuf);
1400 db->db.db_size = dn->dn_bonuslen;
1435 db->db.db_size = DN_MAX_BONUSLEN -
1436 (dn->dn_nblkptr-1) * sizeof (blkptr_t);
1437 ASSERT3U(db->db.db_size, >=, dn->dn_bonuslen);
1401 db->db.db_offset = DB_BONUS_BLKID;
1402 db->db_state = DB_UNCACHED;
1403 /* the bonus dbuf is not placed in the hash table */
1438 db->db.db_offset = DB_BONUS_BLKID;
1439 db->db_state = DB_UNCACHED;
1440 /* the bonus dbuf is not placed in the hash table */
1441 arc_space_consume(sizeof (dmu_buf_impl_t));
1404 return (db);
1405 } else {
1406 int blocksize =
1407 db->db_level ? 1<<dn->dn_indblkshift : dn->dn_datablksz;
1408 db->db.db_size = blocksize;
1409 db->db.db_offset = db->db_blkid * blocksize;
1410 }
1411

--- 10 unchanged lines hidden (view full) ---

1422 /* someone else inserted it first */
1423 kmem_cache_free(dbuf_cache, db);
1424 mutex_exit(&dn->dn_dbufs_mtx);
1425 return (odb);
1426 }
1427 list_insert_head(&dn->dn_dbufs, db);
1428 db->db_state = DB_UNCACHED;
1429 mutex_exit(&dn->dn_dbufs_mtx);
1442 return (db);
1443 } else {
1444 int blocksize =
1445 db->db_level ? 1<<dn->dn_indblkshift : dn->dn_datablksz;
1446 db->db.db_size = blocksize;
1447 db->db.db_offset = db->db_blkid * blocksize;
1448 }
1449

--- 10 unchanged lines hidden (view full) ---

1460 /* someone else inserted it first */
1461 kmem_cache_free(dbuf_cache, db);
1462 mutex_exit(&dn->dn_dbufs_mtx);
1463 return (odb);
1464 }
1465 list_insert_head(&dn->dn_dbufs, db);
1466 db->db_state = DB_UNCACHED;
1467 mutex_exit(&dn->dn_dbufs_mtx);
1468 arc_space_consume(sizeof (dmu_buf_impl_t));
1430
1431 if (parent && parent != dn->dn_dbuf)
1432 dbuf_add_ref(parent, db);
1433
1434 ASSERT(dn->dn_object == DMU_META_DNODE_OBJECT ||
1435 refcount_count(&dn->dn_holds) > 0);
1436 (void) refcount_add(&dn->dn_holds, db);
1437

--- 26 unchanged lines hidden (view full) ---

1464}
1465
1466static void
1467dbuf_destroy(dmu_buf_impl_t *db)
1468{
1469 ASSERT(refcount_is_zero(&db->db_holds));
1470
1471 if (db->db_blkid != DB_BONUS_BLKID) {
1469
1470 if (parent && parent != dn->dn_dbuf)
1471 dbuf_add_ref(parent, db);
1472
1473 ASSERT(dn->dn_object == DMU_META_DNODE_OBJECT ||
1474 refcount_count(&dn->dn_holds) > 0);
1475 (void) refcount_add(&dn->dn_holds, db);
1476

--- 26 unchanged lines hidden (view full) ---

1503}
1504
1505static void
1506dbuf_destroy(dmu_buf_impl_t *db)
1507{
1508 ASSERT(refcount_is_zero(&db->db_holds));
1509
1510 if (db->db_blkid != DB_BONUS_BLKID) {
1472 dnode_t *dn = db->db_dnode;
1473
1474 /*
1475 * If this dbuf is still on the dn_dbufs list,
1476 * remove it from that list.
1477 */
1511 /*
1512 * If this dbuf is still on the dn_dbufs list,
1513 * remove it from that list.
1514 */
1478 if (list_link_active(&db->db_link)) {
1515 if (db->db_dnode) {
1516 dnode_t *dn = db->db_dnode;
1517
1479 mutex_enter(&dn->dn_dbufs_mtx);
1480 list_remove(&dn->dn_dbufs, db);
1481 mutex_exit(&dn->dn_dbufs_mtx);
1482
1483 dnode_rele(dn, db);
1518 mutex_enter(&dn->dn_dbufs_mtx);
1519 list_remove(&dn->dn_dbufs, db);
1520 mutex_exit(&dn->dn_dbufs_mtx);
1521
1522 dnode_rele(dn, db);
1523 db->db_dnode = NULL;
1484 }
1485 dbuf_hash_remove(db);
1486 }
1487 db->db_parent = NULL;
1524 }
1525 dbuf_hash_remove(db);
1526 }
1527 db->db_parent = NULL;
1488 db->db_dnode = NULL;
1489 db->db_buf = NULL;
1490
1528 db->db_buf = NULL;
1529
1530 ASSERT(!list_link_active(&db->db_link));
1491 ASSERT(db->db.db_data == NULL);
1492 ASSERT(db->db_hash_next == NULL);
1493 ASSERT(db->db_blkptr == NULL);
1494 ASSERT(db->db_data_pending == NULL);
1495
1496 kmem_cache_free(dbuf_cache, db);
1531 ASSERT(db->db.db_data == NULL);
1532 ASSERT(db->db_hash_next == NULL);
1533 ASSERT(db->db_blkptr == NULL);
1534 ASSERT(db->db_data_pending == NULL);
1535
1536 kmem_cache_free(dbuf_cache, db);
1537 arc_space_return(sizeof (dmu_buf_impl_t));
1497}
1498
1499void
1500dbuf_prefetch(dnode_t *dn, uint64_t blkid)
1501{
1502 dmu_buf_impl_t *db = NULL;
1503 blkptr_t *bp = NULL;
1504

--- 15 unchanged lines hidden (view full) ---

1520 return;
1521 }
1522 mutex_exit(&db->db_mtx);
1523 db = NULL;
1524 }
1525
1526 if (dbuf_findbp(dn, 0, blkid, TRUE, &db, &bp) == 0) {
1527 if (bp && !BP_IS_HOLE(bp)) {
1538}
1539
1540void
1541dbuf_prefetch(dnode_t *dn, uint64_t blkid)
1542{
1543 dmu_buf_impl_t *db = NULL;
1544 blkptr_t *bp = NULL;
1545

--- 15 unchanged lines hidden (view full) ---

1561 return;
1562 }
1563 mutex_exit(&db->db_mtx);
1564 db = NULL;
1565 }
1566
1567 if (dbuf_findbp(dn, 0, blkid, TRUE, &db, &bp) == 0) {
1568 if (bp && !BP_IS_HOLE(bp)) {
1569 arc_buf_t *pbuf;
1528 uint32_t aflags = ARC_NOWAIT | ARC_PREFETCH;
1529 zbookmark_t zb;
1530 zb.zb_objset = dn->dn_objset->os_dsl_dataset ?
1531 dn->dn_objset->os_dsl_dataset->ds_object : 0;
1532 zb.zb_object = dn->dn_object;
1533 zb.zb_level = 0;
1534 zb.zb_blkid = blkid;
1535
1570 uint32_t aflags = ARC_NOWAIT | ARC_PREFETCH;
1571 zbookmark_t zb;
1572 zb.zb_objset = dn->dn_objset->os_dsl_dataset ?
1573 dn->dn_objset->os_dsl_dataset->ds_object : 0;
1574 zb.zb_object = dn->dn_object;
1575 zb.zb_level = 0;
1576 zb.zb_blkid = blkid;
1577
1536 (void) arc_read(NULL, dn->dn_objset->os_spa, bp,
1537 dmu_ot[dn->dn_type].ot_byteswap,
1538 NULL, NULL, ZIO_PRIORITY_ASYNC_READ,
1578 if (db)
1579 pbuf = db->db_buf;
1580 else
1581 pbuf = dn->dn_objset->os_phys_buf;
1582
1583 (void) arc_read(NULL, dn->dn_objset->os_spa,
1584 bp, pbuf, NULL, NULL, ZIO_PRIORITY_ASYNC_READ,
1539 ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE,
1540 &aflags, &zb);
1541 }
1542 if (db)
1543 dbuf_rele(db, NULL);
1544 }
1545}
1546

--- 100 unchanged lines hidden (view full) ---

1647dmu_buf_impl_t *
1648dbuf_hold_level(dnode_t *dn, int level, uint64_t blkid, void *tag)
1649{
1650 dmu_buf_impl_t *db;
1651 int err = dbuf_hold_impl(dn, level, blkid, FALSE, tag, &db);
1652 return (err ? NULL : db);
1653}
1654
1585 ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE,
1586 &aflags, &zb);
1587 }
1588 if (db)
1589 dbuf_rele(db, NULL);
1590 }
1591}
1592

--- 100 unchanged lines hidden (view full) ---

1693dmu_buf_impl_t *
1694dbuf_hold_level(dnode_t *dn, int level, uint64_t blkid, void *tag)
1695{
1696 dmu_buf_impl_t *db;
1697 int err = dbuf_hold_impl(dn, level, blkid, FALSE, tag, &db);
1698 return (err ? NULL : db);
1699}
1700
1655dmu_buf_impl_t *
1701void
1656dbuf_create_bonus(dnode_t *dn)
1657{
1702dbuf_create_bonus(dnode_t *dn)
1703{
1658 dmu_buf_impl_t *db = dn->dn_bonus;
1659
1660 ASSERT(RW_WRITE_HELD(&dn->dn_struct_rwlock));
1661
1662 ASSERT(dn->dn_bonus == NULL);
1704 ASSERT(RW_WRITE_HELD(&dn->dn_struct_rwlock));
1705
1706 ASSERT(dn->dn_bonus == NULL);
1663 db = dbuf_create(dn, 0, DB_BONUS_BLKID, dn->dn_dbuf, NULL);
1664 return (db);
1707 dn->dn_bonus = dbuf_create(dn, 0, DB_BONUS_BLKID, dn->dn_dbuf, NULL);
1665}
1666
1667#pragma weak dmu_buf_add_ref = dbuf_add_ref
1668void
1669dbuf_add_ref(dmu_buf_impl_t *db, void *tag)
1670{
1671 int64_t holds = refcount_add(&db->db_holds, tag);
1672 ASSERT(holds > 1);

--- 38 unchanged lines hidden (view full) ---

1711 /*
1712 * This dbuf has anonymous data associated with it.
1713 */
1714 dbuf_set_data(db, NULL);
1715 VERIFY(arc_buf_remove_ref(buf, db) == 1);
1716 dbuf_evict(db);
1717 } else {
1718 VERIFY(arc_buf_remove_ref(db->db_buf, db) == 0);
1708}
1709
1710#pragma weak dmu_buf_add_ref = dbuf_add_ref
1711void
1712dbuf_add_ref(dmu_buf_impl_t *db, void *tag)
1713{
1714 int64_t holds = refcount_add(&db->db_holds, tag);
1715 ASSERT(holds > 1);

--- 38 unchanged lines hidden (view full) ---

1754 /*
1755 * This dbuf has anonymous data associated with it.
1756 */
1757 dbuf_set_data(db, NULL);
1758 VERIFY(arc_buf_remove_ref(buf, db) == 1);
1759 dbuf_evict(db);
1760 } else {
1761 VERIFY(arc_buf_remove_ref(db->db_buf, db) == 0);
1719 mutex_exit(&db->db_mtx);
1762 if (!DBUF_IS_CACHEABLE(db))
1763 dbuf_clear(db);
1764 else
1765 mutex_exit(&db->db_mtx);
1720 }
1721 } else {
1722 mutex_exit(&db->db_mtx);
1723 }
1724}
1725
1726#pragma weak dmu_buf_refcount = dbuf_refcount
1727uint64_t

--- 119 unchanged lines hidden (view full) ---

1847 ASSERT3U(db->db_state, ==, DB_CACHED);
1848 ASSERT3U(db->db.db_size, ==, 1<<dn->dn_phys->dn_indblkshift);
1849 ASSERT(db->db_buf != NULL);
1850
1851 dbuf_check_blkptr(dn, db);
1852
1853 db->db_data_pending = dr;
1854
1766 }
1767 } else {
1768 mutex_exit(&db->db_mtx);
1769 }
1770}
1771
1772#pragma weak dmu_buf_refcount = dbuf_refcount
1773uint64_t

--- 119 unchanged lines hidden (view full) ---

1893 ASSERT3U(db->db_state, ==, DB_CACHED);
1894 ASSERT3U(db->db.db_size, ==, 1<<dn->dn_phys->dn_indblkshift);
1895 ASSERT(db->db_buf != NULL);
1896
1897 dbuf_check_blkptr(dn, db);
1898
1899 db->db_data_pending = dr;
1900
1855 arc_release(db->db_buf, db);
1856 mutex_exit(&db->db_mtx);
1901 mutex_exit(&db->db_mtx);
1902 dbuf_write(dr, db->db_buf, tx);
1857
1903
1858 /*
1859 * XXX -- we should design a compression algorithm
1860 * that specializes in arrays of bps.
1861 */
1862 dbuf_write(dr, db->db_buf, ZIO_CHECKSUM_FLETCHER_4,
1863 zfs_mdcomp_disable ? ZIO_COMPRESS_EMPTY : ZIO_COMPRESS_LZJB, tx);
1864
1865 zio = dr->dr_zio;
1866 mutex_enter(&dr->dt.di.dr_mtx);
1867 dbuf_sync_list(&dr->dt.di.dr_children, tx);
1868 ASSERT(list_head(&dr->dt.di.dr_children) == NULL);
1869 mutex_exit(&dr->dt.di.dr_mtx);
1870 zio_nowait(zio);
1871}
1872
1873static void
1874dbuf_sync_leaf(dbuf_dirty_record_t *dr, dmu_tx_t *tx)
1875{
1876 arc_buf_t **datap = &dr->dt.dl.dr_data;
1877 dmu_buf_impl_t *db = dr->dr_dbuf;
1878 dnode_t *dn = db->db_dnode;
1879 objset_impl_t *os = dn->dn_objset;
1880 uint64_t txg = tx->tx_txg;
1904 zio = dr->dr_zio;
1905 mutex_enter(&dr->dt.di.dr_mtx);
1906 dbuf_sync_list(&dr->dt.di.dr_children, tx);
1907 ASSERT(list_head(&dr->dt.di.dr_children) == NULL);
1908 mutex_exit(&dr->dt.di.dr_mtx);
1909 zio_nowait(zio);
1910}
1911
1912static void
1913dbuf_sync_leaf(dbuf_dirty_record_t *dr, dmu_tx_t *tx)
1914{
1915 arc_buf_t **datap = &dr->dt.dl.dr_data;
1916 dmu_buf_impl_t *db = dr->dr_dbuf;
1917 dnode_t *dn = db->db_dnode;
1918 objset_impl_t *os = dn->dn_objset;
1919 uint64_t txg = tx->tx_txg;
1881 int checksum, compress;
1882 int blksz;
1883
1884 ASSERT(dmu_tx_is_syncing(tx));
1885
1886 dprintf_dbuf_bp(db, db->db_blkptr, "blkptr=%p", db->db_blkptr);
1887
1888 mutex_enter(&db->db_mtx);
1889 /*

--- 14 unchanged lines hidden (view full) ---

1904 /*
1905 * If this is a bonus buffer, simply copy the bonus data into the
1906 * dnode. It will be written out when the dnode is synced (and it
1907 * will be synced, since it must have been dirty for dbuf_sync to
1908 * be called).
1909 */
1910 if (db->db_blkid == DB_BONUS_BLKID) {
1911 dbuf_dirty_record_t **drp;
1920 int blksz;
1921
1922 ASSERT(dmu_tx_is_syncing(tx));
1923
1924 dprintf_dbuf_bp(db, db->db_blkptr, "blkptr=%p", db->db_blkptr);
1925
1926 mutex_enter(&db->db_mtx);
1927 /*

--- 14 unchanged lines hidden (view full) ---

1942 /*
1943 * If this is a bonus buffer, simply copy the bonus data into the
1944 * dnode. It will be written out when the dnode is synced (and it
1945 * will be synced, since it must have been dirty for dbuf_sync to
1946 * be called).
1947 */
1948 if (db->db_blkid == DB_BONUS_BLKID) {
1949 dbuf_dirty_record_t **drp;
1912 /*
1913 * Use dn_phys->dn_bonuslen since db.db_size is the length
1914 * of the bonus buffer in the open transaction rather than
1915 * the syncing transaction.
1916 */
1950
1917 ASSERT(*datap != NULL);
1918 ASSERT3U(db->db_level, ==, 0);
1919 ASSERT3U(dn->dn_phys->dn_bonuslen, <=, DN_MAX_BONUSLEN);
1920 bcopy(*datap, DN_BONUS(dn->dn_phys), dn->dn_phys->dn_bonuslen);
1951 ASSERT(*datap != NULL);
1952 ASSERT3U(db->db_level, ==, 0);
1953 ASSERT3U(dn->dn_phys->dn_bonuslen, <=, DN_MAX_BONUSLEN);
1954 bcopy(*datap, DN_BONUS(dn->dn_phys), dn->dn_phys->dn_bonuslen);
1921 if (*datap != db->db.db_data)
1955 if (*datap != db->db.db_data) {
1922 zio_buf_free(*datap, DN_MAX_BONUSLEN);
1956 zio_buf_free(*datap, DN_MAX_BONUSLEN);
1957 arc_space_return(DN_MAX_BONUSLEN);
1958 }
1923 db->db_data_pending = NULL;
1924 drp = &db->db_last_dirty;
1925 while (*drp != dr)
1926 drp = &(*drp)->dr_next;
1959 db->db_data_pending = NULL;
1960 drp = &db->db_last_dirty;
1961 while (*drp != dr)
1962 drp = &(*drp)->dr_next;
1927 ASSERT((*drp)->dr_next == NULL);
1928 *drp = NULL;
1963 ASSERT(dr->dr_next == NULL);
1964 *drp = dr->dr_next;
1929 if (dr->dr_dbuf->db_level != 0) {
1930 list_destroy(&dr->dt.di.dr_children);
1931 mutex_destroy(&dr->dt.di.dr_mtx);
1932 }
1933 kmem_free(dr, sizeof (dbuf_dirty_record_t));
1934 ASSERT(db->db_dirtycnt > 0);
1935 db->db_dirtycnt -= 1;
1936 mutex_exit(&db->db_mtx);
1937 dbuf_rele(db, (void *)(uintptr_t)txg);
1938 return;
1939 }
1940
1941 /*
1965 if (dr->dr_dbuf->db_level != 0) {
1966 list_destroy(&dr->dt.di.dr_children);
1967 mutex_destroy(&dr->dt.di.dr_mtx);
1968 }
1969 kmem_free(dr, sizeof (dbuf_dirty_record_t));
1970 ASSERT(db->db_dirtycnt > 0);
1971 db->db_dirtycnt -= 1;
1972 mutex_exit(&db->db_mtx);
1973 dbuf_rele(db, (void *)(uintptr_t)txg);
1974 return;
1975 }
1976
1977 /*
1978 * This function may have dropped the db_mtx lock allowing a dmu_sync
1979 * operation to sneak in. As a result, we need to ensure that we
1980 * don't check the dr_override_state until we have returned from
1981 * dbuf_check_blkptr.
1982 */
1983 dbuf_check_blkptr(dn, db);
1984
1985 /*
1942 * If this buffer is in the middle of an immdiate write,
1943 * wait for the synchronous IO to complete.
1944 */
1945 while (dr->dt.dl.dr_override_state == DR_IN_DMU_SYNC) {
1946 ASSERT(dn->dn_object != DMU_META_DNODE_OBJECT);
1947 cv_wait(&db->db_changed, &db->db_mtx);
1948 ASSERT(dr->dt.dl.dr_override_state != DR_NOT_OVERRIDDEN);
1949 }
1950
1986 * If this buffer is in the middle of an immdiate write,
1987 * wait for the synchronous IO to complete.
1988 */
1989 while (dr->dt.dl.dr_override_state == DR_IN_DMU_SYNC) {
1990 ASSERT(dn->dn_object != DMU_META_DNODE_OBJECT);
1991 cv_wait(&db->db_changed, &db->db_mtx);
1992 ASSERT(dr->dt.dl.dr_override_state != DR_NOT_OVERRIDDEN);
1993 }
1994
1951 dbuf_check_blkptr(dn, db);
1952
1953 /*
1954 * If this dbuf has already been written out via an immediate write,
1955 * just complete the write by copying over the new block pointer and
1956 * updating the accounting via the write-completion functions.
1957 */
1958 if (dr->dt.dl.dr_override_state == DR_OVERRIDDEN) {
1959 zio_t zio_fake;
1960
1961 zio_fake.io_private = &db;
1962 zio_fake.io_error = 0;
1963 zio_fake.io_bp = db->db_blkptr;
1964 zio_fake.io_bp_orig = *db->db_blkptr;
1965 zio_fake.io_txg = txg;
1995 /*
1996 * If this dbuf has already been written out via an immediate write,
1997 * just complete the write by copying over the new block pointer and
1998 * updating the accounting via the write-completion functions.
1999 */
2000 if (dr->dt.dl.dr_override_state == DR_OVERRIDDEN) {
2001 zio_t zio_fake;
2002
2003 zio_fake.io_private = &db;
2004 zio_fake.io_error = 0;
2005 zio_fake.io_bp = db->db_blkptr;
2006 zio_fake.io_bp_orig = *db->db_blkptr;
2007 zio_fake.io_txg = txg;
2008 zio_fake.io_flags = 0;
1966
1967 *db->db_blkptr = dr->dt.dl.dr_overridden_by;
1968 dr->dt.dl.dr_override_state = DR_NOT_OVERRIDDEN;
1969 db->db_data_pending = dr;
1970 dr->dr_zio = &zio_fake;
1971 mutex_exit(&db->db_mtx);
1972
2009
2010 *db->db_blkptr = dr->dt.dl.dr_overridden_by;
2011 dr->dt.dl.dr_override_state = DR_NOT_OVERRIDDEN;
2012 db->db_data_pending = dr;
2013 dr->dr_zio = &zio_fake;
2014 mutex_exit(&db->db_mtx);
2015
2016 ASSERT(!DVA_EQUAL(BP_IDENTITY(zio_fake.io_bp),
2017 BP_IDENTITY(&zio_fake.io_bp_orig)) ||
2018 BP_IS_HOLE(zio_fake.io_bp));
2019
1973 if (BP_IS_OLDER(&zio_fake.io_bp_orig, txg))
2020 if (BP_IS_OLDER(&zio_fake.io_bp_orig, txg))
1974 dsl_dataset_block_kill(os->os_dsl_dataset,
2021 (void) dsl_dataset_block_kill(os->os_dsl_dataset,
1975 &zio_fake.io_bp_orig, dn->dn_zio, tx);
1976
1977 dbuf_write_ready(&zio_fake, db->db_buf, db);
1978 dbuf_write_done(&zio_fake, db->db_buf, db);
1979
1980 return;
1981 }
1982

--- 9 unchanged lines hidden (view full) ---

1992 * NOTE: this copy does not need to be made for objects only
1993 * modified in the syncing context (e.g. DNONE_DNODE blocks).
1994 */
1995 if (refcount_count(&db->db_holds) > 1 && *datap == db->db_buf) {
1996 arc_buf_contents_t type = DBUF_GET_BUFC_TYPE(db);
1997 *datap = arc_buf_alloc(os->os_spa, blksz, db, type);
1998 bcopy(db->db.db_data, (*datap)->b_data, blksz);
1999 }
2022 &zio_fake.io_bp_orig, dn->dn_zio, tx);
2023
2024 dbuf_write_ready(&zio_fake, db->db_buf, db);
2025 dbuf_write_done(&zio_fake, db->db_buf, db);
2026
2027 return;
2028 }
2029

--- 9 unchanged lines hidden (view full) ---

2039 * NOTE: this copy does not need to be made for objects only
2040 * modified in the syncing context (e.g. DNONE_DNODE blocks).
2041 */
2042 if (refcount_count(&db->db_holds) > 1 && *datap == db->db_buf) {
2043 arc_buf_contents_t type = DBUF_GET_BUFC_TYPE(db);
2044 *datap = arc_buf_alloc(os->os_spa, blksz, db, type);
2045 bcopy(db->db.db_data, (*datap)->b_data, blksz);
2046 }
2000 } else {
2001 /*
2002 * Private object buffers are released here rather
2003 * than in dbuf_dirty() since they are only modified
2004 * in the syncing context and we don't want the
2005 * overhead of making multiple copies of the data.
2006 */
2007 arc_release(db->db_buf, db);
2008 }
2009
2010 ASSERT(*datap != NULL);
2011 db->db_data_pending = dr;
2012
2013 mutex_exit(&db->db_mtx);
2014
2047 }
2048
2049 ASSERT(*datap != NULL);
2050 db->db_data_pending = dr;
2051
2052 mutex_exit(&db->db_mtx);
2053
2015 /*
2016 * Allow dnode settings to override objset settings,
2017 * except for metadata checksums.
2018 */
2019 if (dmu_ot[dn->dn_type].ot_metadata) {
2020 checksum = os->os_md_checksum;
2021 compress = zio_compress_select(dn->dn_compress,
2022 os->os_md_compress);
2023 } else {
2024 checksum = zio_checksum_select(dn->dn_checksum,
2025 os->os_checksum);
2026 compress = zio_compress_select(dn->dn_compress,
2027 os->os_compress);
2028 }
2054 dbuf_write(dr, *datap, tx);
2029
2055
2030 dbuf_write(dr, *datap, checksum, compress, tx);
2031
2032 ASSERT(!list_link_active(&dr->dr_dirty_node));
2033 if (dn->dn_object == DMU_META_DNODE_OBJECT)
2034 list_insert_tail(&dn->dn_dirty_records[txg&TXG_MASK], dr);
2035 else
2036 zio_nowait(dr->dr_zio);
2037}
2038
2039void

--- 18 unchanged lines hidden (view full) ---

2058 if (dr->dr_dbuf->db_level > 0)
2059 dbuf_sync_indirect(dr, tx);
2060 else
2061 dbuf_sync_leaf(dr, tx);
2062 }
2063}
2064
2065static void
2056 ASSERT(!list_link_active(&dr->dr_dirty_node));
2057 if (dn->dn_object == DMU_META_DNODE_OBJECT)
2058 list_insert_tail(&dn->dn_dirty_records[txg&TXG_MASK], dr);
2059 else
2060 zio_nowait(dr->dr_zio);
2061}
2062
2063void

--- 18 unchanged lines hidden (view full) ---

2082 if (dr->dr_dbuf->db_level > 0)
2083 dbuf_sync_indirect(dr, tx);
2084 else
2085 dbuf_sync_leaf(dr, tx);
2086 }
2087}
2088
2089static void
2066dbuf_write(dbuf_dirty_record_t *dr, arc_buf_t *data, int checksum,
2067 int compress, dmu_tx_t *tx)
2090dbuf_write(dbuf_dirty_record_t *dr, arc_buf_t *data, dmu_tx_t *tx)
2068{
2069 dmu_buf_impl_t *db = dr->dr_dbuf;
2070 dnode_t *dn = db->db_dnode;
2071 objset_impl_t *os = dn->dn_objset;
2072 dmu_buf_impl_t *parent = db->db_parent;
2073 uint64_t txg = tx->tx_txg;
2074 zbookmark_t zb;
2091{
2092 dmu_buf_impl_t *db = dr->dr_dbuf;
2093 dnode_t *dn = db->db_dnode;
2094 objset_impl_t *os = dn->dn_objset;
2095 dmu_buf_impl_t *parent = db->db_parent;
2096 uint64_t txg = tx->tx_txg;
2097 zbookmark_t zb;
2098 writeprops_t wp = { 0 };
2075 zio_t *zio;
2099 zio_t *zio;
2076 int zio_flags;
2077
2100
2101 if (!BP_IS_HOLE(db->db_blkptr) &&
2102 (db->db_level > 0 || dn->dn_type == DMU_OT_DNODE)) {
2103 /*
2104 * Private object buffers are released here rather
2105 * than in dbuf_dirty() since they are only modified
2106 * in the syncing context and we don't want the
2107 * overhead of making multiple copies of the data.
2108 */
2109 arc_release(data, db);
2110 } else {
2111 ASSERT(arc_released(data));
2112 /* XXX why do we need to thaw here? */
2113 arc_buf_thaw(data);
2114 }
2115
2078 if (parent != dn->dn_dbuf) {
2079 ASSERT(parent && parent->db_data_pending);
2080 ASSERT(db->db_level == parent->db_level-1);
2081 ASSERT(arc_released(parent->db_buf));
2082 zio = parent->db_data_pending->dr_zio;
2083 } else {
2084 ASSERT(db->db_level == dn->dn_phys->dn_nlevels-1);
2085 ASSERT3P(db->db_blkptr, ==,

--- 5 unchanged lines hidden (view full) ---

2091 ASSERT3U(db->db_blkptr->blk_birth, <=, txg);
2092 ASSERT(zio);
2093
2094 zb.zb_objset = os->os_dsl_dataset ? os->os_dsl_dataset->ds_object : 0;
2095 zb.zb_object = db->db.db_object;
2096 zb.zb_level = db->db_level;
2097 zb.zb_blkid = db->db_blkid;
2098
2116 if (parent != dn->dn_dbuf) {
2117 ASSERT(parent && parent->db_data_pending);
2118 ASSERT(db->db_level == parent->db_level-1);
2119 ASSERT(arc_released(parent->db_buf));
2120 zio = parent->db_data_pending->dr_zio;
2121 } else {
2122 ASSERT(db->db_level == dn->dn_phys->dn_nlevels-1);
2123 ASSERT3P(db->db_blkptr, ==,

--- 5 unchanged lines hidden (view full) ---

2129 ASSERT3U(db->db_blkptr->blk_birth, <=, txg);
2130 ASSERT(zio);
2131
2132 zb.zb_objset = os->os_dsl_dataset ? os->os_dsl_dataset->ds_object : 0;
2133 zb.zb_object = db->db.db_object;
2134 zb.zb_level = db->db_level;
2135 zb.zb_blkid = db->db_blkid;
2136
2099 zio_flags = ZIO_FLAG_MUSTSUCCEED;
2100 if (dmu_ot[dn->dn_type].ot_metadata || zb.zb_level != 0)
2101 zio_flags |= ZIO_FLAG_METADATA;
2137 wp.wp_type = dn->dn_type;
2138 wp.wp_level = db->db_level;
2139 wp.wp_copies = os->os_copies;
2140 wp.wp_dncompress = dn->dn_compress;
2141 wp.wp_oscompress = os->os_compress;
2142 wp.wp_dnchecksum = dn->dn_checksum;
2143 wp.wp_oschecksum = os->os_checksum;
2144
2102 if (BP_IS_OLDER(db->db_blkptr, txg))
2145 if (BP_IS_OLDER(db->db_blkptr, txg))
2103 dsl_dataset_block_kill(
2146 (void) dsl_dataset_block_kill(
2104 os->os_dsl_dataset, db->db_blkptr, zio, tx);
2105
2147 os->os_dsl_dataset, db->db_blkptr, zio, tx);
2148
2106 dr->dr_zio = arc_write(zio, os->os_spa, checksum, compress,
2107 dmu_get_replication_level(os, &zb, dn->dn_type), txg,
2108 db->db_blkptr, data, dbuf_write_ready, dbuf_write_done, db,
2109 ZIO_PRIORITY_ASYNC_WRITE, zio_flags, &zb);
2149 dr->dr_zio = arc_write(zio, os->os_spa, &wp,
2150 DBUF_IS_L2CACHEABLE(db), txg, db->db_blkptr,
2151 data, dbuf_write_ready, dbuf_write_done, db,
2152 ZIO_PRIORITY_ASYNC_WRITE, ZIO_FLAG_MUSTSUCCEED, &zb);
2110}
2111
2112/* ARGSUSED */
2113static void
2114dbuf_write_ready(zio_t *zio, arc_buf_t *buf, void *vdb)
2115{
2116 dmu_buf_impl_t *db = vdb;
2117 dnode_t *dn = db->db_dnode;
2118 objset_impl_t *os = dn->dn_objset;
2153}
2154
2155/* ARGSUSED */
2156static void
2157dbuf_write_ready(zio_t *zio, arc_buf_t *buf, void *vdb)
2158{
2159 dmu_buf_impl_t *db = vdb;
2160 dnode_t *dn = db->db_dnode;
2161 objset_impl_t *os = dn->dn_objset;
2162 blkptr_t *bp = zio->io_bp;
2119 blkptr_t *bp_orig = &zio->io_bp_orig;
2120 uint64_t fill = 0;
2121 int old_size, new_size, i;
2122
2163 blkptr_t *bp_orig = &zio->io_bp_orig;
2164 uint64_t fill = 0;
2165 int old_size, new_size, i;
2166
2167 ASSERT(db->db_blkptr == bp);
2168
2123 dprintf_dbuf_bp(db, bp_orig, "bp_orig: %s", "");
2124
2125 old_size = bp_get_dasize(os->os_spa, bp_orig);
2169 dprintf_dbuf_bp(db, bp_orig, "bp_orig: %s", "");
2170
2171 old_size = bp_get_dasize(os->os_spa, bp_orig);
2126 new_size = bp_get_dasize(os->os_spa, zio->io_bp);
2172 new_size = bp_get_dasize(os->os_spa, bp);
2127
2173
2128 dnode_diduse_space(dn, new_size-old_size);
2174 dnode_diduse_space(dn, new_size - old_size);
2129
2175
2130 if (BP_IS_HOLE(zio->io_bp)) {
2176 if (BP_IS_HOLE(bp)) {
2131 dsl_dataset_t *ds = os->os_dsl_dataset;
2132 dmu_tx_t *tx = os->os_synctx;
2133
2134 if (bp_orig->blk_birth == tx->tx_txg)
2177 dsl_dataset_t *ds = os->os_dsl_dataset;
2178 dmu_tx_t *tx = os->os_synctx;
2179
2180 if (bp_orig->blk_birth == tx->tx_txg)
2135 dsl_dataset_block_kill(ds, bp_orig, NULL, tx);
2136 ASSERT3U(db->db_blkptr->blk_fill, ==, 0);
2181 (void) dsl_dataset_block_kill(ds, bp_orig, zio, tx);
2182 ASSERT3U(bp->blk_fill, ==, 0);
2137 return;
2138 }
2139
2183 return;
2184 }
2185
2186 ASSERT(BP_GET_TYPE(bp) == dn->dn_type);
2187 ASSERT(BP_GET_LEVEL(bp) == db->db_level);
2188
2140 mutex_enter(&db->db_mtx);
2141
2142 if (db->db_level == 0) {
2143 mutex_enter(&dn->dn_mtx);
2144 if (db->db_blkid > dn->dn_phys->dn_maxblkid)
2145 dn->dn_phys->dn_maxblkid = db->db_blkid;
2146 mutex_exit(&dn->dn_mtx);
2147
2148 if (dn->dn_type == DMU_OT_DNODE) {
2149 dnode_phys_t *dnp = db->db.db_data;
2150 for (i = db->db.db_size >> DNODE_SHIFT; i > 0;
2151 i--, dnp++) {
2152 if (dnp->dn_type != DMU_OT_NONE)
2153 fill++;
2154 }
2155 } else {
2156 fill = 1;
2157 }
2158 } else {
2189 mutex_enter(&db->db_mtx);
2190
2191 if (db->db_level == 0) {
2192 mutex_enter(&dn->dn_mtx);
2193 if (db->db_blkid > dn->dn_phys->dn_maxblkid)
2194 dn->dn_phys->dn_maxblkid = db->db_blkid;
2195 mutex_exit(&dn->dn_mtx);
2196
2197 if (dn->dn_type == DMU_OT_DNODE) {
2198 dnode_phys_t *dnp = db->db.db_data;
2199 for (i = db->db.db_size >> DNODE_SHIFT; i > 0;
2200 i--, dnp++) {
2201 if (dnp->dn_type != DMU_OT_NONE)
2202 fill++;
2203 }
2204 } else {
2205 fill = 1;
2206 }
2207 } else {
2159 blkptr_t *bp = db->db.db_data;
2208 blkptr_t *ibp = db->db.db_data;
2160 ASSERT3U(db->db.db_size, ==, 1<<dn->dn_phys->dn_indblkshift);
2209 ASSERT3U(db->db.db_size, ==, 1<<dn->dn_phys->dn_indblkshift);
2161 for (i = db->db.db_size >> SPA_BLKPTRSHIFT; i > 0; i--, bp++) {
2162 if (BP_IS_HOLE(bp))
2210 for (i = db->db.db_size >> SPA_BLKPTRSHIFT; i > 0; i--, ibp++) {
2211 if (BP_IS_HOLE(ibp))
2163 continue;
2212 continue;
2164 ASSERT3U(BP_GET_LSIZE(bp), ==,
2213 ASSERT3U(BP_GET_LSIZE(ibp), ==,
2165 db->db_level == 1 ? dn->dn_datablksz :
2166 (1<<dn->dn_phys->dn_indblkshift));
2214 db->db_level == 1 ? dn->dn_datablksz :
2215 (1<<dn->dn_phys->dn_indblkshift));
2167 fill += bp->blk_fill;
2216 fill += ibp->blk_fill;
2168 }
2169 }
2170
2217 }
2218 }
2219
2171 db->db_blkptr->blk_fill = fill;
2172 BP_SET_TYPE(db->db_blkptr, dn->dn_type);
2173 BP_SET_LEVEL(db->db_blkptr, db->db_level);
2220 bp->blk_fill = fill;
2174
2175 mutex_exit(&db->db_mtx);
2176
2221
2222 mutex_exit(&db->db_mtx);
2223
2177 /* We must do this after we've set the bp's type and level */
2178 if (!DVA_EQUAL(BP_IDENTITY(zio->io_bp), BP_IDENTITY(bp_orig))) {
2224 if (zio->io_flags & ZIO_FLAG_IO_REWRITE) {
2225 ASSERT(DVA_EQUAL(BP_IDENTITY(bp), BP_IDENTITY(bp_orig)));
2226 } else {
2179 dsl_dataset_t *ds = os->os_dsl_dataset;
2180 dmu_tx_t *tx = os->os_synctx;
2181
2182 if (bp_orig->blk_birth == tx->tx_txg)
2227 dsl_dataset_t *ds = os->os_dsl_dataset;
2228 dmu_tx_t *tx = os->os_synctx;
2229
2230 if (bp_orig->blk_birth == tx->tx_txg)
2183 dsl_dataset_block_kill(ds, bp_orig, NULL, tx);
2184 dsl_dataset_block_born(ds, zio->io_bp, tx);
2231 (void) dsl_dataset_block_kill(ds, bp_orig, zio, tx);
2232 dsl_dataset_block_born(ds, bp, tx);
2185 }
2186}
2187
2188/* ARGSUSED */
2189static void
2190dbuf_write_done(zio_t *zio, arc_buf_t *buf, void *vdb)
2191{
2192 dmu_buf_impl_t *db = vdb;
2193 uint64_t txg = zio->io_txg;
2194 dbuf_dirty_record_t **drp, *dr;
2195
2196 ASSERT3U(zio->io_error, ==, 0);
2197
2198 mutex_enter(&db->db_mtx);
2199
2200 drp = &db->db_last_dirty;
2233 }
2234}
2235
2236/* ARGSUSED */
2237static void
2238dbuf_write_done(zio_t *zio, arc_buf_t *buf, void *vdb)
2239{
2240 dmu_buf_impl_t *db = vdb;
2241 uint64_t txg = zio->io_txg;
2242 dbuf_dirty_record_t **drp, *dr;
2243
2244 ASSERT3U(zio->io_error, ==, 0);
2245
2246 mutex_enter(&db->db_mtx);
2247
2248 drp = &db->db_last_dirty;
2201 while (*drp != db->db_data_pending)
2202 drp = &(*drp)->dr_next;
2203 ASSERT(!list_link_active(&(*drp)->dr_dirty_node));
2204 ASSERT((*drp)->dr_txg == txg);
2205 ASSERT((*drp)->dr_next == NULL);
2206 dr = *drp;
2207 *drp = NULL;
2249 while ((dr = *drp) != db->db_data_pending)
2250 drp = &dr->dr_next;
2251 ASSERT(!list_link_active(&dr->dr_dirty_node));
2252 ASSERT(dr->dr_txg == txg);
2253 ASSERT(dr->dr_next == NULL);
2254 *drp = dr->dr_next;
2208
2209 if (db->db_level == 0) {
2210 ASSERT(db->db_blkid != DB_BONUS_BLKID);
2211 ASSERT(dr->dt.dl.dr_override_state == DR_NOT_OVERRIDDEN);
2212
2213 if (dr->dt.dl.dr_data != db->db_buf)
2214 VERIFY(arc_buf_remove_ref(dr->dt.dl.dr_data, db) == 1);
2215 else if (!BP_IS_HOLE(db->db_blkptr))

--- 9 unchanged lines hidden (view full) ---

2225 int epbs =
2226 dn->dn_phys->dn_indblkshift - SPA_BLKPTRSHIFT;
2227 ASSERT3U(BP_GET_LSIZE(db->db_blkptr), ==,
2228 db->db.db_size);
2229 ASSERT3U(dn->dn_phys->dn_maxblkid
2230 >> (db->db_level * epbs), >=, db->db_blkid);
2231 arc_set_callback(db->db_buf, dbuf_do_evict, db);
2232 }
2255
2256 if (db->db_level == 0) {
2257 ASSERT(db->db_blkid != DB_BONUS_BLKID);
2258 ASSERT(dr->dt.dl.dr_override_state == DR_NOT_OVERRIDDEN);
2259
2260 if (dr->dt.dl.dr_data != db->db_buf)
2261 VERIFY(arc_buf_remove_ref(dr->dt.dl.dr_data, db) == 1);
2262 else if (!BP_IS_HOLE(db->db_blkptr))

--- 9 unchanged lines hidden (view full) ---

2272 int epbs =
2273 dn->dn_phys->dn_indblkshift - SPA_BLKPTRSHIFT;
2274 ASSERT3U(BP_GET_LSIZE(db->db_blkptr), ==,
2275 db->db.db_size);
2276 ASSERT3U(dn->dn_phys->dn_maxblkid
2277 >> (db->db_level * epbs), >=, db->db_blkid);
2278 arc_set_callback(db->db_buf, dbuf_do_evict, db);
2279 }
2233 list_destroy(&dr->dt.di.dr_children);
2234 mutex_destroy(&dr->dt.di.dr_mtx);
2280 mutex_destroy(&dr->dt.di.dr_mtx);
2281 list_destroy(&dr->dt.di.dr_children);
2235 }
2236 kmem_free(dr, sizeof (dbuf_dirty_record_t));
2237
2238 cv_broadcast(&db->db_changed);
2239 ASSERT(db->db_dirtycnt > 0);
2240 db->db_dirtycnt -= 1;
2241 db->db_data_pending = NULL;
2242 mutex_exit(&db->db_mtx);
2243
2244 dprintf_dbuf_bp(db, zio->io_bp, "bp: %s", "");
2245
2246 dbuf_rele(db, (void *)(uintptr_t)txg);
2247}
2282 }
2283 kmem_free(dr, sizeof (dbuf_dirty_record_t));
2284
2285 cv_broadcast(&db->db_changed);
2286 ASSERT(db->db_dirtycnt > 0);
2287 db->db_dirtycnt -= 1;
2288 db->db_data_pending = NULL;
2289 mutex_exit(&db->db_mtx);
2290
2291 dprintf_dbuf_bp(db, zio->io_bp, "bp: %s", "");
2292
2293 dbuf_rele(db, (void *)(uintptr_t)txg);
2294}