dmu_tx.c revision 185029
1168404Spjd/* 2168404Spjd * CDDL HEADER START 3168404Spjd * 4168404Spjd * The contents of this file are subject to the terms of the 5168404Spjd * Common Development and Distribution License (the "License"). 6168404Spjd * You may not use this file except in compliance with the License. 7168404Spjd * 8168404Spjd * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9168404Spjd * or http://www.opensolaris.org/os/licensing. 10168404Spjd * See the License for the specific language governing permissions 11168404Spjd * and limitations under the License. 12168404Spjd * 13168404Spjd * When distributing Covered Code, include this CDDL HEADER in each 14168404Spjd * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15168404Spjd * If applicable, add the following below this CDDL HEADER, with the 16168404Spjd * fields enclosed by brackets "[]" replaced with your own identifying 17168404Spjd * information: Portions Copyright [yyyy] [name of copyright owner] 18168404Spjd * 19168404Spjd * CDDL HEADER END 20168404Spjd */ 21168404Spjd/* 22185029Spjd * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 23168404Spjd * Use is subject to license terms. 24168404Spjd */ 25168404Spjd 26168404Spjd#include <sys/dmu.h> 27168404Spjd#include <sys/dmu_impl.h> 28168404Spjd#include <sys/dbuf.h> 29168404Spjd#include <sys/dmu_tx.h> 30168404Spjd#include <sys/dmu_objset.h> 31168404Spjd#include <sys/dsl_dataset.h> /* for dsl_dataset_block_freeable() */ 32168404Spjd#include <sys/dsl_dir.h> /* for dsl_dir_tempreserve_*() */ 33168404Spjd#include <sys/dsl_pool.h> 34168404Spjd#include <sys/zap_impl.h> /* for fzap_default_block_shift */ 35168404Spjd#include <sys/spa.h> 36168404Spjd#include <sys/zfs_context.h> 37168404Spjd 38168404Spjdtypedef void (*dmu_tx_hold_func_t)(dmu_tx_t *tx, struct dnode *dn, 39168404Spjd uint64_t arg1, uint64_t arg2); 40168404Spjd 41168404Spjd 42168404Spjddmu_tx_t * 43168404Spjddmu_tx_create_dd(dsl_dir_t *dd) 44168404Spjd{ 45168404Spjd dmu_tx_t *tx = kmem_zalloc(sizeof (dmu_tx_t), KM_SLEEP); 46168404Spjd tx->tx_dir = dd; 47168404Spjd if (dd) 48168404Spjd tx->tx_pool = dd->dd_pool; 49168404Spjd list_create(&tx->tx_holds, sizeof (dmu_tx_hold_t), 50168404Spjd offsetof(dmu_tx_hold_t, txh_node)); 51168404Spjd#ifdef ZFS_DEBUG 52168404Spjd refcount_create(&tx->tx_space_written); 53168404Spjd refcount_create(&tx->tx_space_freed); 54168404Spjd#endif 55168404Spjd return (tx); 56168404Spjd} 57168404Spjd 58168404Spjddmu_tx_t * 59168404Spjddmu_tx_create(objset_t *os) 60168404Spjd{ 61168404Spjd dmu_tx_t *tx = dmu_tx_create_dd(os->os->os_dsl_dataset->ds_dir); 62168404Spjd tx->tx_objset = os; 63168404Spjd tx->tx_lastsnap_txg = dsl_dataset_prev_snap_txg(os->os->os_dsl_dataset); 64168404Spjd return (tx); 65168404Spjd} 66168404Spjd 67168404Spjddmu_tx_t * 68168404Spjddmu_tx_create_assigned(struct dsl_pool *dp, uint64_t txg) 69168404Spjd{ 70168404Spjd dmu_tx_t *tx = dmu_tx_create_dd(NULL); 71168404Spjd 72168404Spjd ASSERT3U(txg, <=, dp->dp_tx.tx_open_txg); 73168404Spjd tx->tx_pool = dp; 74168404Spjd tx->tx_txg = txg; 75168404Spjd tx->tx_anyobj = TRUE; 76168404Spjd 77168404Spjd return (tx); 78168404Spjd} 79168404Spjd 80168404Spjdint 81168404Spjddmu_tx_is_syncing(dmu_tx_t *tx) 82168404Spjd{ 83168404Spjd return (tx->tx_anyobj); 84168404Spjd} 85168404Spjd 86168404Spjdint 87168404Spjddmu_tx_private_ok(dmu_tx_t *tx) 88168404Spjd{ 89168404Spjd return (tx->tx_anyobj); 90168404Spjd} 91168404Spjd 92168404Spjdstatic dmu_tx_hold_t * 93168404Spjddmu_tx_hold_object_impl(dmu_tx_t *tx, objset_t *os, uint64_t object, 94168404Spjd enum dmu_tx_hold_type type, uint64_t arg1, uint64_t arg2) 95168404Spjd{ 96168404Spjd dmu_tx_hold_t *txh; 97168404Spjd dnode_t *dn = NULL; 98168404Spjd int err; 99168404Spjd 100168404Spjd if (object != DMU_NEW_OBJECT) { 101168404Spjd err = dnode_hold(os->os, object, tx, &dn); 102168404Spjd if (err) { 103168404Spjd tx->tx_err = err; 104168404Spjd return (NULL); 105168404Spjd } 106168404Spjd 107168404Spjd if (err == 0 && tx->tx_txg != 0) { 108168404Spjd mutex_enter(&dn->dn_mtx); 109168404Spjd /* 110168404Spjd * dn->dn_assigned_txg == tx->tx_txg doesn't pose a 111168404Spjd * problem, but there's no way for it to happen (for 112168404Spjd * now, at least). 113168404Spjd */ 114168404Spjd ASSERT(dn->dn_assigned_txg == 0); 115168404Spjd dn->dn_assigned_txg = tx->tx_txg; 116168404Spjd (void) refcount_add(&dn->dn_tx_holds, tx); 117168404Spjd mutex_exit(&dn->dn_mtx); 118168404Spjd } 119168404Spjd } 120168404Spjd 121168404Spjd txh = kmem_zalloc(sizeof (dmu_tx_hold_t), KM_SLEEP); 122168404Spjd txh->txh_tx = tx; 123168404Spjd txh->txh_dnode = dn; 124168404Spjd#ifdef ZFS_DEBUG 125168404Spjd txh->txh_type = type; 126168404Spjd txh->txh_arg1 = arg1; 127168404Spjd txh->txh_arg2 = arg2; 128168404Spjd#endif 129168404Spjd list_insert_tail(&tx->tx_holds, txh); 130168404Spjd 131168404Spjd return (txh); 132168404Spjd} 133168404Spjd 134168404Spjdvoid 135168404Spjddmu_tx_add_new_object(dmu_tx_t *tx, objset_t *os, uint64_t object) 136168404Spjd{ 137168404Spjd /* 138168404Spjd * If we're syncing, they can manipulate any object anyhow, and 139168404Spjd * the hold on the dnode_t can cause problems. 140168404Spjd */ 141168404Spjd if (!dmu_tx_is_syncing(tx)) { 142168404Spjd (void) dmu_tx_hold_object_impl(tx, os, 143168404Spjd object, THT_NEWOBJECT, 0, 0); 144168404Spjd } 145168404Spjd} 146168404Spjd 147168404Spjdstatic int 148168404Spjddmu_tx_check_ioerr(zio_t *zio, dnode_t *dn, int level, uint64_t blkid) 149168404Spjd{ 150168404Spjd int err; 151168404Spjd dmu_buf_impl_t *db; 152168404Spjd 153168404Spjd rw_enter(&dn->dn_struct_rwlock, RW_READER); 154168404Spjd db = dbuf_hold_level(dn, level, blkid, FTAG); 155168404Spjd rw_exit(&dn->dn_struct_rwlock); 156168404Spjd if (db == NULL) 157168404Spjd return (EIO); 158185029Spjd err = dbuf_read(db, zio, DB_RF_CANFAIL | DB_RF_NOPREFETCH); 159168404Spjd dbuf_rele(db, FTAG); 160168404Spjd return (err); 161168404Spjd} 162168404Spjd 163168404Spjd/* ARGSUSED */ 164168404Spjdstatic void 165168404Spjddmu_tx_count_write(dmu_tx_hold_t *txh, uint64_t off, uint64_t len) 166168404Spjd{ 167168404Spjd dnode_t *dn = txh->txh_dnode; 168168404Spjd uint64_t start, end, i; 169168404Spjd int min_bs, max_bs, min_ibs, max_ibs, epbs, bits; 170168404Spjd int err = 0; 171168404Spjd 172168404Spjd if (len == 0) 173168404Spjd return; 174168404Spjd 175168404Spjd min_bs = SPA_MINBLOCKSHIFT; 176168404Spjd max_bs = SPA_MAXBLOCKSHIFT; 177168404Spjd min_ibs = DN_MIN_INDBLKSHIFT; 178168404Spjd max_ibs = DN_MAX_INDBLKSHIFT; 179168404Spjd 180168404Spjd 181168404Spjd /* 182168404Spjd * For i/o error checking, read the first and last level-0 183168404Spjd * blocks (if they are not aligned), and all the level-1 blocks. 184168404Spjd */ 185168404Spjd 186168404Spjd if (dn) { 187168404Spjd if (dn->dn_maxblkid == 0) { 188168404Spjd err = dmu_tx_check_ioerr(NULL, dn, 0, 0); 189168404Spjd if (err) 190168404Spjd goto out; 191168404Spjd } else { 192168404Spjd zio_t *zio = zio_root(dn->dn_objset->os_spa, 193168404Spjd NULL, NULL, ZIO_FLAG_CANFAIL); 194168404Spjd 195168404Spjd /* first level-0 block */ 196168404Spjd start = off >> dn->dn_datablkshift; 197168404Spjd if (P2PHASE(off, dn->dn_datablksz) || 198168404Spjd len < dn->dn_datablksz) { 199168404Spjd err = dmu_tx_check_ioerr(zio, dn, 0, start); 200168404Spjd if (err) 201168404Spjd goto out; 202168404Spjd } 203168404Spjd 204168404Spjd /* last level-0 block */ 205168404Spjd end = (off+len-1) >> dn->dn_datablkshift; 206168404Spjd if (end != start && 207168404Spjd P2PHASE(off+len, dn->dn_datablksz)) { 208168404Spjd err = dmu_tx_check_ioerr(zio, dn, 0, end); 209168404Spjd if (err) 210168404Spjd goto out; 211168404Spjd } 212168404Spjd 213168404Spjd /* level-1 blocks */ 214168404Spjd if (dn->dn_nlevels > 1) { 215168404Spjd start >>= dn->dn_indblkshift - SPA_BLKPTRSHIFT; 216168404Spjd end >>= dn->dn_indblkshift - SPA_BLKPTRSHIFT; 217168404Spjd for (i = start+1; i < end; i++) { 218168404Spjd err = dmu_tx_check_ioerr(zio, dn, 1, i); 219168404Spjd if (err) 220168404Spjd goto out; 221168404Spjd } 222168404Spjd } 223168404Spjd 224168404Spjd err = zio_wait(zio); 225168404Spjd if (err) 226168404Spjd goto out; 227168404Spjd } 228168404Spjd } 229168404Spjd 230168404Spjd /* 231168404Spjd * If there's more than one block, the blocksize can't change, 232168404Spjd * so we can make a more precise estimate. Alternatively, 233168404Spjd * if the dnode's ibs is larger than max_ibs, always use that. 234168404Spjd * This ensures that if we reduce DN_MAX_INDBLKSHIFT, 235168404Spjd * the code will still work correctly on existing pools. 236168404Spjd */ 237168404Spjd if (dn && (dn->dn_maxblkid != 0 || dn->dn_indblkshift > max_ibs)) { 238168404Spjd min_ibs = max_ibs = dn->dn_indblkshift; 239168404Spjd if (dn->dn_datablkshift != 0) 240168404Spjd min_bs = max_bs = dn->dn_datablkshift; 241168404Spjd } 242168404Spjd 243168404Spjd /* 244168404Spjd * 'end' is the last thing we will access, not one past. 245168404Spjd * This way we won't overflow when accessing the last byte. 246168404Spjd */ 247168404Spjd start = P2ALIGN(off, 1ULL << max_bs); 248168404Spjd end = P2ROUNDUP(off + len, 1ULL << max_bs) - 1; 249168404Spjd txh->txh_space_towrite += end - start + 1; 250168404Spjd 251168404Spjd start >>= min_bs; 252168404Spjd end >>= min_bs; 253168404Spjd 254168404Spjd epbs = min_ibs - SPA_BLKPTRSHIFT; 255168404Spjd 256168404Spjd /* 257168404Spjd * The object contains at most 2^(64 - min_bs) blocks, 258168404Spjd * and each indirect level maps 2^epbs. 259168404Spjd */ 260168404Spjd for (bits = 64 - min_bs; bits >= 0; bits -= epbs) { 261168404Spjd start >>= epbs; 262168404Spjd end >>= epbs; 263168404Spjd /* 264168404Spjd * If we increase the number of levels of indirection, 265168404Spjd * we'll need new blkid=0 indirect blocks. If start == 0, 266168404Spjd * we're already accounting for that blocks; and if end == 0, 267168404Spjd * we can't increase the number of levels beyond that. 268168404Spjd */ 269168404Spjd if (start != 0 && end != 0) 270168404Spjd txh->txh_space_towrite += 1ULL << max_ibs; 271168404Spjd txh->txh_space_towrite += (end - start + 1) << max_ibs; 272168404Spjd } 273168404Spjd 274168404Spjd ASSERT(txh->txh_space_towrite < 2 * DMU_MAX_ACCESS); 275168404Spjd 276168404Spjdout: 277168404Spjd if (err) 278168404Spjd txh->txh_tx->tx_err = err; 279168404Spjd} 280168404Spjd 281168404Spjdstatic void 282168404Spjddmu_tx_count_dnode(dmu_tx_hold_t *txh) 283168404Spjd{ 284168404Spjd dnode_t *dn = txh->txh_dnode; 285168404Spjd dnode_t *mdn = txh->txh_tx->tx_objset->os->os_meta_dnode; 286168404Spjd uint64_t space = mdn->dn_datablksz + 287168404Spjd ((mdn->dn_nlevels-1) << mdn->dn_indblkshift); 288168404Spjd 289168404Spjd if (dn && dn->dn_dbuf->db_blkptr && 290168404Spjd dsl_dataset_block_freeable(dn->dn_objset->os_dsl_dataset, 291168404Spjd dn->dn_dbuf->db_blkptr->blk_birth)) { 292168404Spjd txh->txh_space_tooverwrite += space; 293168404Spjd } else { 294168404Spjd txh->txh_space_towrite += space; 295185029Spjd if (dn && dn->dn_dbuf->db_blkptr) 296185029Spjd txh->txh_space_tounref += space; 297168404Spjd } 298168404Spjd} 299168404Spjd 300168404Spjdvoid 301168404Spjddmu_tx_hold_write(dmu_tx_t *tx, uint64_t object, uint64_t off, int len) 302168404Spjd{ 303168404Spjd dmu_tx_hold_t *txh; 304168404Spjd 305168404Spjd ASSERT(tx->tx_txg == 0); 306168404Spjd ASSERT(len < DMU_MAX_ACCESS); 307168404Spjd ASSERT(len == 0 || UINT64_MAX - off >= len - 1); 308168404Spjd 309168404Spjd txh = dmu_tx_hold_object_impl(tx, tx->tx_objset, 310168404Spjd object, THT_WRITE, off, len); 311168404Spjd if (txh == NULL) 312168404Spjd return; 313168404Spjd 314168404Spjd dmu_tx_count_write(txh, off, len); 315168404Spjd dmu_tx_count_dnode(txh); 316168404Spjd} 317168404Spjd 318168404Spjdstatic void 319168404Spjddmu_tx_count_free(dmu_tx_hold_t *txh, uint64_t off, uint64_t len) 320168404Spjd{ 321185029Spjd uint64_t blkid, nblks, lastblk; 322185029Spjd uint64_t space = 0, unref = 0, skipped = 0; 323168404Spjd dnode_t *dn = txh->txh_dnode; 324168404Spjd dsl_dataset_t *ds = dn->dn_objset->os_dsl_dataset; 325168404Spjd spa_t *spa = txh->txh_tx->tx_pool->dp_spa; 326185029Spjd int epbs; 327168404Spjd 328185029Spjd if (dn->dn_nlevels == 0) 329168404Spjd return; 330168404Spjd 331168404Spjd /* 332185029Spjd * The struct_rwlock protects us against dn_nlevels 333168404Spjd * changing, in case (against all odds) we manage to dirty & 334168404Spjd * sync out the changes after we check for being dirty. 335185029Spjd * Also, dbuf_hold_level() wants us to have the struct_rwlock. 336168404Spjd */ 337168404Spjd rw_enter(&dn->dn_struct_rwlock, RW_READER); 338185029Spjd epbs = dn->dn_indblkshift - SPA_BLKPTRSHIFT; 339185029Spjd if (dn->dn_maxblkid == 0) { 340168404Spjd if (off == 0 && len >= dn->dn_datablksz) { 341168404Spjd blkid = 0; 342168404Spjd nblks = 1; 343168404Spjd } else { 344168404Spjd rw_exit(&dn->dn_struct_rwlock); 345168404Spjd return; 346168404Spjd } 347168404Spjd } else { 348168404Spjd blkid = off >> dn->dn_datablkshift; 349185029Spjd nblks = (len + dn->dn_datablksz - 1) >> dn->dn_datablkshift; 350168404Spjd 351185029Spjd if (blkid >= dn->dn_maxblkid) { 352168404Spjd rw_exit(&dn->dn_struct_rwlock); 353168404Spjd return; 354168404Spjd } 355185029Spjd if (blkid + nblks > dn->dn_maxblkid) 356185029Spjd nblks = dn->dn_maxblkid - blkid; 357168404Spjd 358168404Spjd } 359185029Spjd if (dn->dn_nlevels == 1) { 360168404Spjd int i; 361168404Spjd for (i = 0; i < nblks; i++) { 362168404Spjd blkptr_t *bp = dn->dn_phys->dn_blkptr; 363185029Spjd ASSERT3U(blkid + i, <, dn->dn_nblkptr); 364168404Spjd bp += blkid + i; 365168404Spjd if (dsl_dataset_block_freeable(ds, bp->blk_birth)) { 366168404Spjd dprintf_bp(bp, "can free old%s", ""); 367168404Spjd space += bp_get_dasize(spa, bp); 368168404Spjd } 369185029Spjd unref += BP_GET_ASIZE(bp); 370168404Spjd } 371168404Spjd nblks = 0; 372168404Spjd } 373168404Spjd 374185029Spjd /* 375185029Spjd * Add in memory requirements of higher-level indirects. 376185029Spjd * This assumes a worst-possible scenario for dn_nlevels. 377185029Spjd */ 378185029Spjd { 379185029Spjd uint64_t blkcnt = 1 + ((nblks >> epbs) >> epbs); 380185029Spjd int level = (dn->dn_nlevels > 1) ? 2 : 1; 381185029Spjd 382185029Spjd while (level++ < DN_MAX_LEVELS) { 383185029Spjd txh->txh_memory_tohold += blkcnt << dn->dn_indblkshift; 384185029Spjd blkcnt = 1 + (blkcnt >> epbs); 385185029Spjd } 386185029Spjd ASSERT(blkcnt <= dn->dn_nblkptr); 387185029Spjd } 388185029Spjd 389185029Spjd lastblk = blkid + nblks - 1; 390168404Spjd while (nblks) { 391168404Spjd dmu_buf_impl_t *dbuf; 392185029Spjd uint64_t ibyte, new_blkid; 393185029Spjd int epb = 1 << epbs; 394185029Spjd int err, i, blkoff, tochk; 395185029Spjd blkptr_t *bp; 396168404Spjd 397185029Spjd ibyte = blkid << dn->dn_datablkshift; 398185029Spjd err = dnode_next_offset(dn, 399185029Spjd DNODE_FIND_HAVELOCK, &ibyte, 2, 1, 0); 400185029Spjd new_blkid = ibyte >> dn->dn_datablkshift; 401185029Spjd if (err == ESRCH) { 402185029Spjd skipped += (lastblk >> epbs) - (blkid >> epbs) + 1; 403185029Spjd break; 404185029Spjd } 405185029Spjd if (err) { 406185029Spjd txh->txh_tx->tx_err = err; 407185029Spjd break; 408185029Spjd } 409185029Spjd if (new_blkid > lastblk) { 410185029Spjd skipped += (lastblk >> epbs) - (blkid >> epbs) + 1; 411185029Spjd break; 412185029Spjd } 413168404Spjd 414185029Spjd if (new_blkid > blkid) { 415185029Spjd ASSERT((new_blkid >> epbs) > (blkid >> epbs)); 416185029Spjd skipped += (new_blkid >> epbs) - (blkid >> epbs) - 1; 417185029Spjd nblks -= new_blkid - blkid; 418185029Spjd blkid = new_blkid; 419185029Spjd } 420185029Spjd blkoff = P2PHASE(blkid, epb); 421185029Spjd tochk = MIN(epb - blkoff, nblks); 422168404Spjd 423185029Spjd dbuf = dbuf_hold_level(dn, 1, blkid >> epbs, FTAG); 424168404Spjd 425185029Spjd txh->txh_memory_tohold += dbuf->db.db_size; 426185029Spjd if (txh->txh_memory_tohold > DMU_MAX_ACCESS) { 427185029Spjd txh->txh_tx->tx_err = E2BIG; 428168404Spjd dbuf_rele(dbuf, FTAG); 429185029Spjd break; 430168404Spjd } 431185029Spjd err = dbuf_read(dbuf, NULL, DB_RF_HAVESTRUCT | DB_RF_CANFAIL); 432185029Spjd if (err != 0) { 433168404Spjd txh->txh_tx->tx_err = err; 434185029Spjd dbuf_rele(dbuf, FTAG); 435168404Spjd break; 436168404Spjd } 437168404Spjd 438185029Spjd bp = dbuf->db.db_data; 439185029Spjd bp += blkoff; 440185029Spjd 441185029Spjd for (i = 0; i < tochk; i++) { 442185029Spjd if (dsl_dataset_block_freeable(ds, bp[i].blk_birth)) { 443185029Spjd dprintf_bp(&bp[i], "can free old%s", ""); 444185029Spjd space += bp_get_dasize(spa, &bp[i]); 445185029Spjd } 446185029Spjd unref += BP_GET_ASIZE(bp); 447185029Spjd } 448185029Spjd dbuf_rele(dbuf, FTAG); 449185029Spjd 450168404Spjd blkid += tochk; 451168404Spjd nblks -= tochk; 452168404Spjd } 453168404Spjd rw_exit(&dn->dn_struct_rwlock); 454168404Spjd 455185029Spjd /* account for new level 1 indirect blocks that might show up */ 456185029Spjd if (skipped > 0) { 457185029Spjd txh->txh_fudge += skipped << dn->dn_indblkshift; 458185029Spjd skipped = MIN(skipped, DMU_MAX_DELETEBLKCNT >> epbs); 459185029Spjd txh->txh_memory_tohold += skipped << dn->dn_indblkshift; 460185029Spjd } 461168404Spjd txh->txh_space_tofree += space; 462185029Spjd txh->txh_space_tounref += unref; 463168404Spjd} 464168404Spjd 465168404Spjdvoid 466168404Spjddmu_tx_hold_free(dmu_tx_t *tx, uint64_t object, uint64_t off, uint64_t len) 467168404Spjd{ 468168404Spjd dmu_tx_hold_t *txh; 469168404Spjd dnode_t *dn; 470168404Spjd uint64_t start, end, i; 471168404Spjd int err, shift; 472168404Spjd zio_t *zio; 473168404Spjd 474168404Spjd ASSERT(tx->tx_txg == 0); 475168404Spjd 476168404Spjd txh = dmu_tx_hold_object_impl(tx, tx->tx_objset, 477168404Spjd object, THT_FREE, off, len); 478168404Spjd if (txh == NULL) 479168404Spjd return; 480168404Spjd dn = txh->txh_dnode; 481168404Spjd 482168404Spjd /* first block */ 483168404Spjd if (off != 0) 484168404Spjd dmu_tx_count_write(txh, off, 1); 485168404Spjd /* last block */ 486168404Spjd if (len != DMU_OBJECT_END) 487168404Spjd dmu_tx_count_write(txh, off+len, 1); 488168404Spjd 489168404Spjd if (off >= (dn->dn_maxblkid+1) * dn->dn_datablksz) 490168404Spjd return; 491168404Spjd if (len == DMU_OBJECT_END) 492168404Spjd len = (dn->dn_maxblkid+1) * dn->dn_datablksz - off; 493168404Spjd 494168404Spjd /* 495168404Spjd * For i/o error checking, read the first and last level-0 496168404Spjd * blocks, and all the level-1 blocks. The above count_write's 497185029Spjd * have already taken care of the level-0 blocks. 498168404Spjd */ 499168404Spjd if (dn->dn_nlevels > 1) { 500168404Spjd shift = dn->dn_datablkshift + dn->dn_indblkshift - 501168404Spjd SPA_BLKPTRSHIFT; 502168404Spjd start = off >> shift; 503168404Spjd end = dn->dn_datablkshift ? ((off+len) >> shift) : 0; 504168404Spjd 505168404Spjd zio = zio_root(tx->tx_pool->dp_spa, 506168404Spjd NULL, NULL, ZIO_FLAG_CANFAIL); 507168404Spjd for (i = start; i <= end; i++) { 508168404Spjd uint64_t ibyte = i << shift; 509185029Spjd err = dnode_next_offset(dn, 0, &ibyte, 2, 1, 0); 510168404Spjd i = ibyte >> shift; 511168404Spjd if (err == ESRCH) 512168404Spjd break; 513168404Spjd if (err) { 514168404Spjd tx->tx_err = err; 515168404Spjd return; 516168404Spjd } 517168404Spjd 518168404Spjd err = dmu_tx_check_ioerr(zio, dn, 1, i); 519168404Spjd if (err) { 520168404Spjd tx->tx_err = err; 521168404Spjd return; 522168404Spjd } 523168404Spjd } 524168404Spjd err = zio_wait(zio); 525168404Spjd if (err) { 526168404Spjd tx->tx_err = err; 527168404Spjd return; 528168404Spjd } 529168404Spjd } 530168404Spjd 531168404Spjd dmu_tx_count_dnode(txh); 532168404Spjd dmu_tx_count_free(txh, off, len); 533168404Spjd} 534168404Spjd 535168404Spjdvoid 536168404Spjddmu_tx_hold_zap(dmu_tx_t *tx, uint64_t object, int add, char *name) 537168404Spjd{ 538168404Spjd dmu_tx_hold_t *txh; 539168404Spjd dnode_t *dn; 540168404Spjd uint64_t nblocks; 541168404Spjd int epbs, err; 542168404Spjd 543168404Spjd ASSERT(tx->tx_txg == 0); 544168404Spjd 545168404Spjd txh = dmu_tx_hold_object_impl(tx, tx->tx_objset, 546168404Spjd object, THT_ZAP, add, (uintptr_t)name); 547168404Spjd if (txh == NULL) 548168404Spjd return; 549168404Spjd dn = txh->txh_dnode; 550168404Spjd 551168404Spjd dmu_tx_count_dnode(txh); 552168404Spjd 553168404Spjd if (dn == NULL) { 554168404Spjd /* 555168404Spjd * We will be able to fit a new object's entries into one leaf 556168404Spjd * block. So there will be at most 2 blocks total, 557168404Spjd * including the header block. 558168404Spjd */ 559168404Spjd dmu_tx_count_write(txh, 0, 2 << fzap_default_block_shift); 560168404Spjd return; 561168404Spjd } 562168404Spjd 563168404Spjd ASSERT3P(dmu_ot[dn->dn_type].ot_byteswap, ==, zap_byteswap); 564168404Spjd 565168404Spjd if (dn->dn_maxblkid == 0 && !add) { 566168404Spjd /* 567168404Spjd * If there is only one block (i.e. this is a micro-zap) 568168404Spjd * and we are not adding anything, the accounting is simple. 569168404Spjd */ 570168404Spjd err = dmu_tx_check_ioerr(NULL, dn, 0, 0); 571168404Spjd if (err) { 572168404Spjd tx->tx_err = err; 573168404Spjd return; 574168404Spjd } 575168404Spjd 576168404Spjd /* 577168404Spjd * Use max block size here, since we don't know how much 578168404Spjd * the size will change between now and the dbuf dirty call. 579168404Spjd */ 580168404Spjd if (dsl_dataset_block_freeable(dn->dn_objset->os_dsl_dataset, 581185029Spjd dn->dn_phys->dn_blkptr[0].blk_birth)) { 582168404Spjd txh->txh_space_tooverwrite += SPA_MAXBLOCKSIZE; 583185029Spjd } else { 584168404Spjd txh->txh_space_towrite += SPA_MAXBLOCKSIZE; 585185029Spjd txh->txh_space_tounref += 586185029Spjd BP_GET_ASIZE(dn->dn_phys->dn_blkptr); 587185029Spjd } 588168404Spjd return; 589168404Spjd } 590168404Spjd 591168404Spjd if (dn->dn_maxblkid > 0 && name) { 592168404Spjd /* 593168404Spjd * access the name in this fat-zap so that we'll check 594168404Spjd * for i/o errors to the leaf blocks, etc. 595168404Spjd */ 596168404Spjd err = zap_lookup(&dn->dn_objset->os, dn->dn_object, name, 597168404Spjd 8, 0, NULL); 598168404Spjd if (err == EIO) { 599168404Spjd tx->tx_err = err; 600168404Spjd return; 601168404Spjd } 602168404Spjd } 603168404Spjd 604168404Spjd /* 605168404Spjd * 3 blocks overwritten: target leaf, ptrtbl block, header block 606168404Spjd * 3 new blocks written if adding: new split leaf, 2 grown ptrtbl blocks 607168404Spjd */ 608168404Spjd dmu_tx_count_write(txh, dn->dn_maxblkid * dn->dn_datablksz, 609185029Spjd (3 + (add ? 3 : 0)) << dn->dn_datablkshift); 610168404Spjd 611168404Spjd /* 612168404Spjd * If the modified blocks are scattered to the four winds, 613168404Spjd * we'll have to modify an indirect twig for each. 614168404Spjd */ 615168404Spjd epbs = dn->dn_indblkshift - SPA_BLKPTRSHIFT; 616168404Spjd for (nblocks = dn->dn_maxblkid >> epbs; nblocks != 0; nblocks >>= epbs) 617168404Spjd txh->txh_space_towrite += 3 << dn->dn_indblkshift; 618168404Spjd} 619168404Spjd 620168404Spjdvoid 621168404Spjddmu_tx_hold_bonus(dmu_tx_t *tx, uint64_t object) 622168404Spjd{ 623168404Spjd dmu_tx_hold_t *txh; 624168404Spjd 625168404Spjd ASSERT(tx->tx_txg == 0); 626168404Spjd 627168404Spjd txh = dmu_tx_hold_object_impl(tx, tx->tx_objset, 628168404Spjd object, THT_BONUS, 0, 0); 629168404Spjd if (txh) 630168404Spjd dmu_tx_count_dnode(txh); 631168404Spjd} 632168404Spjd 633168404Spjdvoid 634168404Spjddmu_tx_hold_space(dmu_tx_t *tx, uint64_t space) 635168404Spjd{ 636168404Spjd dmu_tx_hold_t *txh; 637168404Spjd ASSERT(tx->tx_txg == 0); 638168404Spjd 639168404Spjd txh = dmu_tx_hold_object_impl(tx, tx->tx_objset, 640168404Spjd DMU_NEW_OBJECT, THT_SPACE, space, 0); 641168404Spjd 642168404Spjd txh->txh_space_towrite += space; 643168404Spjd} 644168404Spjd 645168404Spjdint 646168404Spjddmu_tx_holds(dmu_tx_t *tx, uint64_t object) 647168404Spjd{ 648168404Spjd dmu_tx_hold_t *txh; 649168404Spjd int holds = 0; 650168404Spjd 651168404Spjd /* 652168404Spjd * By asserting that the tx is assigned, we're counting the 653168404Spjd * number of dn_tx_holds, which is the same as the number of 654168404Spjd * dn_holds. Otherwise, we'd be counting dn_holds, but 655168404Spjd * dn_tx_holds could be 0. 656168404Spjd */ 657168404Spjd ASSERT(tx->tx_txg != 0); 658168404Spjd 659168404Spjd /* if (tx->tx_anyobj == TRUE) */ 660168404Spjd /* return (0); */ 661168404Spjd 662168404Spjd for (txh = list_head(&tx->tx_holds); txh; 663168404Spjd txh = list_next(&tx->tx_holds, txh)) { 664168404Spjd if (txh->txh_dnode && txh->txh_dnode->dn_object == object) 665168404Spjd holds++; 666168404Spjd } 667168404Spjd 668168404Spjd return (holds); 669168404Spjd} 670168404Spjd 671168404Spjd#ifdef ZFS_DEBUG 672168404Spjdvoid 673168404Spjddmu_tx_dirty_buf(dmu_tx_t *tx, dmu_buf_impl_t *db) 674168404Spjd{ 675168404Spjd dmu_tx_hold_t *txh; 676168404Spjd int match_object = FALSE, match_offset = FALSE; 677168404Spjd dnode_t *dn = db->db_dnode; 678168404Spjd 679168404Spjd ASSERT(tx->tx_txg != 0); 680168404Spjd ASSERT(tx->tx_objset == NULL || dn->dn_objset == tx->tx_objset->os); 681168404Spjd ASSERT3U(dn->dn_object, ==, db->db.db_object); 682168404Spjd 683168404Spjd if (tx->tx_anyobj) 684168404Spjd return; 685168404Spjd 686168404Spjd /* XXX No checking on the meta dnode for now */ 687168404Spjd if (db->db.db_object == DMU_META_DNODE_OBJECT) 688168404Spjd return; 689168404Spjd 690168404Spjd for (txh = list_head(&tx->tx_holds); txh; 691168404Spjd txh = list_next(&tx->tx_holds, txh)) { 692168404Spjd ASSERT(dn == NULL || dn->dn_assigned_txg == tx->tx_txg); 693168404Spjd if (txh->txh_dnode == dn && txh->txh_type != THT_NEWOBJECT) 694168404Spjd match_object = TRUE; 695168404Spjd if (txh->txh_dnode == NULL || txh->txh_dnode == dn) { 696168404Spjd int datablkshift = dn->dn_datablkshift ? 697168404Spjd dn->dn_datablkshift : SPA_MAXBLOCKSHIFT; 698168404Spjd int epbs = dn->dn_indblkshift - SPA_BLKPTRSHIFT; 699168404Spjd int shift = datablkshift + epbs * db->db_level; 700168404Spjd uint64_t beginblk = shift >= 64 ? 0 : 701168404Spjd (txh->txh_arg1 >> shift); 702168404Spjd uint64_t endblk = shift >= 64 ? 0 : 703168404Spjd ((txh->txh_arg1 + txh->txh_arg2 - 1) >> shift); 704168404Spjd uint64_t blkid = db->db_blkid; 705168404Spjd 706168404Spjd /* XXX txh_arg2 better not be zero... */ 707168404Spjd 708168404Spjd dprintf("found txh type %x beginblk=%llx endblk=%llx\n", 709168404Spjd txh->txh_type, beginblk, endblk); 710168404Spjd 711168404Spjd switch (txh->txh_type) { 712168404Spjd case THT_WRITE: 713168404Spjd if (blkid >= beginblk && blkid <= endblk) 714168404Spjd match_offset = TRUE; 715168404Spjd /* 716168404Spjd * We will let this hold work for the bonus 717168404Spjd * buffer so that we don't need to hold it 718168404Spjd * when creating a new object. 719168404Spjd */ 720168404Spjd if (blkid == DB_BONUS_BLKID) 721168404Spjd match_offset = TRUE; 722168404Spjd /* 723168404Spjd * They might have to increase nlevels, 724168404Spjd * thus dirtying the new TLIBs. Or the 725168404Spjd * might have to change the block size, 726168404Spjd * thus dirying the new lvl=0 blk=0. 727168404Spjd */ 728168404Spjd if (blkid == 0) 729168404Spjd match_offset = TRUE; 730168404Spjd break; 731168404Spjd case THT_FREE: 732185029Spjd /* 733185029Spjd * We will dirty all the level 1 blocks in 734185029Spjd * the free range and perhaps the first and 735185029Spjd * last level 0 block. 736185029Spjd */ 737185029Spjd if (blkid >= beginblk && (blkid <= endblk || 738185029Spjd txh->txh_arg2 == DMU_OBJECT_END)) 739168404Spjd match_offset = TRUE; 740168404Spjd break; 741168404Spjd case THT_BONUS: 742168404Spjd if (blkid == DB_BONUS_BLKID) 743168404Spjd match_offset = TRUE; 744168404Spjd break; 745168404Spjd case THT_ZAP: 746168404Spjd match_offset = TRUE; 747168404Spjd break; 748168404Spjd case THT_NEWOBJECT: 749168404Spjd match_object = TRUE; 750168404Spjd break; 751168404Spjd default: 752168404Spjd ASSERT(!"bad txh_type"); 753168404Spjd } 754168404Spjd } 755168404Spjd if (match_object && match_offset) 756168404Spjd return; 757168404Spjd } 758168404Spjd panic("dirtying dbuf obj=%llx lvl=%u blkid=%llx but not tx_held\n", 759168404Spjd (u_longlong_t)db->db.db_object, db->db_level, 760168404Spjd (u_longlong_t)db->db_blkid); 761168404Spjd} 762168404Spjd#endif 763168404Spjd 764168404Spjdstatic int 765168404Spjddmu_tx_try_assign(dmu_tx_t *tx, uint64_t txg_how) 766168404Spjd{ 767168404Spjd dmu_tx_hold_t *txh; 768185029Spjd spa_t *spa = tx->tx_pool->dp_spa; 769185029Spjd uint64_t memory, asize, fsize, usize; 770185029Spjd uint64_t towrite, tofree, tooverwrite, tounref, tohold, fudge; 771168404Spjd 772168404Spjd ASSERT3U(tx->tx_txg, ==, 0); 773185029Spjd 774168404Spjd if (tx->tx_err) 775168404Spjd return (tx->tx_err); 776168404Spjd 777185029Spjd if (spa_suspended(spa)) { 778185029Spjd /* 779185029Spjd * If the user has indicated a blocking failure mode 780185029Spjd * then return ERESTART which will block in dmu_tx_wait(). 781185029Spjd * Otherwise, return EIO so that an error can get 782185029Spjd * propagated back to the VOP calls. 783185029Spjd * 784185029Spjd * Note that we always honor the txg_how flag regardless 785185029Spjd * of the failuremode setting. 786185029Spjd */ 787185029Spjd if (spa_get_failmode(spa) == ZIO_FAILURE_MODE_CONTINUE && 788185029Spjd txg_how != TXG_WAIT) 789185029Spjd return (EIO); 790185029Spjd 791185029Spjd return (ERESTART); 792185029Spjd } 793185029Spjd 794168404Spjd tx->tx_txg = txg_hold_open(tx->tx_pool, &tx->tx_txgh); 795168404Spjd tx->tx_needassign_txh = NULL; 796168404Spjd 797168404Spjd /* 798168404Spjd * NB: No error returns are allowed after txg_hold_open, but 799168404Spjd * before processing the dnode holds, due to the 800168404Spjd * dmu_tx_unassign() logic. 801168404Spjd */ 802168404Spjd 803185029Spjd towrite = tofree = tooverwrite = tounref = tohold = fudge = 0; 804168404Spjd for (txh = list_head(&tx->tx_holds); txh; 805168404Spjd txh = list_next(&tx->tx_holds, txh)) { 806168404Spjd dnode_t *dn = txh->txh_dnode; 807168404Spjd if (dn != NULL) { 808168404Spjd mutex_enter(&dn->dn_mtx); 809168404Spjd if (dn->dn_assigned_txg == tx->tx_txg - 1) { 810168404Spjd mutex_exit(&dn->dn_mtx); 811168404Spjd tx->tx_needassign_txh = txh; 812168404Spjd return (ERESTART); 813168404Spjd } 814168404Spjd if (dn->dn_assigned_txg == 0) 815168404Spjd dn->dn_assigned_txg = tx->tx_txg; 816168404Spjd ASSERT3U(dn->dn_assigned_txg, ==, tx->tx_txg); 817168404Spjd (void) refcount_add(&dn->dn_tx_holds, tx); 818168404Spjd mutex_exit(&dn->dn_mtx); 819168404Spjd } 820168404Spjd towrite += txh->txh_space_towrite; 821168404Spjd tofree += txh->txh_space_tofree; 822168404Spjd tooverwrite += txh->txh_space_tooverwrite; 823185029Spjd tounref += txh->txh_space_tounref; 824185029Spjd tohold += txh->txh_memory_tohold; 825185029Spjd fudge += txh->txh_fudge; 826168404Spjd } 827168404Spjd 828168404Spjd /* 829168404Spjd * NB: This check must be after we've held the dnodes, so that 830168404Spjd * the dmu_tx_unassign() logic will work properly 831168404Spjd */ 832168404Spjd if (txg_how >= TXG_INITIAL && txg_how != tx->tx_txg) 833168404Spjd return (ERESTART); 834168404Spjd 835168404Spjd /* 836168404Spjd * If a snapshot has been taken since we made our estimates, 837168404Spjd * assume that we won't be able to free or overwrite anything. 838168404Spjd */ 839168404Spjd if (tx->tx_objset && 840168404Spjd dsl_dataset_prev_snap_txg(tx->tx_objset->os->os_dsl_dataset) > 841168404Spjd tx->tx_lastsnap_txg) { 842168404Spjd towrite += tooverwrite; 843168404Spjd tooverwrite = tofree = 0; 844168404Spjd } 845168404Spjd 846185029Spjd /* needed allocation: worst-case estimate of write space */ 847185029Spjd asize = spa_get_asize(tx->tx_pool->dp_spa, towrite + tooverwrite); 848185029Spjd /* freed space estimate: worst-case overwrite + free estimate */ 849168404Spjd fsize = spa_get_asize(tx->tx_pool->dp_spa, tooverwrite) + tofree; 850185029Spjd /* convert unrefd space to worst-case estimate */ 851185029Spjd usize = spa_get_asize(tx->tx_pool->dp_spa, tounref); 852185029Spjd /* calculate memory footprint estimate */ 853185029Spjd memory = towrite + tooverwrite + tohold; 854168404Spjd 855168404Spjd#ifdef ZFS_DEBUG 856185029Spjd /* 857185029Spjd * Add in 'tohold' to account for our dirty holds on this memory 858185029Spjd * XXX - the "fudge" factor is to account for skipped blocks that 859185029Spjd * we missed because dnode_next_offset() misses in-core-only blocks. 860185029Spjd */ 861185029Spjd tx->tx_space_towrite = asize + 862185029Spjd spa_get_asize(tx->tx_pool->dp_spa, tohold + fudge); 863168404Spjd tx->tx_space_tofree = tofree; 864168404Spjd tx->tx_space_tooverwrite = tooverwrite; 865185029Spjd tx->tx_space_tounref = tounref; 866168404Spjd#endif 867168404Spjd 868168404Spjd if (tx->tx_dir && asize != 0) { 869185029Spjd int err = dsl_dir_tempreserve_space(tx->tx_dir, memory, 870185029Spjd asize, fsize, usize, &tx->tx_tempreserve_cookie, tx); 871168404Spjd if (err) 872168404Spjd return (err); 873168404Spjd } 874168404Spjd 875168404Spjd return (0); 876168404Spjd} 877168404Spjd 878168404Spjdstatic void 879168404Spjddmu_tx_unassign(dmu_tx_t *tx) 880168404Spjd{ 881168404Spjd dmu_tx_hold_t *txh; 882168404Spjd 883168404Spjd if (tx->tx_txg == 0) 884168404Spjd return; 885168404Spjd 886168404Spjd txg_rele_to_quiesce(&tx->tx_txgh); 887168404Spjd 888168404Spjd for (txh = list_head(&tx->tx_holds); txh != tx->tx_needassign_txh; 889168404Spjd txh = list_next(&tx->tx_holds, txh)) { 890168404Spjd dnode_t *dn = txh->txh_dnode; 891168404Spjd 892168404Spjd if (dn == NULL) 893168404Spjd continue; 894168404Spjd mutex_enter(&dn->dn_mtx); 895168404Spjd ASSERT3U(dn->dn_assigned_txg, ==, tx->tx_txg); 896168404Spjd 897168404Spjd if (refcount_remove(&dn->dn_tx_holds, tx) == 0) { 898168404Spjd dn->dn_assigned_txg = 0; 899168404Spjd cv_broadcast(&dn->dn_notxholds); 900168404Spjd } 901168404Spjd mutex_exit(&dn->dn_mtx); 902168404Spjd } 903168404Spjd 904168404Spjd txg_rele_to_sync(&tx->tx_txgh); 905168404Spjd 906168404Spjd tx->tx_lasttried_txg = tx->tx_txg; 907168404Spjd tx->tx_txg = 0; 908168404Spjd} 909168404Spjd 910168404Spjd/* 911168404Spjd * Assign tx to a transaction group. txg_how can be one of: 912168404Spjd * 913168404Spjd * (1) TXG_WAIT. If the current open txg is full, waits until there's 914168404Spjd * a new one. This should be used when you're not holding locks. 915168404Spjd * If will only fail if we're truly out of space (or over quota). 916168404Spjd * 917168404Spjd * (2) TXG_NOWAIT. If we can't assign into the current open txg without 918168404Spjd * blocking, returns immediately with ERESTART. This should be used 919168404Spjd * whenever you're holding locks. On an ERESTART error, the caller 920168404Spjd * should drop locks, do a dmu_tx_wait(tx), and try again. 921168404Spjd * 922168404Spjd * (3) A specific txg. Use this if you need to ensure that multiple 923168404Spjd * transactions all sync in the same txg. Like TXG_NOWAIT, it 924168404Spjd * returns ERESTART if it can't assign you into the requested txg. 925168404Spjd */ 926168404Spjdint 927168404Spjddmu_tx_assign(dmu_tx_t *tx, uint64_t txg_how) 928168404Spjd{ 929168404Spjd int err; 930168404Spjd 931168404Spjd ASSERT(tx->tx_txg == 0); 932168404Spjd ASSERT(txg_how != 0); 933168404Spjd ASSERT(!dsl_pool_sync_context(tx->tx_pool)); 934168404Spjd 935168404Spjd while ((err = dmu_tx_try_assign(tx, txg_how)) != 0) { 936168404Spjd dmu_tx_unassign(tx); 937168404Spjd 938168404Spjd if (err != ERESTART || txg_how != TXG_WAIT) 939168404Spjd return (err); 940168404Spjd 941168404Spjd dmu_tx_wait(tx); 942168404Spjd } 943168404Spjd 944168404Spjd txg_rele_to_quiesce(&tx->tx_txgh); 945168404Spjd 946168404Spjd return (0); 947168404Spjd} 948168404Spjd 949168404Spjdvoid 950168404Spjddmu_tx_wait(dmu_tx_t *tx) 951168404Spjd{ 952185029Spjd spa_t *spa = tx->tx_pool->dp_spa; 953185029Spjd 954168404Spjd ASSERT(tx->tx_txg == 0); 955168404Spjd 956185029Spjd /* 957185029Spjd * It's possible that the pool has become active after this thread 958185029Spjd * has tried to obtain a tx. If that's the case then his 959185029Spjd * tx_lasttried_txg would not have been assigned. 960185029Spjd */ 961185029Spjd if (spa_suspended(spa) || tx->tx_lasttried_txg == 0) { 962185029Spjd txg_wait_synced(tx->tx_pool, spa_last_synced_txg(spa) + 1); 963185029Spjd } else if (tx->tx_needassign_txh) { 964168404Spjd dnode_t *dn = tx->tx_needassign_txh->txh_dnode; 965168404Spjd 966168404Spjd mutex_enter(&dn->dn_mtx); 967168404Spjd while (dn->dn_assigned_txg == tx->tx_lasttried_txg - 1) 968168404Spjd cv_wait(&dn->dn_notxholds, &dn->dn_mtx); 969168404Spjd mutex_exit(&dn->dn_mtx); 970168404Spjd tx->tx_needassign_txh = NULL; 971168404Spjd } else { 972168404Spjd txg_wait_open(tx->tx_pool, tx->tx_lasttried_txg + 1); 973168404Spjd } 974168404Spjd} 975168404Spjd 976168404Spjdvoid 977168404Spjddmu_tx_willuse_space(dmu_tx_t *tx, int64_t delta) 978168404Spjd{ 979168404Spjd#ifdef ZFS_DEBUG 980168404Spjd if (tx->tx_dir == NULL || delta == 0) 981168404Spjd return; 982168404Spjd 983168404Spjd if (delta > 0) { 984168404Spjd ASSERT3U(refcount_count(&tx->tx_space_written) + delta, <=, 985168404Spjd tx->tx_space_towrite); 986168404Spjd (void) refcount_add_many(&tx->tx_space_written, delta, NULL); 987168404Spjd } else { 988168404Spjd (void) refcount_add_many(&tx->tx_space_freed, -delta, NULL); 989168404Spjd } 990168404Spjd#endif 991168404Spjd} 992168404Spjd 993168404Spjdvoid 994168404Spjddmu_tx_commit(dmu_tx_t *tx) 995168404Spjd{ 996168404Spjd dmu_tx_hold_t *txh; 997168404Spjd 998168404Spjd ASSERT(tx->tx_txg != 0); 999168404Spjd 1000168404Spjd while (txh = list_head(&tx->tx_holds)) { 1001168404Spjd dnode_t *dn = txh->txh_dnode; 1002168404Spjd 1003168404Spjd list_remove(&tx->tx_holds, txh); 1004168404Spjd kmem_free(txh, sizeof (dmu_tx_hold_t)); 1005168404Spjd if (dn == NULL) 1006168404Spjd continue; 1007168404Spjd mutex_enter(&dn->dn_mtx); 1008168404Spjd ASSERT3U(dn->dn_assigned_txg, ==, tx->tx_txg); 1009168404Spjd 1010168404Spjd if (refcount_remove(&dn->dn_tx_holds, tx) == 0) { 1011168404Spjd dn->dn_assigned_txg = 0; 1012168404Spjd cv_broadcast(&dn->dn_notxholds); 1013168404Spjd } 1014168404Spjd mutex_exit(&dn->dn_mtx); 1015168404Spjd dnode_rele(dn, tx); 1016168404Spjd } 1017168404Spjd 1018168404Spjd if (tx->tx_tempreserve_cookie) 1019168404Spjd dsl_dir_tempreserve_clear(tx->tx_tempreserve_cookie, tx); 1020168404Spjd 1021168404Spjd if (tx->tx_anyobj == FALSE) 1022168404Spjd txg_rele_to_sync(&tx->tx_txgh); 1023185029Spjd list_destroy(&tx->tx_holds); 1024168404Spjd#ifdef ZFS_DEBUG 1025168404Spjd dprintf("towrite=%llu written=%llu tofree=%llu freed=%llu\n", 1026168404Spjd tx->tx_space_towrite, refcount_count(&tx->tx_space_written), 1027168404Spjd tx->tx_space_tofree, refcount_count(&tx->tx_space_freed)); 1028168404Spjd refcount_destroy_many(&tx->tx_space_written, 1029168404Spjd refcount_count(&tx->tx_space_written)); 1030168404Spjd refcount_destroy_many(&tx->tx_space_freed, 1031168404Spjd refcount_count(&tx->tx_space_freed)); 1032168404Spjd#endif 1033168404Spjd kmem_free(tx, sizeof (dmu_tx_t)); 1034168404Spjd} 1035168404Spjd 1036168404Spjdvoid 1037168404Spjddmu_tx_abort(dmu_tx_t *tx) 1038168404Spjd{ 1039168404Spjd dmu_tx_hold_t *txh; 1040168404Spjd 1041168404Spjd ASSERT(tx->tx_txg == 0); 1042168404Spjd 1043168404Spjd while (txh = list_head(&tx->tx_holds)) { 1044168404Spjd dnode_t *dn = txh->txh_dnode; 1045168404Spjd 1046168404Spjd list_remove(&tx->tx_holds, txh); 1047168404Spjd kmem_free(txh, sizeof (dmu_tx_hold_t)); 1048168404Spjd if (dn != NULL) 1049168404Spjd dnode_rele(dn, tx); 1050168404Spjd } 1051185029Spjd list_destroy(&tx->tx_holds); 1052168404Spjd#ifdef ZFS_DEBUG 1053168404Spjd refcount_destroy_many(&tx->tx_space_written, 1054168404Spjd refcount_count(&tx->tx_space_written)); 1055168404Spjd refcount_destroy_many(&tx->tx_space_freed, 1056168404Spjd refcount_count(&tx->tx_space_freed)); 1057168404Spjd#endif 1058168404Spjd kmem_free(tx, sizeof (dmu_tx_t)); 1059168404Spjd} 1060168404Spjd 1061168404Spjduint64_t 1062168404Spjddmu_tx_get_txg(dmu_tx_t *tx) 1063168404Spjd{ 1064168404Spjd ASSERT(tx->tx_txg != 0); 1065168404Spjd return (tx->tx_txg); 1066168404Spjd} 1067