30 31uint64_t 32bpobj_alloc(objset_t *os, int blocksize, dmu_tx_t *tx) 33{ 34 int size; 35 36 if (spa_version(dmu_objset_spa(os)) < SPA_VERSION_BPOBJ_ACCOUNT) 37 size = BPOBJ_SIZE_V0; 38 else if (spa_version(dmu_objset_spa(os)) < SPA_VERSION_DEADLISTS) 39 size = BPOBJ_SIZE_V1; 40 else 41 size = sizeof (bpobj_phys_t); 42 43 return (dmu_object_alloc(os, DMU_OT_BPOBJ, blocksize, 44 DMU_OT_BPOBJ_HDR, size, tx)); 45} 46 47void 48bpobj_free(objset_t *os, uint64_t obj, dmu_tx_t *tx) 49{ 50 int64_t i; 51 bpobj_t bpo; 52 dmu_object_info_t doi; 53 int epb; 54 dmu_buf_t *dbuf = NULL; 55 56 VERIFY3U(0, ==, bpobj_open(&bpo, os, obj)); 57 58 mutex_enter(&bpo.bpo_lock); 59 60 if (!bpo.bpo_havesubobj || bpo.bpo_phys->bpo_subobjs == 0) 61 goto out; 62 63 VERIFY3U(0, ==, dmu_object_info(os, bpo.bpo_phys->bpo_subobjs, &doi)); 64 epb = doi.doi_data_block_size / sizeof (uint64_t); 65 66 for (i = bpo.bpo_phys->bpo_num_subobjs - 1; i >= 0; i--) { 67 uint64_t *objarray; 68 uint64_t offset, blkoff; 69 70 offset = i * sizeof (uint64_t); 71 blkoff = P2PHASE(i, epb); 72 73 if (dbuf == NULL || dbuf->db_offset > offset) { 74 if (dbuf) 75 dmu_buf_rele(dbuf, FTAG); 76 VERIFY3U(0, ==, dmu_buf_hold(os, 77 bpo.bpo_phys->bpo_subobjs, offset, FTAG, &dbuf, 0)); 78 } 79 80 ASSERT3U(offset, >=, dbuf->db_offset); 81 ASSERT3U(offset, <, dbuf->db_offset + dbuf->db_size); 82 83 objarray = dbuf->db_data; 84 bpobj_free(os, objarray[blkoff], tx); 85 } 86 if (dbuf) { 87 dmu_buf_rele(dbuf, FTAG); 88 dbuf = NULL; 89 } 90 VERIFY3U(0, ==, dmu_object_free(os, bpo.bpo_phys->bpo_subobjs, tx)); 91 92out: 93 mutex_exit(&bpo.bpo_lock); 94 bpobj_close(&bpo); 95 96 VERIFY3U(0, ==, dmu_object_free(os, obj, tx)); 97} 98 99int 100bpobj_open(bpobj_t *bpo, objset_t *os, uint64_t object) 101{ 102 dmu_object_info_t doi; 103 int err; 104 105 err = dmu_object_info(os, object, &doi); 106 if (err) 107 return (err); 108 109 bzero(bpo, sizeof (*bpo)); 110 mutex_init(&bpo->bpo_lock, NULL, MUTEX_DEFAULT, NULL); 111 112 ASSERT(bpo->bpo_dbuf == NULL); 113 ASSERT(bpo->bpo_phys == NULL); 114 ASSERT(object != 0); 115 ASSERT3U(doi.doi_type, ==, DMU_OT_BPOBJ); 116 ASSERT3U(doi.doi_bonus_type, ==, DMU_OT_BPOBJ_HDR); 117 118 err = dmu_bonus_hold(os, object, bpo, &bpo->bpo_dbuf); 119 if (err) 120 return (err); 121 122 bpo->bpo_os = os; 123 bpo->bpo_object = object; 124 bpo->bpo_epb = doi.doi_data_block_size >> SPA_BLKPTRSHIFT; 125 bpo->bpo_havecomp = (doi.doi_bonus_size > BPOBJ_SIZE_V0); 126 bpo->bpo_havesubobj = (doi.doi_bonus_size > BPOBJ_SIZE_V1); 127 bpo->bpo_phys = bpo->bpo_dbuf->db_data; 128 return (0); 129} 130 131void 132bpobj_close(bpobj_t *bpo) 133{ 134 /* Lame workaround for closing a bpobj that was never opened. */ 135 if (bpo->bpo_object == 0) 136 return; 137 138 dmu_buf_rele(bpo->bpo_dbuf, bpo); 139 if (bpo->bpo_cached_dbuf != NULL) 140 dmu_buf_rele(bpo->bpo_cached_dbuf, bpo); 141 bpo->bpo_dbuf = NULL; 142 bpo->bpo_phys = NULL; 143 bpo->bpo_cached_dbuf = NULL; 144 bpo->bpo_object = 0; 145 146 mutex_destroy(&bpo->bpo_lock); 147} 148 149static int 150bpobj_iterate_impl(bpobj_t *bpo, bpobj_itor_t func, void *arg, dmu_tx_t *tx, 151 boolean_t free) 152{ 153 dmu_object_info_t doi; 154 int epb; 155 int64_t i; 156 int err = 0; 157 dmu_buf_t *dbuf = NULL; 158 159 mutex_enter(&bpo->bpo_lock); 160 161 if (free) 162 dmu_buf_will_dirty(bpo->bpo_dbuf, tx); 163 164 for (i = bpo->bpo_phys->bpo_num_blkptrs - 1; i >= 0; i--) { 165 blkptr_t *bparray; 166 blkptr_t *bp; 167 uint64_t offset, blkoff; 168 169 offset = i * sizeof (blkptr_t); 170 blkoff = P2PHASE(i, bpo->bpo_epb); 171 172 if (dbuf == NULL || dbuf->db_offset > offset) { 173 if (dbuf) 174 dmu_buf_rele(dbuf, FTAG); 175 err = dmu_buf_hold(bpo->bpo_os, bpo->bpo_object, offset, 176 FTAG, &dbuf, 0); 177 if (err) 178 break; 179 } 180 181 ASSERT3U(offset, >=, dbuf->db_offset); 182 ASSERT3U(offset, <, dbuf->db_offset + dbuf->db_size); 183 184 bparray = dbuf->db_data; 185 bp = &bparray[blkoff]; 186 err = func(arg, bp, tx); 187 if (err) 188 break; 189 if (free) { 190 bpo->bpo_phys->bpo_bytes -= 191 bp_get_dsize_sync(dmu_objset_spa(bpo->bpo_os), bp); 192 ASSERT3S(bpo->bpo_phys->bpo_bytes, >=, 0); 193 if (bpo->bpo_havecomp) { 194 bpo->bpo_phys->bpo_comp -= BP_GET_PSIZE(bp); 195 bpo->bpo_phys->bpo_uncomp -= BP_GET_UCSIZE(bp); 196 } 197 bpo->bpo_phys->bpo_num_blkptrs--; 198 ASSERT3S(bpo->bpo_phys->bpo_num_blkptrs, >=, 0); 199 } 200 } 201 if (dbuf) { 202 dmu_buf_rele(dbuf, FTAG); 203 dbuf = NULL; 204 } 205 if (free) { 206 i++; 207 VERIFY3U(0, ==, dmu_free_range(bpo->bpo_os, bpo->bpo_object, 208 i * sizeof (blkptr_t), -1ULL, tx)); 209 } 210 if (err || !bpo->bpo_havesubobj || bpo->bpo_phys->bpo_subobjs == 0) 211 goto out; 212 213 ASSERT(bpo->bpo_havecomp); 214 err = dmu_object_info(bpo->bpo_os, bpo->bpo_phys->bpo_subobjs, &doi); 215 if (err) { 216 mutex_exit(&bpo->bpo_lock); 217 return (err); 218 } 219 epb = doi.doi_data_block_size / sizeof (uint64_t); 220 221 for (i = bpo->bpo_phys->bpo_num_subobjs - 1; i >= 0; i--) { 222 uint64_t *objarray; 223 uint64_t offset, blkoff; 224 bpobj_t sublist; 225 uint64_t used_before, comp_before, uncomp_before; 226 uint64_t used_after, comp_after, uncomp_after; 227 228 offset = i * sizeof (uint64_t); 229 blkoff = P2PHASE(i, epb); 230 231 if (dbuf == NULL || dbuf->db_offset > offset) { 232 if (dbuf) 233 dmu_buf_rele(dbuf, FTAG); 234 err = dmu_buf_hold(bpo->bpo_os, 235 bpo->bpo_phys->bpo_subobjs, offset, FTAG, &dbuf, 0); 236 if (err) 237 break; 238 } 239 240 ASSERT3U(offset, >=, dbuf->db_offset); 241 ASSERT3U(offset, <, dbuf->db_offset + dbuf->db_size); 242 243 objarray = dbuf->db_data; 244 err = bpobj_open(&sublist, bpo->bpo_os, objarray[blkoff]); 245 if (err) 246 break; 247 if (free) { 248 err = bpobj_space(&sublist, 249 &used_before, &comp_before, &uncomp_before); 250 if (err) 251 break; 252 } 253 err = bpobj_iterate_impl(&sublist, func, arg, tx, free); 254 if (free) { 255 VERIFY3U(0, ==, bpobj_space(&sublist, 256 &used_after, &comp_after, &uncomp_after)); 257 bpo->bpo_phys->bpo_bytes -= used_before - used_after; 258 ASSERT3S(bpo->bpo_phys->bpo_bytes, >=, 0); 259 bpo->bpo_phys->bpo_comp -= comp_before - comp_after; 260 bpo->bpo_phys->bpo_uncomp -= 261 uncomp_before - uncomp_after; 262 } 263 264 bpobj_close(&sublist); 265 if (err) 266 break; 267 if (free) { 268 err = dmu_object_free(bpo->bpo_os, 269 objarray[blkoff], tx); 270 if (err) 271 break; 272 bpo->bpo_phys->bpo_num_subobjs--; 273 ASSERT3S(bpo->bpo_phys->bpo_num_subobjs, >=, 0); 274 } 275 } 276 if (dbuf) { 277 dmu_buf_rele(dbuf, FTAG); 278 dbuf = NULL; 279 } 280 if (free) { 281 VERIFY3U(0, ==, dmu_free_range(bpo->bpo_os, 282 bpo->bpo_phys->bpo_subobjs, 283 (i + 1) * sizeof (uint64_t), -1ULL, tx)); 284 } 285 286out: 287 /* If there are no entries, there should be no bytes. */ 288 ASSERT(bpo->bpo_phys->bpo_num_blkptrs > 0 || 289 (bpo->bpo_havesubobj && bpo->bpo_phys->bpo_num_subobjs > 0) || 290 bpo->bpo_phys->bpo_bytes == 0); 291 292 mutex_exit(&bpo->bpo_lock); 293 return (err); 294} 295 296/* 297 * Iterate and remove the entries. If func returns nonzero, iteration 298 * will stop and that entry will not be removed. 299 */ 300int 301bpobj_iterate(bpobj_t *bpo, bpobj_itor_t func, void *arg, dmu_tx_t *tx) 302{ 303 return (bpobj_iterate_impl(bpo, func, arg, tx, B_TRUE)); 304} 305 306/* 307 * Iterate the entries. If func returns nonzero, iteration will stop. 308 */ 309int 310bpobj_iterate_nofree(bpobj_t *bpo, bpobj_itor_t func, void *arg, dmu_tx_t *tx) 311{ 312 return (bpobj_iterate_impl(bpo, func, arg, tx, B_FALSE)); 313} 314 315void 316bpobj_enqueue_subobj(bpobj_t *bpo, uint64_t subobj, dmu_tx_t *tx) 317{ 318 bpobj_t subbpo; 319 uint64_t used, comp, uncomp, subsubobjs; 320 321 ASSERT(bpo->bpo_havesubobj); 322 ASSERT(bpo->bpo_havecomp); 323 324 VERIFY3U(0, ==, bpobj_open(&subbpo, bpo->bpo_os, subobj)); 325 VERIFY3U(0, ==, bpobj_space(&subbpo, &used, &comp, &uncomp)); 326 327 if (used == 0) { 328 /* No point in having an empty subobj. */ 329 bpobj_close(&subbpo); 330 bpobj_free(bpo->bpo_os, subobj, tx); 331 return; 332 } 333 334 dmu_buf_will_dirty(bpo->bpo_dbuf, tx); 335 if (bpo->bpo_phys->bpo_subobjs == 0) { 336 bpo->bpo_phys->bpo_subobjs = dmu_object_alloc(bpo->bpo_os, 337 DMU_OT_BPOBJ_SUBOBJ, SPA_MAXBLOCKSIZE, DMU_OT_NONE, 0, tx); 338 } 339 340 mutex_enter(&bpo->bpo_lock); 341 dmu_write(bpo->bpo_os, bpo->bpo_phys->bpo_subobjs, 342 bpo->bpo_phys->bpo_num_subobjs * sizeof (subobj), 343 sizeof (subobj), &subobj, tx); 344 bpo->bpo_phys->bpo_num_subobjs++; 345 346 /* 347 * If subobj has only one block of subobjs, then move subobj's 348 * subobjs to bpo's subobj list directly. This reduces 349 * recursion in bpobj_iterate due to nested subobjs. 350 */ 351 subsubobjs = subbpo.bpo_phys->bpo_subobjs; 352 if (subsubobjs != 0) { 353 dmu_object_info_t doi; 354 355 VERIFY3U(0, ==, dmu_object_info(bpo->bpo_os, subsubobjs, &doi)); 356 if (doi.doi_max_offset == doi.doi_data_block_size) { 357 dmu_buf_t *subdb; 358 uint64_t numsubsub = subbpo.bpo_phys->bpo_num_subobjs; 359 360 VERIFY3U(0, ==, dmu_buf_hold(bpo->bpo_os, subsubobjs, 361 0, FTAG, &subdb, 0)); 362 dmu_write(bpo->bpo_os, bpo->bpo_phys->bpo_subobjs, 363 bpo->bpo_phys->bpo_num_subobjs * sizeof (subobj), 364 numsubsub * sizeof (subobj), subdb->db_data, tx); 365 dmu_buf_rele(subdb, FTAG); 366 bpo->bpo_phys->bpo_num_subobjs += numsubsub; 367 368 dmu_buf_will_dirty(subbpo.bpo_dbuf, tx); 369 subbpo.bpo_phys->bpo_subobjs = 0; 370 VERIFY3U(0, ==, dmu_object_free(bpo->bpo_os, 371 subsubobjs, tx)); 372 } 373 } 374 bpo->bpo_phys->bpo_bytes += used; 375 bpo->bpo_phys->bpo_comp += comp; 376 bpo->bpo_phys->bpo_uncomp += uncomp; 377 mutex_exit(&bpo->bpo_lock); 378 379 bpobj_close(&subbpo); 380} 381 382void 383bpobj_enqueue(bpobj_t *bpo, const blkptr_t *bp, dmu_tx_t *tx) 384{ 385 blkptr_t stored_bp = *bp; 386 uint64_t offset; 387 int blkoff; 388 blkptr_t *bparray; 389 390 ASSERT(!BP_IS_HOLE(bp)); 391 392 /* We never need the fill count. */ 393 stored_bp.blk_fill = 0; 394 395 /* The bpobj will compress better if we can leave off the checksum */ 396 if (!BP_GET_DEDUP(bp)) 397 bzero(&stored_bp.blk_cksum, sizeof (stored_bp.blk_cksum)); 398 399 mutex_enter(&bpo->bpo_lock); 400 401 offset = bpo->bpo_phys->bpo_num_blkptrs * sizeof (stored_bp); 402 blkoff = P2PHASE(bpo->bpo_phys->bpo_num_blkptrs, bpo->bpo_epb); 403 404 if (bpo->bpo_cached_dbuf == NULL || 405 offset < bpo->bpo_cached_dbuf->db_offset || 406 offset >= bpo->bpo_cached_dbuf->db_offset + 407 bpo->bpo_cached_dbuf->db_size) { 408 if (bpo->bpo_cached_dbuf) 409 dmu_buf_rele(bpo->bpo_cached_dbuf, bpo); 410 VERIFY3U(0, ==, dmu_buf_hold(bpo->bpo_os, bpo->bpo_object, 411 offset, bpo, &bpo->bpo_cached_dbuf, 0)); 412 } 413 414 dmu_buf_will_dirty(bpo->bpo_cached_dbuf, tx); 415 bparray = bpo->bpo_cached_dbuf->db_data; 416 bparray[blkoff] = stored_bp; 417 418 dmu_buf_will_dirty(bpo->bpo_dbuf, tx); 419 bpo->bpo_phys->bpo_num_blkptrs++; 420 bpo->bpo_phys->bpo_bytes += 421 bp_get_dsize_sync(dmu_objset_spa(bpo->bpo_os), bp); 422 if (bpo->bpo_havecomp) { 423 bpo->bpo_phys->bpo_comp += BP_GET_PSIZE(bp); 424 bpo->bpo_phys->bpo_uncomp += BP_GET_UCSIZE(bp); 425 } 426 mutex_exit(&bpo->bpo_lock); 427} 428 429struct space_range_arg { 430 spa_t *spa; 431 uint64_t mintxg; 432 uint64_t maxtxg; 433 uint64_t used; 434 uint64_t comp; 435 uint64_t uncomp; 436}; 437 438/* ARGSUSED */ 439static int 440space_range_cb(void *arg, const blkptr_t *bp, dmu_tx_t *tx) 441{ 442 struct space_range_arg *sra = arg; 443 444 if (bp->blk_birth > sra->mintxg && bp->blk_birth <= sra->maxtxg) {
|