zap_micro.c revision 264669
1/* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21/* 22 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. 23 * Copyright (c) 2011, 2014 by Delphix. All rights reserved. 24 */ 25 26#include <sys/zio.h> 27#include <sys/spa.h> 28#include <sys/dmu.h> 29#include <sys/zfs_context.h> 30#include <sys/zap.h> 31#include <sys/refcount.h> 32#include <sys/zap_impl.h> 33#include <sys/zap_leaf.h> 34#include <sys/avl.h> 35#include <sys/arc.h> 36 37#ifdef _KERNEL 38#include <sys/sunddi.h> 39#endif 40 41static int mzap_upgrade(zap_t **zapp, dmu_tx_t *tx, zap_flags_t flags); 42 43uint64_t 44zap_getflags(zap_t *zap) 45{ 46 if (zap->zap_ismicro) 47 return (0); 48 return (zap->zap_u.zap_fat.zap_phys->zap_flags); 49} 50 51int 52zap_hashbits(zap_t *zap) 53{ 54 if (zap_getflags(zap) & ZAP_FLAG_HASH64) 55 return (48); 56 else 57 return (28); 58} 59 60uint32_t 61zap_maxcd(zap_t *zap) 62{ 63 if (zap_getflags(zap) & ZAP_FLAG_HASH64) 64 return ((1<<16)-1); 65 else 66 return (-1U); 67} 68 69static uint64_t 70zap_hash(zap_name_t *zn) 71{ 72 zap_t *zap = zn->zn_zap; 73 uint64_t h = 0; 74 75 if (zap_getflags(zap) & ZAP_FLAG_PRE_HASHED_KEY) { 76 ASSERT(zap_getflags(zap) & ZAP_FLAG_UINT64_KEY); 77 h = *(uint64_t *)zn->zn_key_orig; 78 } else { 79 h = zap->zap_salt; 80 ASSERT(h != 0); 81 ASSERT(zfs_crc64_table[128] == ZFS_CRC64_POLY); 82 83 if (zap_getflags(zap) & ZAP_FLAG_UINT64_KEY) { 84 int i; 85 const uint64_t *wp = zn->zn_key_norm; 86 87 ASSERT(zn->zn_key_intlen == 8); 88 for (i = 0; i < zn->zn_key_norm_numints; wp++, i++) { 89 int j; 90 uint64_t word = *wp; 91 92 for (j = 0; j < zn->zn_key_intlen; j++) { 93 h = (h >> 8) ^ 94 zfs_crc64_table[(h ^ word) & 0xFF]; 95 word >>= NBBY; 96 } 97 } 98 } else { 99 int i, len; 100 const uint8_t *cp = zn->zn_key_norm; 101 102 /* 103 * We previously stored the terminating null on 104 * disk, but didn't hash it, so we need to 105 * continue to not hash it. (The 106 * zn_key_*_numints includes the terminating 107 * null for non-binary keys.) 108 */ 109 len = zn->zn_key_norm_numints - 1; 110 111 ASSERT(zn->zn_key_intlen == 1); 112 for (i = 0; i < len; cp++, i++) { 113 h = (h >> 8) ^ 114 zfs_crc64_table[(h ^ *cp) & 0xFF]; 115 } 116 } 117 } 118 /* 119 * Don't use all 64 bits, since we need some in the cookie for 120 * the collision differentiator. We MUST use the high bits, 121 * since those are the ones that we first pay attention to when 122 * chosing the bucket. 123 */ 124 h &= ~((1ULL << (64 - zap_hashbits(zap))) - 1); 125 126 return (h); 127} 128 129static int 130zap_normalize(zap_t *zap, const char *name, char *namenorm) 131{ 132 size_t inlen, outlen; 133 int err; 134 135 ASSERT(!(zap_getflags(zap) & ZAP_FLAG_UINT64_KEY)); 136 137 inlen = strlen(name) + 1; 138 outlen = ZAP_MAXNAMELEN; 139 140 err = 0; 141 (void) u8_textprep_str((char *)name, &inlen, namenorm, &outlen, 142 zap->zap_normflags | U8_TEXTPREP_IGNORE_NULL | 143 U8_TEXTPREP_IGNORE_INVALID, U8_UNICODE_LATEST, &err); 144 145 return (err); 146} 147 148boolean_t 149zap_match(zap_name_t *zn, const char *matchname) 150{ 151 ASSERT(!(zap_getflags(zn->zn_zap) & ZAP_FLAG_UINT64_KEY)); 152 153 if (zn->zn_matchtype == MT_FIRST) { 154 char norm[ZAP_MAXNAMELEN]; 155 156 if (zap_normalize(zn->zn_zap, matchname, norm) != 0) 157 return (B_FALSE); 158 159 return (strcmp(zn->zn_key_norm, norm) == 0); 160 } else { 161 /* MT_BEST or MT_EXACT */ 162 return (strcmp(zn->zn_key_orig, matchname) == 0); 163 } 164} 165 166void 167zap_name_free(zap_name_t *zn) 168{ 169 kmem_free(zn, sizeof (zap_name_t)); 170} 171 172zap_name_t * 173zap_name_alloc(zap_t *zap, const char *key, matchtype_t mt) 174{ 175 zap_name_t *zn = kmem_alloc(sizeof (zap_name_t), KM_SLEEP); 176 177 zn->zn_zap = zap; 178 zn->zn_key_intlen = sizeof (*key); 179 zn->zn_key_orig = key; 180 zn->zn_key_orig_numints = strlen(zn->zn_key_orig) + 1; 181 zn->zn_matchtype = mt; 182 if (zap->zap_normflags) { 183 if (zap_normalize(zap, key, zn->zn_normbuf) != 0) { 184 zap_name_free(zn); 185 return (NULL); 186 } 187 zn->zn_key_norm = zn->zn_normbuf; 188 zn->zn_key_norm_numints = strlen(zn->zn_key_norm) + 1; 189 } else { 190 if (mt != MT_EXACT) { 191 zap_name_free(zn); 192 return (NULL); 193 } 194 zn->zn_key_norm = zn->zn_key_orig; 195 zn->zn_key_norm_numints = zn->zn_key_orig_numints; 196 } 197 198 zn->zn_hash = zap_hash(zn); 199 return (zn); 200} 201 202zap_name_t * 203zap_name_alloc_uint64(zap_t *zap, const uint64_t *key, int numints) 204{ 205 zap_name_t *zn = kmem_alloc(sizeof (zap_name_t), KM_SLEEP); 206 207 ASSERT(zap->zap_normflags == 0); 208 zn->zn_zap = zap; 209 zn->zn_key_intlen = sizeof (*key); 210 zn->zn_key_orig = zn->zn_key_norm = key; 211 zn->zn_key_orig_numints = zn->zn_key_norm_numints = numints; 212 zn->zn_matchtype = MT_EXACT; 213 214 zn->zn_hash = zap_hash(zn); 215 return (zn); 216} 217 218static void 219mzap_byteswap(mzap_phys_t *buf, size_t size) 220{ 221 int i, max; 222 buf->mz_block_type = BSWAP_64(buf->mz_block_type); 223 buf->mz_salt = BSWAP_64(buf->mz_salt); 224 buf->mz_normflags = BSWAP_64(buf->mz_normflags); 225 max = (size / MZAP_ENT_LEN) - 1; 226 for (i = 0; i < max; i++) { 227 buf->mz_chunk[i].mze_value = 228 BSWAP_64(buf->mz_chunk[i].mze_value); 229 buf->mz_chunk[i].mze_cd = 230 BSWAP_32(buf->mz_chunk[i].mze_cd); 231 } 232} 233 234void 235zap_byteswap(void *buf, size_t size) 236{ 237 uint64_t block_type; 238 239 block_type = *(uint64_t *)buf; 240 241 if (block_type == ZBT_MICRO || block_type == BSWAP_64(ZBT_MICRO)) { 242 /* ASSERT(magic == ZAP_LEAF_MAGIC); */ 243 mzap_byteswap(buf, size); 244 } else { 245 fzap_byteswap(buf, size); 246 } 247} 248 249static int 250mze_compare(const void *arg1, const void *arg2) 251{ 252 const mzap_ent_t *mze1 = arg1; 253 const mzap_ent_t *mze2 = arg2; 254 255 if (mze1->mze_hash > mze2->mze_hash) 256 return (+1); 257 if (mze1->mze_hash < mze2->mze_hash) 258 return (-1); 259 if (mze1->mze_cd > mze2->mze_cd) 260 return (+1); 261 if (mze1->mze_cd < mze2->mze_cd) 262 return (-1); 263 return (0); 264} 265 266static int 267mze_insert(zap_t *zap, int chunkid, uint64_t hash) 268{ 269 mzap_ent_t *mze; 270 avl_index_t idx; 271 272 ASSERT(zap->zap_ismicro); 273 ASSERT(RW_WRITE_HELD(&zap->zap_rwlock)); 274 275 mze = kmem_alloc(sizeof (mzap_ent_t), KM_SLEEP); 276 mze->mze_chunkid = chunkid; 277 mze->mze_hash = hash; 278 mze->mze_cd = MZE_PHYS(zap, mze)->mze_cd; 279 ASSERT(MZE_PHYS(zap, mze)->mze_name[0] != 0); 280 if (avl_find(&zap->zap_m.zap_avl, mze, &idx) != NULL) { 281 kmem_free(mze, sizeof (mzap_ent_t)); 282 return (EEXIST); 283 } 284 avl_insert(&zap->zap_m.zap_avl, mze, idx); 285 return (0); 286} 287 288static mzap_ent_t * 289mze_find(zap_name_t *zn) 290{ 291 mzap_ent_t mze_tofind; 292 mzap_ent_t *mze; 293 avl_index_t idx; 294 avl_tree_t *avl = &zn->zn_zap->zap_m.zap_avl; 295 296 ASSERT(zn->zn_zap->zap_ismicro); 297 ASSERT(RW_LOCK_HELD(&zn->zn_zap->zap_rwlock)); 298 299 mze_tofind.mze_hash = zn->zn_hash; 300 mze_tofind.mze_cd = 0; 301 302again: 303 mze = avl_find(avl, &mze_tofind, &idx); 304 if (mze == NULL) 305 mze = avl_nearest(avl, idx, AVL_AFTER); 306 for (; mze && mze->mze_hash == zn->zn_hash; mze = AVL_NEXT(avl, mze)) { 307 ASSERT3U(mze->mze_cd, ==, MZE_PHYS(zn->zn_zap, mze)->mze_cd); 308 if (zap_match(zn, MZE_PHYS(zn->zn_zap, mze)->mze_name)) 309 return (mze); 310 } 311 if (zn->zn_matchtype == MT_BEST) { 312 zn->zn_matchtype = MT_FIRST; 313 goto again; 314 } 315 return (NULL); 316} 317 318static uint32_t 319mze_find_unused_cd(zap_t *zap, uint64_t hash) 320{ 321 mzap_ent_t mze_tofind; 322 mzap_ent_t *mze; 323 avl_index_t idx; 324 avl_tree_t *avl = &zap->zap_m.zap_avl; 325 uint32_t cd; 326 327 ASSERT(zap->zap_ismicro); 328 ASSERT(RW_LOCK_HELD(&zap->zap_rwlock)); 329 330 mze_tofind.mze_hash = hash; 331 mze_tofind.mze_cd = 0; 332 333 cd = 0; 334 for (mze = avl_find(avl, &mze_tofind, &idx); 335 mze && mze->mze_hash == hash; mze = AVL_NEXT(avl, mze)) { 336 if (mze->mze_cd != cd) 337 break; 338 cd++; 339 } 340 341 return (cd); 342} 343 344static void 345mze_remove(zap_t *zap, mzap_ent_t *mze) 346{ 347 ASSERT(zap->zap_ismicro); 348 ASSERT(RW_WRITE_HELD(&zap->zap_rwlock)); 349 350 avl_remove(&zap->zap_m.zap_avl, mze); 351 kmem_free(mze, sizeof (mzap_ent_t)); 352} 353 354static void 355mze_destroy(zap_t *zap) 356{ 357 mzap_ent_t *mze; 358 void *avlcookie = NULL; 359 360 while (mze = avl_destroy_nodes(&zap->zap_m.zap_avl, &avlcookie)) 361 kmem_free(mze, sizeof (mzap_ent_t)); 362 avl_destroy(&zap->zap_m.zap_avl); 363} 364 365static zap_t * 366mzap_open(objset_t *os, uint64_t obj, dmu_buf_t *db) 367{ 368 zap_t *winner; 369 zap_t *zap; 370 int i; 371 372 ASSERT3U(MZAP_ENT_LEN, ==, sizeof (mzap_ent_phys_t)); 373 374 zap = kmem_zalloc(sizeof (zap_t), KM_SLEEP); 375 rw_init(&zap->zap_rwlock, 0, 0, 0); 376 rw_enter(&zap->zap_rwlock, RW_WRITER); 377 zap->zap_objset = os; 378 zap->zap_object = obj; 379 zap->zap_dbuf = db; 380 381 if (*(uint64_t *)db->db_data != ZBT_MICRO) { 382 mutex_init(&zap->zap_f.zap_num_entries_mtx, 0, 0, 0); 383 zap->zap_f.zap_block_shift = highbit64(db->db_size) - 1; 384 } else { 385 zap->zap_ismicro = TRUE; 386 } 387 388 /* 389 * Make sure that zap_ismicro is set before we let others see 390 * it, because zap_lockdir() checks zap_ismicro without the lock 391 * held. 392 */ 393 winner = dmu_buf_set_user(db, zap, &zap->zap_m.zap_phys, zap_evict); 394 395 if (winner != NULL) { 396 rw_exit(&zap->zap_rwlock); 397 rw_destroy(&zap->zap_rwlock); 398 if (!zap->zap_ismicro) 399 mutex_destroy(&zap->zap_f.zap_num_entries_mtx); 400 kmem_free(zap, sizeof (zap_t)); 401 return (winner); 402 } 403 404 if (zap->zap_ismicro) { 405 zap->zap_salt = zap->zap_m.zap_phys->mz_salt; 406 zap->zap_normflags = zap->zap_m.zap_phys->mz_normflags; 407 zap->zap_m.zap_num_chunks = db->db_size / MZAP_ENT_LEN - 1; 408 avl_create(&zap->zap_m.zap_avl, mze_compare, 409 sizeof (mzap_ent_t), offsetof(mzap_ent_t, mze_node)); 410 411 for (i = 0; i < zap->zap_m.zap_num_chunks; i++) { 412 mzap_ent_phys_t *mze = 413 &zap->zap_m.zap_phys->mz_chunk[i]; 414 if (mze->mze_name[0]) { 415 zap_name_t *zn; 416 417 zn = zap_name_alloc(zap, mze->mze_name, 418 MT_EXACT); 419 if (mze_insert(zap, i, zn->zn_hash) == 0) 420 zap->zap_m.zap_num_entries++; 421 else { 422 printf("ZFS WARNING: Duplicated ZAP " 423 "entry detected (%s).\n", 424 mze->mze_name); 425 } 426 zap_name_free(zn); 427 } 428 } 429 } else { 430 zap->zap_salt = zap->zap_f.zap_phys->zap_salt; 431 zap->zap_normflags = zap->zap_f.zap_phys->zap_normflags; 432 433 ASSERT3U(sizeof (struct zap_leaf_header), ==, 434 2*ZAP_LEAF_CHUNKSIZE); 435 436 /* 437 * The embedded pointer table should not overlap the 438 * other members. 439 */ 440 ASSERT3P(&ZAP_EMBEDDED_PTRTBL_ENT(zap, 0), >, 441 &zap->zap_f.zap_phys->zap_salt); 442 443 /* 444 * The embedded pointer table should end at the end of 445 * the block 446 */ 447 ASSERT3U((uintptr_t)&ZAP_EMBEDDED_PTRTBL_ENT(zap, 448 1<<ZAP_EMBEDDED_PTRTBL_SHIFT(zap)) - 449 (uintptr_t)zap->zap_f.zap_phys, ==, 450 zap->zap_dbuf->db_size); 451 } 452 rw_exit(&zap->zap_rwlock); 453 return (zap); 454} 455 456int 457zap_lockdir(objset_t *os, uint64_t obj, dmu_tx_t *tx, 458 krw_t lti, boolean_t fatreader, boolean_t adding, zap_t **zapp) 459{ 460 zap_t *zap; 461 dmu_buf_t *db; 462 krw_t lt; 463 int err; 464 465 *zapp = NULL; 466 467 err = dmu_buf_hold(os, obj, 0, NULL, &db, DMU_READ_NO_PREFETCH); 468 if (err) 469 return (err); 470 471#ifdef ZFS_DEBUG 472 { 473 dmu_object_info_t doi; 474 dmu_object_info_from_db(db, &doi); 475 ASSERT3U(DMU_OT_BYTESWAP(doi.doi_type), ==, DMU_BSWAP_ZAP); 476 } 477#endif 478 479 zap = dmu_buf_get_user(db); 480 if (zap == NULL) 481 zap = mzap_open(os, obj, db); 482 483 /* 484 * We're checking zap_ismicro without the lock held, in order to 485 * tell what type of lock we want. Once we have some sort of 486 * lock, see if it really is the right type. In practice this 487 * can only be different if it was upgraded from micro to fat, 488 * and micro wanted WRITER but fat only needs READER. 489 */ 490 lt = (!zap->zap_ismicro && fatreader) ? RW_READER : lti; 491 rw_enter(&zap->zap_rwlock, lt); 492 if (lt != ((!zap->zap_ismicro && fatreader) ? RW_READER : lti)) { 493 /* it was upgraded, now we only need reader */ 494 ASSERT(lt == RW_WRITER); 495 ASSERT(RW_READER == 496 (!zap->zap_ismicro && fatreader) ? RW_READER : lti); 497 rw_downgrade(&zap->zap_rwlock); 498 lt = RW_READER; 499 } 500 501 zap->zap_objset = os; 502 503 if (lt == RW_WRITER) 504 dmu_buf_will_dirty(db, tx); 505 506 ASSERT3P(zap->zap_dbuf, ==, db); 507 508 ASSERT(!zap->zap_ismicro || 509 zap->zap_m.zap_num_entries <= zap->zap_m.zap_num_chunks); 510 if (zap->zap_ismicro && tx && adding && 511 zap->zap_m.zap_num_entries == zap->zap_m.zap_num_chunks) { 512 uint64_t newsz = db->db_size + SPA_MINBLOCKSIZE; 513 if (newsz > MZAP_MAX_BLKSZ) { 514 dprintf("upgrading obj %llu: num_entries=%u\n", 515 obj, zap->zap_m.zap_num_entries); 516 *zapp = zap; 517 return (mzap_upgrade(zapp, tx, 0)); 518 } 519 err = dmu_object_set_blocksize(os, obj, newsz, 0, tx); 520 ASSERT0(err); 521 zap->zap_m.zap_num_chunks = 522 db->db_size / MZAP_ENT_LEN - 1; 523 } 524 525 *zapp = zap; 526 return (0); 527} 528 529void 530zap_unlockdir(zap_t *zap) 531{ 532 rw_exit(&zap->zap_rwlock); 533 dmu_buf_rele(zap->zap_dbuf, NULL); 534} 535 536static int 537mzap_upgrade(zap_t **zapp, dmu_tx_t *tx, zap_flags_t flags) 538{ 539 mzap_phys_t *mzp; 540 int i, sz, nchunks; 541 int err = 0; 542 zap_t *zap = *zapp; 543 544 ASSERT(RW_WRITE_HELD(&zap->zap_rwlock)); 545 546 sz = zap->zap_dbuf->db_size; 547 mzp = kmem_alloc(sz, KM_SLEEP); 548 bcopy(zap->zap_dbuf->db_data, mzp, sz); 549 nchunks = zap->zap_m.zap_num_chunks; 550 551 if (!flags) { 552 err = dmu_object_set_blocksize(zap->zap_objset, zap->zap_object, 553 1ULL << fzap_default_block_shift, 0, tx); 554 if (err) { 555 kmem_free(mzp, sz); 556 return (err); 557 } 558 } 559 560 dprintf("upgrading obj=%llu with %u chunks\n", 561 zap->zap_object, nchunks); 562 /* XXX destroy the avl later, so we can use the stored hash value */ 563 mze_destroy(zap); 564 565 fzap_upgrade(zap, tx, flags); 566 567 for (i = 0; i < nchunks; i++) { 568 mzap_ent_phys_t *mze = &mzp->mz_chunk[i]; 569 zap_name_t *zn; 570 if (mze->mze_name[0] == 0) 571 continue; 572 dprintf("adding %s=%llu\n", 573 mze->mze_name, mze->mze_value); 574 zn = zap_name_alloc(zap, mze->mze_name, MT_EXACT); 575 err = fzap_add_cd(zn, 8, 1, &mze->mze_value, mze->mze_cd, tx); 576 zap = zn->zn_zap; /* fzap_add_cd() may change zap */ 577 zap_name_free(zn); 578 if (err) 579 break; 580 } 581 kmem_free(mzp, sz); 582 *zapp = zap; 583 return (err); 584} 585 586void 587mzap_create_impl(objset_t *os, uint64_t obj, int normflags, zap_flags_t flags, 588 dmu_tx_t *tx) 589{ 590 dmu_buf_t *db; 591 mzap_phys_t *zp; 592 593 VERIFY(0 == dmu_buf_hold(os, obj, 0, FTAG, &db, DMU_READ_NO_PREFETCH)); 594 595#ifdef ZFS_DEBUG 596 { 597 dmu_object_info_t doi; 598 dmu_object_info_from_db(db, &doi); 599 ASSERT3U(DMU_OT_BYTESWAP(doi.doi_type), ==, DMU_BSWAP_ZAP); 600 } 601#endif 602 603 dmu_buf_will_dirty(db, tx); 604 zp = db->db_data; 605 zp->mz_block_type = ZBT_MICRO; 606 zp->mz_salt = ((uintptr_t)db ^ (uintptr_t)tx ^ (obj << 1)) | 1ULL; 607 zp->mz_normflags = normflags; 608 dmu_buf_rele(db, FTAG); 609 610 if (flags != 0) { 611 zap_t *zap; 612 /* Only fat zap supports flags; upgrade immediately. */ 613 VERIFY(0 == zap_lockdir(os, obj, tx, RW_WRITER, 614 B_FALSE, B_FALSE, &zap)); 615 VERIFY3U(0, ==, mzap_upgrade(&zap, tx, flags)); 616 zap_unlockdir(zap); 617 } 618} 619 620int 621zap_create_claim(objset_t *os, uint64_t obj, dmu_object_type_t ot, 622 dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx) 623{ 624 return (zap_create_claim_norm(os, obj, 625 0, ot, bonustype, bonuslen, tx)); 626} 627 628int 629zap_create_claim_norm(objset_t *os, uint64_t obj, int normflags, 630 dmu_object_type_t ot, 631 dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx) 632{ 633 int err; 634 635 err = dmu_object_claim(os, obj, ot, 0, bonustype, bonuslen, tx); 636 if (err != 0) 637 return (err); 638 mzap_create_impl(os, obj, normflags, 0, tx); 639 return (0); 640} 641 642uint64_t 643zap_create(objset_t *os, dmu_object_type_t ot, 644 dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx) 645{ 646 return (zap_create_norm(os, 0, ot, bonustype, bonuslen, tx)); 647} 648 649uint64_t 650zap_create_norm(objset_t *os, int normflags, dmu_object_type_t ot, 651 dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx) 652{ 653 uint64_t obj = dmu_object_alloc(os, ot, 0, bonustype, bonuslen, tx); 654 655 mzap_create_impl(os, obj, normflags, 0, tx); 656 return (obj); 657} 658 659uint64_t 660zap_create_flags(objset_t *os, int normflags, zap_flags_t flags, 661 dmu_object_type_t ot, int leaf_blockshift, int indirect_blockshift, 662 dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx) 663{ 664 uint64_t obj = dmu_object_alloc(os, ot, 0, bonustype, bonuslen, tx); 665 666 ASSERT(leaf_blockshift >= SPA_MINBLOCKSHIFT && 667 leaf_blockshift <= SPA_MAXBLOCKSHIFT && 668 indirect_blockshift >= SPA_MINBLOCKSHIFT && 669 indirect_blockshift <= SPA_MAXBLOCKSHIFT); 670 671 VERIFY(dmu_object_set_blocksize(os, obj, 672 1ULL << leaf_blockshift, indirect_blockshift, tx) == 0); 673 674 mzap_create_impl(os, obj, normflags, flags, tx); 675 return (obj); 676} 677 678int 679zap_destroy(objset_t *os, uint64_t zapobj, dmu_tx_t *tx) 680{ 681 /* 682 * dmu_object_free will free the object number and free the 683 * data. Freeing the data will cause our pageout function to be 684 * called, which will destroy our data (zap_leaf_t's and zap_t). 685 */ 686 687 return (dmu_object_free(os, zapobj, tx)); 688} 689 690_NOTE(ARGSUSED(0)) 691void 692zap_evict(dmu_buf_t *db, void *vzap) 693{ 694 zap_t *zap = vzap; 695 696 rw_destroy(&zap->zap_rwlock); 697 698 if (zap->zap_ismicro) 699 mze_destroy(zap); 700 else 701 mutex_destroy(&zap->zap_f.zap_num_entries_mtx); 702 703 kmem_free(zap, sizeof (zap_t)); 704} 705 706int 707zap_count(objset_t *os, uint64_t zapobj, uint64_t *count) 708{ 709 zap_t *zap; 710 int err; 711 712 err = zap_lockdir(os, zapobj, NULL, RW_READER, TRUE, FALSE, &zap); 713 if (err) 714 return (err); 715 if (!zap->zap_ismicro) { 716 err = fzap_count(zap, count); 717 } else { 718 *count = zap->zap_m.zap_num_entries; 719 } 720 zap_unlockdir(zap); 721 return (err); 722} 723 724/* 725 * zn may be NULL; if not specified, it will be computed if needed. 726 * See also the comment above zap_entry_normalization_conflict(). 727 */ 728static boolean_t 729mzap_normalization_conflict(zap_t *zap, zap_name_t *zn, mzap_ent_t *mze) 730{ 731 mzap_ent_t *other; 732 int direction = AVL_BEFORE; 733 boolean_t allocdzn = B_FALSE; 734 735 if (zap->zap_normflags == 0) 736 return (B_FALSE); 737 738again: 739 for (other = avl_walk(&zap->zap_m.zap_avl, mze, direction); 740 other && other->mze_hash == mze->mze_hash; 741 other = avl_walk(&zap->zap_m.zap_avl, other, direction)) { 742 743 if (zn == NULL) { 744 zn = zap_name_alloc(zap, MZE_PHYS(zap, mze)->mze_name, 745 MT_FIRST); 746 allocdzn = B_TRUE; 747 } 748 if (zap_match(zn, MZE_PHYS(zap, other)->mze_name)) { 749 if (allocdzn) 750 zap_name_free(zn); 751 return (B_TRUE); 752 } 753 } 754 755 if (direction == AVL_BEFORE) { 756 direction = AVL_AFTER; 757 goto again; 758 } 759 760 if (allocdzn) 761 zap_name_free(zn); 762 return (B_FALSE); 763} 764 765/* 766 * Routines for manipulating attributes. 767 */ 768 769int 770zap_lookup(objset_t *os, uint64_t zapobj, const char *name, 771 uint64_t integer_size, uint64_t num_integers, void *buf) 772{ 773 return (zap_lookup_norm(os, zapobj, name, integer_size, 774 num_integers, buf, MT_EXACT, NULL, 0, NULL)); 775} 776 777int 778zap_lookup_norm(objset_t *os, uint64_t zapobj, const char *name, 779 uint64_t integer_size, uint64_t num_integers, void *buf, 780 matchtype_t mt, char *realname, int rn_len, 781 boolean_t *ncp) 782{ 783 zap_t *zap; 784 int err; 785 mzap_ent_t *mze; 786 zap_name_t *zn; 787 788 err = zap_lockdir(os, zapobj, NULL, RW_READER, TRUE, FALSE, &zap); 789 if (err) 790 return (err); 791 zn = zap_name_alloc(zap, name, mt); 792 if (zn == NULL) { 793 zap_unlockdir(zap); 794 return (SET_ERROR(ENOTSUP)); 795 } 796 797 if (!zap->zap_ismicro) { 798 err = fzap_lookup(zn, integer_size, num_integers, buf, 799 realname, rn_len, ncp); 800 } else { 801 mze = mze_find(zn); 802 if (mze == NULL) { 803 err = SET_ERROR(ENOENT); 804 } else { 805 if (num_integers < 1) { 806 err = SET_ERROR(EOVERFLOW); 807 } else if (integer_size != 8) { 808 err = SET_ERROR(EINVAL); 809 } else { 810 *(uint64_t *)buf = 811 MZE_PHYS(zap, mze)->mze_value; 812 (void) strlcpy(realname, 813 MZE_PHYS(zap, mze)->mze_name, rn_len); 814 if (ncp) { 815 *ncp = mzap_normalization_conflict(zap, 816 zn, mze); 817 } 818 } 819 } 820 } 821 zap_name_free(zn); 822 zap_unlockdir(zap); 823 return (err); 824} 825 826int 827zap_prefetch_uint64(objset_t *os, uint64_t zapobj, const uint64_t *key, 828 int key_numints) 829{ 830 zap_t *zap; 831 int err; 832 zap_name_t *zn; 833 834 err = zap_lockdir(os, zapobj, NULL, RW_READER, TRUE, FALSE, &zap); 835 if (err) 836 return (err); 837 zn = zap_name_alloc_uint64(zap, key, key_numints); 838 if (zn == NULL) { 839 zap_unlockdir(zap); 840 return (SET_ERROR(ENOTSUP)); 841 } 842 843 fzap_prefetch(zn); 844 zap_name_free(zn); 845 zap_unlockdir(zap); 846 return (err); 847} 848 849int 850zap_lookup_uint64(objset_t *os, uint64_t zapobj, const uint64_t *key, 851 int key_numints, uint64_t integer_size, uint64_t num_integers, void *buf) 852{ 853 zap_t *zap; 854 int err; 855 zap_name_t *zn; 856 857 err = zap_lockdir(os, zapobj, NULL, RW_READER, TRUE, FALSE, &zap); 858 if (err) 859 return (err); 860 zn = zap_name_alloc_uint64(zap, key, key_numints); 861 if (zn == NULL) { 862 zap_unlockdir(zap); 863 return (SET_ERROR(ENOTSUP)); 864 } 865 866 err = fzap_lookup(zn, integer_size, num_integers, buf, 867 NULL, 0, NULL); 868 zap_name_free(zn); 869 zap_unlockdir(zap); 870 return (err); 871} 872 873int 874zap_contains(objset_t *os, uint64_t zapobj, const char *name) 875{ 876 int err = zap_lookup_norm(os, zapobj, name, 0, 877 0, NULL, MT_EXACT, NULL, 0, NULL); 878 if (err == EOVERFLOW || err == EINVAL) 879 err = 0; /* found, but skipped reading the value */ 880 return (err); 881} 882 883int 884zap_length(objset_t *os, uint64_t zapobj, const char *name, 885 uint64_t *integer_size, uint64_t *num_integers) 886{ 887 zap_t *zap; 888 int err; 889 mzap_ent_t *mze; 890 zap_name_t *zn; 891 892 err = zap_lockdir(os, zapobj, NULL, RW_READER, TRUE, FALSE, &zap); 893 if (err) 894 return (err); 895 zn = zap_name_alloc(zap, name, MT_EXACT); 896 if (zn == NULL) { 897 zap_unlockdir(zap); 898 return (SET_ERROR(ENOTSUP)); 899 } 900 if (!zap->zap_ismicro) { 901 err = fzap_length(zn, integer_size, num_integers); 902 } else { 903 mze = mze_find(zn); 904 if (mze == NULL) { 905 err = SET_ERROR(ENOENT); 906 } else { 907 if (integer_size) 908 *integer_size = 8; 909 if (num_integers) 910 *num_integers = 1; 911 } 912 } 913 zap_name_free(zn); 914 zap_unlockdir(zap); 915 return (err); 916} 917 918int 919zap_length_uint64(objset_t *os, uint64_t zapobj, const uint64_t *key, 920 int key_numints, uint64_t *integer_size, uint64_t *num_integers) 921{ 922 zap_t *zap; 923 int err; 924 zap_name_t *zn; 925 926 err = zap_lockdir(os, zapobj, NULL, RW_READER, TRUE, FALSE, &zap); 927 if (err) 928 return (err); 929 zn = zap_name_alloc_uint64(zap, key, key_numints); 930 if (zn == NULL) { 931 zap_unlockdir(zap); 932 return (SET_ERROR(ENOTSUP)); 933 } 934 err = fzap_length(zn, integer_size, num_integers); 935 zap_name_free(zn); 936 zap_unlockdir(zap); 937 return (err); 938} 939 940static void 941mzap_addent(zap_name_t *zn, uint64_t value) 942{ 943 int i; 944 zap_t *zap = zn->zn_zap; 945 int start = zap->zap_m.zap_alloc_next; 946 uint32_t cd; 947 948 ASSERT(RW_WRITE_HELD(&zap->zap_rwlock)); 949 950#ifdef ZFS_DEBUG 951 for (i = 0; i < zap->zap_m.zap_num_chunks; i++) { 952 mzap_ent_phys_t *mze = &zap->zap_m.zap_phys->mz_chunk[i]; 953 ASSERT(strcmp(zn->zn_key_orig, mze->mze_name) != 0); 954 } 955#endif 956 957 cd = mze_find_unused_cd(zap, zn->zn_hash); 958 /* given the limited size of the microzap, this can't happen */ 959 ASSERT(cd < zap_maxcd(zap)); 960 961again: 962 for (i = start; i < zap->zap_m.zap_num_chunks; i++) { 963 mzap_ent_phys_t *mze = &zap->zap_m.zap_phys->mz_chunk[i]; 964 if (mze->mze_name[0] == 0) { 965 mze->mze_value = value; 966 mze->mze_cd = cd; 967 (void) strcpy(mze->mze_name, zn->zn_key_orig); 968 zap->zap_m.zap_num_entries++; 969 zap->zap_m.zap_alloc_next = i+1; 970 if (zap->zap_m.zap_alloc_next == 971 zap->zap_m.zap_num_chunks) 972 zap->zap_m.zap_alloc_next = 0; 973 VERIFY(0 == mze_insert(zap, i, zn->zn_hash)); 974 return; 975 } 976 } 977 if (start != 0) { 978 start = 0; 979 goto again; 980 } 981 ASSERT(!"out of entries!"); 982} 983 984int 985zap_add(objset_t *os, uint64_t zapobj, const char *key, 986 int integer_size, uint64_t num_integers, 987 const void *val, dmu_tx_t *tx) 988{ 989 zap_t *zap; 990 int err; 991 mzap_ent_t *mze; 992 const uint64_t *intval = val; 993 zap_name_t *zn; 994 995 err = zap_lockdir(os, zapobj, tx, RW_WRITER, TRUE, TRUE, &zap); 996 if (err) 997 return (err); 998 zn = zap_name_alloc(zap, key, MT_EXACT); 999 if (zn == NULL) { 1000 zap_unlockdir(zap); 1001 return (SET_ERROR(ENOTSUP)); 1002 } 1003 if (!zap->zap_ismicro) { 1004 err = fzap_add(zn, integer_size, num_integers, val, tx); 1005 zap = zn->zn_zap; /* fzap_add() may change zap */ 1006 } else if (integer_size != 8 || num_integers != 1 || 1007 strlen(key) >= MZAP_NAME_LEN) { 1008 err = mzap_upgrade(&zn->zn_zap, tx, 0); 1009 if (err == 0) 1010 err = fzap_add(zn, integer_size, num_integers, val, tx); 1011 zap = zn->zn_zap; /* fzap_add() may change zap */ 1012 } else { 1013 mze = mze_find(zn); 1014 if (mze != NULL) { 1015 err = SET_ERROR(EEXIST); 1016 } else { 1017 mzap_addent(zn, *intval); 1018 } 1019 } 1020 ASSERT(zap == zn->zn_zap); 1021 zap_name_free(zn); 1022 if (zap != NULL) /* may be NULL if fzap_add() failed */ 1023 zap_unlockdir(zap); 1024 return (err); 1025} 1026 1027int 1028zap_add_uint64(objset_t *os, uint64_t zapobj, const uint64_t *key, 1029 int key_numints, int integer_size, uint64_t num_integers, 1030 const void *val, dmu_tx_t *tx) 1031{ 1032 zap_t *zap; 1033 int err; 1034 zap_name_t *zn; 1035 1036 err = zap_lockdir(os, zapobj, tx, RW_WRITER, TRUE, TRUE, &zap); 1037 if (err) 1038 return (err); 1039 zn = zap_name_alloc_uint64(zap, key, key_numints); 1040 if (zn == NULL) { 1041 zap_unlockdir(zap); 1042 return (SET_ERROR(ENOTSUP)); 1043 } 1044 err = fzap_add(zn, integer_size, num_integers, val, tx); 1045 zap = zn->zn_zap; /* fzap_add() may change zap */ 1046 zap_name_free(zn); 1047 if (zap != NULL) /* may be NULL if fzap_add() failed */ 1048 zap_unlockdir(zap); 1049 return (err); 1050} 1051 1052int 1053zap_update(objset_t *os, uint64_t zapobj, const char *name, 1054 int integer_size, uint64_t num_integers, const void *val, dmu_tx_t *tx) 1055{ 1056 zap_t *zap; 1057 mzap_ent_t *mze; 1058 uint64_t oldval; 1059 const uint64_t *intval = val; 1060 zap_name_t *zn; 1061 int err; 1062 1063#ifdef ZFS_DEBUG 1064 /* 1065 * If there is an old value, it shouldn't change across the 1066 * lockdir (eg, due to bprewrite's xlation). 1067 */ 1068 if (integer_size == 8 && num_integers == 1) 1069 (void) zap_lookup(os, zapobj, name, 8, 1, &oldval); 1070#endif 1071 1072 err = zap_lockdir(os, zapobj, tx, RW_WRITER, TRUE, TRUE, &zap); 1073 if (err) 1074 return (err); 1075 zn = zap_name_alloc(zap, name, MT_EXACT); 1076 if (zn == NULL) { 1077 zap_unlockdir(zap); 1078 return (SET_ERROR(ENOTSUP)); 1079 } 1080 if (!zap->zap_ismicro) { 1081 err = fzap_update(zn, integer_size, num_integers, val, tx); 1082 zap = zn->zn_zap; /* fzap_update() may change zap */ 1083 } else if (integer_size != 8 || num_integers != 1 || 1084 strlen(name) >= MZAP_NAME_LEN) { 1085 dprintf("upgrading obj %llu: intsz=%u numint=%llu name=%s\n", 1086 zapobj, integer_size, num_integers, name); 1087 err = mzap_upgrade(&zn->zn_zap, tx, 0); 1088 if (err == 0) 1089 err = fzap_update(zn, integer_size, num_integers, 1090 val, tx); 1091 zap = zn->zn_zap; /* fzap_update() may change zap */ 1092 } else { 1093 mze = mze_find(zn); 1094 if (mze != NULL) { 1095 ASSERT3U(MZE_PHYS(zap, mze)->mze_value, ==, oldval); 1096 MZE_PHYS(zap, mze)->mze_value = *intval; 1097 } else { 1098 mzap_addent(zn, *intval); 1099 } 1100 } 1101 ASSERT(zap == zn->zn_zap); 1102 zap_name_free(zn); 1103 if (zap != NULL) /* may be NULL if fzap_upgrade() failed */ 1104 zap_unlockdir(zap); 1105 return (err); 1106} 1107 1108int 1109zap_update_uint64(objset_t *os, uint64_t zapobj, const uint64_t *key, 1110 int key_numints, 1111 int integer_size, uint64_t num_integers, const void *val, dmu_tx_t *tx) 1112{ 1113 zap_t *zap; 1114 zap_name_t *zn; 1115 int err; 1116 1117 err = zap_lockdir(os, zapobj, tx, RW_WRITER, TRUE, TRUE, &zap); 1118 if (err) 1119 return (err); 1120 zn = zap_name_alloc_uint64(zap, key, key_numints); 1121 if (zn == NULL) { 1122 zap_unlockdir(zap); 1123 return (SET_ERROR(ENOTSUP)); 1124 } 1125 err = fzap_update(zn, integer_size, num_integers, val, tx); 1126 zap = zn->zn_zap; /* fzap_update() may change zap */ 1127 zap_name_free(zn); 1128 if (zap != NULL) /* may be NULL if fzap_upgrade() failed */ 1129 zap_unlockdir(zap); 1130 return (err); 1131} 1132 1133int 1134zap_remove(objset_t *os, uint64_t zapobj, const char *name, dmu_tx_t *tx) 1135{ 1136 return (zap_remove_norm(os, zapobj, name, MT_EXACT, tx)); 1137} 1138 1139int 1140zap_remove_norm(objset_t *os, uint64_t zapobj, const char *name, 1141 matchtype_t mt, dmu_tx_t *tx) 1142{ 1143 zap_t *zap; 1144 int err; 1145 mzap_ent_t *mze; 1146 zap_name_t *zn; 1147 1148 err = zap_lockdir(os, zapobj, tx, RW_WRITER, TRUE, FALSE, &zap); 1149 if (err) 1150 return (err); 1151 zn = zap_name_alloc(zap, name, mt); 1152 if (zn == NULL) { 1153 zap_unlockdir(zap); 1154 return (SET_ERROR(ENOTSUP)); 1155 } 1156 if (!zap->zap_ismicro) { 1157 err = fzap_remove(zn, tx); 1158 } else { 1159 mze = mze_find(zn); 1160 if (mze == NULL) { 1161 err = SET_ERROR(ENOENT); 1162 } else { 1163 zap->zap_m.zap_num_entries--; 1164 bzero(&zap->zap_m.zap_phys->mz_chunk[mze->mze_chunkid], 1165 sizeof (mzap_ent_phys_t)); 1166 mze_remove(zap, mze); 1167 } 1168 } 1169 zap_name_free(zn); 1170 zap_unlockdir(zap); 1171 return (err); 1172} 1173 1174int 1175zap_remove_uint64(objset_t *os, uint64_t zapobj, const uint64_t *key, 1176 int key_numints, dmu_tx_t *tx) 1177{ 1178 zap_t *zap; 1179 int err; 1180 zap_name_t *zn; 1181 1182 err = zap_lockdir(os, zapobj, tx, RW_WRITER, TRUE, FALSE, &zap); 1183 if (err) 1184 return (err); 1185 zn = zap_name_alloc_uint64(zap, key, key_numints); 1186 if (zn == NULL) { 1187 zap_unlockdir(zap); 1188 return (SET_ERROR(ENOTSUP)); 1189 } 1190 err = fzap_remove(zn, tx); 1191 zap_name_free(zn); 1192 zap_unlockdir(zap); 1193 return (err); 1194} 1195 1196/* 1197 * Routines for iterating over the attributes. 1198 */ 1199 1200void 1201zap_cursor_init_serialized(zap_cursor_t *zc, objset_t *os, uint64_t zapobj, 1202 uint64_t serialized) 1203{ 1204 zc->zc_objset = os; 1205 zc->zc_zap = NULL; 1206 zc->zc_leaf = NULL; 1207 zc->zc_zapobj = zapobj; 1208 zc->zc_serialized = serialized; 1209 zc->zc_hash = 0; 1210 zc->zc_cd = 0; 1211} 1212 1213void 1214zap_cursor_init(zap_cursor_t *zc, objset_t *os, uint64_t zapobj) 1215{ 1216 zap_cursor_init_serialized(zc, os, zapobj, 0); 1217} 1218 1219void 1220zap_cursor_fini(zap_cursor_t *zc) 1221{ 1222 if (zc->zc_zap) { 1223 rw_enter(&zc->zc_zap->zap_rwlock, RW_READER); 1224 zap_unlockdir(zc->zc_zap); 1225 zc->zc_zap = NULL; 1226 } 1227 if (zc->zc_leaf) { 1228 rw_enter(&zc->zc_leaf->l_rwlock, RW_READER); 1229 zap_put_leaf(zc->zc_leaf); 1230 zc->zc_leaf = NULL; 1231 } 1232 zc->zc_objset = NULL; 1233} 1234 1235uint64_t 1236zap_cursor_serialize(zap_cursor_t *zc) 1237{ 1238 if (zc->zc_hash == -1ULL) 1239 return (-1ULL); 1240 if (zc->zc_zap == NULL) 1241 return (zc->zc_serialized); 1242 ASSERT((zc->zc_hash & zap_maxcd(zc->zc_zap)) == 0); 1243 ASSERT(zc->zc_cd < zap_maxcd(zc->zc_zap)); 1244 1245 /* 1246 * We want to keep the high 32 bits of the cursor zero if we can, so 1247 * that 32-bit programs can access this. So usually use a small 1248 * (28-bit) hash value so we can fit 4 bits of cd into the low 32-bits 1249 * of the cursor. 1250 * 1251 * [ collision differentiator | zap_hashbits()-bit hash value ] 1252 */ 1253 return ((zc->zc_hash >> (64 - zap_hashbits(zc->zc_zap))) | 1254 ((uint64_t)zc->zc_cd << zap_hashbits(zc->zc_zap))); 1255} 1256 1257int 1258zap_cursor_retrieve(zap_cursor_t *zc, zap_attribute_t *za) 1259{ 1260 int err; 1261 avl_index_t idx; 1262 mzap_ent_t mze_tofind; 1263 mzap_ent_t *mze; 1264 1265 if (zc->zc_hash == -1ULL) 1266 return (SET_ERROR(ENOENT)); 1267 1268 if (zc->zc_zap == NULL) { 1269 int hb; 1270 err = zap_lockdir(zc->zc_objset, zc->zc_zapobj, NULL, 1271 RW_READER, TRUE, FALSE, &zc->zc_zap); 1272 if (err) 1273 return (err); 1274 1275 /* 1276 * To support zap_cursor_init_serialized, advance, retrieve, 1277 * we must add to the existing zc_cd, which may already 1278 * be 1 due to the zap_cursor_advance. 1279 */ 1280 ASSERT(zc->zc_hash == 0); 1281 hb = zap_hashbits(zc->zc_zap); 1282 zc->zc_hash = zc->zc_serialized << (64 - hb); 1283 zc->zc_cd += zc->zc_serialized >> hb; 1284 if (zc->zc_cd >= zap_maxcd(zc->zc_zap)) /* corrupt serialized */ 1285 zc->zc_cd = 0; 1286 } else { 1287 rw_enter(&zc->zc_zap->zap_rwlock, RW_READER); 1288 } 1289 if (!zc->zc_zap->zap_ismicro) { 1290 err = fzap_cursor_retrieve(zc->zc_zap, zc, za); 1291 } else { 1292 mze_tofind.mze_hash = zc->zc_hash; 1293 mze_tofind.mze_cd = zc->zc_cd; 1294 1295 mze = avl_find(&zc->zc_zap->zap_m.zap_avl, &mze_tofind, &idx); 1296 if (mze == NULL) { 1297 mze = avl_nearest(&zc->zc_zap->zap_m.zap_avl, 1298 idx, AVL_AFTER); 1299 } 1300 if (mze) { 1301 mzap_ent_phys_t *mzep = MZE_PHYS(zc->zc_zap, mze); 1302 ASSERT3U(mze->mze_cd, ==, mzep->mze_cd); 1303 za->za_normalization_conflict = 1304 mzap_normalization_conflict(zc->zc_zap, NULL, mze); 1305 za->za_integer_length = 8; 1306 za->za_num_integers = 1; 1307 za->za_first_integer = mzep->mze_value; 1308 (void) strcpy(za->za_name, mzep->mze_name); 1309 zc->zc_hash = mze->mze_hash; 1310 zc->zc_cd = mze->mze_cd; 1311 err = 0; 1312 } else { 1313 zc->zc_hash = -1ULL; 1314 err = SET_ERROR(ENOENT); 1315 } 1316 } 1317 rw_exit(&zc->zc_zap->zap_rwlock); 1318 return (err); 1319} 1320 1321void 1322zap_cursor_advance(zap_cursor_t *zc) 1323{ 1324 if (zc->zc_hash == -1ULL) 1325 return; 1326 zc->zc_cd++; 1327} 1328 1329int 1330zap_cursor_move_to_key(zap_cursor_t *zc, const char *name, matchtype_t mt) 1331{ 1332 int err = 0; 1333 mzap_ent_t *mze; 1334 zap_name_t *zn; 1335 1336 if (zc->zc_zap == NULL) { 1337 err = zap_lockdir(zc->zc_objset, zc->zc_zapobj, NULL, 1338 RW_READER, TRUE, FALSE, &zc->zc_zap); 1339 if (err) 1340 return (err); 1341 } else { 1342 rw_enter(&zc->zc_zap->zap_rwlock, RW_READER); 1343 } 1344 1345 zn = zap_name_alloc(zc->zc_zap, name, mt); 1346 if (zn == NULL) { 1347 rw_exit(&zc->zc_zap->zap_rwlock); 1348 return (SET_ERROR(ENOTSUP)); 1349 } 1350 1351 if (!zc->zc_zap->zap_ismicro) { 1352 err = fzap_cursor_move_to_key(zc, zn); 1353 } else { 1354 mze = mze_find(zn); 1355 if (mze == NULL) { 1356 err = SET_ERROR(ENOENT); 1357 goto out; 1358 } 1359 zc->zc_hash = mze->mze_hash; 1360 zc->zc_cd = mze->mze_cd; 1361 } 1362 1363out: 1364 zap_name_free(zn); 1365 rw_exit(&zc->zc_zap->zap_rwlock); 1366 return (err); 1367} 1368 1369int 1370zap_get_stats(objset_t *os, uint64_t zapobj, zap_stats_t *zs) 1371{ 1372 int err; 1373 zap_t *zap; 1374 1375 err = zap_lockdir(os, zapobj, NULL, RW_READER, TRUE, FALSE, &zap); 1376 if (err) 1377 return (err); 1378 1379 bzero(zs, sizeof (zap_stats_t)); 1380 1381 if (zap->zap_ismicro) { 1382 zs->zs_blocksize = zap->zap_dbuf->db_size; 1383 zs->zs_num_entries = zap->zap_m.zap_num_entries; 1384 zs->zs_num_blocks = 1; 1385 } else { 1386 fzap_get_stats(zap, zs); 1387 } 1388 zap_unlockdir(zap); 1389 return (0); 1390} 1391 1392int 1393zap_count_write(objset_t *os, uint64_t zapobj, const char *name, int add, 1394 uint64_t *towrite, uint64_t *tooverwrite) 1395{ 1396 zap_t *zap; 1397 int err = 0; 1398 1399 1400 /* 1401 * Since, we don't have a name, we cannot figure out which blocks will 1402 * be affected in this operation. So, account for the worst case : 1403 * - 3 blocks overwritten: target leaf, ptrtbl block, header block 1404 * - 4 new blocks written if adding: 1405 * - 2 blocks for possibly split leaves, 1406 * - 2 grown ptrtbl blocks 1407 * 1408 * This also accomodates the case where an add operation to a fairly 1409 * large microzap results in a promotion to fatzap. 1410 */ 1411 if (name == NULL) { 1412 *towrite += (3 + (add ? 4 : 0)) * SPA_MAXBLOCKSIZE; 1413 return (err); 1414 } 1415 1416 /* 1417 * We lock the zap with adding == FALSE. Because, if we pass 1418 * the actual value of add, it could trigger a mzap_upgrade(). 1419 * At present we are just evaluating the possibility of this operation 1420 * and hence we donot want to trigger an upgrade. 1421 */ 1422 err = zap_lockdir(os, zapobj, NULL, RW_READER, TRUE, FALSE, &zap); 1423 if (err) 1424 return (err); 1425 1426 if (!zap->zap_ismicro) { 1427 zap_name_t *zn = zap_name_alloc(zap, name, MT_EXACT); 1428 if (zn) { 1429 err = fzap_count_write(zn, add, towrite, 1430 tooverwrite); 1431 zap_name_free(zn); 1432 } else { 1433 /* 1434 * We treat this case as similar to (name == NULL) 1435 */ 1436 *towrite += (3 + (add ? 4 : 0)) * SPA_MAXBLOCKSIZE; 1437 } 1438 } else { 1439 /* 1440 * We are here if (name != NULL) and this is a micro-zap. 1441 * We account for the header block depending on whether it 1442 * is freeable. 1443 * 1444 * Incase of an add-operation it is hard to find out 1445 * if this add will promote this microzap to fatzap. 1446 * Hence, we consider the worst case and account for the 1447 * blocks assuming this microzap would be promoted to a 1448 * fatzap. 1449 * 1450 * 1 block overwritten : header block 1451 * 4 new blocks written : 2 new split leaf, 2 grown 1452 * ptrtbl blocks 1453 */ 1454 if (dmu_buf_freeable(zap->zap_dbuf)) 1455 *tooverwrite += SPA_MAXBLOCKSIZE; 1456 else 1457 *towrite += SPA_MAXBLOCKSIZE; 1458 1459 if (add) { 1460 *towrite += 4 * SPA_MAXBLOCKSIZE; 1461 } 1462 } 1463 1464 zap_unlockdir(zap); 1465 return (err); 1466} 1467