zap_micro.c revision 275782
1/* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21/* 22 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. 23 * Copyright (c) 2011, 2014 by Delphix. All rights reserved. 24 */ 25 26#include <sys/zio.h> 27#include <sys/spa.h> 28#include <sys/dmu.h> 29#include <sys/zfs_context.h> 30#include <sys/zap.h> 31#include <sys/refcount.h> 32#include <sys/zap_impl.h> 33#include <sys/zap_leaf.h> 34#include <sys/avl.h> 35#include <sys/arc.h> 36#include <sys/dmu_objset.h> 37 38#ifdef _KERNEL 39#include <sys/sunddi.h> 40#endif 41 42extern inline mzap_phys_t *zap_m_phys(zap_t *zap); 43 44static int mzap_upgrade(zap_t **zapp, dmu_tx_t *tx, zap_flags_t flags); 45 46uint64_t 47zap_getflags(zap_t *zap) 48{ 49 if (zap->zap_ismicro) 50 return (0); 51 return (zap_f_phys(zap)->zap_flags); 52} 53 54int 55zap_hashbits(zap_t *zap) 56{ 57 if (zap_getflags(zap) & ZAP_FLAG_HASH64) 58 return (48); 59 else 60 return (28); 61} 62 63uint32_t 64zap_maxcd(zap_t *zap) 65{ 66 if (zap_getflags(zap) & ZAP_FLAG_HASH64) 67 return ((1<<16)-1); 68 else 69 return (-1U); 70} 71 72static uint64_t 73zap_hash(zap_name_t *zn) 74{ 75 zap_t *zap = zn->zn_zap; 76 uint64_t h = 0; 77 78 if (zap_getflags(zap) & ZAP_FLAG_PRE_HASHED_KEY) { 79 ASSERT(zap_getflags(zap) & ZAP_FLAG_UINT64_KEY); 80 h = *(uint64_t *)zn->zn_key_orig; 81 } else { 82 h = zap->zap_salt; 83 ASSERT(h != 0); 84 ASSERT(zfs_crc64_table[128] == ZFS_CRC64_POLY); 85 86 if (zap_getflags(zap) & ZAP_FLAG_UINT64_KEY) { 87 int i; 88 const uint64_t *wp = zn->zn_key_norm; 89 90 ASSERT(zn->zn_key_intlen == 8); 91 for (i = 0; i < zn->zn_key_norm_numints; wp++, i++) { 92 int j; 93 uint64_t word = *wp; 94 95 for (j = 0; j < zn->zn_key_intlen; j++) { 96 h = (h >> 8) ^ 97 zfs_crc64_table[(h ^ word) & 0xFF]; 98 word >>= NBBY; 99 } 100 } 101 } else { 102 int i, len; 103 const uint8_t *cp = zn->zn_key_norm; 104 105 /* 106 * We previously stored the terminating null on 107 * disk, but didn't hash it, so we need to 108 * continue to not hash it. (The 109 * zn_key_*_numints includes the terminating 110 * null for non-binary keys.) 111 */ 112 len = zn->zn_key_norm_numints - 1; 113 114 ASSERT(zn->zn_key_intlen == 1); 115 for (i = 0; i < len; cp++, i++) { 116 h = (h >> 8) ^ 117 zfs_crc64_table[(h ^ *cp) & 0xFF]; 118 } 119 } 120 } 121 /* 122 * Don't use all 64 bits, since we need some in the cookie for 123 * the collision differentiator. We MUST use the high bits, 124 * since those are the ones that we first pay attention to when 125 * chosing the bucket. 126 */ 127 h &= ~((1ULL << (64 - zap_hashbits(zap))) - 1); 128 129 return (h); 130} 131 132static int 133zap_normalize(zap_t *zap, const char *name, char *namenorm) 134{ 135 size_t inlen, outlen; 136 int err; 137 138 ASSERT(!(zap_getflags(zap) & ZAP_FLAG_UINT64_KEY)); 139 140 inlen = strlen(name) + 1; 141 outlen = ZAP_MAXNAMELEN; 142 143 err = 0; 144 (void) u8_textprep_str((char *)name, &inlen, namenorm, &outlen, 145 zap->zap_normflags | U8_TEXTPREP_IGNORE_NULL | 146 U8_TEXTPREP_IGNORE_INVALID, U8_UNICODE_LATEST, &err); 147 148 return (err); 149} 150 151boolean_t 152zap_match(zap_name_t *zn, const char *matchname) 153{ 154 ASSERT(!(zap_getflags(zn->zn_zap) & ZAP_FLAG_UINT64_KEY)); 155 156 if (zn->zn_matchtype == MT_FIRST) { 157 char norm[ZAP_MAXNAMELEN]; 158 159 if (zap_normalize(zn->zn_zap, matchname, norm) != 0) 160 return (B_FALSE); 161 162 return (strcmp(zn->zn_key_norm, norm) == 0); 163 } else { 164 /* MT_BEST or MT_EXACT */ 165 return (strcmp(zn->zn_key_orig, matchname) == 0); 166 } 167} 168 169void 170zap_name_free(zap_name_t *zn) 171{ 172 kmem_free(zn, sizeof (zap_name_t)); 173} 174 175zap_name_t * 176zap_name_alloc(zap_t *zap, const char *key, matchtype_t mt) 177{ 178 zap_name_t *zn = kmem_alloc(sizeof (zap_name_t), KM_SLEEP); 179 180 zn->zn_zap = zap; 181 zn->zn_key_intlen = sizeof (*key); 182 zn->zn_key_orig = key; 183 zn->zn_key_orig_numints = strlen(zn->zn_key_orig) + 1; 184 zn->zn_matchtype = mt; 185 if (zap->zap_normflags) { 186 if (zap_normalize(zap, key, zn->zn_normbuf) != 0) { 187 zap_name_free(zn); 188 return (NULL); 189 } 190 zn->zn_key_norm = zn->zn_normbuf; 191 zn->zn_key_norm_numints = strlen(zn->zn_key_norm) + 1; 192 } else { 193 if (mt != MT_EXACT) { 194 zap_name_free(zn); 195 return (NULL); 196 } 197 zn->zn_key_norm = zn->zn_key_orig; 198 zn->zn_key_norm_numints = zn->zn_key_orig_numints; 199 } 200 201 zn->zn_hash = zap_hash(zn); 202 return (zn); 203} 204 205zap_name_t * 206zap_name_alloc_uint64(zap_t *zap, const uint64_t *key, int numints) 207{ 208 zap_name_t *zn = kmem_alloc(sizeof (zap_name_t), KM_SLEEP); 209 210 ASSERT(zap->zap_normflags == 0); 211 zn->zn_zap = zap; 212 zn->zn_key_intlen = sizeof (*key); 213 zn->zn_key_orig = zn->zn_key_norm = key; 214 zn->zn_key_orig_numints = zn->zn_key_norm_numints = numints; 215 zn->zn_matchtype = MT_EXACT; 216 217 zn->zn_hash = zap_hash(zn); 218 return (zn); 219} 220 221static void 222mzap_byteswap(mzap_phys_t *buf, size_t size) 223{ 224 int i, max; 225 buf->mz_block_type = BSWAP_64(buf->mz_block_type); 226 buf->mz_salt = BSWAP_64(buf->mz_salt); 227 buf->mz_normflags = BSWAP_64(buf->mz_normflags); 228 max = (size / MZAP_ENT_LEN) - 1; 229 for (i = 0; i < max; i++) { 230 buf->mz_chunk[i].mze_value = 231 BSWAP_64(buf->mz_chunk[i].mze_value); 232 buf->mz_chunk[i].mze_cd = 233 BSWAP_32(buf->mz_chunk[i].mze_cd); 234 } 235} 236 237void 238zap_byteswap(void *buf, size_t size) 239{ 240 uint64_t block_type; 241 242 block_type = *(uint64_t *)buf; 243 244 if (block_type == ZBT_MICRO || block_type == BSWAP_64(ZBT_MICRO)) { 245 /* ASSERT(magic == ZAP_LEAF_MAGIC); */ 246 mzap_byteswap(buf, size); 247 } else { 248 fzap_byteswap(buf, size); 249 } 250} 251 252static int 253mze_compare(const void *arg1, const void *arg2) 254{ 255 const mzap_ent_t *mze1 = arg1; 256 const mzap_ent_t *mze2 = arg2; 257 258 if (mze1->mze_hash > mze2->mze_hash) 259 return (+1); 260 if (mze1->mze_hash < mze2->mze_hash) 261 return (-1); 262 if (mze1->mze_cd > mze2->mze_cd) 263 return (+1); 264 if (mze1->mze_cd < mze2->mze_cd) 265 return (-1); 266 return (0); 267} 268 269static int 270mze_insert(zap_t *zap, int chunkid, uint64_t hash) 271{ 272 mzap_ent_t *mze; 273 avl_index_t idx; 274 275 ASSERT(zap->zap_ismicro); 276 ASSERT(RW_WRITE_HELD(&zap->zap_rwlock)); 277 278 mze = kmem_alloc(sizeof (mzap_ent_t), KM_SLEEP); 279 mze->mze_chunkid = chunkid; 280 mze->mze_hash = hash; 281 mze->mze_cd = MZE_PHYS(zap, mze)->mze_cd; 282 ASSERT(MZE_PHYS(zap, mze)->mze_name[0] != 0); 283 if (avl_find(&zap->zap_m.zap_avl, mze, &idx) != NULL) { 284 kmem_free(mze, sizeof (mzap_ent_t)); 285 return (EEXIST); 286 } 287 avl_insert(&zap->zap_m.zap_avl, mze, idx); 288 return (0); 289} 290 291static mzap_ent_t * 292mze_find(zap_name_t *zn) 293{ 294 mzap_ent_t mze_tofind; 295 mzap_ent_t *mze; 296 avl_index_t idx; 297 avl_tree_t *avl = &zn->zn_zap->zap_m.zap_avl; 298 299 ASSERT(zn->zn_zap->zap_ismicro); 300 ASSERT(RW_LOCK_HELD(&zn->zn_zap->zap_rwlock)); 301 302 mze_tofind.mze_hash = zn->zn_hash; 303 mze_tofind.mze_cd = 0; 304 305again: 306 mze = avl_find(avl, &mze_tofind, &idx); 307 if (mze == NULL) 308 mze = avl_nearest(avl, idx, AVL_AFTER); 309 for (; mze && mze->mze_hash == zn->zn_hash; mze = AVL_NEXT(avl, mze)) { 310 ASSERT3U(mze->mze_cd, ==, MZE_PHYS(zn->zn_zap, mze)->mze_cd); 311 if (zap_match(zn, MZE_PHYS(zn->zn_zap, mze)->mze_name)) 312 return (mze); 313 } 314 if (zn->zn_matchtype == MT_BEST) { 315 zn->zn_matchtype = MT_FIRST; 316 goto again; 317 } 318 return (NULL); 319} 320 321static uint32_t 322mze_find_unused_cd(zap_t *zap, uint64_t hash) 323{ 324 mzap_ent_t mze_tofind; 325 mzap_ent_t *mze; 326 avl_index_t idx; 327 avl_tree_t *avl = &zap->zap_m.zap_avl; 328 uint32_t cd; 329 330 ASSERT(zap->zap_ismicro); 331 ASSERT(RW_LOCK_HELD(&zap->zap_rwlock)); 332 333 mze_tofind.mze_hash = hash; 334 mze_tofind.mze_cd = 0; 335 336 cd = 0; 337 for (mze = avl_find(avl, &mze_tofind, &idx); 338 mze && mze->mze_hash == hash; mze = AVL_NEXT(avl, mze)) { 339 if (mze->mze_cd != cd) 340 break; 341 cd++; 342 } 343 344 return (cd); 345} 346 347static void 348mze_remove(zap_t *zap, mzap_ent_t *mze) 349{ 350 ASSERT(zap->zap_ismicro); 351 ASSERT(RW_WRITE_HELD(&zap->zap_rwlock)); 352 353 avl_remove(&zap->zap_m.zap_avl, mze); 354 kmem_free(mze, sizeof (mzap_ent_t)); 355} 356 357static void 358mze_destroy(zap_t *zap) 359{ 360 mzap_ent_t *mze; 361 void *avlcookie = NULL; 362 363 while (mze = avl_destroy_nodes(&zap->zap_m.zap_avl, &avlcookie)) 364 kmem_free(mze, sizeof (mzap_ent_t)); 365 avl_destroy(&zap->zap_m.zap_avl); 366} 367 368static zap_t * 369mzap_open(objset_t *os, uint64_t obj, dmu_buf_t *db) 370{ 371 zap_t *winner; 372 zap_t *zap; 373 int i; 374 375 ASSERT3U(MZAP_ENT_LEN, ==, sizeof (mzap_ent_phys_t)); 376 377 zap = kmem_zalloc(sizeof (zap_t), KM_SLEEP); 378 rw_init(&zap->zap_rwlock, 0, 0, 0); 379 rw_enter(&zap->zap_rwlock, RW_WRITER); 380 zap->zap_objset = os; 381 zap->zap_object = obj; 382 zap->zap_dbuf = db; 383 384 if (*(uint64_t *)db->db_data != ZBT_MICRO) { 385 mutex_init(&zap->zap_f.zap_num_entries_mtx, 0, 0, 0); 386 zap->zap_f.zap_block_shift = highbit64(db->db_size) - 1; 387 } else { 388 zap->zap_ismicro = TRUE; 389 } 390 391 /* 392 * Make sure that zap_ismicro is set before we let others see 393 * it, because zap_lockdir() checks zap_ismicro without the lock 394 * held. 395 */ 396 winner = dmu_buf_set_user(db, zap, zap_evict); 397 398 if (winner != NULL) { 399 rw_exit(&zap->zap_rwlock); 400 rw_destroy(&zap->zap_rwlock); 401 if (!zap->zap_ismicro) 402 mutex_destroy(&zap->zap_f.zap_num_entries_mtx); 403 kmem_free(zap, sizeof (zap_t)); 404 return (winner); 405 } 406 407 if (zap->zap_ismicro) { 408 zap->zap_salt = zap_m_phys(zap)->mz_salt; 409 zap->zap_normflags = zap_m_phys(zap)->mz_normflags; 410 zap->zap_m.zap_num_chunks = db->db_size / MZAP_ENT_LEN - 1; 411 avl_create(&zap->zap_m.zap_avl, mze_compare, 412 sizeof (mzap_ent_t), offsetof(mzap_ent_t, mze_node)); 413 414 for (i = 0; i < zap->zap_m.zap_num_chunks; i++) { 415 mzap_ent_phys_t *mze = 416 &zap_m_phys(zap)->mz_chunk[i]; 417 if (mze->mze_name[0]) { 418 zap_name_t *zn; 419 420 zn = zap_name_alloc(zap, mze->mze_name, 421 MT_EXACT); 422 if (mze_insert(zap, i, zn->zn_hash) == 0) 423 zap->zap_m.zap_num_entries++; 424 else { 425 printf("ZFS WARNING: Duplicated ZAP " 426 "entry detected (%s).\n", 427 mze->mze_name); 428 } 429 zap_name_free(zn); 430 } 431 } 432 } else { 433 zap->zap_salt = zap_f_phys(zap)->zap_salt; 434 zap->zap_normflags = zap_f_phys(zap)->zap_normflags; 435 436 ASSERT3U(sizeof (struct zap_leaf_header), ==, 437 2*ZAP_LEAF_CHUNKSIZE); 438 439 /* 440 * The embedded pointer table should not overlap the 441 * other members. 442 */ 443 ASSERT3P(&ZAP_EMBEDDED_PTRTBL_ENT(zap, 0), >, 444 &zap_f_phys(zap)->zap_salt); 445 446 /* 447 * The embedded pointer table should end at the end of 448 * the block 449 */ 450 ASSERT3U((uintptr_t)&ZAP_EMBEDDED_PTRTBL_ENT(zap, 451 1<<ZAP_EMBEDDED_PTRTBL_SHIFT(zap)) - 452 (uintptr_t)zap_f_phys(zap), ==, 453 zap->zap_dbuf->db_size); 454 } 455 rw_exit(&zap->zap_rwlock); 456 return (zap); 457} 458 459int 460zap_lockdir(objset_t *os, uint64_t obj, dmu_tx_t *tx, 461 krw_t lti, boolean_t fatreader, boolean_t adding, zap_t **zapp) 462{ 463 zap_t *zap; 464 dmu_buf_t *db; 465 krw_t lt; 466 int err; 467 468 *zapp = NULL; 469 470 err = dmu_buf_hold(os, obj, 0, NULL, &db, DMU_READ_NO_PREFETCH); 471 if (err) 472 return (err); 473 474#ifdef ZFS_DEBUG 475 { 476 dmu_object_info_t doi; 477 dmu_object_info_from_db(db, &doi); 478 ASSERT3U(DMU_OT_BYTESWAP(doi.doi_type), ==, DMU_BSWAP_ZAP); 479 } 480#endif 481 482 zap = dmu_buf_get_user(db); 483 if (zap == NULL) 484 zap = mzap_open(os, obj, db); 485 486 /* 487 * We're checking zap_ismicro without the lock held, in order to 488 * tell what type of lock we want. Once we have some sort of 489 * lock, see if it really is the right type. In practice this 490 * can only be different if it was upgraded from micro to fat, 491 * and micro wanted WRITER but fat only needs READER. 492 */ 493 lt = (!zap->zap_ismicro && fatreader) ? RW_READER : lti; 494 rw_enter(&zap->zap_rwlock, lt); 495 if (lt != ((!zap->zap_ismicro && fatreader) ? RW_READER : lti)) { 496 /* it was upgraded, now we only need reader */ 497 ASSERT(lt == RW_WRITER); 498 ASSERT(RW_READER == 499 (!zap->zap_ismicro && fatreader) ? RW_READER : lti); 500 rw_downgrade(&zap->zap_rwlock); 501 lt = RW_READER; 502 } 503 504 zap->zap_objset = os; 505 506 if (lt == RW_WRITER) 507 dmu_buf_will_dirty(db, tx); 508 509 ASSERT3P(zap->zap_dbuf, ==, db); 510 511 ASSERT(!zap->zap_ismicro || 512 zap->zap_m.zap_num_entries <= zap->zap_m.zap_num_chunks); 513 if (zap->zap_ismicro && tx && adding && 514 zap->zap_m.zap_num_entries == zap->zap_m.zap_num_chunks) { 515 uint64_t newsz = db->db_size + SPA_MINBLOCKSIZE; 516 if (newsz > MZAP_MAX_BLKSZ) { 517 dprintf("upgrading obj %llu: num_entries=%u\n", 518 obj, zap->zap_m.zap_num_entries); 519 *zapp = zap; 520 return (mzap_upgrade(zapp, tx, 0)); 521 } 522 err = dmu_object_set_blocksize(os, obj, newsz, 0, tx); 523 ASSERT0(err); 524 zap->zap_m.zap_num_chunks = 525 db->db_size / MZAP_ENT_LEN - 1; 526 } 527 528 *zapp = zap; 529 return (0); 530} 531 532void 533zap_unlockdir(zap_t *zap) 534{ 535 rw_exit(&zap->zap_rwlock); 536 dmu_buf_rele(zap->zap_dbuf, NULL); 537} 538 539static int 540mzap_upgrade(zap_t **zapp, dmu_tx_t *tx, zap_flags_t flags) 541{ 542 mzap_phys_t *mzp; 543 int i, sz, nchunks; 544 int err = 0; 545 zap_t *zap = *zapp; 546 547 ASSERT(RW_WRITE_HELD(&zap->zap_rwlock)); 548 549 sz = zap->zap_dbuf->db_size; 550 mzp = kmem_alloc(sz, KM_SLEEP); 551 bcopy(zap->zap_dbuf->db_data, mzp, sz); 552 nchunks = zap->zap_m.zap_num_chunks; 553 554 if (!flags) { 555 err = dmu_object_set_blocksize(zap->zap_objset, zap->zap_object, 556 1ULL << fzap_default_block_shift, 0, tx); 557 if (err) { 558 kmem_free(mzp, sz); 559 return (err); 560 } 561 } 562 563 dprintf("upgrading obj=%llu with %u chunks\n", 564 zap->zap_object, nchunks); 565 /* XXX destroy the avl later, so we can use the stored hash value */ 566 mze_destroy(zap); 567 568 fzap_upgrade(zap, tx, flags); 569 570 for (i = 0; i < nchunks; i++) { 571 mzap_ent_phys_t *mze = &mzp->mz_chunk[i]; 572 zap_name_t *zn; 573 if (mze->mze_name[0] == 0) 574 continue; 575 dprintf("adding %s=%llu\n", 576 mze->mze_name, mze->mze_value); 577 zn = zap_name_alloc(zap, mze->mze_name, MT_EXACT); 578 err = fzap_add_cd(zn, 8, 1, &mze->mze_value, mze->mze_cd, tx); 579 zap = zn->zn_zap; /* fzap_add_cd() may change zap */ 580 zap_name_free(zn); 581 if (err) 582 break; 583 } 584 kmem_free(mzp, sz); 585 *zapp = zap; 586 return (err); 587} 588 589void 590mzap_create_impl(objset_t *os, uint64_t obj, int normflags, zap_flags_t flags, 591 dmu_tx_t *tx) 592{ 593 dmu_buf_t *db; 594 mzap_phys_t *zp; 595 596 VERIFY(0 == dmu_buf_hold(os, obj, 0, FTAG, &db, DMU_READ_NO_PREFETCH)); 597 598#ifdef ZFS_DEBUG 599 { 600 dmu_object_info_t doi; 601 dmu_object_info_from_db(db, &doi); 602 ASSERT3U(DMU_OT_BYTESWAP(doi.doi_type), ==, DMU_BSWAP_ZAP); 603 } 604#endif 605 606 dmu_buf_will_dirty(db, tx); 607 zp = db->db_data; 608 zp->mz_block_type = ZBT_MICRO; 609 zp->mz_salt = ((uintptr_t)db ^ (uintptr_t)tx ^ (obj << 1)) | 1ULL; 610 zp->mz_normflags = normflags; 611 dmu_buf_rele(db, FTAG); 612 613 if (flags != 0) { 614 zap_t *zap; 615 /* Only fat zap supports flags; upgrade immediately. */ 616 VERIFY(0 == zap_lockdir(os, obj, tx, RW_WRITER, 617 B_FALSE, B_FALSE, &zap)); 618 VERIFY3U(0, ==, mzap_upgrade(&zap, tx, flags)); 619 zap_unlockdir(zap); 620 } 621} 622 623int 624zap_create_claim(objset_t *os, uint64_t obj, dmu_object_type_t ot, 625 dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx) 626{ 627 return (zap_create_claim_norm(os, obj, 628 0, ot, bonustype, bonuslen, tx)); 629} 630 631int 632zap_create_claim_norm(objset_t *os, uint64_t obj, int normflags, 633 dmu_object_type_t ot, 634 dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx) 635{ 636 int err; 637 638 err = dmu_object_claim(os, obj, ot, 0, bonustype, bonuslen, tx); 639 if (err != 0) 640 return (err); 641 mzap_create_impl(os, obj, normflags, 0, tx); 642 return (0); 643} 644 645uint64_t 646zap_create(objset_t *os, dmu_object_type_t ot, 647 dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx) 648{ 649 return (zap_create_norm(os, 0, ot, bonustype, bonuslen, tx)); 650} 651 652uint64_t 653zap_create_norm(objset_t *os, int normflags, dmu_object_type_t ot, 654 dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx) 655{ 656 uint64_t obj = dmu_object_alloc(os, ot, 0, bonustype, bonuslen, tx); 657 658 mzap_create_impl(os, obj, normflags, 0, tx); 659 return (obj); 660} 661 662uint64_t 663zap_create_flags(objset_t *os, int normflags, zap_flags_t flags, 664 dmu_object_type_t ot, int leaf_blockshift, int indirect_blockshift, 665 dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx) 666{ 667 uint64_t obj = dmu_object_alloc(os, ot, 0, bonustype, bonuslen, tx); 668 669 ASSERT(leaf_blockshift >= SPA_MINBLOCKSHIFT && 670 leaf_blockshift <= SPA_OLD_MAXBLOCKSHIFT && 671 indirect_blockshift >= SPA_MINBLOCKSHIFT && 672 indirect_blockshift <= SPA_OLD_MAXBLOCKSHIFT); 673 674 VERIFY(dmu_object_set_blocksize(os, obj, 675 1ULL << leaf_blockshift, indirect_blockshift, tx) == 0); 676 677 mzap_create_impl(os, obj, normflags, flags, tx); 678 return (obj); 679} 680 681int 682zap_destroy(objset_t *os, uint64_t zapobj, dmu_tx_t *tx) 683{ 684 /* 685 * dmu_object_free will free the object number and free the 686 * data. Freeing the data will cause our pageout function to be 687 * called, which will destroy our data (zap_leaf_t's and zap_t). 688 */ 689 690 return (dmu_object_free(os, zapobj, tx)); 691} 692 693_NOTE(ARGSUSED(0)) 694void 695zap_evict(dmu_buf_t *db, void *vzap) 696{ 697 zap_t *zap = vzap; 698 699 rw_destroy(&zap->zap_rwlock); 700 701 if (zap->zap_ismicro) 702 mze_destroy(zap); 703 else 704 mutex_destroy(&zap->zap_f.zap_num_entries_mtx); 705 706 kmem_free(zap, sizeof (zap_t)); 707} 708 709int 710zap_count(objset_t *os, uint64_t zapobj, uint64_t *count) 711{ 712 zap_t *zap; 713 int err; 714 715 err = zap_lockdir(os, zapobj, NULL, RW_READER, TRUE, FALSE, &zap); 716 if (err) 717 return (err); 718 if (!zap->zap_ismicro) { 719 err = fzap_count(zap, count); 720 } else { 721 *count = zap->zap_m.zap_num_entries; 722 } 723 zap_unlockdir(zap); 724 return (err); 725} 726 727/* 728 * zn may be NULL; if not specified, it will be computed if needed. 729 * See also the comment above zap_entry_normalization_conflict(). 730 */ 731static boolean_t 732mzap_normalization_conflict(zap_t *zap, zap_name_t *zn, mzap_ent_t *mze) 733{ 734 mzap_ent_t *other; 735 int direction = AVL_BEFORE; 736 boolean_t allocdzn = B_FALSE; 737 738 if (zap->zap_normflags == 0) 739 return (B_FALSE); 740 741again: 742 for (other = avl_walk(&zap->zap_m.zap_avl, mze, direction); 743 other && other->mze_hash == mze->mze_hash; 744 other = avl_walk(&zap->zap_m.zap_avl, other, direction)) { 745 746 if (zn == NULL) { 747 zn = zap_name_alloc(zap, MZE_PHYS(zap, mze)->mze_name, 748 MT_FIRST); 749 allocdzn = B_TRUE; 750 } 751 if (zap_match(zn, MZE_PHYS(zap, other)->mze_name)) { 752 if (allocdzn) 753 zap_name_free(zn); 754 return (B_TRUE); 755 } 756 } 757 758 if (direction == AVL_BEFORE) { 759 direction = AVL_AFTER; 760 goto again; 761 } 762 763 if (allocdzn) 764 zap_name_free(zn); 765 return (B_FALSE); 766} 767 768/* 769 * Routines for manipulating attributes. 770 */ 771 772int 773zap_lookup(objset_t *os, uint64_t zapobj, const char *name, 774 uint64_t integer_size, uint64_t num_integers, void *buf) 775{ 776 return (zap_lookup_norm(os, zapobj, name, integer_size, 777 num_integers, buf, MT_EXACT, NULL, 0, NULL)); 778} 779 780int 781zap_lookup_norm(objset_t *os, uint64_t zapobj, const char *name, 782 uint64_t integer_size, uint64_t num_integers, void *buf, 783 matchtype_t mt, char *realname, int rn_len, 784 boolean_t *ncp) 785{ 786 zap_t *zap; 787 int err; 788 mzap_ent_t *mze; 789 zap_name_t *zn; 790 791 err = zap_lockdir(os, zapobj, NULL, RW_READER, TRUE, FALSE, &zap); 792 if (err) 793 return (err); 794 zn = zap_name_alloc(zap, name, mt); 795 if (zn == NULL) { 796 zap_unlockdir(zap); 797 return (SET_ERROR(ENOTSUP)); 798 } 799 800 if (!zap->zap_ismicro) { 801 err = fzap_lookup(zn, integer_size, num_integers, buf, 802 realname, rn_len, ncp); 803 } else { 804 mze = mze_find(zn); 805 if (mze == NULL) { 806 err = SET_ERROR(ENOENT); 807 } else { 808 if (num_integers < 1) { 809 err = SET_ERROR(EOVERFLOW); 810 } else if (integer_size != 8) { 811 err = SET_ERROR(EINVAL); 812 } else { 813 *(uint64_t *)buf = 814 MZE_PHYS(zap, mze)->mze_value; 815 (void) strlcpy(realname, 816 MZE_PHYS(zap, mze)->mze_name, rn_len); 817 if (ncp) { 818 *ncp = mzap_normalization_conflict(zap, 819 zn, mze); 820 } 821 } 822 } 823 } 824 zap_name_free(zn); 825 zap_unlockdir(zap); 826 return (err); 827} 828 829int 830zap_prefetch_uint64(objset_t *os, uint64_t zapobj, const uint64_t *key, 831 int key_numints) 832{ 833 zap_t *zap; 834 int err; 835 zap_name_t *zn; 836 837 err = zap_lockdir(os, zapobj, NULL, RW_READER, TRUE, FALSE, &zap); 838 if (err) 839 return (err); 840 zn = zap_name_alloc_uint64(zap, key, key_numints); 841 if (zn == NULL) { 842 zap_unlockdir(zap); 843 return (SET_ERROR(ENOTSUP)); 844 } 845 846 fzap_prefetch(zn); 847 zap_name_free(zn); 848 zap_unlockdir(zap); 849 return (err); 850} 851 852int 853zap_lookup_uint64(objset_t *os, uint64_t zapobj, const uint64_t *key, 854 int key_numints, uint64_t integer_size, uint64_t num_integers, void *buf) 855{ 856 zap_t *zap; 857 int err; 858 zap_name_t *zn; 859 860 err = zap_lockdir(os, zapobj, NULL, RW_READER, TRUE, FALSE, &zap); 861 if (err) 862 return (err); 863 zn = zap_name_alloc_uint64(zap, key, key_numints); 864 if (zn == NULL) { 865 zap_unlockdir(zap); 866 return (SET_ERROR(ENOTSUP)); 867 } 868 869 err = fzap_lookup(zn, integer_size, num_integers, buf, 870 NULL, 0, NULL); 871 zap_name_free(zn); 872 zap_unlockdir(zap); 873 return (err); 874} 875 876int 877zap_contains(objset_t *os, uint64_t zapobj, const char *name) 878{ 879 int err = zap_lookup_norm(os, zapobj, name, 0, 880 0, NULL, MT_EXACT, NULL, 0, NULL); 881 if (err == EOVERFLOW || err == EINVAL) 882 err = 0; /* found, but skipped reading the value */ 883 return (err); 884} 885 886int 887zap_length(objset_t *os, uint64_t zapobj, const char *name, 888 uint64_t *integer_size, uint64_t *num_integers) 889{ 890 zap_t *zap; 891 int err; 892 mzap_ent_t *mze; 893 zap_name_t *zn; 894 895 err = zap_lockdir(os, zapobj, NULL, RW_READER, TRUE, FALSE, &zap); 896 if (err) 897 return (err); 898 zn = zap_name_alloc(zap, name, MT_EXACT); 899 if (zn == NULL) { 900 zap_unlockdir(zap); 901 return (SET_ERROR(ENOTSUP)); 902 } 903 if (!zap->zap_ismicro) { 904 err = fzap_length(zn, integer_size, num_integers); 905 } else { 906 mze = mze_find(zn); 907 if (mze == NULL) { 908 err = SET_ERROR(ENOENT); 909 } else { 910 if (integer_size) 911 *integer_size = 8; 912 if (num_integers) 913 *num_integers = 1; 914 } 915 } 916 zap_name_free(zn); 917 zap_unlockdir(zap); 918 return (err); 919} 920 921int 922zap_length_uint64(objset_t *os, uint64_t zapobj, const uint64_t *key, 923 int key_numints, uint64_t *integer_size, uint64_t *num_integers) 924{ 925 zap_t *zap; 926 int err; 927 zap_name_t *zn; 928 929 err = zap_lockdir(os, zapobj, NULL, RW_READER, TRUE, FALSE, &zap); 930 if (err) 931 return (err); 932 zn = zap_name_alloc_uint64(zap, key, key_numints); 933 if (zn == NULL) { 934 zap_unlockdir(zap); 935 return (SET_ERROR(ENOTSUP)); 936 } 937 err = fzap_length(zn, integer_size, num_integers); 938 zap_name_free(zn); 939 zap_unlockdir(zap); 940 return (err); 941} 942 943static void 944mzap_addent(zap_name_t *zn, uint64_t value) 945{ 946 int i; 947 zap_t *zap = zn->zn_zap; 948 int start = zap->zap_m.zap_alloc_next; 949 uint32_t cd; 950 951 ASSERT(RW_WRITE_HELD(&zap->zap_rwlock)); 952 953#ifdef ZFS_DEBUG 954 for (i = 0; i < zap->zap_m.zap_num_chunks; i++) { 955 mzap_ent_phys_t *mze = &zap_m_phys(zap)->mz_chunk[i]; 956 ASSERT(strcmp(zn->zn_key_orig, mze->mze_name) != 0); 957 } 958#endif 959 960 cd = mze_find_unused_cd(zap, zn->zn_hash); 961 /* given the limited size of the microzap, this can't happen */ 962 ASSERT(cd < zap_maxcd(zap)); 963 964again: 965 for (i = start; i < zap->zap_m.zap_num_chunks; i++) { 966 mzap_ent_phys_t *mze = &zap_m_phys(zap)->mz_chunk[i]; 967 if (mze->mze_name[0] == 0) { 968 mze->mze_value = value; 969 mze->mze_cd = cd; 970 (void) strcpy(mze->mze_name, zn->zn_key_orig); 971 zap->zap_m.zap_num_entries++; 972 zap->zap_m.zap_alloc_next = i+1; 973 if (zap->zap_m.zap_alloc_next == 974 zap->zap_m.zap_num_chunks) 975 zap->zap_m.zap_alloc_next = 0; 976 VERIFY(0 == mze_insert(zap, i, zn->zn_hash)); 977 return; 978 } 979 } 980 if (start != 0) { 981 start = 0; 982 goto again; 983 } 984 ASSERT(!"out of entries!"); 985} 986 987int 988zap_add(objset_t *os, uint64_t zapobj, const char *key, 989 int integer_size, uint64_t num_integers, 990 const void *val, dmu_tx_t *tx) 991{ 992 zap_t *zap; 993 int err; 994 mzap_ent_t *mze; 995 const uint64_t *intval = val; 996 zap_name_t *zn; 997 998 err = zap_lockdir(os, zapobj, tx, RW_WRITER, TRUE, TRUE, &zap); 999 if (err) 1000 return (err); 1001 zn = zap_name_alloc(zap, key, MT_EXACT); 1002 if (zn == NULL) { 1003 zap_unlockdir(zap); 1004 return (SET_ERROR(ENOTSUP)); 1005 } 1006 if (!zap->zap_ismicro) { 1007 err = fzap_add(zn, integer_size, num_integers, val, tx); 1008 zap = zn->zn_zap; /* fzap_add() may change zap */ 1009 } else if (integer_size != 8 || num_integers != 1 || 1010 strlen(key) >= MZAP_NAME_LEN) { 1011 err = mzap_upgrade(&zn->zn_zap, tx, 0); 1012 if (err == 0) 1013 err = fzap_add(zn, integer_size, num_integers, val, tx); 1014 zap = zn->zn_zap; /* fzap_add() may change zap */ 1015 } else { 1016 mze = mze_find(zn); 1017 if (mze != NULL) { 1018 err = SET_ERROR(EEXIST); 1019 } else { 1020 mzap_addent(zn, *intval); 1021 } 1022 } 1023 ASSERT(zap == zn->zn_zap); 1024 zap_name_free(zn); 1025 if (zap != NULL) /* may be NULL if fzap_add() failed */ 1026 zap_unlockdir(zap); 1027 return (err); 1028} 1029 1030int 1031zap_add_uint64(objset_t *os, uint64_t zapobj, const uint64_t *key, 1032 int key_numints, int integer_size, uint64_t num_integers, 1033 const void *val, dmu_tx_t *tx) 1034{ 1035 zap_t *zap; 1036 int err; 1037 zap_name_t *zn; 1038 1039 err = zap_lockdir(os, zapobj, tx, RW_WRITER, TRUE, TRUE, &zap); 1040 if (err) 1041 return (err); 1042 zn = zap_name_alloc_uint64(zap, key, key_numints); 1043 if (zn == NULL) { 1044 zap_unlockdir(zap); 1045 return (SET_ERROR(ENOTSUP)); 1046 } 1047 err = fzap_add(zn, integer_size, num_integers, val, tx); 1048 zap = zn->zn_zap; /* fzap_add() may change zap */ 1049 zap_name_free(zn); 1050 if (zap != NULL) /* may be NULL if fzap_add() failed */ 1051 zap_unlockdir(zap); 1052 return (err); 1053} 1054 1055int 1056zap_update(objset_t *os, uint64_t zapobj, const char *name, 1057 int integer_size, uint64_t num_integers, const void *val, dmu_tx_t *tx) 1058{ 1059 zap_t *zap; 1060 mzap_ent_t *mze; 1061 uint64_t oldval; 1062 const uint64_t *intval = val; 1063 zap_name_t *zn; 1064 int err; 1065 1066#ifdef ZFS_DEBUG 1067 /* 1068 * If there is an old value, it shouldn't change across the 1069 * lockdir (eg, due to bprewrite's xlation). 1070 */ 1071 if (integer_size == 8 && num_integers == 1) 1072 (void) zap_lookup(os, zapobj, name, 8, 1, &oldval); 1073#endif 1074 1075 err = zap_lockdir(os, zapobj, tx, RW_WRITER, TRUE, TRUE, &zap); 1076 if (err) 1077 return (err); 1078 zn = zap_name_alloc(zap, name, MT_EXACT); 1079 if (zn == NULL) { 1080 zap_unlockdir(zap); 1081 return (SET_ERROR(ENOTSUP)); 1082 } 1083 if (!zap->zap_ismicro) { 1084 err = fzap_update(zn, integer_size, num_integers, val, tx); 1085 zap = zn->zn_zap; /* fzap_update() may change zap */ 1086 } else if (integer_size != 8 || num_integers != 1 || 1087 strlen(name) >= MZAP_NAME_LEN) { 1088 dprintf("upgrading obj %llu: intsz=%u numint=%llu name=%s\n", 1089 zapobj, integer_size, num_integers, name); 1090 err = mzap_upgrade(&zn->zn_zap, tx, 0); 1091 if (err == 0) 1092 err = fzap_update(zn, integer_size, num_integers, 1093 val, tx); 1094 zap = zn->zn_zap; /* fzap_update() may change zap */ 1095 } else { 1096 mze = mze_find(zn); 1097 if (mze != NULL) { 1098 ASSERT3U(MZE_PHYS(zap, mze)->mze_value, ==, oldval); 1099 MZE_PHYS(zap, mze)->mze_value = *intval; 1100 } else { 1101 mzap_addent(zn, *intval); 1102 } 1103 } 1104 ASSERT(zap == zn->zn_zap); 1105 zap_name_free(zn); 1106 if (zap != NULL) /* may be NULL if fzap_upgrade() failed */ 1107 zap_unlockdir(zap); 1108 return (err); 1109} 1110 1111int 1112zap_update_uint64(objset_t *os, uint64_t zapobj, const uint64_t *key, 1113 int key_numints, 1114 int integer_size, uint64_t num_integers, const void *val, dmu_tx_t *tx) 1115{ 1116 zap_t *zap; 1117 zap_name_t *zn; 1118 int err; 1119 1120 err = zap_lockdir(os, zapobj, tx, RW_WRITER, TRUE, TRUE, &zap); 1121 if (err) 1122 return (err); 1123 zn = zap_name_alloc_uint64(zap, key, key_numints); 1124 if (zn == NULL) { 1125 zap_unlockdir(zap); 1126 return (SET_ERROR(ENOTSUP)); 1127 } 1128 err = fzap_update(zn, integer_size, num_integers, val, tx); 1129 zap = zn->zn_zap; /* fzap_update() may change zap */ 1130 zap_name_free(zn); 1131 if (zap != NULL) /* may be NULL if fzap_upgrade() failed */ 1132 zap_unlockdir(zap); 1133 return (err); 1134} 1135 1136int 1137zap_remove(objset_t *os, uint64_t zapobj, const char *name, dmu_tx_t *tx) 1138{ 1139 return (zap_remove_norm(os, zapobj, name, MT_EXACT, tx)); 1140} 1141 1142int 1143zap_remove_norm(objset_t *os, uint64_t zapobj, const char *name, 1144 matchtype_t mt, dmu_tx_t *tx) 1145{ 1146 zap_t *zap; 1147 int err; 1148 mzap_ent_t *mze; 1149 zap_name_t *zn; 1150 1151 err = zap_lockdir(os, zapobj, tx, RW_WRITER, TRUE, FALSE, &zap); 1152 if (err) 1153 return (err); 1154 zn = zap_name_alloc(zap, name, mt); 1155 if (zn == NULL) { 1156 zap_unlockdir(zap); 1157 return (SET_ERROR(ENOTSUP)); 1158 } 1159 if (!zap->zap_ismicro) { 1160 err = fzap_remove(zn, tx); 1161 } else { 1162 mze = mze_find(zn); 1163 if (mze == NULL) { 1164 err = SET_ERROR(ENOENT); 1165 } else { 1166 zap->zap_m.zap_num_entries--; 1167 bzero(&zap_m_phys(zap)->mz_chunk[mze->mze_chunkid], 1168 sizeof (mzap_ent_phys_t)); 1169 mze_remove(zap, mze); 1170 } 1171 } 1172 zap_name_free(zn); 1173 zap_unlockdir(zap); 1174 return (err); 1175} 1176 1177int 1178zap_remove_uint64(objset_t *os, uint64_t zapobj, const uint64_t *key, 1179 int key_numints, dmu_tx_t *tx) 1180{ 1181 zap_t *zap; 1182 int err; 1183 zap_name_t *zn; 1184 1185 err = zap_lockdir(os, zapobj, tx, RW_WRITER, TRUE, FALSE, &zap); 1186 if (err) 1187 return (err); 1188 zn = zap_name_alloc_uint64(zap, key, key_numints); 1189 if (zn == NULL) { 1190 zap_unlockdir(zap); 1191 return (SET_ERROR(ENOTSUP)); 1192 } 1193 err = fzap_remove(zn, tx); 1194 zap_name_free(zn); 1195 zap_unlockdir(zap); 1196 return (err); 1197} 1198 1199/* 1200 * Routines for iterating over the attributes. 1201 */ 1202 1203void 1204zap_cursor_init_serialized(zap_cursor_t *zc, objset_t *os, uint64_t zapobj, 1205 uint64_t serialized) 1206{ 1207 zc->zc_objset = os; 1208 zc->zc_zap = NULL; 1209 zc->zc_leaf = NULL; 1210 zc->zc_zapobj = zapobj; 1211 zc->zc_serialized = serialized; 1212 zc->zc_hash = 0; 1213 zc->zc_cd = 0; 1214} 1215 1216void 1217zap_cursor_init(zap_cursor_t *zc, objset_t *os, uint64_t zapobj) 1218{ 1219 zap_cursor_init_serialized(zc, os, zapobj, 0); 1220} 1221 1222void 1223zap_cursor_fini(zap_cursor_t *zc) 1224{ 1225 if (zc->zc_zap) { 1226 rw_enter(&zc->zc_zap->zap_rwlock, RW_READER); 1227 zap_unlockdir(zc->zc_zap); 1228 zc->zc_zap = NULL; 1229 } 1230 if (zc->zc_leaf) { 1231 rw_enter(&zc->zc_leaf->l_rwlock, RW_READER); 1232 zap_put_leaf(zc->zc_leaf); 1233 zc->zc_leaf = NULL; 1234 } 1235 zc->zc_objset = NULL; 1236} 1237 1238uint64_t 1239zap_cursor_serialize(zap_cursor_t *zc) 1240{ 1241 if (zc->zc_hash == -1ULL) 1242 return (-1ULL); 1243 if (zc->zc_zap == NULL) 1244 return (zc->zc_serialized); 1245 ASSERT((zc->zc_hash & zap_maxcd(zc->zc_zap)) == 0); 1246 ASSERT(zc->zc_cd < zap_maxcd(zc->zc_zap)); 1247 1248 /* 1249 * We want to keep the high 32 bits of the cursor zero if we can, so 1250 * that 32-bit programs can access this. So usually use a small 1251 * (28-bit) hash value so we can fit 4 bits of cd into the low 32-bits 1252 * of the cursor. 1253 * 1254 * [ collision differentiator | zap_hashbits()-bit hash value ] 1255 */ 1256 return ((zc->zc_hash >> (64 - zap_hashbits(zc->zc_zap))) | 1257 ((uint64_t)zc->zc_cd << zap_hashbits(zc->zc_zap))); 1258} 1259 1260int 1261zap_cursor_retrieve(zap_cursor_t *zc, zap_attribute_t *za) 1262{ 1263 int err; 1264 avl_index_t idx; 1265 mzap_ent_t mze_tofind; 1266 mzap_ent_t *mze; 1267 1268 if (zc->zc_hash == -1ULL) 1269 return (SET_ERROR(ENOENT)); 1270 1271 if (zc->zc_zap == NULL) { 1272 int hb; 1273 err = zap_lockdir(zc->zc_objset, zc->zc_zapobj, NULL, 1274 RW_READER, TRUE, FALSE, &zc->zc_zap); 1275 if (err) 1276 return (err); 1277 1278 /* 1279 * To support zap_cursor_init_serialized, advance, retrieve, 1280 * we must add to the existing zc_cd, which may already 1281 * be 1 due to the zap_cursor_advance. 1282 */ 1283 ASSERT(zc->zc_hash == 0); 1284 hb = zap_hashbits(zc->zc_zap); 1285 zc->zc_hash = zc->zc_serialized << (64 - hb); 1286 zc->zc_cd += zc->zc_serialized >> hb; 1287 if (zc->zc_cd >= zap_maxcd(zc->zc_zap)) /* corrupt serialized */ 1288 zc->zc_cd = 0; 1289 } else { 1290 rw_enter(&zc->zc_zap->zap_rwlock, RW_READER); 1291 } 1292 if (!zc->zc_zap->zap_ismicro) { 1293 err = fzap_cursor_retrieve(zc->zc_zap, zc, za); 1294 } else { 1295 mze_tofind.mze_hash = zc->zc_hash; 1296 mze_tofind.mze_cd = zc->zc_cd; 1297 1298 mze = avl_find(&zc->zc_zap->zap_m.zap_avl, &mze_tofind, &idx); 1299 if (mze == NULL) { 1300 mze = avl_nearest(&zc->zc_zap->zap_m.zap_avl, 1301 idx, AVL_AFTER); 1302 } 1303 if (mze) { 1304 mzap_ent_phys_t *mzep = MZE_PHYS(zc->zc_zap, mze); 1305 ASSERT3U(mze->mze_cd, ==, mzep->mze_cd); 1306 za->za_normalization_conflict = 1307 mzap_normalization_conflict(zc->zc_zap, NULL, mze); 1308 za->za_integer_length = 8; 1309 za->za_num_integers = 1; 1310 za->za_first_integer = mzep->mze_value; 1311 (void) strcpy(za->za_name, mzep->mze_name); 1312 zc->zc_hash = mze->mze_hash; 1313 zc->zc_cd = mze->mze_cd; 1314 err = 0; 1315 } else { 1316 zc->zc_hash = -1ULL; 1317 err = SET_ERROR(ENOENT); 1318 } 1319 } 1320 rw_exit(&zc->zc_zap->zap_rwlock); 1321 return (err); 1322} 1323 1324void 1325zap_cursor_advance(zap_cursor_t *zc) 1326{ 1327 if (zc->zc_hash == -1ULL) 1328 return; 1329 zc->zc_cd++; 1330} 1331 1332int 1333zap_cursor_move_to_key(zap_cursor_t *zc, const char *name, matchtype_t mt) 1334{ 1335 int err = 0; 1336 mzap_ent_t *mze; 1337 zap_name_t *zn; 1338 1339 if (zc->zc_zap == NULL) { 1340 err = zap_lockdir(zc->zc_objset, zc->zc_zapobj, NULL, 1341 RW_READER, TRUE, FALSE, &zc->zc_zap); 1342 if (err) 1343 return (err); 1344 } else { 1345 rw_enter(&zc->zc_zap->zap_rwlock, RW_READER); 1346 } 1347 1348 zn = zap_name_alloc(zc->zc_zap, name, mt); 1349 if (zn == NULL) { 1350 rw_exit(&zc->zc_zap->zap_rwlock); 1351 return (SET_ERROR(ENOTSUP)); 1352 } 1353 1354 if (!zc->zc_zap->zap_ismicro) { 1355 err = fzap_cursor_move_to_key(zc, zn); 1356 } else { 1357 mze = mze_find(zn); 1358 if (mze == NULL) { 1359 err = SET_ERROR(ENOENT); 1360 goto out; 1361 } 1362 zc->zc_hash = mze->mze_hash; 1363 zc->zc_cd = mze->mze_cd; 1364 } 1365 1366out: 1367 zap_name_free(zn); 1368 rw_exit(&zc->zc_zap->zap_rwlock); 1369 return (err); 1370} 1371 1372int 1373zap_get_stats(objset_t *os, uint64_t zapobj, zap_stats_t *zs) 1374{ 1375 int err; 1376 zap_t *zap; 1377 1378 err = zap_lockdir(os, zapobj, NULL, RW_READER, TRUE, FALSE, &zap); 1379 if (err) 1380 return (err); 1381 1382 bzero(zs, sizeof (zap_stats_t)); 1383 1384 if (zap->zap_ismicro) { 1385 zs->zs_blocksize = zap->zap_dbuf->db_size; 1386 zs->zs_num_entries = zap->zap_m.zap_num_entries; 1387 zs->zs_num_blocks = 1; 1388 } else { 1389 fzap_get_stats(zap, zs); 1390 } 1391 zap_unlockdir(zap); 1392 return (0); 1393} 1394 1395int 1396zap_count_write(objset_t *os, uint64_t zapobj, const char *name, int add, 1397 uint64_t *towrite, uint64_t *tooverwrite) 1398{ 1399 zap_t *zap; 1400 int err = 0; 1401 1402 /* 1403 * Since, we don't have a name, we cannot figure out which blocks will 1404 * be affected in this operation. So, account for the worst case : 1405 * - 3 blocks overwritten: target leaf, ptrtbl block, header block 1406 * - 4 new blocks written if adding: 1407 * - 2 blocks for possibly split leaves, 1408 * - 2 grown ptrtbl blocks 1409 * 1410 * This also accomodates the case where an add operation to a fairly 1411 * large microzap results in a promotion to fatzap. 1412 */ 1413 if (name == NULL) { 1414 *towrite += (3 + (add ? 4 : 0)) * SPA_OLD_MAXBLOCKSIZE; 1415 return (err); 1416 } 1417 1418 /* 1419 * We lock the zap with adding == FALSE. Because, if we pass 1420 * the actual value of add, it could trigger a mzap_upgrade(). 1421 * At present we are just evaluating the possibility of this operation 1422 * and hence we donot want to trigger an upgrade. 1423 */ 1424 err = zap_lockdir(os, zapobj, NULL, RW_READER, TRUE, FALSE, &zap); 1425 if (err) 1426 return (err); 1427 1428 if (!zap->zap_ismicro) { 1429 zap_name_t *zn = zap_name_alloc(zap, name, MT_EXACT); 1430 if (zn) { 1431 err = fzap_count_write(zn, add, towrite, 1432 tooverwrite); 1433 zap_name_free(zn); 1434 } else { 1435 /* 1436 * We treat this case as similar to (name == NULL) 1437 */ 1438 *towrite += (3 + (add ? 4 : 0)) * SPA_OLD_MAXBLOCKSIZE; 1439 } 1440 } else { 1441 /* 1442 * We are here if (name != NULL) and this is a micro-zap. 1443 * We account for the header block depending on whether it 1444 * is freeable. 1445 * 1446 * Incase of an add-operation it is hard to find out 1447 * if this add will promote this microzap to fatzap. 1448 * Hence, we consider the worst case and account for the 1449 * blocks assuming this microzap would be promoted to a 1450 * fatzap. 1451 * 1452 * 1 block overwritten : header block 1453 * 4 new blocks written : 2 new split leaf, 2 grown 1454 * ptrtbl blocks 1455 */ 1456 if (dmu_buf_freeable(zap->zap_dbuf)) 1457 *tooverwrite += MZAP_MAX_BLKSZ; 1458 else 1459 *towrite += MZAP_MAX_BLKSZ; 1460 1461 if (add) { 1462 *towrite += 4 * MZAP_MAX_BLKSZ; 1463 } 1464 } 1465 1466 zap_unlockdir(zap); 1467 return (err); 1468} 1469