zap_micro.c revision 168404
1/* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21/* 22 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26#pragma ident "%Z%%M% %I% %E% SMI" 27 28#include <sys/spa.h> 29#include <sys/dmu.h> 30#include <sys/zfs_context.h> 31#include <sys/zap.h> 32#include <sys/refcount.h> 33#include <sys/zap_impl.h> 34#include <sys/zap_leaf.h> 35#include <sys/avl.h> 36 37 38static void mzap_upgrade(zap_t *zap, dmu_tx_t *tx); 39 40 41static void 42mzap_byteswap(mzap_phys_t *buf, size_t size) 43{ 44 int i, max; 45 buf->mz_block_type = BSWAP_64(buf->mz_block_type); 46 buf->mz_salt = BSWAP_64(buf->mz_salt); 47 max = (size / MZAP_ENT_LEN) - 1; 48 for (i = 0; i < max; i++) { 49 buf->mz_chunk[i].mze_value = 50 BSWAP_64(buf->mz_chunk[i].mze_value); 51 buf->mz_chunk[i].mze_cd = 52 BSWAP_32(buf->mz_chunk[i].mze_cd); 53 } 54} 55 56void 57zap_byteswap(void *buf, size_t size) 58{ 59 uint64_t block_type; 60 61 block_type = *(uint64_t *)buf; 62 63 if (block_type == ZBT_MICRO || block_type == BSWAP_64(ZBT_MICRO)) { 64 /* ASSERT(magic == ZAP_LEAF_MAGIC); */ 65 mzap_byteswap(buf, size); 66 } else { 67 fzap_byteswap(buf, size); 68 } 69} 70 71static int 72mze_compare(const void *arg1, const void *arg2) 73{ 74 const mzap_ent_t *mze1 = arg1; 75 const mzap_ent_t *mze2 = arg2; 76 77 if (mze1->mze_hash > mze2->mze_hash) 78 return (+1); 79 if (mze1->mze_hash < mze2->mze_hash) 80 return (-1); 81 if (mze1->mze_phys.mze_cd > mze2->mze_phys.mze_cd) 82 return (+1); 83 if (mze1->mze_phys.mze_cd < mze2->mze_phys.mze_cd) 84 return (-1); 85 return (0); 86} 87 88static void 89mze_insert(zap_t *zap, int chunkid, uint64_t hash, mzap_ent_phys_t *mzep) 90{ 91 mzap_ent_t *mze; 92 93 ASSERT(zap->zap_ismicro); 94 ASSERT(RW_WRITE_HELD(&zap->zap_rwlock)); 95 ASSERT(mzep->mze_cd < ZAP_MAXCD); 96 ASSERT3U(zap_hash(zap, mzep->mze_name), ==, hash); 97 98 mze = kmem_alloc(sizeof (mzap_ent_t), KM_SLEEP); 99 mze->mze_chunkid = chunkid; 100 mze->mze_hash = hash; 101 mze->mze_phys = *mzep; 102 avl_add(&zap->zap_m.zap_avl, mze); 103} 104 105static mzap_ent_t * 106mze_find(zap_t *zap, const char *name, uint64_t hash) 107{ 108 mzap_ent_t mze_tofind; 109 mzap_ent_t *mze; 110 avl_index_t idx; 111 avl_tree_t *avl = &zap->zap_m.zap_avl; 112 113 ASSERT(zap->zap_ismicro); 114 ASSERT(RW_LOCK_HELD(&zap->zap_rwlock)); 115 ASSERT3U(zap_hash(zap, name), ==, hash); 116 117 if (strlen(name) >= sizeof (mze_tofind.mze_phys.mze_name)) 118 return (NULL); 119 120 mze_tofind.mze_hash = hash; 121 mze_tofind.mze_phys.mze_cd = 0; 122 123 mze = avl_find(avl, &mze_tofind, &idx); 124 if (mze == NULL) 125 mze = avl_nearest(avl, idx, AVL_AFTER); 126 for (; mze && mze->mze_hash == hash; mze = AVL_NEXT(avl, mze)) { 127 if (strcmp(name, mze->mze_phys.mze_name) == 0) 128 return (mze); 129 } 130 return (NULL); 131} 132 133static uint32_t 134mze_find_unused_cd(zap_t *zap, uint64_t hash) 135{ 136 mzap_ent_t mze_tofind; 137 mzap_ent_t *mze; 138 avl_index_t idx; 139 avl_tree_t *avl = &zap->zap_m.zap_avl; 140 uint32_t cd; 141 142 ASSERT(zap->zap_ismicro); 143 ASSERT(RW_LOCK_HELD(&zap->zap_rwlock)); 144 145 mze_tofind.mze_hash = hash; 146 mze_tofind.mze_phys.mze_cd = 0; 147 148 cd = 0; 149 for (mze = avl_find(avl, &mze_tofind, &idx); 150 mze && mze->mze_hash == hash; mze = AVL_NEXT(avl, mze)) { 151 if (mze->mze_phys.mze_cd != cd) 152 break; 153 cd++; 154 } 155 156 return (cd); 157} 158 159static void 160mze_remove(zap_t *zap, mzap_ent_t *mze) 161{ 162 ASSERT(zap->zap_ismicro); 163 ASSERT(RW_WRITE_HELD(&zap->zap_rwlock)); 164 165 avl_remove(&zap->zap_m.zap_avl, mze); 166 kmem_free(mze, sizeof (mzap_ent_t)); 167} 168 169static void 170mze_destroy(zap_t *zap) 171{ 172 mzap_ent_t *mze; 173 void *avlcookie = NULL; 174 175 while (mze = avl_destroy_nodes(&zap->zap_m.zap_avl, &avlcookie)) 176 kmem_free(mze, sizeof (mzap_ent_t)); 177 avl_destroy(&zap->zap_m.zap_avl); 178} 179 180static zap_t * 181mzap_open(objset_t *os, uint64_t obj, dmu_buf_t *db) 182{ 183 zap_t *winner; 184 zap_t *zap; 185 int i; 186 187 ASSERT3U(MZAP_ENT_LEN, ==, sizeof (mzap_ent_phys_t)); 188 189 zap = kmem_zalloc(sizeof (zap_t), KM_SLEEP); 190 rw_init(&zap->zap_rwlock, NULL, RW_DEFAULT, 0); 191 rw_enter(&zap->zap_rwlock, RW_WRITER); 192 zap->zap_objset = os; 193 zap->zap_object = obj; 194 zap->zap_dbuf = db; 195 196 if (((uint64_t *)db->db_data)[0] != ZBT_MICRO) { 197 mutex_init(&zap->zap_f.zap_num_entries_mtx, NULL, 198 MUTEX_DEFAULT, 0); 199 zap->zap_f.zap_block_shift = highbit(db->db_size) - 1; 200 } else { 201 zap->zap_ismicro = TRUE; 202 } 203 204 /* 205 * Make sure that zap_ismicro is set before we let others see 206 * it, because zap_lockdir() checks zap_ismicro without the lock 207 * held. 208 */ 209 winner = dmu_buf_set_user(db, zap, &zap->zap_m.zap_phys, zap_evict); 210 211 if (winner != NULL) { 212 if (!zap->zap_ismicro) 213 mutex_destroy(&zap->zap_f.zap_num_entries_mtx); 214 kmem_free(zap, sizeof (zap_t)); 215 return (winner); 216 } 217 218 if (zap->zap_ismicro) { 219 zap->zap_salt = zap->zap_m.zap_phys->mz_salt; 220 zap->zap_m.zap_num_chunks = db->db_size / MZAP_ENT_LEN - 1; 221 avl_create(&zap->zap_m.zap_avl, mze_compare, 222 sizeof (mzap_ent_t), offsetof(mzap_ent_t, mze_node)); 223 224 for (i = 0; i < zap->zap_m.zap_num_chunks; i++) { 225 mzap_ent_phys_t *mze = 226 &zap->zap_m.zap_phys->mz_chunk[i]; 227 if (mze->mze_name[0]) { 228 zap->zap_m.zap_num_entries++; 229 mze_insert(zap, i, 230 zap_hash(zap, mze->mze_name), mze); 231 } 232 } 233 } else { 234 zap->zap_salt = zap->zap_f.zap_phys->zap_salt; 235 236 ASSERT3U(sizeof (struct zap_leaf_header), ==, 237 2*ZAP_LEAF_CHUNKSIZE); 238 239 /* 240 * The embedded pointer table should not overlap the 241 * other members. 242 */ 243 ASSERT3P(&ZAP_EMBEDDED_PTRTBL_ENT(zap, 0), >, 244 &zap->zap_f.zap_phys->zap_salt); 245 246 /* 247 * The embedded pointer table should end at the end of 248 * the block 249 */ 250 ASSERT3U((uintptr_t)&ZAP_EMBEDDED_PTRTBL_ENT(zap, 251 1<<ZAP_EMBEDDED_PTRTBL_SHIFT(zap)) - 252 (uintptr_t)zap->zap_f.zap_phys, ==, 253 zap->zap_dbuf->db_size); 254 } 255 rw_exit(&zap->zap_rwlock); 256 return (zap); 257} 258 259int 260zap_lockdir(objset_t *os, uint64_t obj, dmu_tx_t *tx, 261 krw_t lti, int fatreader, zap_t **zapp) 262{ 263 zap_t *zap; 264 dmu_buf_t *db; 265 krw_t lt; 266 int err; 267 268 *zapp = NULL; 269 270 err = dmu_buf_hold(os, obj, 0, NULL, &db); 271 if (err) 272 return (err); 273 274#ifdef ZFS_DEBUG 275 { 276 dmu_object_info_t doi; 277 dmu_object_info_from_db(db, &doi); 278 ASSERT(dmu_ot[doi.doi_type].ot_byteswap == zap_byteswap); 279 } 280#endif 281 282 zap = dmu_buf_get_user(db); 283 if (zap == NULL) 284 zap = mzap_open(os, obj, db); 285 286 /* 287 * We're checking zap_ismicro without the lock held, in order to 288 * tell what type of lock we want. Once we have some sort of 289 * lock, see if it really is the right type. In practice this 290 * can only be different if it was upgraded from micro to fat, 291 * and micro wanted WRITER but fat only needs READER. 292 */ 293 lt = (!zap->zap_ismicro && fatreader) ? RW_READER : lti; 294 rw_enter(&zap->zap_rwlock, lt); 295 if (lt != ((!zap->zap_ismicro && fatreader) ? RW_READER : lti)) { 296 /* it was upgraded, now we only need reader */ 297 ASSERT(lt == RW_WRITER); 298 ASSERT(RW_READER == 299 (!zap->zap_ismicro && fatreader) ? RW_READER : lti); 300 rw_downgrade(&zap->zap_rwlock); 301 lt = RW_READER; 302 } 303 304 zap->zap_objset = os; 305 306 if (lt == RW_WRITER) 307 dmu_buf_will_dirty(db, tx); 308 309 ASSERT3P(zap->zap_dbuf, ==, db); 310 311 ASSERT(!zap->zap_ismicro || 312 zap->zap_m.zap_num_entries <= zap->zap_m.zap_num_chunks); 313 if (zap->zap_ismicro && tx && 314 zap->zap_m.zap_num_entries == zap->zap_m.zap_num_chunks) { 315 uint64_t newsz = db->db_size + SPA_MINBLOCKSIZE; 316 if (newsz > MZAP_MAX_BLKSZ) { 317 dprintf("upgrading obj %llu: num_entries=%u\n", 318 obj, zap->zap_m.zap_num_entries); 319 mzap_upgrade(zap, tx); 320 *zapp = zap; 321 return (0); 322 } 323 err = dmu_object_set_blocksize(os, obj, newsz, 0, tx); 324 ASSERT3U(err, ==, 0); 325 zap->zap_m.zap_num_chunks = 326 db->db_size / MZAP_ENT_LEN - 1; 327 } 328 329 *zapp = zap; 330 return (0); 331} 332 333void 334zap_unlockdir(zap_t *zap) 335{ 336 rw_exit(&zap->zap_rwlock); 337 dmu_buf_rele(zap->zap_dbuf, NULL); 338} 339 340static void 341mzap_upgrade(zap_t *zap, dmu_tx_t *tx) 342{ 343 mzap_phys_t *mzp; 344 int i, sz, nchunks, err; 345 346 ASSERT(RW_WRITE_HELD(&zap->zap_rwlock)); 347 348 sz = zap->zap_dbuf->db_size; 349 mzp = kmem_alloc(sz, KM_SLEEP); 350 bcopy(zap->zap_dbuf->db_data, mzp, sz); 351 nchunks = zap->zap_m.zap_num_chunks; 352 353 err = dmu_object_set_blocksize(zap->zap_objset, zap->zap_object, 354 1ULL << fzap_default_block_shift, 0, tx); 355 ASSERT(err == 0); 356 357 dprintf("upgrading obj=%llu with %u chunks\n", 358 zap->zap_object, nchunks); 359 mze_destroy(zap); 360 361 fzap_upgrade(zap, tx); 362 363 for (i = 0; i < nchunks; i++) { 364 int err; 365 mzap_ent_phys_t *mze = &mzp->mz_chunk[i]; 366 if (mze->mze_name[0] == 0) 367 continue; 368 dprintf("adding %s=%llu\n", 369 mze->mze_name, mze->mze_value); 370 err = fzap_add_cd(zap, 371 mze->mze_name, 8, 1, &mze->mze_value, 372 mze->mze_cd, tx); 373 ASSERT3U(err, ==, 0); 374 } 375 kmem_free(mzp, sz); 376} 377 378uint64_t 379zap_hash(zap_t *zap, const char *name) 380{ 381 const uint8_t *cp; 382 uint8_t c; 383 uint64_t crc = zap->zap_salt; 384 385 ASSERT(crc != 0); 386 ASSERT(zfs_crc64_table[128] == ZFS_CRC64_POLY); 387 for (cp = (const uint8_t *)name; (c = *cp) != '\0'; cp++) 388 crc = (crc >> 8) ^ zfs_crc64_table[(crc ^ c) & 0xFF]; 389 390 /* 391 * Only use 28 bits, since we need 4 bits in the cookie for the 392 * collision differentiator. We MUST use the high bits, since 393 * those are the onces that we first pay attention to when 394 * chosing the bucket. 395 */ 396 crc &= ~((1ULL << (64 - ZAP_HASHBITS)) - 1); 397 398 return (crc); 399} 400 401 402static void 403mzap_create_impl(objset_t *os, uint64_t obj, dmu_tx_t *tx) 404{ 405 dmu_buf_t *db; 406 mzap_phys_t *zp; 407 408 VERIFY(0 == dmu_buf_hold(os, obj, 0, FTAG, &db)); 409 410#ifdef ZFS_DEBUG 411 { 412 dmu_object_info_t doi; 413 dmu_object_info_from_db(db, &doi); 414 ASSERT(dmu_ot[doi.doi_type].ot_byteswap == zap_byteswap); 415 } 416#endif 417 418 dmu_buf_will_dirty(db, tx); 419 zp = db->db_data; 420 zp->mz_block_type = ZBT_MICRO; 421 zp->mz_salt = ((uintptr_t)db ^ (uintptr_t)tx ^ (obj << 1)) | 1ULL; 422 ASSERT(zp->mz_salt != 0); 423 dmu_buf_rele(db, FTAG); 424} 425 426int 427zap_create_claim(objset_t *os, uint64_t obj, dmu_object_type_t ot, 428 dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx) 429{ 430 int err; 431 432 err = dmu_object_claim(os, obj, ot, 0, bonustype, bonuslen, tx); 433 if (err != 0) 434 return (err); 435 mzap_create_impl(os, obj, tx); 436 return (0); 437} 438 439uint64_t 440zap_create(objset_t *os, dmu_object_type_t ot, 441 dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx) 442{ 443 uint64_t obj = dmu_object_alloc(os, ot, 0, bonustype, bonuslen, tx); 444 445 mzap_create_impl(os, obj, tx); 446 return (obj); 447} 448 449int 450zap_destroy(objset_t *os, uint64_t zapobj, dmu_tx_t *tx) 451{ 452 /* 453 * dmu_object_free will free the object number and free the 454 * data. Freeing the data will cause our pageout function to be 455 * called, which will destroy our data (zap_leaf_t's and zap_t). 456 */ 457 458 return (dmu_object_free(os, zapobj, tx)); 459} 460 461_NOTE(ARGSUSED(0)) 462void 463zap_evict(dmu_buf_t *db, void *vzap) 464{ 465 zap_t *zap = vzap; 466 467 rw_destroy(&zap->zap_rwlock); 468 469 if (zap->zap_ismicro) 470 mze_destroy(zap); 471 else 472 mutex_destroy(&zap->zap_f.zap_num_entries_mtx); 473 474 kmem_free(zap, sizeof (zap_t)); 475} 476 477int 478zap_count(objset_t *os, uint64_t zapobj, uint64_t *count) 479{ 480 zap_t *zap; 481 int err; 482 483 err = zap_lockdir(os, zapobj, NULL, RW_READER, TRUE, &zap); 484 if (err) 485 return (err); 486 if (!zap->zap_ismicro) { 487 err = fzap_count(zap, count); 488 } else { 489 *count = zap->zap_m.zap_num_entries; 490 } 491 zap_unlockdir(zap); 492 return (err); 493} 494 495/* 496 * Routines for maniplulating attributes. 497 */ 498 499int 500zap_lookup(objset_t *os, uint64_t zapobj, const char *name, 501 uint64_t integer_size, uint64_t num_integers, void *buf) 502{ 503 zap_t *zap; 504 int err; 505 mzap_ent_t *mze; 506 507 err = zap_lockdir(os, zapobj, NULL, RW_READER, TRUE, &zap); 508 if (err) 509 return (err); 510 if (!zap->zap_ismicro) { 511 err = fzap_lookup(zap, name, 512 integer_size, num_integers, buf); 513 } else { 514 mze = mze_find(zap, name, zap_hash(zap, name)); 515 if (mze == NULL) { 516 err = ENOENT; 517 } else { 518 if (num_integers < 1) 519 err = EOVERFLOW; 520 else if (integer_size != 8) 521 err = EINVAL; 522 else 523 *(uint64_t *)buf = mze->mze_phys.mze_value; 524 } 525 } 526 zap_unlockdir(zap); 527 return (err); 528} 529 530int 531zap_length(objset_t *os, uint64_t zapobj, const char *name, 532 uint64_t *integer_size, uint64_t *num_integers) 533{ 534 zap_t *zap; 535 int err; 536 mzap_ent_t *mze; 537 538 err = zap_lockdir(os, zapobj, NULL, RW_READER, TRUE, &zap); 539 if (err) 540 return (err); 541 if (!zap->zap_ismicro) { 542 err = fzap_length(zap, name, integer_size, num_integers); 543 } else { 544 mze = mze_find(zap, name, zap_hash(zap, name)); 545 if (mze == NULL) { 546 err = ENOENT; 547 } else { 548 if (integer_size) 549 *integer_size = 8; 550 if (num_integers) 551 *num_integers = 1; 552 } 553 } 554 zap_unlockdir(zap); 555 return (err); 556} 557 558static void 559mzap_addent(zap_t *zap, const char *name, uint64_t hash, uint64_t value) 560{ 561 int i; 562 int start = zap->zap_m.zap_alloc_next; 563 uint32_t cd; 564 565 dprintf("obj=%llu %s=%llu\n", zap->zap_object, name, value); 566 ASSERT(RW_WRITE_HELD(&zap->zap_rwlock)); 567 568#ifdef ZFS_DEBUG 569 for (i = 0; i < zap->zap_m.zap_num_chunks; i++) { 570 mzap_ent_phys_t *mze = &zap->zap_m.zap_phys->mz_chunk[i]; 571 ASSERT(strcmp(name, mze->mze_name) != 0); 572 } 573#endif 574 575 cd = mze_find_unused_cd(zap, hash); 576 /* given the limited size of the microzap, this can't happen */ 577 ASSERT(cd != ZAP_MAXCD); 578 579again: 580 for (i = start; i < zap->zap_m.zap_num_chunks; i++) { 581 mzap_ent_phys_t *mze = &zap->zap_m.zap_phys->mz_chunk[i]; 582 if (mze->mze_name[0] == 0) { 583 mze->mze_value = value; 584 mze->mze_cd = cd; 585 (void) strcpy(mze->mze_name, name); 586 zap->zap_m.zap_num_entries++; 587 zap->zap_m.zap_alloc_next = i+1; 588 if (zap->zap_m.zap_alloc_next == 589 zap->zap_m.zap_num_chunks) 590 zap->zap_m.zap_alloc_next = 0; 591 mze_insert(zap, i, hash, mze); 592 return; 593 } 594 } 595 if (start != 0) { 596 start = 0; 597 goto again; 598 } 599 ASSERT(!"out of entries!"); 600} 601 602int 603zap_add(objset_t *os, uint64_t zapobj, const char *name, 604 int integer_size, uint64_t num_integers, 605 const void *val, dmu_tx_t *tx) 606{ 607 zap_t *zap; 608 int err; 609 mzap_ent_t *mze; 610 const uint64_t *intval = val; 611 uint64_t hash; 612 613 err = zap_lockdir(os, zapobj, tx, RW_WRITER, TRUE, &zap); 614 if (err) 615 return (err); 616 if (!zap->zap_ismicro) { 617 err = fzap_add(zap, name, integer_size, num_integers, val, tx); 618 } else if (integer_size != 8 || num_integers != 1 || 619 strlen(name) >= MZAP_NAME_LEN) { 620 dprintf("upgrading obj %llu: intsz=%u numint=%llu name=%s\n", 621 zapobj, integer_size, num_integers, name); 622 mzap_upgrade(zap, tx); 623 err = fzap_add(zap, name, integer_size, num_integers, val, tx); 624 } else { 625 hash = zap_hash(zap, name); 626 mze = mze_find(zap, name, hash); 627 if (mze != NULL) { 628 err = EEXIST; 629 } else { 630 mzap_addent(zap, name, hash, *intval); 631 } 632 } 633 zap_unlockdir(zap); 634 return (err); 635} 636 637int 638zap_update(objset_t *os, uint64_t zapobj, const char *name, 639 int integer_size, uint64_t num_integers, const void *val, dmu_tx_t *tx) 640{ 641 zap_t *zap; 642 mzap_ent_t *mze; 643 const uint64_t *intval = val; 644 uint64_t hash; 645 int err; 646 647 err = zap_lockdir(os, zapobj, tx, RW_WRITER, TRUE, &zap); 648 if (err) 649 return (err); 650 ASSERT(RW_LOCK_HELD(&zap->zap_rwlock)); 651 if (!zap->zap_ismicro) { 652 err = fzap_update(zap, name, 653 integer_size, num_integers, val, tx); 654 } else if (integer_size != 8 || num_integers != 1 || 655 strlen(name) >= MZAP_NAME_LEN) { 656 dprintf("upgrading obj %llu: intsz=%u numint=%llu name=%s\n", 657 zapobj, integer_size, num_integers, name); 658 mzap_upgrade(zap, tx); 659 err = fzap_update(zap, name, 660 integer_size, num_integers, val, tx); 661 } else { 662 hash = zap_hash(zap, name); 663 mze = mze_find(zap, name, hash); 664 if (mze != NULL) { 665 mze->mze_phys.mze_value = *intval; 666 zap->zap_m.zap_phys->mz_chunk 667 [mze->mze_chunkid].mze_value = *intval; 668 } else { 669 mzap_addent(zap, name, hash, *intval); 670 } 671 } 672 zap_unlockdir(zap); 673 return (err); 674} 675 676int 677zap_remove(objset_t *os, uint64_t zapobj, const char *name, dmu_tx_t *tx) 678{ 679 zap_t *zap; 680 int err; 681 mzap_ent_t *mze; 682 683 err = zap_lockdir(os, zapobj, tx, RW_WRITER, TRUE, &zap); 684 if (err) 685 return (err); 686 if (!zap->zap_ismicro) { 687 err = fzap_remove(zap, name, tx); 688 } else { 689 mze = mze_find(zap, name, zap_hash(zap, name)); 690 if (mze == NULL) { 691 dprintf("fail: %s\n", name); 692 err = ENOENT; 693 } else { 694 dprintf("success: %s\n", name); 695 zap->zap_m.zap_num_entries--; 696 bzero(&zap->zap_m.zap_phys->mz_chunk[mze->mze_chunkid], 697 sizeof (mzap_ent_phys_t)); 698 mze_remove(zap, mze); 699 } 700 } 701 zap_unlockdir(zap); 702 return (err); 703} 704 705 706/* 707 * Routines for iterating over the attributes. 708 */ 709 710/* 711 * We want to keep the high 32 bits of the cursor zero if we can, so 712 * that 32-bit programs can access this. So use a small hash value so 713 * we can fit 4 bits of cd into the 32-bit cursor. 714 * 715 * [ 4 zero bits | 32-bit collision differentiator | 28-bit hash value ] 716 */ 717void 718zap_cursor_init_serialized(zap_cursor_t *zc, objset_t *os, uint64_t zapobj, 719 uint64_t serialized) 720{ 721 zc->zc_objset = os; 722 zc->zc_zap = NULL; 723 zc->zc_leaf = NULL; 724 zc->zc_zapobj = zapobj; 725 if (serialized == -1ULL) { 726 zc->zc_hash = -1ULL; 727 zc->zc_cd = 0; 728 } else { 729 zc->zc_hash = serialized << (64-ZAP_HASHBITS); 730 zc->zc_cd = serialized >> ZAP_HASHBITS; 731 if (zc->zc_cd >= ZAP_MAXCD) /* corrupt serialized */ 732 zc->zc_cd = 0; 733 } 734} 735 736void 737zap_cursor_init(zap_cursor_t *zc, objset_t *os, uint64_t zapobj) 738{ 739 zap_cursor_init_serialized(zc, os, zapobj, 0); 740} 741 742void 743zap_cursor_fini(zap_cursor_t *zc) 744{ 745 if (zc->zc_zap) { 746 rw_enter(&zc->zc_zap->zap_rwlock, RW_READER); 747 zap_unlockdir(zc->zc_zap); 748 zc->zc_zap = NULL; 749 } 750 if (zc->zc_leaf) { 751 rw_enter(&zc->zc_leaf->l_rwlock, RW_READER); 752 zap_put_leaf(zc->zc_leaf); 753 zc->zc_leaf = NULL; 754 } 755 zc->zc_objset = NULL; 756} 757 758uint64_t 759zap_cursor_serialize(zap_cursor_t *zc) 760{ 761 if (zc->zc_hash == -1ULL) 762 return (-1ULL); 763 ASSERT((zc->zc_hash & (ZAP_MAXCD-1)) == 0); 764 ASSERT(zc->zc_cd < ZAP_MAXCD); 765 return ((zc->zc_hash >> (64-ZAP_HASHBITS)) | 766 ((uint64_t)zc->zc_cd << ZAP_HASHBITS)); 767} 768 769int 770zap_cursor_retrieve(zap_cursor_t *zc, zap_attribute_t *za) 771{ 772 int err; 773 avl_index_t idx; 774 mzap_ent_t mze_tofind; 775 mzap_ent_t *mze; 776 777 if (zc->zc_hash == -1ULL) 778 return (ENOENT); 779 780 if (zc->zc_zap == NULL) { 781 err = zap_lockdir(zc->zc_objset, zc->zc_zapobj, NULL, 782 RW_READER, TRUE, &zc->zc_zap); 783 if (err) 784 return (err); 785 } else { 786 rw_enter(&zc->zc_zap->zap_rwlock, RW_READER); 787 } 788 if (!zc->zc_zap->zap_ismicro) { 789 err = fzap_cursor_retrieve(zc->zc_zap, zc, za); 790 } else { 791 err = ENOENT; 792 793 mze_tofind.mze_hash = zc->zc_hash; 794 mze_tofind.mze_phys.mze_cd = zc->zc_cd; 795 796 mze = avl_find(&zc->zc_zap->zap_m.zap_avl, &mze_tofind, &idx); 797 ASSERT(mze == NULL || 0 == bcmp(&mze->mze_phys, 798 &zc->zc_zap->zap_m.zap_phys->mz_chunk[mze->mze_chunkid], 799 sizeof (mze->mze_phys))); 800 if (mze == NULL) { 801 mze = avl_nearest(&zc->zc_zap->zap_m.zap_avl, 802 idx, AVL_AFTER); 803 } 804 if (mze) { 805 za->za_integer_length = 8; 806 za->za_num_integers = 1; 807 za->za_first_integer = mze->mze_phys.mze_value; 808 (void) strcpy(za->za_name, mze->mze_phys.mze_name); 809 zc->zc_hash = mze->mze_hash; 810 zc->zc_cd = mze->mze_phys.mze_cd; 811 err = 0; 812 } else { 813 zc->zc_hash = -1ULL; 814 } 815 } 816 rw_exit(&zc->zc_zap->zap_rwlock); 817 return (err); 818} 819 820void 821zap_cursor_advance(zap_cursor_t *zc) 822{ 823 if (zc->zc_hash == -1ULL) 824 return; 825 zc->zc_cd++; 826 if (zc->zc_cd >= ZAP_MAXCD) { 827 zc->zc_cd = 0; 828 zc->zc_hash += 1ULL<<(64-ZAP_HASHBITS); 829 if (zc->zc_hash == 0) /* EOF */ 830 zc->zc_hash = -1ULL; 831 } 832} 833 834int 835zap_get_stats(objset_t *os, uint64_t zapobj, zap_stats_t *zs) 836{ 837 int err; 838 zap_t *zap; 839 840 err = zap_lockdir(os, zapobj, NULL, RW_READER, TRUE, &zap); 841 if (err) 842 return (err); 843 844 bzero(zs, sizeof (zap_stats_t)); 845 846 if (zap->zap_ismicro) { 847 zs->zs_blocksize = zap->zap_dbuf->db_size; 848 zs->zs_num_entries = zap->zap_m.zap_num_entries; 849 zs->zs_num_blocks = 1; 850 } else { 851 fzap_get_stats(zap, zs); 852 } 853 zap_unlockdir(zap); 854 return (0); 855} 856