sa.c revision 1.3
1/* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22/* 23 * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved. 24 * Portions Copyright 2011 iXsystems, Inc 25 * Copyright (c) 2013, 2016 by Delphix. All rights reserved. 26 * Copyright (c) 2014 Spectra Logic Corporation, All rights reserved. 27 * Copyright (c) 2014 Integros [integros.com] 28 */ 29 30#include <sys/zfs_context.h> 31#include <sys/types.h> 32#include <sys/param.h> 33#include <sys/systm.h> 34#include <sys/sysmacros.h> 35#include <sys/dmu.h> 36#include <sys/dmu_impl.h> 37#include <sys/dmu_objset.h> 38#include <sys/dbuf.h> 39#include <sys/dnode.h> 40#include <sys/zap.h> 41#include <sys/sa.h> 42#include <sys/sunddi.h> 43#include <sys/sa_impl.h> 44#include <sys/dnode.h> 45#include <sys/errno.h> 46#include <sys/zfs_context.h> 47 48/* 49 * ZFS System attributes: 50 * 51 * A generic mechanism to allow for arbitrary attributes 52 * to be stored in a dnode. The data will be stored in the bonus buffer of 53 * the dnode and if necessary a special "spill" block will be used to handle 54 * overflow situations. The spill block will be sized to fit the data 55 * from 512 - 128K. When a spill block is used the BP (blkptr_t) for the 56 * spill block is stored at the end of the current bonus buffer. Any 57 * attributes that would be in the way of the blkptr_t will be relocated 58 * into the spill block. 59 * 60 * Attribute registration: 61 * 62 * Stored persistently on a per dataset basis 63 * a mapping between attribute "string" names and their actual attribute 64 * numeric values, length, and byteswap function. The names are only used 65 * during registration. All attributes are known by their unique attribute 66 * id value. If an attribute can have a variable size then the value 67 * 0 will be used to indicate this. 68 * 69 * Attribute Layout: 70 * 71 * Attribute layouts are a way to compactly store multiple attributes, but 72 * without taking the overhead associated with managing each attribute 73 * individually. Since you will typically have the same set of attributes 74 * stored in the same order a single table will be used to represent that 75 * layout. The ZPL for example will usually have only about 10 different 76 * layouts (regular files, device files, symlinks, 77 * regular files + scanstamp, files/dir with extended attributes, and then 78 * you have the possibility of all of those minus ACL, because it would 79 * be kicked out into the spill block) 80 * 81 * Layouts are simply an array of the attributes and their 82 * ordering i.e. [0, 1, 4, 5, 2] 83 * 84 * Each distinct layout is given a unique layout number and that is whats 85 * stored in the header at the beginning of the SA data buffer. 86 * 87 * A layout only covers a single dbuf (bonus or spill). If a set of 88 * attributes is split up between the bonus buffer and a spill buffer then 89 * two different layouts will be used. This allows us to byteswap the 90 * spill without looking at the bonus buffer and keeps the on disk format of 91 * the bonus and spill buffer the same. 92 * 93 * Adding a single attribute will cause the entire set of attributes to 94 * be rewritten and could result in a new layout number being constructed 95 * as part of the rewrite if no such layout exists for the new set of 96 * attribues. The new attribute will be appended to the end of the already 97 * existing attributes. 98 * 99 * Both the attribute registration and attribute layout information are 100 * stored in normal ZAP attributes. Their should be a small number of 101 * known layouts and the set of attributes is assumed to typically be quite 102 * small. 103 * 104 * The registered attributes and layout "table" information is maintained 105 * in core and a special "sa_os_t" is attached to the objset_t. 106 * 107 * A special interface is provided to allow for quickly applying 108 * a large set of attributes at once. sa_replace_all_by_template() is 109 * used to set an array of attributes. This is used by the ZPL when 110 * creating a brand new file. The template that is passed into the function 111 * specifies the attribute, size for variable length attributes, location of 112 * data and special "data locator" function if the data isn't in a contiguous 113 * location. 114 * 115 * Byteswap implications: 116 * 117 * Since the SA attributes are not entirely self describing we can't do 118 * the normal byteswap processing. The special ZAP layout attribute and 119 * attribute registration attributes define the byteswap function and the 120 * size of the attributes, unless it is variable sized. 121 * The normal ZFS byteswapping infrastructure assumes you don't need 122 * to read any objects in order to do the necessary byteswapping. Whereas 123 * SA attributes can only be properly byteswapped if the dataset is opened 124 * and the layout/attribute ZAP attributes are available. Because of this 125 * the SA attributes will be byteswapped when they are first accessed by 126 * the SA code that will read the SA data. 127 */ 128 129typedef void (sa_iterfunc_t)(void *hdr, void *addr, sa_attr_type_t, 130 uint16_t length, int length_idx, boolean_t, void *userp); 131 132static int sa_build_index(sa_handle_t *hdl, sa_buf_type_t buftype); 133static void sa_idx_tab_hold(objset_t *os, sa_idx_tab_t *idx_tab); 134static void *sa_find_idx_tab(objset_t *os, dmu_object_type_t bonustype, 135 void *data); 136static void sa_idx_tab_rele(objset_t *os, void *arg); 137static void sa_copy_data(sa_data_locator_t *func, void *start, void *target, 138 int buflen); 139static int sa_modify_attrs(sa_handle_t *hdl, sa_attr_type_t newattr, 140 sa_data_op_t action, sa_data_locator_t *locator, void *datastart, 141 uint16_t buflen, dmu_tx_t *tx); 142 143arc_byteswap_func_t *sa_bswap_table[] = { 144 byteswap_uint64_array, 145 byteswap_uint32_array, 146 byteswap_uint16_array, 147 byteswap_uint8_array, 148 zfs_acl_byteswap, 149}; 150 151#define SA_COPY_DATA(f, s, t, l) \ 152 { \ 153 if (f == NULL) { \ 154 if (l == 8) { \ 155 *(uint64_t *)t = *(uint64_t *)s; \ 156 } else if (l == 16) { \ 157 *(uint64_t *)t = *(uint64_t *)s; \ 158 *(uint64_t *)((uintptr_t)t + 8) = \ 159 *(uint64_t *)((uintptr_t)s + 8); \ 160 } else { \ 161 bcopy(s, t, l); \ 162 } \ 163 } else \ 164 sa_copy_data(f, s, t, l); \ 165 } 166 167/* 168 * This table is fixed and cannot be changed. Its purpose is to 169 * allow the SA code to work with both old/new ZPL file systems. 170 * It contains the list of legacy attributes. These attributes aren't 171 * stored in the "attribute" registry zap objects, since older ZPL file systems 172 * won't have the registry. Only objsets of type ZFS_TYPE_FILESYSTEM will 173 * use this static table. 174 */ 175sa_attr_reg_t sa_legacy_attrs[] = { 176 {"ZPL_ATIME", sizeof (uint64_t) * 2, SA_UINT64_ARRAY, 0}, 177 {"ZPL_MTIME", sizeof (uint64_t) * 2, SA_UINT64_ARRAY, 1}, 178 {"ZPL_CTIME", sizeof (uint64_t) * 2, SA_UINT64_ARRAY, 2}, 179 {"ZPL_CRTIME", sizeof (uint64_t) * 2, SA_UINT64_ARRAY, 3}, 180 {"ZPL_GEN", sizeof (uint64_t), SA_UINT64_ARRAY, 4}, 181 {"ZPL_MODE", sizeof (uint64_t), SA_UINT64_ARRAY, 5}, 182 {"ZPL_SIZE", sizeof (uint64_t), SA_UINT64_ARRAY, 6}, 183 {"ZPL_PARENT", sizeof (uint64_t), SA_UINT64_ARRAY, 7}, 184 {"ZPL_LINKS", sizeof (uint64_t), SA_UINT64_ARRAY, 8}, 185 {"ZPL_XATTR", sizeof (uint64_t), SA_UINT64_ARRAY, 9}, 186 {"ZPL_RDEV", sizeof (uint64_t), SA_UINT64_ARRAY, 10}, 187 {"ZPL_FLAGS", sizeof (uint64_t), SA_UINT64_ARRAY, 11}, 188 {"ZPL_UID", sizeof (uint64_t), SA_UINT64_ARRAY, 12}, 189 {"ZPL_GID", sizeof (uint64_t), SA_UINT64_ARRAY, 13}, 190 {"ZPL_PAD", sizeof (uint64_t) * 4, SA_UINT64_ARRAY, 14}, 191 {"ZPL_ZNODE_ACL", 88, SA_UINT8_ARRAY, 15}, 192}; 193 194/* 195 * This is only used for objects of type DMU_OT_ZNODE 196 */ 197sa_attr_type_t sa_legacy_zpl_layout[] = { 198 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 199}; 200 201/* 202 * Special dummy layout used for buffers with no attributes. 203 */ 204sa_attr_type_t sa_dummy_zpl_layout[] = { 0 }; 205 206static int sa_legacy_attr_count = 16; 207static kmem_cache_t *sa_cache = NULL; 208 209/*ARGSUSED*/ 210static int 211sa_cache_constructor(void *buf, void *unused, int kmflag) 212{ 213 sa_handle_t *hdl = buf; 214 215 mutex_init(&hdl->sa_lock, NULL, MUTEX_DEFAULT, NULL); 216 return (0); 217} 218 219/*ARGSUSED*/ 220static void 221sa_cache_destructor(void *buf, void *unused) 222{ 223 sa_handle_t *hdl = buf; 224 225#ifdef __NetBSD__ 226 hdl = unused; 227#endif 228 mutex_destroy(&hdl->sa_lock); 229} 230 231void 232sa_cache_init(void) 233{ 234 sa_cache = kmem_cache_create("sa_cache", 235 sizeof (sa_handle_t), 0, sa_cache_constructor, 236 sa_cache_destructor, NULL, NULL, NULL, 0); 237} 238 239void 240sa_cache_fini(void) 241{ 242 if (sa_cache) 243 kmem_cache_destroy(sa_cache); 244} 245 246static int 247layout_num_compare(const void *arg1, const void *arg2) 248{ 249 const sa_lot_t *node1 = arg1; 250 const sa_lot_t *node2 = arg2; 251 252 if (node1->lot_num > node2->lot_num) 253 return (1); 254 else if (node1->lot_num < node2->lot_num) 255 return (-1); 256 return (0); 257} 258 259static int 260layout_hash_compare(const void *arg1, const void *arg2) 261{ 262 const sa_lot_t *node1 = arg1; 263 const sa_lot_t *node2 = arg2; 264 265 if (node1->lot_hash > node2->lot_hash) 266 return (1); 267 if (node1->lot_hash < node2->lot_hash) 268 return (-1); 269 if (node1->lot_instance > node2->lot_instance) 270 return (1); 271 if (node1->lot_instance < node2->lot_instance) 272 return (-1); 273 return (0); 274} 275 276boolean_t 277sa_layout_equal(sa_lot_t *tbf, sa_attr_type_t *attrs, int count) 278{ 279 int i; 280 281 if (count != tbf->lot_attr_count) 282 return (1); 283 284 for (i = 0; i != count; i++) { 285 if (attrs[i] != tbf->lot_attrs[i]) 286 return (1); 287 } 288 return (0); 289} 290 291#define SA_ATTR_HASH(attr) (zfs_crc64_table[(-1ULL ^ attr) & 0xFF]) 292 293static uint64_t 294sa_layout_info_hash(sa_attr_type_t *attrs, int attr_count) 295{ 296 int i; 297 uint64_t crc = -1ULL; 298 299 for (i = 0; i != attr_count; i++) 300 crc ^= SA_ATTR_HASH(attrs[i]); 301 302 return (crc); 303} 304 305static int 306sa_get_spill(sa_handle_t *hdl) 307{ 308 int rc; 309 if (hdl->sa_spill == NULL) { 310 if ((rc = dmu_spill_hold_existing(hdl->sa_bonus, NULL, 311 &hdl->sa_spill)) == 0) 312 VERIFY(0 == sa_build_index(hdl, SA_SPILL)); 313 } else { 314 rc = 0; 315 } 316 317 return (rc); 318} 319 320/* 321 * Main attribute lookup/update function 322 * returns 0 for success or non zero for failures 323 * 324 * Operates on bulk array, first failure will abort further processing 325 */ 326int 327sa_attr_op(sa_handle_t *hdl, sa_bulk_attr_t *bulk, int count, 328 sa_data_op_t data_op, dmu_tx_t *tx) 329{ 330 sa_os_t *sa = hdl->sa_os->os_sa; 331 int i; 332 int error = 0; 333 sa_buf_type_t buftypes; 334 335 buftypes = 0; 336 337 ASSERT(count > 0); 338 for (i = 0; i != count; i++) { 339 ASSERT(bulk[i].sa_attr <= hdl->sa_os->os_sa->sa_num_attrs); 340 341 bulk[i].sa_addr = NULL; 342 /* First check the bonus buffer */ 343 344 if (hdl->sa_bonus_tab && TOC_ATTR_PRESENT( 345 hdl->sa_bonus_tab->sa_idx_tab[bulk[i].sa_attr])) { 346 SA_ATTR_INFO(sa, hdl->sa_bonus_tab, 347 SA_GET_HDR(hdl, SA_BONUS), 348 bulk[i].sa_attr, bulk[i], SA_BONUS, hdl); 349 if (tx && !(buftypes & SA_BONUS)) { 350 dmu_buf_will_dirty(hdl->sa_bonus, tx); 351 buftypes |= SA_BONUS; 352 } 353 } 354 if (bulk[i].sa_addr == NULL && 355 ((error = sa_get_spill(hdl)) == 0)) { 356 if (TOC_ATTR_PRESENT( 357 hdl->sa_spill_tab->sa_idx_tab[bulk[i].sa_attr])) { 358 SA_ATTR_INFO(sa, hdl->sa_spill_tab, 359 SA_GET_HDR(hdl, SA_SPILL), 360 bulk[i].sa_attr, bulk[i], SA_SPILL, hdl); 361 if (tx && !(buftypes & SA_SPILL) && 362 bulk[i].sa_size == bulk[i].sa_length) { 363 dmu_buf_will_dirty(hdl->sa_spill, tx); 364 buftypes |= SA_SPILL; 365 } 366 } 367 } 368 if (error && error != ENOENT) { 369 return ((error == ECKSUM) ? EIO : error); 370 } 371 372 switch (data_op) { 373 case SA_LOOKUP: 374 if (bulk[i].sa_addr == NULL) 375 return (SET_ERROR(ENOENT)); 376 if (bulk[i].sa_data) { 377 SA_COPY_DATA(bulk[i].sa_data_func, 378 bulk[i].sa_addr, bulk[i].sa_data, 379 bulk[i].sa_size); 380 } 381 continue; 382 383 case SA_UPDATE: 384 /* existing rewrite of attr */ 385 if (bulk[i].sa_addr && 386 bulk[i].sa_size == bulk[i].sa_length) { 387 SA_COPY_DATA(bulk[i].sa_data_func, 388 bulk[i].sa_data, bulk[i].sa_addr, 389 bulk[i].sa_length); 390 continue; 391 } else if (bulk[i].sa_addr) { /* attr size change */ 392 error = sa_modify_attrs(hdl, bulk[i].sa_attr, 393 SA_REPLACE, bulk[i].sa_data_func, 394 bulk[i].sa_data, bulk[i].sa_length, tx); 395 } else { /* adding new attribute */ 396 error = sa_modify_attrs(hdl, bulk[i].sa_attr, 397 SA_ADD, bulk[i].sa_data_func, 398 bulk[i].sa_data, bulk[i].sa_length, tx); 399 } 400 if (error) 401 return (error); 402 break; 403 } 404 } 405 return (error); 406} 407 408static sa_lot_t * 409sa_add_layout_entry(objset_t *os, sa_attr_type_t *attrs, int attr_count, 410 uint64_t lot_num, uint64_t hash, boolean_t zapadd, dmu_tx_t *tx) 411{ 412 sa_os_t *sa = os->os_sa; 413 sa_lot_t *tb, *findtb; 414 int i; 415 avl_index_t loc; 416 417 ASSERT(MUTEX_HELD(&sa->sa_lock)); 418 tb = kmem_zalloc(sizeof (sa_lot_t), KM_SLEEP); 419 tb->lot_attr_count = attr_count; 420#ifdef __NetBSD__ 421 if (attr_count != 0) 422#endif 423 tb->lot_attrs = kmem_alloc(sizeof (sa_attr_type_t) * attr_count, 424 KM_SLEEP); 425 bcopy(attrs, tb->lot_attrs, sizeof (sa_attr_type_t) * attr_count); 426 tb->lot_num = lot_num; 427 tb->lot_hash = hash; 428 tb->lot_instance = 0; 429 430 if (zapadd) { 431 char attr_name[8]; 432 433 if (sa->sa_layout_attr_obj == 0) { 434 sa->sa_layout_attr_obj = zap_create_link(os, 435 DMU_OT_SA_ATTR_LAYOUTS, 436 sa->sa_master_obj, SA_LAYOUTS, tx); 437 } 438 439 (void) snprintf(attr_name, sizeof (attr_name), 440 "%d", (int)lot_num); 441 VERIFY(0 == zap_update(os, os->os_sa->sa_layout_attr_obj, 442 attr_name, 2, attr_count, attrs, tx)); 443 } 444 445 list_create(&tb->lot_idx_tab, sizeof (sa_idx_tab_t), 446 offsetof(sa_idx_tab_t, sa_next)); 447 448 for (i = 0; i != attr_count; i++) { 449 if (sa->sa_attr_table[tb->lot_attrs[i]].sa_length == 0) 450 tb->lot_var_sizes++; 451 } 452 453 avl_add(&sa->sa_layout_num_tree, tb); 454 455 /* verify we don't have a hash collision */ 456 if ((findtb = avl_find(&sa->sa_layout_hash_tree, tb, &loc)) != NULL) { 457 for (; findtb && findtb->lot_hash == hash; 458 findtb = AVL_NEXT(&sa->sa_layout_hash_tree, findtb)) { 459 if (findtb->lot_instance != tb->lot_instance) 460 break; 461 tb->lot_instance++; 462 } 463 } 464 avl_add(&sa->sa_layout_hash_tree, tb); 465 return (tb); 466} 467 468static void 469sa_find_layout(objset_t *os, uint64_t hash, sa_attr_type_t *attrs, 470 int count, dmu_tx_t *tx, sa_lot_t **lot) 471{ 472 sa_lot_t *tb, tbsearch; 473 avl_index_t loc; 474 sa_os_t *sa = os->os_sa; 475 boolean_t found = B_FALSE; 476 477 mutex_enter(&sa->sa_lock); 478 tbsearch.lot_hash = hash; 479 tbsearch.lot_instance = 0; 480 tb = avl_find(&sa->sa_layout_hash_tree, &tbsearch, &loc); 481 if (tb) { 482 for (; tb && tb->lot_hash == hash; 483 tb = AVL_NEXT(&sa->sa_layout_hash_tree, tb)) { 484 if (sa_layout_equal(tb, attrs, count) == 0) { 485 found = B_TRUE; 486 break; 487 } 488 } 489 } 490 if (!found) { 491 tb = sa_add_layout_entry(os, attrs, count, 492 avl_numnodes(&sa->sa_layout_num_tree), hash, B_TRUE, tx); 493 } 494 mutex_exit(&sa->sa_lock); 495 *lot = tb; 496} 497 498static int 499sa_resize_spill(sa_handle_t *hdl, uint32_t size, dmu_tx_t *tx) 500{ 501 int error; 502 uint32_t blocksize; 503 504 if (size == 0) { 505 blocksize = SPA_MINBLOCKSIZE; 506 } else if (size > SPA_OLD_MAXBLOCKSIZE) { 507 ASSERT(0); 508 return (SET_ERROR(EFBIG)); 509 } else { 510 blocksize = P2ROUNDUP_TYPED(size, SPA_MINBLOCKSIZE, uint32_t); 511 } 512 513 error = dbuf_spill_set_blksz(hdl->sa_spill, blocksize, tx); 514 ASSERT(error == 0); 515 return (error); 516} 517 518static void 519sa_copy_data(sa_data_locator_t *func, void *datastart, void *target, int buflen) 520{ 521 if (func == NULL) { 522 bcopy(datastart, target, buflen); 523 } else { 524 boolean_t start; 525 int bytes; 526 void *dataptr; 527 void *saptr = target; 528 uint32_t length; 529 530 start = B_TRUE; 531 bytes = 0; 532 while (bytes < buflen) { 533 func(&dataptr, &length, buflen, start, datastart); 534 bcopy(dataptr, saptr, length); 535 saptr = (void *)((caddr_t)saptr + length); 536 bytes += length; 537 start = B_FALSE; 538 } 539 } 540} 541 542/* 543 * Determine several different sizes 544 * first the sa header size 545 * the number of bytes to be stored 546 * if spill would occur the index in the attribute array is returned 547 * 548 * the boolean will_spill will be set when spilling is necessary. It 549 * is only set when the buftype is SA_BONUS 550 */ 551static int 552sa_find_sizes(sa_os_t *sa, sa_bulk_attr_t *attr_desc, int attr_count, 553 dmu_buf_t *db, sa_buf_type_t buftype, int *index, int *total, 554 boolean_t *will_spill) 555{ 556 int var_size = 0; 557 int i; 558 int full_space; 559 int hdrsize; 560 int extra_hdrsize; 561 562 if (buftype == SA_BONUS && sa->sa_force_spill) { 563 *total = 0; 564 *index = 0; 565 *will_spill = B_TRUE; 566 return (0); 567 } 568 569 *index = -1; 570 *total = 0; 571 *will_spill = B_FALSE; 572 573 extra_hdrsize = 0; 574 hdrsize = (SA_BONUSTYPE_FROM_DB(db) == DMU_OT_ZNODE) ? 0 : 575 sizeof (sa_hdr_phys_t); 576 577 full_space = (buftype == SA_BONUS) ? DN_MAX_BONUSLEN : db->db_size; 578 ASSERT(IS_P2ALIGNED(full_space, 8)); 579 580 for (i = 0; i != attr_count; i++) { 581 boolean_t is_var_sz; 582 583 *total = P2ROUNDUP(*total, 8); 584 *total += attr_desc[i].sa_length; 585 if (*will_spill) 586 continue; 587 588 is_var_sz = (SA_REGISTERED_LEN(sa, attr_desc[i].sa_attr) == 0); 589 if (is_var_sz) { 590 var_size++; 591 } 592 593 if (is_var_sz && var_size > 1) { 594 /* 595 * Don't worry that the spill block might overflow. 596 * It will be resized if needed in sa_build_layouts(). 597 */ 598 if (buftype == SA_SPILL || 599 P2ROUNDUP(hdrsize + sizeof (uint16_t), 8) + 600 *total < full_space) { 601 /* 602 * Account for header space used by array of 603 * optional sizes of variable-length attributes. 604 * Record the extra header size in case this 605 * increase needs to be reversed due to 606 * spill-over. 607 */ 608 hdrsize += sizeof (uint16_t); 609 if (*index != -1) 610 extra_hdrsize += sizeof (uint16_t); 611 } else { 612 ASSERT(buftype == SA_BONUS); 613 if (*index == -1) 614 *index = i; 615 *will_spill = B_TRUE; 616 continue; 617 } 618 } 619 620 /* 621 * find index of where spill *could* occur. 622 * Then continue to count of remainder attribute 623 * space. The sum is used later for sizing bonus 624 * and spill buffer. 625 */ 626 if (buftype == SA_BONUS && *index == -1 && 627 (*total + P2ROUNDUP(hdrsize, 8)) > 628 (full_space - sizeof (blkptr_t))) { 629 *index = i; 630 } 631 632 if ((*total + P2ROUNDUP(hdrsize, 8)) > full_space && 633 buftype == SA_BONUS) 634 *will_spill = B_TRUE; 635 } 636 637 if (*will_spill) 638 hdrsize -= extra_hdrsize; 639 640 hdrsize = P2ROUNDUP(hdrsize, 8); 641 return (hdrsize); 642} 643 644#define BUF_SPACE_NEEDED(total, header) (total + header) 645 646/* 647 * Find layout that corresponds to ordering of attributes 648 * If not found a new layout number is created and added to 649 * persistent layout tables. 650 */ 651static int 652sa_build_layouts(sa_handle_t *hdl, sa_bulk_attr_t *attr_desc, int attr_count, 653 dmu_tx_t *tx) 654{ 655 sa_os_t *sa = hdl->sa_os->os_sa; 656 uint64_t hash; 657 sa_buf_type_t buftype; 658 sa_hdr_phys_t *sahdr; 659 void *data_start; 660 int buf_space; 661 sa_attr_type_t *attrs, *attrs_start; 662 int i, lot_count; 663 int hdrsize; 664 int spillhdrsize = 0; 665 int used; 666 dmu_object_type_t bonustype; 667 sa_lot_t *lot; 668 int len_idx; 669 int spill_used; 670 boolean_t spilling; 671 672 dmu_buf_will_dirty(hdl->sa_bonus, tx); 673 bonustype = SA_BONUSTYPE_FROM_DB(hdl->sa_bonus); 674 675 /* first determine bonus header size and sum of all attributes */ 676 hdrsize = sa_find_sizes(sa, attr_desc, attr_count, hdl->sa_bonus, 677 SA_BONUS, &i, &used, &spilling); 678 679 if (used > SPA_OLD_MAXBLOCKSIZE) 680 return (SET_ERROR(EFBIG)); 681 682 VERIFY(0 == dmu_set_bonus(hdl->sa_bonus, spilling ? 683 MIN(DN_MAX_BONUSLEN - sizeof (blkptr_t), used + hdrsize) : 684 used + hdrsize, tx)); 685 686 ASSERT((bonustype == DMU_OT_ZNODE && spilling == 0) || 687 bonustype == DMU_OT_SA); 688 689 /* setup and size spill buffer when needed */ 690 if (spilling) { 691 boolean_t dummy; 692 693 if (hdl->sa_spill == NULL) { 694 VERIFY(dmu_spill_hold_by_bonus(hdl->sa_bonus, NULL, 695 &hdl->sa_spill) == 0); 696 } 697 dmu_buf_will_dirty(hdl->sa_spill, tx); 698 699 spillhdrsize = sa_find_sizes(sa, &attr_desc[i], 700 attr_count - i, hdl->sa_spill, SA_SPILL, &i, 701 &spill_used, &dummy); 702 703 if (spill_used > SPA_OLD_MAXBLOCKSIZE) 704 return (SET_ERROR(EFBIG)); 705 706 buf_space = hdl->sa_spill->db_size - spillhdrsize; 707 if (BUF_SPACE_NEEDED(spill_used, spillhdrsize) > 708 hdl->sa_spill->db_size) 709 VERIFY(0 == sa_resize_spill(hdl, 710 BUF_SPACE_NEEDED(spill_used, spillhdrsize), tx)); 711 } 712 713 /* setup starting pointers to lay down data */ 714 data_start = (void *)((uintptr_t)hdl->sa_bonus->db_data + hdrsize); 715 sahdr = (sa_hdr_phys_t *)hdl->sa_bonus->db_data; 716 buftype = SA_BONUS; 717 718 if (spilling) 719 buf_space = (sa->sa_force_spill) ? 720 0 : SA_BLKPTR_SPACE - hdrsize; 721 else 722 buf_space = hdl->sa_bonus->db_size - hdrsize; 723 724 attrs_start = attrs = kmem_alloc(sizeof (sa_attr_type_t) * attr_count, 725 KM_SLEEP); 726 lot_count = 0; 727 728 for (i = 0, len_idx = 0, hash = -1ULL; i != attr_count; i++) { 729 uint16_t length; 730 731 ASSERT(IS_P2ALIGNED(data_start, 8)); 732 ASSERT(IS_P2ALIGNED(buf_space, 8)); 733 attrs[i] = attr_desc[i].sa_attr; 734 length = SA_REGISTERED_LEN(sa, attrs[i]); 735 if (length == 0) 736 length = attr_desc[i].sa_length; 737 else 738 VERIFY(length == attr_desc[i].sa_length); 739 740 if (buf_space < length) { /* switch to spill buffer */ 741 VERIFY(spilling); 742 VERIFY(bonustype == DMU_OT_SA); 743 if (buftype == SA_BONUS && !sa->sa_force_spill) { 744 sa_find_layout(hdl->sa_os, hash, attrs_start, 745 lot_count, tx, &lot); 746 SA_SET_HDR(sahdr, lot->lot_num, hdrsize); 747 } 748 749 buftype = SA_SPILL; 750 hash = -1ULL; 751 len_idx = 0; 752 753 sahdr = (sa_hdr_phys_t *)hdl->sa_spill->db_data; 754 sahdr->sa_magic = SA_MAGIC; 755 data_start = (void *)((uintptr_t)sahdr + 756 spillhdrsize); 757 attrs_start = &attrs[i]; 758 buf_space = hdl->sa_spill->db_size - spillhdrsize; 759 lot_count = 0; 760 } 761 hash ^= SA_ATTR_HASH(attrs[i]); 762 attr_desc[i].sa_addr = data_start; 763 attr_desc[i].sa_size = length; 764 SA_COPY_DATA(attr_desc[i].sa_data_func, attr_desc[i].sa_data, 765 data_start, length); 766 if (sa->sa_attr_table[attrs[i]].sa_length == 0) { 767 sahdr->sa_lengths[len_idx++] = length; 768 } 769 VERIFY((uintptr_t)data_start % 8 == 0); 770 data_start = (void *)P2ROUNDUP(((uintptr_t)data_start + 771 length), 8); 772 buf_space -= P2ROUNDUP(length, 8); 773 lot_count++; 774 } 775 776 sa_find_layout(hdl->sa_os, hash, attrs_start, lot_count, tx, &lot); 777 778 /* 779 * Verify that old znodes always have layout number 0. 780 * Must be DMU_OT_SA for arbitrary layouts 781 */ 782 VERIFY((bonustype == DMU_OT_ZNODE && lot->lot_num == 0) || 783 (bonustype == DMU_OT_SA && lot->lot_num > 1)); 784 785 if (bonustype == DMU_OT_SA) { 786 SA_SET_HDR(sahdr, lot->lot_num, 787 buftype == SA_BONUS ? hdrsize : spillhdrsize); 788 } 789 790 kmem_free(attrs, sizeof (sa_attr_type_t) * attr_count); 791 if (hdl->sa_bonus_tab) { 792 sa_idx_tab_rele(hdl->sa_os, hdl->sa_bonus_tab); 793 hdl->sa_bonus_tab = NULL; 794 } 795 if (!sa->sa_force_spill) 796 VERIFY(0 == sa_build_index(hdl, SA_BONUS)); 797 if (hdl->sa_spill) { 798 sa_idx_tab_rele(hdl->sa_os, hdl->sa_spill_tab); 799 if (!spilling) { 800 /* 801 * remove spill block that is no longer needed. 802 */ 803 dmu_buf_rele(hdl->sa_spill, NULL); 804 hdl->sa_spill = NULL; 805 hdl->sa_spill_tab = NULL; 806 VERIFY(0 == dmu_rm_spill(hdl->sa_os, 807 sa_handle_object(hdl), tx)); 808 } else { 809 VERIFY(0 == sa_build_index(hdl, SA_SPILL)); 810 } 811 } 812 813 return (0); 814} 815 816static void 817sa_free_attr_table(sa_os_t *sa) 818{ 819 int i; 820 821 if (sa->sa_attr_table == NULL) 822 return; 823 824 for (i = 0; i != sa->sa_num_attrs; i++) { 825 if (sa->sa_attr_table[i].sa_name) 826 kmem_free(sa->sa_attr_table[i].sa_name, 827 strlen(sa->sa_attr_table[i].sa_name) + 1); 828 } 829 830 kmem_free(sa->sa_attr_table, 831 sizeof (sa_attr_table_t) * sa->sa_num_attrs); 832 833 sa->sa_attr_table = NULL; 834} 835 836static int 837sa_attr_table_setup(objset_t *os, sa_attr_reg_t *reg_attrs, int count) 838{ 839 sa_os_t *sa = os->os_sa; 840 uint64_t sa_attr_count = 0; 841 uint64_t sa_reg_count = 0; 842 int error = 0; 843 uint64_t attr_value; 844 sa_attr_table_t *tb; 845 zap_cursor_t zc; 846 zap_attribute_t za; 847 int registered_count = 0; 848 int i; 849 dmu_objset_type_t ostype = dmu_objset_type(os); 850 851 sa->sa_user_table = 852 kmem_zalloc(count * sizeof (sa_attr_type_t), KM_SLEEP); 853 sa->sa_user_table_sz = count * sizeof (sa_attr_type_t); 854 855 if (sa->sa_reg_attr_obj != 0) { 856 error = zap_count(os, sa->sa_reg_attr_obj, 857 &sa_attr_count); 858 859 /* 860 * Make sure we retrieved a count and that it isn't zero 861 */ 862 if (error || (error == 0 && sa_attr_count == 0)) { 863 if (error == 0) 864 error = SET_ERROR(EINVAL); 865 goto bail; 866 } 867 sa_reg_count = sa_attr_count; 868 } 869 870 if (ostype == DMU_OST_ZFS && sa_attr_count == 0) 871 sa_attr_count += sa_legacy_attr_count; 872 873 /* Allocate attribute numbers for attributes that aren't registered */ 874 for (i = 0; i != count; i++) { 875 boolean_t found = B_FALSE; 876 int j; 877 878 if (ostype == DMU_OST_ZFS) { 879 for (j = 0; j != sa_legacy_attr_count; j++) { 880 if (strcmp(reg_attrs[i].sa_name, 881 sa_legacy_attrs[j].sa_name) == 0) { 882 sa->sa_user_table[i] = 883 sa_legacy_attrs[j].sa_attr; 884 found = B_TRUE; 885 } 886 } 887 } 888 if (found) 889 continue; 890 891 if (sa->sa_reg_attr_obj) 892 error = zap_lookup(os, sa->sa_reg_attr_obj, 893 reg_attrs[i].sa_name, 8, 1, &attr_value); 894 else 895 error = SET_ERROR(ENOENT); 896 switch (error) { 897 case ENOENT: 898 sa->sa_user_table[i] = (sa_attr_type_t)sa_attr_count; 899 sa_attr_count++; 900 break; 901 case 0: 902 sa->sa_user_table[i] = ATTR_NUM(attr_value); 903 break; 904 default: 905 goto bail; 906 } 907 } 908 909 sa->sa_num_attrs = sa_attr_count; 910 tb = sa->sa_attr_table = 911 kmem_zalloc(sizeof (sa_attr_table_t) * sa_attr_count, KM_SLEEP); 912 913 /* 914 * Attribute table is constructed from requested attribute list, 915 * previously foreign registered attributes, and also the legacy 916 * ZPL set of attributes. 917 */ 918 919 if (sa->sa_reg_attr_obj) { 920 for (zap_cursor_init(&zc, os, sa->sa_reg_attr_obj); 921 (error = zap_cursor_retrieve(&zc, &za)) == 0; 922 zap_cursor_advance(&zc)) { 923 uint64_t value; 924 value = za.za_first_integer; 925 926 registered_count++; 927 tb[ATTR_NUM(value)].sa_attr = ATTR_NUM(value); 928 tb[ATTR_NUM(value)].sa_length = ATTR_LENGTH(value); 929 tb[ATTR_NUM(value)].sa_byteswap = ATTR_BSWAP(value); 930 tb[ATTR_NUM(value)].sa_registered = B_TRUE; 931 932 if (tb[ATTR_NUM(value)].sa_name) { 933 continue; 934 } 935 tb[ATTR_NUM(value)].sa_name = 936 kmem_zalloc(strlen(za.za_name) +1, KM_SLEEP); 937 (void) strlcpy(tb[ATTR_NUM(value)].sa_name, za.za_name, 938 strlen(za.za_name) +1); 939 } 940 zap_cursor_fini(&zc); 941 /* 942 * Make sure we processed the correct number of registered 943 * attributes 944 */ 945 if (registered_count != sa_reg_count) { 946 ASSERT(error != 0); 947 goto bail; 948 } 949 950 } 951 952 if (ostype == DMU_OST_ZFS) { 953 for (i = 0; i != sa_legacy_attr_count; i++) { 954 if (tb[i].sa_name) 955 continue; 956 tb[i].sa_attr = sa_legacy_attrs[i].sa_attr; 957 tb[i].sa_length = sa_legacy_attrs[i].sa_length; 958 tb[i].sa_byteswap = sa_legacy_attrs[i].sa_byteswap; 959 tb[i].sa_registered = B_FALSE; 960 tb[i].sa_name = 961 kmem_zalloc(strlen(sa_legacy_attrs[i].sa_name) +1, 962 KM_SLEEP); 963 (void) strlcpy(tb[i].sa_name, 964 sa_legacy_attrs[i].sa_name, 965 strlen(sa_legacy_attrs[i].sa_name) + 1); 966 } 967 } 968 969 for (i = 0; i != count; i++) { 970 sa_attr_type_t attr_id; 971 972 attr_id = sa->sa_user_table[i]; 973 if (tb[attr_id].sa_name) 974 continue; 975 976 tb[attr_id].sa_length = reg_attrs[i].sa_length; 977 tb[attr_id].sa_byteswap = reg_attrs[i].sa_byteswap; 978 tb[attr_id].sa_attr = attr_id; 979 tb[attr_id].sa_name = 980 kmem_zalloc(strlen(reg_attrs[i].sa_name) + 1, KM_SLEEP); 981 (void) strlcpy(tb[attr_id].sa_name, reg_attrs[i].sa_name, 982 strlen(reg_attrs[i].sa_name) + 1); 983 } 984 985 sa->sa_need_attr_registration = 986 (sa_attr_count != registered_count); 987 988 return (0); 989bail: 990 kmem_free(sa->sa_user_table, count * sizeof (sa_attr_type_t)); 991 sa->sa_user_table = NULL; 992 sa_free_attr_table(sa); 993 return ((error != 0) ? error : EINVAL); 994} 995 996int 997sa_setup(objset_t *os, uint64_t sa_obj, sa_attr_reg_t *reg_attrs, int count, 998 sa_attr_type_t **user_table) 999{ 1000 zap_cursor_t zc; 1001 zap_attribute_t za; 1002 sa_os_t *sa; 1003 dmu_objset_type_t ostype = dmu_objset_type(os); 1004 sa_attr_type_t *tb; 1005 int error; 1006 1007 mutex_enter(&os->os_user_ptr_lock); 1008 if (os->os_sa) { 1009 mutex_enter(&os->os_sa->sa_lock); 1010 mutex_exit(&os->os_user_ptr_lock); 1011 tb = os->os_sa->sa_user_table; 1012 mutex_exit(&os->os_sa->sa_lock); 1013 *user_table = tb; 1014 return (0); 1015 } 1016 1017 sa = kmem_zalloc(sizeof (sa_os_t), KM_SLEEP); 1018 mutex_init(&sa->sa_lock, NULL, MUTEX_DEFAULT, NULL); 1019 sa->sa_master_obj = sa_obj; 1020 1021 os->os_sa = sa; 1022 mutex_enter(&sa->sa_lock); 1023 mutex_exit(&os->os_user_ptr_lock); 1024 avl_create(&sa->sa_layout_num_tree, layout_num_compare, 1025 sizeof (sa_lot_t), offsetof(sa_lot_t, lot_num_node)); 1026 avl_create(&sa->sa_layout_hash_tree, layout_hash_compare, 1027 sizeof (sa_lot_t), offsetof(sa_lot_t, lot_hash_node)); 1028 1029 if (sa_obj) { 1030 error = zap_lookup(os, sa_obj, SA_LAYOUTS, 1031 8, 1, &sa->sa_layout_attr_obj); 1032 if (error != 0 && error != ENOENT) 1033 goto fail; 1034 error = zap_lookup(os, sa_obj, SA_REGISTRY, 1035 8, 1, &sa->sa_reg_attr_obj); 1036 if (error != 0 && error != ENOENT) 1037 goto fail; 1038 } 1039 1040 if ((error = sa_attr_table_setup(os, reg_attrs, count)) != 0) 1041 goto fail; 1042 1043 if (sa->sa_layout_attr_obj != 0) { 1044 uint64_t layout_count; 1045 1046 error = zap_count(os, sa->sa_layout_attr_obj, 1047 &layout_count); 1048 1049 /* 1050 * Layout number count should be > 0 1051 */ 1052 if (error || (error == 0 && layout_count == 0)) { 1053 if (error == 0) 1054 error = SET_ERROR(EINVAL); 1055 goto fail; 1056 } 1057 1058 for (zap_cursor_init(&zc, os, sa->sa_layout_attr_obj); 1059 (error = zap_cursor_retrieve(&zc, &za)) == 0; 1060 zap_cursor_advance(&zc)) { 1061 sa_attr_type_t *lot_attrs; 1062 uint64_t lot_num; 1063 1064 lot_attrs = kmem_zalloc(sizeof (sa_attr_type_t) * 1065 za.za_num_integers, KM_SLEEP); 1066 1067 if ((error = (zap_lookup(os, sa->sa_layout_attr_obj, 1068 za.za_name, 2, za.za_num_integers, 1069 lot_attrs))) != 0) { 1070 kmem_free(lot_attrs, sizeof (sa_attr_type_t) * 1071 za.za_num_integers); 1072 break; 1073 } 1074 VERIFY(ddi_strtoull(za.za_name, NULL, 10, 1075 (unsigned long long *)&lot_num) == 0); 1076 1077 (void) sa_add_layout_entry(os, lot_attrs, 1078 za.za_num_integers, lot_num, 1079 sa_layout_info_hash(lot_attrs, 1080 za.za_num_integers), B_FALSE, NULL); 1081 kmem_free(lot_attrs, sizeof (sa_attr_type_t) * 1082 za.za_num_integers); 1083 } 1084 zap_cursor_fini(&zc); 1085 1086 /* 1087 * Make sure layout count matches number of entries added 1088 * to AVL tree 1089 */ 1090 if (avl_numnodes(&sa->sa_layout_num_tree) != layout_count) { 1091 ASSERT(error != 0); 1092 goto fail; 1093 } 1094 } 1095 1096 /* Add special layout number for old ZNODES */ 1097 if (ostype == DMU_OST_ZFS) { 1098 (void) sa_add_layout_entry(os, sa_legacy_zpl_layout, 1099 sa_legacy_attr_count, 0, 1100 sa_layout_info_hash(sa_legacy_zpl_layout, 1101 sa_legacy_attr_count), B_FALSE, NULL); 1102 1103 (void) sa_add_layout_entry(os, sa_dummy_zpl_layout, 0, 1, 1104 0, B_FALSE, NULL); 1105 } 1106 *user_table = os->os_sa->sa_user_table; 1107 mutex_exit(&sa->sa_lock); 1108 return (0); 1109fail: 1110 os->os_sa = NULL; 1111 sa_free_attr_table(sa); 1112 if (sa->sa_user_table) 1113 kmem_free(sa->sa_user_table, sa->sa_user_table_sz); 1114 mutex_exit(&sa->sa_lock); 1115 avl_destroy(&sa->sa_layout_hash_tree); 1116 avl_destroy(&sa->sa_layout_num_tree); 1117 mutex_destroy(&sa->sa_lock); 1118 kmem_free(sa, sizeof (sa_os_t)); 1119 return ((error == ECKSUM) ? EIO : error); 1120} 1121 1122void 1123sa_tear_down(objset_t *os) 1124{ 1125 sa_os_t *sa = os->os_sa; 1126 sa_lot_t *layout; 1127 void *cookie; 1128 1129 kmem_free(sa->sa_user_table, sa->sa_user_table_sz); 1130 1131 /* Free up attr table */ 1132 1133 sa_free_attr_table(sa); 1134 1135 cookie = NULL; 1136 while (layout = avl_destroy_nodes(&sa->sa_layout_hash_tree, &cookie)) { 1137 sa_idx_tab_t *tab; 1138 while (tab = list_head(&layout->lot_idx_tab)) { 1139 ASSERT(refcount_count(&tab->sa_refcount)); 1140 sa_idx_tab_rele(os, tab); 1141 } 1142 } 1143 1144 cookie = NULL; 1145 while (layout = avl_destroy_nodes(&sa->sa_layout_num_tree, &cookie)) { 1146#ifdef __NetBSD__ 1147 if (layout->lot_attr_count != 0) 1148#endif 1149 kmem_free(layout->lot_attrs, 1150 sizeof (sa_attr_type_t) * layout->lot_attr_count); 1151 kmem_free(layout, sizeof (sa_lot_t)); 1152 } 1153 1154 avl_destroy(&sa->sa_layout_hash_tree); 1155 avl_destroy(&sa->sa_layout_num_tree); 1156 mutex_destroy(&sa->sa_lock); 1157 1158 kmem_free(sa, sizeof (sa_os_t)); 1159 os->os_sa = NULL; 1160} 1161 1162void 1163sa_build_idx_tab(void *hdr, void *attr_addr, sa_attr_type_t attr, 1164 uint16_t length, int length_idx, boolean_t var_length, void *userp) 1165{ 1166 sa_idx_tab_t *idx_tab = userp; 1167 1168 if (var_length) { 1169 ASSERT(idx_tab->sa_variable_lengths); 1170 idx_tab->sa_variable_lengths[length_idx] = length; 1171 } 1172 TOC_ATTR_ENCODE(idx_tab->sa_idx_tab[attr], length_idx, 1173 (uint32_t)((uintptr_t)attr_addr - (uintptr_t)hdr)); 1174} 1175 1176static void 1177sa_attr_iter(objset_t *os, sa_hdr_phys_t *hdr, dmu_object_type_t type, 1178 sa_iterfunc_t func, sa_lot_t *tab, void *userp) 1179{ 1180 void *data_start; 1181 sa_lot_t *tb = tab; 1182 sa_lot_t search; 1183 avl_index_t loc; 1184 sa_os_t *sa = os->os_sa; 1185 int i; 1186 uint16_t *length_start = NULL; 1187 uint8_t length_idx = 0; 1188 1189 if (tab == NULL) { 1190 search.lot_num = SA_LAYOUT_NUM(hdr, type); 1191 tb = avl_find(&sa->sa_layout_num_tree, &search, &loc); 1192 ASSERT(tb); 1193 } 1194 1195 if (IS_SA_BONUSTYPE(type)) { 1196 data_start = (void *)P2ROUNDUP(((uintptr_t)hdr + 1197 offsetof(sa_hdr_phys_t, sa_lengths) + 1198 (sizeof (uint16_t) * tb->lot_var_sizes)), 8); 1199 length_start = hdr->sa_lengths; 1200 } else { 1201 data_start = hdr; 1202 } 1203 1204 for (i = 0; i != tb->lot_attr_count; i++) { 1205 int attr_length, reg_length; 1206 uint8_t idx_len; 1207 1208 reg_length = sa->sa_attr_table[tb->lot_attrs[i]].sa_length; 1209 if (reg_length) { 1210 attr_length = reg_length; 1211 idx_len = 0; 1212 } else { 1213 attr_length = length_start[length_idx]; 1214 idx_len = length_idx++; 1215 } 1216 1217 func(hdr, data_start, tb->lot_attrs[i], attr_length, 1218 idx_len, reg_length == 0 ? B_TRUE : B_FALSE, userp); 1219 1220 data_start = (void *)P2ROUNDUP(((uintptr_t)data_start + 1221 attr_length), 8); 1222 } 1223} 1224 1225/*ARGSUSED*/ 1226void 1227sa_byteswap_cb(void *hdr, void *attr_addr, sa_attr_type_t attr, 1228 uint16_t length, int length_idx, boolean_t variable_length, void *userp) 1229{ 1230 sa_handle_t *hdl = userp; 1231 sa_os_t *sa = hdl->sa_os->os_sa; 1232 1233 sa_bswap_table[sa->sa_attr_table[attr].sa_byteswap](attr_addr, length); 1234} 1235 1236void 1237sa_byteswap(sa_handle_t *hdl, sa_buf_type_t buftype) 1238{ 1239 sa_hdr_phys_t *sa_hdr_phys = SA_GET_HDR(hdl, buftype); 1240 dmu_buf_impl_t *db; 1241 sa_os_t *sa = hdl->sa_os->os_sa; 1242 int num_lengths = 1; 1243 int i; 1244 1245 ASSERT(MUTEX_HELD(&sa->sa_lock)); 1246 if (sa_hdr_phys->sa_magic == SA_MAGIC) 1247 return; 1248 1249 db = SA_GET_DB(hdl, buftype); 1250 1251 if (buftype == SA_SPILL) { 1252 arc_release(db->db_buf, NULL); 1253 arc_buf_thaw(db->db_buf); 1254 } 1255 1256 sa_hdr_phys->sa_magic = BSWAP_32(sa_hdr_phys->sa_magic); 1257 sa_hdr_phys->sa_layout_info = BSWAP_16(sa_hdr_phys->sa_layout_info); 1258 1259 /* 1260 * Determine number of variable lenghts in header 1261 * The standard 8 byte header has one for free and a 1262 * 16 byte header would have 4 + 1; 1263 */ 1264 if (SA_HDR_SIZE(sa_hdr_phys) > 8) 1265 num_lengths += (SA_HDR_SIZE(sa_hdr_phys) - 8) >> 1; 1266 for (i = 0; i != num_lengths; i++) 1267 sa_hdr_phys->sa_lengths[i] = 1268 BSWAP_16(sa_hdr_phys->sa_lengths[i]); 1269 1270 sa_attr_iter(hdl->sa_os, sa_hdr_phys, DMU_OT_SA, 1271 sa_byteswap_cb, NULL, hdl); 1272 1273 if (buftype == SA_SPILL) 1274 arc_buf_freeze(((dmu_buf_impl_t *)hdl->sa_spill)->db_buf); 1275} 1276 1277static int 1278sa_build_index(sa_handle_t *hdl, sa_buf_type_t buftype) 1279{ 1280 sa_hdr_phys_t *sa_hdr_phys; 1281 dmu_buf_impl_t *db = SA_GET_DB(hdl, buftype); 1282 dmu_object_type_t bonustype = SA_BONUSTYPE_FROM_DB(db); 1283 sa_os_t *sa = hdl->sa_os->os_sa; 1284 sa_idx_tab_t *idx_tab; 1285 1286 sa_hdr_phys = SA_GET_HDR(hdl, buftype); 1287 1288 mutex_enter(&sa->sa_lock); 1289 1290 /* Do we need to byteswap? */ 1291 1292 /* only check if not old znode */ 1293 if (IS_SA_BONUSTYPE(bonustype) && sa_hdr_phys->sa_magic != SA_MAGIC && 1294 sa_hdr_phys->sa_magic != 0) { 1295 VERIFY(BSWAP_32(sa_hdr_phys->sa_magic) == SA_MAGIC); 1296 sa_byteswap(hdl, buftype); 1297 } 1298 1299 idx_tab = sa_find_idx_tab(hdl->sa_os, bonustype, sa_hdr_phys); 1300 1301 if (buftype == SA_BONUS) 1302 hdl->sa_bonus_tab = idx_tab; 1303 else 1304 hdl->sa_spill_tab = idx_tab; 1305 1306 mutex_exit(&sa->sa_lock); 1307 return (0); 1308} 1309 1310/*ARGSUSED*/ 1311static void 1312sa_evict_sync(void *dbu) 1313{ 1314 panic("evicting sa dbuf\n"); 1315} 1316 1317static void 1318sa_idx_tab_rele(objset_t *os, void *arg) 1319{ 1320 sa_os_t *sa = os->os_sa; 1321 sa_idx_tab_t *idx_tab = arg; 1322 1323 if (idx_tab == NULL) 1324 return; 1325 1326 mutex_enter(&sa->sa_lock); 1327 if (refcount_remove(&idx_tab->sa_refcount, NULL) == 0) { 1328 list_remove(&idx_tab->sa_layout->lot_idx_tab, idx_tab); 1329 if (idx_tab->sa_variable_lengths) 1330 kmem_free(idx_tab->sa_variable_lengths, 1331 sizeof (uint16_t) * 1332 idx_tab->sa_layout->lot_var_sizes); 1333 refcount_destroy(&idx_tab->sa_refcount); 1334 kmem_free(idx_tab->sa_idx_tab, 1335 sizeof (uint32_t) * sa->sa_num_attrs); 1336 kmem_free(idx_tab, sizeof (sa_idx_tab_t)); 1337 } 1338 mutex_exit(&sa->sa_lock); 1339} 1340 1341static void 1342sa_idx_tab_hold(objset_t *os, sa_idx_tab_t *idx_tab) 1343{ 1344 sa_os_t *sa = os->os_sa; 1345 1346 ASSERT(MUTEX_HELD(&sa->sa_lock)); 1347 (void) refcount_add(&idx_tab->sa_refcount, NULL); 1348} 1349 1350void 1351sa_handle_destroy(sa_handle_t *hdl) 1352{ 1353 dmu_buf_t *db = hdl->sa_bonus; 1354 1355 mutex_enter(&hdl->sa_lock); 1356 (void) dmu_buf_remove_user(db, &hdl->sa_dbu); 1357 1358 if (hdl->sa_bonus_tab) 1359 sa_idx_tab_rele(hdl->sa_os, hdl->sa_bonus_tab); 1360 1361 if (hdl->sa_spill_tab) 1362 sa_idx_tab_rele(hdl->sa_os, hdl->sa_spill_tab); 1363 1364 dmu_buf_rele(hdl->sa_bonus, NULL); 1365 1366 if (hdl->sa_spill) 1367 dmu_buf_rele((dmu_buf_t *)hdl->sa_spill, NULL); 1368 mutex_exit(&hdl->sa_lock); 1369 1370 kmem_cache_free(sa_cache, hdl); 1371} 1372 1373int 1374sa_handle_get_from_db(objset_t *os, dmu_buf_t *db, void *userp, 1375 sa_handle_type_t hdl_type, sa_handle_t **handlepp) 1376{ 1377 int error = 0; 1378 dmu_object_info_t doi; 1379 sa_handle_t *handle = NULL; 1380 1381#ifdef ZFS_DEBUG 1382 dmu_object_info_from_db(db, &doi); 1383 ASSERT(doi.doi_bonus_type == DMU_OT_SA || 1384 doi.doi_bonus_type == DMU_OT_ZNODE); 1385#endif 1386 /* find handle, if it exists */ 1387 /* if one doesn't exist then create a new one, and initialize it */ 1388 1389 if (hdl_type == SA_HDL_SHARED) 1390 handle = dmu_buf_get_user(db); 1391 1392 if (handle == NULL) { 1393 sa_handle_t *winner = NULL; 1394 1395 handle = kmem_cache_alloc(sa_cache, KM_SLEEP); 1396 handle->sa_dbu.dbu_evict_func_sync = NULL; 1397 handle->sa_dbu.dbu_evict_func_async = NULL; 1398 handle->sa_userp = userp; 1399 handle->sa_bonus = db; 1400 handle->sa_os = os; 1401 handle->sa_spill = NULL; 1402 handle->sa_bonus_tab = NULL; 1403 handle->sa_spill_tab = NULL; 1404 1405 error = sa_build_index(handle, SA_BONUS); 1406 1407 if (hdl_type == SA_HDL_SHARED) { 1408 dmu_buf_init_user(&handle->sa_dbu, sa_evict_sync, NULL, 1409 NULL); 1410 winner = dmu_buf_set_user_ie(db, &handle->sa_dbu); 1411 } 1412 1413 if (winner != NULL) { 1414 kmem_cache_free(sa_cache, handle); 1415 handle = winner; 1416 } 1417 } 1418 *handlepp = handle; 1419 1420 return (error); 1421} 1422 1423int 1424sa_handle_get(objset_t *objset, uint64_t objid, void *userp, 1425 sa_handle_type_t hdl_type, sa_handle_t **handlepp) 1426{ 1427 dmu_buf_t *db; 1428 int error; 1429 1430 if (error = dmu_bonus_hold(objset, objid, NULL, &db)) 1431 return (error); 1432 1433 return (sa_handle_get_from_db(objset, db, userp, hdl_type, 1434 handlepp)); 1435} 1436 1437int 1438sa_buf_hold(objset_t *objset, uint64_t obj_num, void *tag, dmu_buf_t **db) 1439{ 1440 return (dmu_bonus_hold(objset, obj_num, tag, db)); 1441} 1442 1443void 1444sa_buf_rele(dmu_buf_t *db, void *tag) 1445{ 1446 dmu_buf_rele(db, tag); 1447} 1448 1449int 1450sa_lookup_impl(sa_handle_t *hdl, sa_bulk_attr_t *bulk, int count) 1451{ 1452 ASSERT(hdl); 1453 ASSERT(MUTEX_HELD(&hdl->sa_lock)); 1454 return (sa_attr_op(hdl, bulk, count, SA_LOOKUP, NULL)); 1455} 1456 1457int 1458sa_lookup(sa_handle_t *hdl, sa_attr_type_t attr, void *buf, uint32_t buflen) 1459{ 1460 int error; 1461 sa_bulk_attr_t bulk; 1462 1463 bulk.sa_attr = attr; 1464 bulk.sa_data = buf; 1465 bulk.sa_length = buflen; 1466 bulk.sa_data_func = NULL; 1467 1468 ASSERT(hdl); 1469 mutex_enter(&hdl->sa_lock); 1470 error = sa_lookup_impl(hdl, &bulk, 1); 1471 mutex_exit(&hdl->sa_lock); 1472 return (error); 1473} 1474 1475#ifdef _KERNEL 1476int 1477sa_lookup_uio(sa_handle_t *hdl, sa_attr_type_t attr, uio_t *uio) 1478{ 1479 int error; 1480 sa_bulk_attr_t bulk; 1481 1482 bulk.sa_data = NULL; 1483 bulk.sa_attr = attr; 1484 bulk.sa_data_func = NULL; 1485 1486 ASSERT(hdl); 1487 1488 mutex_enter(&hdl->sa_lock); 1489 if ((error = sa_attr_op(hdl, &bulk, 1, SA_LOOKUP, NULL)) == 0) { 1490 error = uiomove((void *)bulk.sa_addr, MIN(bulk.sa_size, 1491 uio->uio_resid), UIO_READ, uio); 1492 } 1493 mutex_exit(&hdl->sa_lock); 1494 return (error); 1495 1496} 1497#endif 1498 1499void * 1500sa_find_idx_tab(objset_t *os, dmu_object_type_t bonustype, void *data) 1501{ 1502 sa_idx_tab_t *idx_tab; 1503 sa_hdr_phys_t *hdr = (sa_hdr_phys_t *)data; 1504 sa_os_t *sa = os->os_sa; 1505 sa_lot_t *tb, search; 1506 avl_index_t loc; 1507 1508 /* 1509 * Deterimine layout number. If SA node and header == 0 then 1510 * force the index table to the dummy "1" empty layout. 1511 * 1512 * The layout number would only be zero for a newly created file 1513 * that has not added any attributes yet, or with crypto enabled which 1514 * doesn't write any attributes to the bonus buffer. 1515 */ 1516 1517 search.lot_num = SA_LAYOUT_NUM(hdr, bonustype); 1518 1519 tb = avl_find(&sa->sa_layout_num_tree, &search, &loc); 1520 1521 /* Verify header size is consistent with layout information */ 1522 ASSERT(tb); 1523 ASSERT(IS_SA_BONUSTYPE(bonustype) && 1524 SA_HDR_SIZE_MATCH_LAYOUT(hdr, tb) || !IS_SA_BONUSTYPE(bonustype) || 1525 (IS_SA_BONUSTYPE(bonustype) && hdr->sa_layout_info == 0)); 1526 1527 /* 1528 * See if any of the already existing TOC entries can be reused? 1529 */ 1530 1531 for (idx_tab = list_head(&tb->lot_idx_tab); idx_tab; 1532 idx_tab = list_next(&tb->lot_idx_tab, idx_tab)) { 1533 boolean_t valid_idx = B_TRUE; 1534 int i; 1535 1536 if (tb->lot_var_sizes != 0 && 1537 idx_tab->sa_variable_lengths != NULL) { 1538 for (i = 0; i != tb->lot_var_sizes; i++) { 1539 if (hdr->sa_lengths[i] != 1540 idx_tab->sa_variable_lengths[i]) { 1541 valid_idx = B_FALSE; 1542 break; 1543 } 1544 } 1545 } 1546 if (valid_idx) { 1547 sa_idx_tab_hold(os, idx_tab); 1548 return (idx_tab); 1549 } 1550 } 1551 1552 /* No such luck, create a new entry */ 1553 idx_tab = kmem_zalloc(sizeof (sa_idx_tab_t), KM_SLEEP); 1554 idx_tab->sa_idx_tab = 1555 kmem_zalloc(sizeof (uint32_t) * sa->sa_num_attrs, KM_SLEEP); 1556 idx_tab->sa_layout = tb; 1557 refcount_create(&idx_tab->sa_refcount); 1558 if (tb->lot_var_sizes) 1559 idx_tab->sa_variable_lengths = kmem_alloc(sizeof (uint16_t) * 1560 tb->lot_var_sizes, KM_SLEEP); 1561 1562 sa_attr_iter(os, hdr, bonustype, sa_build_idx_tab, 1563 tb, idx_tab); 1564 sa_idx_tab_hold(os, idx_tab); /* one hold for consumer */ 1565 sa_idx_tab_hold(os, idx_tab); /* one for layout */ 1566 list_insert_tail(&tb->lot_idx_tab, idx_tab); 1567 return (idx_tab); 1568} 1569 1570void 1571sa_default_locator(void **dataptr, uint32_t *len, uint32_t total_len, 1572 boolean_t start, void *userdata) 1573{ 1574 ASSERT(start); 1575 1576 *dataptr = userdata; 1577 *len = total_len; 1578} 1579 1580static void 1581sa_attr_register_sync(sa_handle_t *hdl, dmu_tx_t *tx) 1582{ 1583 uint64_t attr_value = 0; 1584 sa_os_t *sa = hdl->sa_os->os_sa; 1585 sa_attr_table_t *tb = sa->sa_attr_table; 1586 int i; 1587 1588 mutex_enter(&sa->sa_lock); 1589 1590 if (!sa->sa_need_attr_registration || sa->sa_master_obj == 0) { 1591 mutex_exit(&sa->sa_lock); 1592 return; 1593 } 1594 1595 if (sa->sa_reg_attr_obj == 0) { 1596 sa->sa_reg_attr_obj = zap_create_link(hdl->sa_os, 1597 DMU_OT_SA_ATTR_REGISTRATION, 1598 sa->sa_master_obj, SA_REGISTRY, tx); 1599 } 1600 for (i = 0; i != sa->sa_num_attrs; i++) { 1601 if (sa->sa_attr_table[i].sa_registered) 1602 continue; 1603 ATTR_ENCODE(attr_value, tb[i].sa_attr, tb[i].sa_length, 1604 tb[i].sa_byteswap); 1605 VERIFY(0 == zap_update(hdl->sa_os, sa->sa_reg_attr_obj, 1606 tb[i].sa_name, 8, 1, &attr_value, tx)); 1607 tb[i].sa_registered = B_TRUE; 1608 } 1609 sa->sa_need_attr_registration = B_FALSE; 1610 mutex_exit(&sa->sa_lock); 1611} 1612 1613/* 1614 * Replace all attributes with attributes specified in template. 1615 * If dnode had a spill buffer then those attributes will be 1616 * also be replaced, possibly with just an empty spill block 1617 * 1618 * This interface is intended to only be used for bulk adding of 1619 * attributes for a new file. It will also be used by the ZPL 1620 * when converting and old formatted znode to native SA support. 1621 */ 1622int 1623sa_replace_all_by_template_locked(sa_handle_t *hdl, sa_bulk_attr_t *attr_desc, 1624 int attr_count, dmu_tx_t *tx) 1625{ 1626 sa_os_t *sa = hdl->sa_os->os_sa; 1627 1628 if (sa->sa_need_attr_registration) 1629 sa_attr_register_sync(hdl, tx); 1630 return (sa_build_layouts(hdl, attr_desc, attr_count, tx)); 1631} 1632 1633int 1634sa_replace_all_by_template(sa_handle_t *hdl, sa_bulk_attr_t *attr_desc, 1635 int attr_count, dmu_tx_t *tx) 1636{ 1637 int error; 1638 1639 mutex_enter(&hdl->sa_lock); 1640 error = sa_replace_all_by_template_locked(hdl, attr_desc, 1641 attr_count, tx); 1642 mutex_exit(&hdl->sa_lock); 1643 return (error); 1644} 1645 1646/* 1647 * Add/remove a single attribute or replace a variable-sized attribute value 1648 * with a value of a different size, and then rewrite the entire set 1649 * of attributes. 1650 * Same-length attribute value replacement (including fixed-length attributes) 1651 * is handled more efficiently by the upper layers. 1652 */ 1653static int 1654sa_modify_attrs(sa_handle_t *hdl, sa_attr_type_t newattr, 1655 sa_data_op_t action, sa_data_locator_t *locator, void *datastart, 1656 uint16_t buflen, dmu_tx_t *tx) 1657{ 1658 sa_os_t *sa = hdl->sa_os->os_sa; 1659 dmu_buf_impl_t *db = (dmu_buf_impl_t *)hdl->sa_bonus; 1660 dnode_t *dn; 1661 sa_bulk_attr_t *attr_desc; 1662 void *old_data[2]; 1663 int bonus_attr_count = 0; 1664 int bonus_data_size = 0; 1665 int spill_data_size = 0; 1666 int spill_attr_count = 0; 1667 int error; 1668 uint16_t length, reg_length; 1669 int i, j, k, length_idx; 1670 sa_hdr_phys_t *hdr; 1671 sa_idx_tab_t *idx_tab; 1672 int attr_count; 1673 int count; 1674 1675 ASSERT(MUTEX_HELD(&hdl->sa_lock)); 1676 1677 /* First make of copy of the old data */ 1678 1679 DB_DNODE_ENTER(db); 1680 dn = DB_DNODE(db); 1681 if (dn->dn_bonuslen != 0) { 1682 bonus_data_size = hdl->sa_bonus->db_size; 1683 old_data[0] = kmem_alloc(bonus_data_size, KM_SLEEP); 1684 bcopy(hdl->sa_bonus->db_data, old_data[0], 1685 hdl->sa_bonus->db_size); 1686 bonus_attr_count = hdl->sa_bonus_tab->sa_layout->lot_attr_count; 1687 } else { 1688 old_data[0] = NULL; 1689 } 1690 DB_DNODE_EXIT(db); 1691 1692 /* Bring spill buffer online if it isn't currently */ 1693 1694 if ((error = sa_get_spill(hdl)) == 0) { 1695 spill_data_size = hdl->sa_spill->db_size; 1696 old_data[1] = kmem_alloc(spill_data_size, KM_SLEEP); 1697 bcopy(hdl->sa_spill->db_data, old_data[1], 1698 hdl->sa_spill->db_size); 1699 spill_attr_count = 1700 hdl->sa_spill_tab->sa_layout->lot_attr_count; 1701 } else if (error && error != ENOENT) { 1702 if (old_data[0]) 1703 kmem_free(old_data[0], bonus_data_size); 1704 return (error); 1705 } else { 1706 old_data[1] = NULL; 1707 } 1708 1709 /* build descriptor of all attributes */ 1710 1711 attr_count = bonus_attr_count + spill_attr_count; 1712 if (action == SA_ADD) 1713 attr_count++; 1714 else if (action == SA_REMOVE) 1715 attr_count--; 1716 1717 attr_desc = kmem_zalloc(sizeof (sa_bulk_attr_t) * attr_count, KM_SLEEP); 1718 1719 /* 1720 * loop through bonus and spill buffer if it exists, and 1721 * build up new attr_descriptor to reset the attributes 1722 */ 1723 k = j = 0; 1724 count = bonus_attr_count; 1725 hdr = SA_GET_HDR(hdl, SA_BONUS); 1726 idx_tab = SA_IDX_TAB_GET(hdl, SA_BONUS); 1727 for (; k != 2; k++) { 1728 /* 1729 * Iterate over each attribute in layout. Fetch the 1730 * size of variable-length attributes needing rewrite 1731 * from sa_lengths[]. 1732 */ 1733 for (i = 0, length_idx = 0; i != count; i++) { 1734 sa_attr_type_t attr; 1735 1736 attr = idx_tab->sa_layout->lot_attrs[i]; 1737 reg_length = SA_REGISTERED_LEN(sa, attr); 1738 if (reg_length == 0) { 1739 length = hdr->sa_lengths[length_idx]; 1740 length_idx++; 1741 } else { 1742 length = reg_length; 1743 } 1744 if (attr == newattr) { 1745 /* 1746 * There is nothing to do for SA_REMOVE, 1747 * so it is just skipped. 1748 */ 1749 if (action == SA_REMOVE) 1750 continue; 1751 1752 /* 1753 * Duplicate attributes are not allowed, so the 1754 * action can not be SA_ADD here. 1755 */ 1756 ASSERT3S(action, ==, SA_REPLACE); 1757 1758 /* 1759 * Only a variable-sized attribute can be 1760 * replaced here, and its size must be changing. 1761 */ 1762 ASSERT3U(reg_length, ==, 0); 1763 ASSERT3U(length, !=, buflen); 1764 SA_ADD_BULK_ATTR(attr_desc, j, attr, 1765 locator, datastart, buflen); 1766 } else { 1767 SA_ADD_BULK_ATTR(attr_desc, j, attr, 1768 NULL, (void *) 1769 (TOC_OFF(idx_tab->sa_idx_tab[attr]) + 1770 (uintptr_t)old_data[k]), length); 1771 } 1772 } 1773 if (k == 0 && hdl->sa_spill) { 1774 hdr = SA_GET_HDR(hdl, SA_SPILL); 1775 idx_tab = SA_IDX_TAB_GET(hdl, SA_SPILL); 1776 count = spill_attr_count; 1777 } else { 1778 break; 1779 } 1780 } 1781 if (action == SA_ADD) { 1782 reg_length = SA_REGISTERED_LEN(sa, newattr); 1783 IMPLY(reg_length != 0, reg_length == buflen); 1784 SA_ADD_BULK_ATTR(attr_desc, j, newattr, locator, 1785 datastart, buflen); 1786 } 1787 ASSERT3U(j, ==, attr_count); 1788 1789 error = sa_build_layouts(hdl, attr_desc, attr_count, tx); 1790 1791 if (old_data[0]) 1792 kmem_free(old_data[0], bonus_data_size); 1793 if (old_data[1]) 1794 kmem_free(old_data[1], spill_data_size); 1795 kmem_free(attr_desc, sizeof (sa_bulk_attr_t) * attr_count); 1796 1797 return (error); 1798} 1799 1800static int 1801sa_bulk_update_impl(sa_handle_t *hdl, sa_bulk_attr_t *bulk, int count, 1802 dmu_tx_t *tx) 1803{ 1804 int error; 1805 sa_os_t *sa = hdl->sa_os->os_sa; 1806 dmu_object_type_t bonustype; 1807 1808 bonustype = SA_BONUSTYPE_FROM_DB(SA_GET_DB(hdl, SA_BONUS)); 1809 1810 ASSERT(hdl); 1811 ASSERT(MUTEX_HELD(&hdl->sa_lock)); 1812 1813 /* sync out registration table if necessary */ 1814 if (sa->sa_need_attr_registration) 1815 sa_attr_register_sync(hdl, tx); 1816 1817 error = sa_attr_op(hdl, bulk, count, SA_UPDATE, tx); 1818 if (error == 0 && !IS_SA_BONUSTYPE(bonustype) && sa->sa_update_cb) 1819 sa->sa_update_cb(hdl, tx); 1820 1821 return (error); 1822} 1823 1824/* 1825 * update or add new attribute 1826 */ 1827int 1828sa_update(sa_handle_t *hdl, sa_attr_type_t type, 1829 void *buf, uint32_t buflen, dmu_tx_t *tx) 1830{ 1831 int error; 1832 sa_bulk_attr_t bulk; 1833 1834 bulk.sa_attr = type; 1835 bulk.sa_data_func = NULL; 1836 bulk.sa_length = buflen; 1837 bulk.sa_data = buf; 1838 1839 mutex_enter(&hdl->sa_lock); 1840 error = sa_bulk_update_impl(hdl, &bulk, 1, tx); 1841 mutex_exit(&hdl->sa_lock); 1842 return (error); 1843} 1844 1845int 1846sa_update_from_cb(sa_handle_t *hdl, sa_attr_type_t attr, 1847 uint32_t buflen, sa_data_locator_t *locator, void *userdata, dmu_tx_t *tx) 1848{ 1849 int error; 1850 sa_bulk_attr_t bulk; 1851 1852 bulk.sa_attr = attr; 1853 bulk.sa_data = userdata; 1854 bulk.sa_data_func = locator; 1855 bulk.sa_length = buflen; 1856 1857 mutex_enter(&hdl->sa_lock); 1858 error = sa_bulk_update_impl(hdl, &bulk, 1, tx); 1859 mutex_exit(&hdl->sa_lock); 1860 return (error); 1861} 1862 1863/* 1864 * Return size of an attribute 1865 */ 1866 1867int 1868sa_size(sa_handle_t *hdl, sa_attr_type_t attr, int *size) 1869{ 1870 sa_bulk_attr_t bulk; 1871 int error; 1872 1873 bulk.sa_data = NULL; 1874 bulk.sa_attr = attr; 1875 bulk.sa_data_func = NULL; 1876 1877 ASSERT(hdl); 1878 mutex_enter(&hdl->sa_lock); 1879 if ((error = sa_attr_op(hdl, &bulk, 1, SA_LOOKUP, NULL)) != 0) { 1880 mutex_exit(&hdl->sa_lock); 1881 return (error); 1882 } 1883 *size = bulk.sa_size; 1884 1885 mutex_exit(&hdl->sa_lock); 1886 return (0); 1887} 1888 1889int 1890sa_bulk_lookup_locked(sa_handle_t *hdl, sa_bulk_attr_t *attrs, int count) 1891{ 1892 ASSERT(hdl); 1893 ASSERT(MUTEX_HELD(&hdl->sa_lock)); 1894 return (sa_lookup_impl(hdl, attrs, count)); 1895} 1896 1897int 1898sa_bulk_lookup(sa_handle_t *hdl, sa_bulk_attr_t *attrs, int count) 1899{ 1900 int error; 1901 1902 ASSERT(hdl); 1903 mutex_enter(&hdl->sa_lock); 1904 error = sa_bulk_lookup_locked(hdl, attrs, count); 1905 mutex_exit(&hdl->sa_lock); 1906 return (error); 1907} 1908 1909int 1910sa_bulk_update(sa_handle_t *hdl, sa_bulk_attr_t *attrs, int count, dmu_tx_t *tx) 1911{ 1912 int error; 1913 1914 ASSERT(hdl); 1915 mutex_enter(&hdl->sa_lock); 1916 error = sa_bulk_update_impl(hdl, attrs, count, tx); 1917 mutex_exit(&hdl->sa_lock); 1918 return (error); 1919} 1920 1921int 1922sa_remove(sa_handle_t *hdl, sa_attr_type_t attr, dmu_tx_t *tx) 1923{ 1924 int error; 1925 1926 mutex_enter(&hdl->sa_lock); 1927 error = sa_modify_attrs(hdl, attr, SA_REMOVE, NULL, 1928 NULL, 0, tx); 1929 mutex_exit(&hdl->sa_lock); 1930 return (error); 1931} 1932 1933void 1934sa_object_info(sa_handle_t *hdl, dmu_object_info_t *doi) 1935{ 1936 dmu_object_info_from_db((dmu_buf_t *)hdl->sa_bonus, doi); 1937} 1938 1939void 1940sa_object_size(sa_handle_t *hdl, uint32_t *blksize, u_longlong_t *nblocks) 1941{ 1942 dmu_object_size_from_db((dmu_buf_t *)hdl->sa_bonus, 1943 blksize, nblocks); 1944} 1945 1946void 1947sa_set_userp(sa_handle_t *hdl, void *ptr) 1948{ 1949 hdl->sa_userp = ptr; 1950} 1951 1952dmu_buf_t * 1953sa_get_db(sa_handle_t *hdl) 1954{ 1955 return ((dmu_buf_t *)hdl->sa_bonus); 1956} 1957 1958void * 1959sa_get_userdata(sa_handle_t *hdl) 1960{ 1961 return (hdl->sa_userp); 1962} 1963 1964void 1965sa_register_update_callback_locked(objset_t *os, sa_update_cb_t *func) 1966{ 1967 ASSERT(MUTEX_HELD(&os->os_sa->sa_lock)); 1968 os->os_sa->sa_update_cb = func; 1969} 1970 1971void 1972sa_register_update_callback(objset_t *os, sa_update_cb_t *func) 1973{ 1974 1975 mutex_enter(&os->os_sa->sa_lock); 1976 sa_register_update_callback_locked(os, func); 1977 mutex_exit(&os->os_sa->sa_lock); 1978} 1979 1980uint64_t 1981sa_handle_object(sa_handle_t *hdl) 1982{ 1983 return (hdl->sa_bonus->db_object); 1984} 1985 1986boolean_t 1987sa_enabled(objset_t *os) 1988{ 1989 return (os->os_sa == NULL); 1990} 1991 1992int 1993sa_set_sa_object(objset_t *os, uint64_t sa_object) 1994{ 1995 sa_os_t *sa = os->os_sa; 1996 1997 if (sa->sa_master_obj) 1998 return (1); 1999 2000 sa->sa_master_obj = sa_object; 2001 2002 return (0); 2003} 2004 2005int 2006sa_hdrsize(void *arg) 2007{ 2008 sa_hdr_phys_t *hdr = arg; 2009 2010 return (SA_HDR_SIZE(hdr)); 2011} 2012 2013void 2014sa_handle_lock(sa_handle_t *hdl) 2015{ 2016 ASSERT(hdl); 2017 mutex_enter(&hdl->sa_lock); 2018} 2019 2020void 2021sa_handle_unlock(sa_handle_t *hdl) 2022{ 2023 ASSERT(hdl); 2024 mutex_exit(&hdl->sa_lock); 2025} 2026