zap_leaf.c revision 247187
1168404Spjd/* 2168404Spjd * CDDL HEADER START 3168404Spjd * 4168404Spjd * The contents of this file are subject to the terms of the 5168404Spjd * Common Development and Distribution License (the "License"). 6168404Spjd * You may not use this file except in compliance with the License. 7168404Spjd * 8168404Spjd * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9168404Spjd * or http://www.opensolaris.org/os/licensing. 10168404Spjd * See the License for the specific language governing permissions 11168404Spjd * and limitations under the License. 12168404Spjd * 13168404Spjd * When distributing Covered Code, include this CDDL HEADER in each 14168404Spjd * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15168404Spjd * If applicable, add the following below this CDDL HEADER, with the 16168404Spjd * fields enclosed by brackets "[]" replaced with your own identifying 17168404Spjd * information: Portions Copyright [yyyy] [name of copyright owner] 18168404Spjd * 19168404Spjd * CDDL HEADER END 20168404Spjd */ 21168404Spjd/* 22219089Spjd * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. 23168404Spjd */ 24168404Spjd 25168404Spjd/* 26168404Spjd * The 512-byte leaf is broken into 32 16-byte chunks. 27168404Spjd * chunk number n means l_chunk[n], even though the header precedes it. 28168404Spjd * the names are stored null-terminated. 29168404Spjd */ 30168404Spjd 31219089Spjd#include <sys/zio.h> 32219089Spjd#include <sys/spa.h> 33219089Spjd#include <sys/dmu.h> 34168404Spjd#include <sys/zfs_context.h> 35219089Spjd#include <sys/fs/zfs.h> 36168404Spjd#include <sys/zap.h> 37168404Spjd#include <sys/zap_impl.h> 38168404Spjd#include <sys/zap_leaf.h> 39219089Spjd#include <sys/arc.h> 40168404Spjd 41185029Spjdstatic uint16_t *zap_leaf_rehash_entry(zap_leaf_t *l, uint16_t entry); 42185029Spjd 43168404Spjd#define CHAIN_END 0xffff /* end of the chunk chain */ 44168404Spjd 45168404Spjd/* half the (current) minimum block size */ 46168404Spjd#define MAX_ARRAY_BYTES (8<<10) 47168404Spjd 48168404Spjd#define LEAF_HASH(l, h) \ 49168404Spjd ((ZAP_LEAF_HASH_NUMENTRIES(l)-1) & \ 50168404Spjd ((h) >> (64 - ZAP_LEAF_HASH_SHIFT(l)-(l)->l_phys->l_hdr.lh_prefix_len))) 51168404Spjd 52168404Spjd#define LEAF_HASH_ENTPTR(l, h) (&(l)->l_phys->l_hash[LEAF_HASH(l, h)]) 53168404Spjd 54168404Spjd 55168404Spjdstatic void 56168404Spjdzap_memset(void *a, int c, size_t n) 57168404Spjd{ 58168404Spjd char *cp = a; 59168404Spjd char *cpend = cp + n; 60168404Spjd 61168404Spjd while (cp < cpend) 62168404Spjd *cp++ = c; 63168404Spjd} 64168404Spjd 65168404Spjdstatic void 66168404Spjdstv(int len, void *addr, uint64_t value) 67168404Spjd{ 68168404Spjd switch (len) { 69168404Spjd case 1: 70168404Spjd *(uint8_t *)addr = value; 71168404Spjd return; 72168404Spjd case 2: 73168404Spjd *(uint16_t *)addr = value; 74168404Spjd return; 75168404Spjd case 4: 76168404Spjd *(uint32_t *)addr = value; 77168404Spjd return; 78168404Spjd case 8: 79168404Spjd *(uint64_t *)addr = value; 80168404Spjd return; 81168404Spjd } 82168404Spjd ASSERT(!"bad int len"); 83168404Spjd} 84168404Spjd 85168404Spjdstatic uint64_t 86168404Spjdldv(int len, const void *addr) 87168404Spjd{ 88168404Spjd switch (len) { 89168404Spjd case 1: 90168404Spjd return (*(uint8_t *)addr); 91168404Spjd case 2: 92168404Spjd return (*(uint16_t *)addr); 93168404Spjd case 4: 94168404Spjd return (*(uint32_t *)addr); 95168404Spjd case 8: 96168404Spjd return (*(uint64_t *)addr); 97168404Spjd } 98168404Spjd ASSERT(!"bad int len"); 99168404Spjd return (0xFEEDFACEDEADBEEFULL); 100168404Spjd} 101168404Spjd 102168404Spjdvoid 103168404Spjdzap_leaf_byteswap(zap_leaf_phys_t *buf, int size) 104168404Spjd{ 105168404Spjd int i; 106168404Spjd zap_leaf_t l; 107168404Spjd l.l_bs = highbit(size)-1; 108168404Spjd l.l_phys = buf; 109168404Spjd 110168404Spjd buf->l_hdr.lh_block_type = BSWAP_64(buf->l_hdr.lh_block_type); 111168404Spjd buf->l_hdr.lh_prefix = BSWAP_64(buf->l_hdr.lh_prefix); 112168404Spjd buf->l_hdr.lh_magic = BSWAP_32(buf->l_hdr.lh_magic); 113168404Spjd buf->l_hdr.lh_nfree = BSWAP_16(buf->l_hdr.lh_nfree); 114168404Spjd buf->l_hdr.lh_nentries = BSWAP_16(buf->l_hdr.lh_nentries); 115168404Spjd buf->l_hdr.lh_prefix_len = BSWAP_16(buf->l_hdr.lh_prefix_len); 116168404Spjd buf->l_hdr.lh_freelist = BSWAP_16(buf->l_hdr.lh_freelist); 117168404Spjd 118168404Spjd for (i = 0; i < ZAP_LEAF_HASH_NUMENTRIES(&l); i++) 119168404Spjd buf->l_hash[i] = BSWAP_16(buf->l_hash[i]); 120168404Spjd 121168404Spjd for (i = 0; i < ZAP_LEAF_NUMCHUNKS(&l); i++) { 122168404Spjd zap_leaf_chunk_t *lc = &ZAP_LEAF_CHUNK(&l, i); 123168404Spjd struct zap_leaf_entry *le; 124168404Spjd 125168404Spjd switch (lc->l_free.lf_type) { 126168404Spjd case ZAP_CHUNK_ENTRY: 127168404Spjd le = &lc->l_entry; 128168404Spjd 129168404Spjd le->le_type = BSWAP_8(le->le_type); 130219089Spjd le->le_value_intlen = BSWAP_8(le->le_value_intlen); 131168404Spjd le->le_next = BSWAP_16(le->le_next); 132168404Spjd le->le_name_chunk = BSWAP_16(le->le_name_chunk); 133219089Spjd le->le_name_numints = BSWAP_16(le->le_name_numints); 134168404Spjd le->le_value_chunk = BSWAP_16(le->le_value_chunk); 135219089Spjd le->le_value_numints = BSWAP_16(le->le_value_numints); 136168404Spjd le->le_cd = BSWAP_32(le->le_cd); 137168404Spjd le->le_hash = BSWAP_64(le->le_hash); 138168404Spjd break; 139168404Spjd case ZAP_CHUNK_FREE: 140168404Spjd lc->l_free.lf_type = BSWAP_8(lc->l_free.lf_type); 141168404Spjd lc->l_free.lf_next = BSWAP_16(lc->l_free.lf_next); 142168404Spjd break; 143168404Spjd case ZAP_CHUNK_ARRAY: 144168404Spjd lc->l_array.la_type = BSWAP_8(lc->l_array.la_type); 145168404Spjd lc->l_array.la_next = BSWAP_16(lc->l_array.la_next); 146168404Spjd /* la_array doesn't need swapping */ 147168404Spjd break; 148168404Spjd default: 149168404Spjd ASSERT(!"bad leaf type"); 150168404Spjd } 151168404Spjd } 152168404Spjd} 153168404Spjd 154168404Spjdvoid 155185029Spjdzap_leaf_init(zap_leaf_t *l, boolean_t sort) 156168404Spjd{ 157168404Spjd int i; 158168404Spjd 159168404Spjd l->l_bs = highbit(l->l_dbuf->db_size)-1; 160168404Spjd zap_memset(&l->l_phys->l_hdr, 0, sizeof (struct zap_leaf_header)); 161168404Spjd zap_memset(l->l_phys->l_hash, CHAIN_END, 2*ZAP_LEAF_HASH_NUMENTRIES(l)); 162168404Spjd for (i = 0; i < ZAP_LEAF_NUMCHUNKS(l); i++) { 163168404Spjd ZAP_LEAF_CHUNK(l, i).l_free.lf_type = ZAP_CHUNK_FREE; 164168404Spjd ZAP_LEAF_CHUNK(l, i).l_free.lf_next = i+1; 165168404Spjd } 166168404Spjd ZAP_LEAF_CHUNK(l, ZAP_LEAF_NUMCHUNKS(l)-1).l_free.lf_next = CHAIN_END; 167168404Spjd l->l_phys->l_hdr.lh_block_type = ZBT_LEAF; 168168404Spjd l->l_phys->l_hdr.lh_magic = ZAP_LEAF_MAGIC; 169168404Spjd l->l_phys->l_hdr.lh_nfree = ZAP_LEAF_NUMCHUNKS(l); 170185029Spjd if (sort) 171185029Spjd l->l_phys->l_hdr.lh_flags |= ZLF_ENTRIES_CDSORTED; 172168404Spjd} 173168404Spjd 174168404Spjd/* 175168404Spjd * Routines which manipulate leaf chunks (l_chunk[]). 176168404Spjd */ 177168404Spjd 178168404Spjdstatic uint16_t 179168404Spjdzap_leaf_chunk_alloc(zap_leaf_t *l) 180168404Spjd{ 181168404Spjd int chunk; 182168404Spjd 183168404Spjd ASSERT(l->l_phys->l_hdr.lh_nfree > 0); 184168404Spjd 185168404Spjd chunk = l->l_phys->l_hdr.lh_freelist; 186168404Spjd ASSERT3U(chunk, <, ZAP_LEAF_NUMCHUNKS(l)); 187168404Spjd ASSERT3U(ZAP_LEAF_CHUNK(l, chunk).l_free.lf_type, ==, ZAP_CHUNK_FREE); 188168404Spjd 189168404Spjd l->l_phys->l_hdr.lh_freelist = ZAP_LEAF_CHUNK(l, chunk).l_free.lf_next; 190168404Spjd 191168404Spjd l->l_phys->l_hdr.lh_nfree--; 192168404Spjd 193168404Spjd return (chunk); 194168404Spjd} 195168404Spjd 196168404Spjdstatic void 197168404Spjdzap_leaf_chunk_free(zap_leaf_t *l, uint16_t chunk) 198168404Spjd{ 199168404Spjd struct zap_leaf_free *zlf = &ZAP_LEAF_CHUNK(l, chunk).l_free; 200168404Spjd ASSERT3U(l->l_phys->l_hdr.lh_nfree, <, ZAP_LEAF_NUMCHUNKS(l)); 201168404Spjd ASSERT3U(chunk, <, ZAP_LEAF_NUMCHUNKS(l)); 202168404Spjd ASSERT(zlf->lf_type != ZAP_CHUNK_FREE); 203168404Spjd 204168404Spjd zlf->lf_type = ZAP_CHUNK_FREE; 205168404Spjd zlf->lf_next = l->l_phys->l_hdr.lh_freelist; 206168404Spjd bzero(zlf->lf_pad, sizeof (zlf->lf_pad)); /* help it to compress */ 207168404Spjd l->l_phys->l_hdr.lh_freelist = chunk; 208168404Spjd 209168404Spjd l->l_phys->l_hdr.lh_nfree++; 210168404Spjd} 211168404Spjd 212168404Spjd/* 213168404Spjd * Routines which manipulate leaf arrays (zap_leaf_array type chunks). 214168404Spjd */ 215168404Spjd 216168404Spjdstatic uint16_t 217168404Spjdzap_leaf_array_create(zap_leaf_t *l, const char *buf, 218219089Spjd int integer_size, int num_integers) 219168404Spjd{ 220168404Spjd uint16_t chunk_head; 221168404Spjd uint16_t *chunkp = &chunk_head; 222168404Spjd int byten = 0; 223247187Smm uint64_t value = 0; 224168404Spjd int shift = (integer_size-1)*8; 225168404Spjd int len = num_integers; 226168404Spjd 227168404Spjd ASSERT3U(num_integers * integer_size, <, MAX_ARRAY_BYTES); 228168404Spjd 229168404Spjd while (len > 0) { 230168404Spjd uint16_t chunk = zap_leaf_chunk_alloc(l); 231168404Spjd struct zap_leaf_array *la = &ZAP_LEAF_CHUNK(l, chunk).l_array; 232168404Spjd int i; 233168404Spjd 234168404Spjd la->la_type = ZAP_CHUNK_ARRAY; 235168404Spjd for (i = 0; i < ZAP_LEAF_ARRAY_BYTES; i++) { 236168404Spjd if (byten == 0) 237168404Spjd value = ldv(integer_size, buf); 238168404Spjd la->la_array[i] = value >> shift; 239168404Spjd value <<= 8; 240168404Spjd if (++byten == integer_size) { 241168404Spjd byten = 0; 242168404Spjd buf += integer_size; 243168404Spjd if (--len == 0) 244168404Spjd break; 245168404Spjd } 246168404Spjd } 247168404Spjd 248168404Spjd *chunkp = chunk; 249168404Spjd chunkp = &la->la_next; 250168404Spjd } 251168404Spjd *chunkp = CHAIN_END; 252168404Spjd 253168404Spjd return (chunk_head); 254168404Spjd} 255168404Spjd 256168404Spjdstatic void 257168404Spjdzap_leaf_array_free(zap_leaf_t *l, uint16_t *chunkp) 258168404Spjd{ 259168404Spjd uint16_t chunk = *chunkp; 260168404Spjd 261168404Spjd *chunkp = CHAIN_END; 262168404Spjd 263168404Spjd while (chunk != CHAIN_END) { 264168404Spjd int nextchunk = ZAP_LEAF_CHUNK(l, chunk).l_array.la_next; 265168404Spjd ASSERT3U(ZAP_LEAF_CHUNK(l, chunk).l_array.la_type, ==, 266168404Spjd ZAP_CHUNK_ARRAY); 267168404Spjd zap_leaf_chunk_free(l, chunk); 268168404Spjd chunk = nextchunk; 269168404Spjd } 270168404Spjd} 271168404Spjd 272168404Spjd/* array_len and buf_len are in integers, not bytes */ 273168404Spjdstatic void 274168404Spjdzap_leaf_array_read(zap_leaf_t *l, uint16_t chunk, 275168404Spjd int array_int_len, int array_len, int buf_int_len, uint64_t buf_len, 276219089Spjd void *buf) 277168404Spjd{ 278168404Spjd int len = MIN(array_len, buf_len); 279168404Spjd int byten = 0; 280168404Spjd uint64_t value = 0; 281219089Spjd char *p = buf; 282168404Spjd 283168404Spjd ASSERT3U(array_int_len, <=, buf_int_len); 284168404Spjd 285168404Spjd /* Fast path for one 8-byte integer */ 286168404Spjd if (array_int_len == 8 && buf_int_len == 8 && len == 1) { 287168404Spjd struct zap_leaf_array *la = &ZAP_LEAF_CHUNK(l, chunk).l_array; 288168404Spjd uint8_t *ip = la->la_array; 289219089Spjd uint64_t *buf64 = buf; 290168404Spjd 291168404Spjd *buf64 = (uint64_t)ip[0] << 56 | (uint64_t)ip[1] << 48 | 292168404Spjd (uint64_t)ip[2] << 40 | (uint64_t)ip[3] << 32 | 293168404Spjd (uint64_t)ip[4] << 24 | (uint64_t)ip[5] << 16 | 294168404Spjd (uint64_t)ip[6] << 8 | (uint64_t)ip[7]; 295168404Spjd return; 296168404Spjd } 297168404Spjd 298168404Spjd /* Fast path for an array of 1-byte integers (eg. the entry name) */ 299168404Spjd if (array_int_len == 1 && buf_int_len == 1 && 300168404Spjd buf_len > array_len + ZAP_LEAF_ARRAY_BYTES) { 301168404Spjd while (chunk != CHAIN_END) { 302168404Spjd struct zap_leaf_array *la = 303168404Spjd &ZAP_LEAF_CHUNK(l, chunk).l_array; 304219089Spjd bcopy(la->la_array, p, ZAP_LEAF_ARRAY_BYTES); 305219089Spjd p += ZAP_LEAF_ARRAY_BYTES; 306168404Spjd chunk = la->la_next; 307168404Spjd } 308168404Spjd return; 309168404Spjd } 310168404Spjd 311168404Spjd while (len > 0) { 312168404Spjd struct zap_leaf_array *la = &ZAP_LEAF_CHUNK(l, chunk).l_array; 313168404Spjd int i; 314168404Spjd 315168404Spjd ASSERT3U(chunk, <, ZAP_LEAF_NUMCHUNKS(l)); 316168404Spjd for (i = 0; i < ZAP_LEAF_ARRAY_BYTES && len > 0; i++) { 317168404Spjd value = (value << 8) | la->la_array[i]; 318168404Spjd byten++; 319168404Spjd if (byten == array_int_len) { 320219089Spjd stv(buf_int_len, p, value); 321168404Spjd byten = 0; 322168404Spjd len--; 323168404Spjd if (len == 0) 324168404Spjd return; 325219089Spjd p += buf_int_len; 326168404Spjd } 327168404Spjd } 328168404Spjd chunk = la->la_next; 329168404Spjd } 330168404Spjd} 331168404Spjd 332185029Spjdstatic boolean_t 333219089Spjdzap_leaf_array_match(zap_leaf_t *l, zap_name_t *zn, 334219089Spjd int chunk, int array_numints) 335168404Spjd{ 336168404Spjd int bseen = 0; 337168404Spjd 338219089Spjd if (zap_getflags(zn->zn_zap) & ZAP_FLAG_UINT64_KEY) { 339219089Spjd uint64_t *thiskey; 340219089Spjd boolean_t match; 341219089Spjd 342219089Spjd ASSERT(zn->zn_key_intlen == sizeof (*thiskey)); 343219089Spjd thiskey = kmem_alloc(array_numints * sizeof (*thiskey), 344219089Spjd KM_SLEEP); 345219089Spjd 346219089Spjd zap_leaf_array_read(l, chunk, sizeof (*thiskey), array_numints, 347219089Spjd sizeof (*thiskey), array_numints, thiskey); 348219089Spjd match = bcmp(thiskey, zn->zn_key_orig, 349219089Spjd array_numints * sizeof (*thiskey)) == 0; 350219089Spjd kmem_free(thiskey, array_numints * sizeof (*thiskey)); 351219089Spjd return (match); 352219089Spjd } 353219089Spjd 354219089Spjd ASSERT(zn->zn_key_intlen == 1); 355185029Spjd if (zn->zn_matchtype == MT_FIRST) { 356219089Spjd char *thisname = kmem_alloc(array_numints, KM_SLEEP); 357185029Spjd boolean_t match; 358185029Spjd 359219089Spjd zap_leaf_array_read(l, chunk, sizeof (char), array_numints, 360219089Spjd sizeof (char), array_numints, thisname); 361185029Spjd match = zap_match(zn, thisname); 362219089Spjd kmem_free(thisname, array_numints); 363185029Spjd return (match); 364185029Spjd } 365185029Spjd 366219089Spjd /* 367219089Spjd * Fast path for exact matching. 368219089Spjd * First check that the lengths match, so that we don't read 369219089Spjd * past the end of the zn_key_orig array. 370219089Spjd */ 371219089Spjd if (array_numints != zn->zn_key_orig_numints) 372219089Spjd return (B_FALSE); 373219089Spjd while (bseen < array_numints) { 374168404Spjd struct zap_leaf_array *la = &ZAP_LEAF_CHUNK(l, chunk).l_array; 375219089Spjd int toread = MIN(array_numints - bseen, ZAP_LEAF_ARRAY_BYTES); 376168404Spjd ASSERT3U(chunk, <, ZAP_LEAF_NUMCHUNKS(l)); 377219089Spjd if (bcmp(la->la_array, (char *)zn->zn_key_orig + bseen, toread)) 378168404Spjd break; 379168404Spjd chunk = la->la_next; 380168404Spjd bseen += toread; 381168404Spjd } 382219089Spjd return (bseen == array_numints); 383168404Spjd} 384168404Spjd 385168404Spjd/* 386168404Spjd * Routines which manipulate leaf entries. 387168404Spjd */ 388168404Spjd 389168404Spjdint 390185029Spjdzap_leaf_lookup(zap_leaf_t *l, zap_name_t *zn, zap_entry_handle_t *zeh) 391168404Spjd{ 392168404Spjd uint16_t *chunkp; 393168404Spjd struct zap_leaf_entry *le; 394168404Spjd 395168404Spjd ASSERT3U(l->l_phys->l_hdr.lh_magic, ==, ZAP_LEAF_MAGIC); 396168404Spjd 397185029Spjdagain: 398185029Spjd for (chunkp = LEAF_HASH_ENTPTR(l, zn->zn_hash); 399168404Spjd *chunkp != CHAIN_END; chunkp = &le->le_next) { 400168404Spjd uint16_t chunk = *chunkp; 401168404Spjd le = ZAP_LEAF_ENTRY(l, chunk); 402168404Spjd 403168404Spjd ASSERT3U(chunk, <, ZAP_LEAF_NUMCHUNKS(l)); 404168404Spjd ASSERT3U(le->le_type, ==, ZAP_CHUNK_ENTRY); 405168404Spjd 406185029Spjd if (le->le_hash != zn->zn_hash) 407168404Spjd continue; 408168404Spjd 409185029Spjd /* 410185029Spjd * NB: the entry chain is always sorted by cd on 411185029Spjd * normalized zap objects, so this will find the 412185029Spjd * lowest-cd match for MT_FIRST. 413185029Spjd */ 414185029Spjd ASSERT(zn->zn_matchtype == MT_EXACT || 415185029Spjd (l->l_phys->l_hdr.lh_flags & ZLF_ENTRIES_CDSORTED)); 416185029Spjd if (zap_leaf_array_match(l, zn, le->le_name_chunk, 417219089Spjd le->le_name_numints)) { 418219089Spjd zeh->zeh_num_integers = le->le_value_numints; 419219089Spjd zeh->zeh_integer_size = le->le_value_intlen; 420168404Spjd zeh->zeh_cd = le->le_cd; 421168404Spjd zeh->zeh_hash = le->le_hash; 422168404Spjd zeh->zeh_chunkp = chunkp; 423168404Spjd zeh->zeh_leaf = l; 424168404Spjd return (0); 425168404Spjd } 426168404Spjd } 427168404Spjd 428185029Spjd /* 429185029Spjd * NB: we could of course do this in one pass, but that would be 430185029Spjd * a pain. We'll see if MT_BEST is even used much. 431185029Spjd */ 432185029Spjd if (zn->zn_matchtype == MT_BEST) { 433185029Spjd zn->zn_matchtype = MT_FIRST; 434185029Spjd goto again; 435185029Spjd } 436185029Spjd 437168404Spjd return (ENOENT); 438168404Spjd} 439168404Spjd 440168404Spjd/* Return (h1,cd1 >= h2,cd2) */ 441168404Spjd#define HCD_GTEQ(h1, cd1, h2, cd2) \ 442168404Spjd ((h1 > h2) ? TRUE : ((h1 == h2 && cd1 >= cd2) ? TRUE : FALSE)) 443168404Spjd 444168404Spjdint 445168404Spjdzap_leaf_lookup_closest(zap_leaf_t *l, 446168404Spjd uint64_t h, uint32_t cd, zap_entry_handle_t *zeh) 447168404Spjd{ 448168404Spjd uint16_t chunk; 449168404Spjd uint64_t besth = -1ULL; 450219089Spjd uint32_t bestcd = -1U; 451168404Spjd uint16_t bestlh = ZAP_LEAF_HASH_NUMENTRIES(l)-1; 452168404Spjd uint16_t lh; 453168404Spjd struct zap_leaf_entry *le; 454168404Spjd 455168404Spjd ASSERT3U(l->l_phys->l_hdr.lh_magic, ==, ZAP_LEAF_MAGIC); 456168404Spjd 457168404Spjd for (lh = LEAF_HASH(l, h); lh <= bestlh; lh++) { 458168404Spjd for (chunk = l->l_phys->l_hash[lh]; 459168404Spjd chunk != CHAIN_END; chunk = le->le_next) { 460168404Spjd le = ZAP_LEAF_ENTRY(l, chunk); 461168404Spjd 462168404Spjd ASSERT3U(chunk, <, ZAP_LEAF_NUMCHUNKS(l)); 463168404Spjd ASSERT3U(le->le_type, ==, ZAP_CHUNK_ENTRY); 464168404Spjd 465168404Spjd if (HCD_GTEQ(le->le_hash, le->le_cd, h, cd) && 466168404Spjd HCD_GTEQ(besth, bestcd, le->le_hash, le->le_cd)) { 467168404Spjd ASSERT3U(bestlh, >=, lh); 468168404Spjd bestlh = lh; 469168404Spjd besth = le->le_hash; 470168404Spjd bestcd = le->le_cd; 471168404Spjd 472219089Spjd zeh->zeh_num_integers = le->le_value_numints; 473219089Spjd zeh->zeh_integer_size = le->le_value_intlen; 474168404Spjd zeh->zeh_cd = le->le_cd; 475168404Spjd zeh->zeh_hash = le->le_hash; 476168404Spjd zeh->zeh_fakechunk = chunk; 477168404Spjd zeh->zeh_chunkp = &zeh->zeh_fakechunk; 478168404Spjd zeh->zeh_leaf = l; 479168404Spjd } 480168404Spjd } 481168404Spjd } 482168404Spjd 483219089Spjd return (bestcd == -1U ? ENOENT : 0); 484168404Spjd} 485168404Spjd 486168404Spjdint 487168404Spjdzap_entry_read(const zap_entry_handle_t *zeh, 488168404Spjd uint8_t integer_size, uint64_t num_integers, void *buf) 489168404Spjd{ 490168404Spjd struct zap_leaf_entry *le = 491168404Spjd ZAP_LEAF_ENTRY(zeh->zeh_leaf, *zeh->zeh_chunkp); 492168404Spjd ASSERT3U(le->le_type, ==, ZAP_CHUNK_ENTRY); 493168404Spjd 494219089Spjd if (le->le_value_intlen > integer_size) 495168404Spjd return (EINVAL); 496168404Spjd 497219089Spjd zap_leaf_array_read(zeh->zeh_leaf, le->le_value_chunk, 498219089Spjd le->le_value_intlen, le->le_value_numints, 499219089Spjd integer_size, num_integers, buf); 500168404Spjd 501168404Spjd if (zeh->zeh_num_integers > num_integers) 502168404Spjd return (EOVERFLOW); 503168404Spjd return (0); 504168404Spjd 505168404Spjd} 506168404Spjd 507168404Spjdint 508219089Spjdzap_entry_read_name(zap_t *zap, const zap_entry_handle_t *zeh, uint16_t buflen, 509219089Spjd char *buf) 510168404Spjd{ 511168404Spjd struct zap_leaf_entry *le = 512168404Spjd ZAP_LEAF_ENTRY(zeh->zeh_leaf, *zeh->zeh_chunkp); 513168404Spjd ASSERT3U(le->le_type, ==, ZAP_CHUNK_ENTRY); 514168404Spjd 515219089Spjd if (zap_getflags(zap) & ZAP_FLAG_UINT64_KEY) { 516219089Spjd zap_leaf_array_read(zeh->zeh_leaf, le->le_name_chunk, 8, 517219089Spjd le->le_name_numints, 8, buflen / 8, buf); 518219089Spjd } else { 519219089Spjd zap_leaf_array_read(zeh->zeh_leaf, le->le_name_chunk, 1, 520219089Spjd le->le_name_numints, 1, buflen, buf); 521219089Spjd } 522219089Spjd if (le->le_name_numints > buflen) 523168404Spjd return (EOVERFLOW); 524168404Spjd return (0); 525168404Spjd} 526168404Spjd 527168404Spjdint 528168404Spjdzap_entry_update(zap_entry_handle_t *zeh, 529168404Spjd uint8_t integer_size, uint64_t num_integers, const void *buf) 530168404Spjd{ 531168404Spjd int delta_chunks; 532168404Spjd zap_leaf_t *l = zeh->zeh_leaf; 533168404Spjd struct zap_leaf_entry *le = ZAP_LEAF_ENTRY(l, *zeh->zeh_chunkp); 534168404Spjd 535168404Spjd delta_chunks = ZAP_LEAF_ARRAY_NCHUNKS(num_integers * integer_size) - 536219089Spjd ZAP_LEAF_ARRAY_NCHUNKS(le->le_value_numints * le->le_value_intlen); 537168404Spjd 538168404Spjd if ((int)l->l_phys->l_hdr.lh_nfree < delta_chunks) 539168404Spjd return (EAGAIN); 540168404Spjd 541168404Spjd zap_leaf_array_free(l, &le->le_value_chunk); 542168404Spjd le->le_value_chunk = 543168404Spjd zap_leaf_array_create(l, buf, integer_size, num_integers); 544219089Spjd le->le_value_numints = num_integers; 545219089Spjd le->le_value_intlen = integer_size; 546168404Spjd return (0); 547168404Spjd} 548168404Spjd 549168404Spjdvoid 550168404Spjdzap_entry_remove(zap_entry_handle_t *zeh) 551168404Spjd{ 552168404Spjd uint16_t entry_chunk; 553168404Spjd struct zap_leaf_entry *le; 554168404Spjd zap_leaf_t *l = zeh->zeh_leaf; 555168404Spjd 556168404Spjd ASSERT3P(zeh->zeh_chunkp, !=, &zeh->zeh_fakechunk); 557168404Spjd 558168404Spjd entry_chunk = *zeh->zeh_chunkp; 559168404Spjd le = ZAP_LEAF_ENTRY(l, entry_chunk); 560168404Spjd ASSERT3U(le->le_type, ==, ZAP_CHUNK_ENTRY); 561168404Spjd 562168404Spjd zap_leaf_array_free(l, &le->le_name_chunk); 563168404Spjd zap_leaf_array_free(l, &le->le_value_chunk); 564168404Spjd 565168404Spjd *zeh->zeh_chunkp = le->le_next; 566168404Spjd zap_leaf_chunk_free(l, entry_chunk); 567168404Spjd 568168404Spjd l->l_phys->l_hdr.lh_nentries--; 569168404Spjd} 570168404Spjd 571168404Spjdint 572219089Spjdzap_entry_create(zap_leaf_t *l, zap_name_t *zn, uint32_t cd, 573168404Spjd uint8_t integer_size, uint64_t num_integers, const void *buf, 574168404Spjd zap_entry_handle_t *zeh) 575168404Spjd{ 576168404Spjd uint16_t chunk; 577168404Spjd uint16_t *chunkp; 578168404Spjd struct zap_leaf_entry *le; 579219089Spjd uint64_t valuelen; 580168404Spjd int numchunks; 581219089Spjd uint64_t h = zn->zn_hash; 582168404Spjd 583168404Spjd valuelen = integer_size * num_integers; 584168404Spjd 585219089Spjd numchunks = 1 + ZAP_LEAF_ARRAY_NCHUNKS(zn->zn_key_orig_numints * 586219089Spjd zn->zn_key_intlen) + ZAP_LEAF_ARRAY_NCHUNKS(valuelen); 587168404Spjd if (numchunks > ZAP_LEAF_NUMCHUNKS(l)) 588168404Spjd return (E2BIG); 589168404Spjd 590219089Spjd if (cd == ZAP_NEED_CD) { 591185029Spjd /* find the lowest unused cd */ 592185029Spjd if (l->l_phys->l_hdr.lh_flags & ZLF_ENTRIES_CDSORTED) { 593185029Spjd cd = 0; 594185029Spjd 595168404Spjd for (chunk = *LEAF_HASH_ENTPTR(l, h); 596168404Spjd chunk != CHAIN_END; chunk = le->le_next) { 597168404Spjd le = ZAP_LEAF_ENTRY(l, chunk); 598185029Spjd if (le->le_cd > cd) 599168404Spjd break; 600185029Spjd if (le->le_hash == h) { 601185029Spjd ASSERT3U(cd, ==, le->le_cd); 602185029Spjd cd++; 603168404Spjd } 604168404Spjd } 605185029Spjd } else { 606185029Spjd /* old unsorted format; do it the O(n^2) way */ 607219089Spjd for (cd = 0; ; cd++) { 608185029Spjd for (chunk = *LEAF_HASH_ENTPTR(l, h); 609185029Spjd chunk != CHAIN_END; chunk = le->le_next) { 610185029Spjd le = ZAP_LEAF_ENTRY(l, chunk); 611185029Spjd if (le->le_hash == h && 612185029Spjd le->le_cd == cd) { 613185029Spjd break; 614185029Spjd } 615185029Spjd } 616185029Spjd /* If this cd is not in use, we are good. */ 617185029Spjd if (chunk == CHAIN_END) 618185029Spjd break; 619185029Spjd } 620168404Spjd } 621185029Spjd /* 622219089Spjd * We would run out of space in a block before we could 623219089Spjd * store enough entries to run out of CD values. 624185029Spjd */ 625219089Spjd ASSERT3U(cd, <, zap_maxcd(zn->zn_zap)); 626168404Spjd } 627168404Spjd 628168404Spjd if (l->l_phys->l_hdr.lh_nfree < numchunks) 629168404Spjd return (EAGAIN); 630168404Spjd 631168404Spjd /* make the entry */ 632168404Spjd chunk = zap_leaf_chunk_alloc(l); 633168404Spjd le = ZAP_LEAF_ENTRY(l, chunk); 634168404Spjd le->le_type = ZAP_CHUNK_ENTRY; 635219089Spjd le->le_name_chunk = zap_leaf_array_create(l, zn->zn_key_orig, 636219089Spjd zn->zn_key_intlen, zn->zn_key_orig_numints); 637219089Spjd le->le_name_numints = zn->zn_key_orig_numints; 638168404Spjd le->le_value_chunk = 639168404Spjd zap_leaf_array_create(l, buf, integer_size, num_integers); 640219089Spjd le->le_value_numints = num_integers; 641219089Spjd le->le_value_intlen = integer_size; 642168404Spjd le->le_hash = h; 643168404Spjd le->le_cd = cd; 644168404Spjd 645168404Spjd /* link it into the hash chain */ 646185029Spjd /* XXX if we did the search above, we could just use that */ 647185029Spjd chunkp = zap_leaf_rehash_entry(l, chunk); 648168404Spjd 649168404Spjd l->l_phys->l_hdr.lh_nentries++; 650168404Spjd 651168404Spjd zeh->zeh_leaf = l; 652168404Spjd zeh->zeh_num_integers = num_integers; 653219089Spjd zeh->zeh_integer_size = le->le_value_intlen; 654168404Spjd zeh->zeh_cd = le->le_cd; 655168404Spjd zeh->zeh_hash = le->le_hash; 656168404Spjd zeh->zeh_chunkp = chunkp; 657168404Spjd 658168404Spjd return (0); 659168404Spjd} 660168404Spjd 661168404Spjd/* 662185029Spjd * Determine if there is another entry with the same normalized form. 663185029Spjd * For performance purposes, either zn or name must be provided (the 664185029Spjd * other can be NULL). Note, there usually won't be any hash 665185029Spjd * conflicts, in which case we don't need the concatenated/normalized 666185029Spjd * form of the name. But all callers have one of these on hand anyway, 667185029Spjd * so might as well take advantage. A cleaner but slower interface 668185029Spjd * would accept neither argument, and compute the normalized name as 669185029Spjd * needed (using zap_name_alloc(zap_entry_read_name(zeh))). 670185029Spjd */ 671185029Spjdboolean_t 672185029Spjdzap_entry_normalization_conflict(zap_entry_handle_t *zeh, zap_name_t *zn, 673185029Spjd const char *name, zap_t *zap) 674185029Spjd{ 675185029Spjd uint64_t chunk; 676185029Spjd struct zap_leaf_entry *le; 677185029Spjd boolean_t allocdzn = B_FALSE; 678185029Spjd 679185029Spjd if (zap->zap_normflags == 0) 680185029Spjd return (B_FALSE); 681185029Spjd 682185029Spjd for (chunk = *LEAF_HASH_ENTPTR(zeh->zeh_leaf, zeh->zeh_hash); 683185029Spjd chunk != CHAIN_END; chunk = le->le_next) { 684185029Spjd le = ZAP_LEAF_ENTRY(zeh->zeh_leaf, chunk); 685185029Spjd if (le->le_hash != zeh->zeh_hash) 686185029Spjd continue; 687185029Spjd if (le->le_cd == zeh->zeh_cd) 688185029Spjd continue; 689185029Spjd 690185029Spjd if (zn == NULL) { 691185029Spjd zn = zap_name_alloc(zap, name, MT_FIRST); 692185029Spjd allocdzn = B_TRUE; 693185029Spjd } 694185029Spjd if (zap_leaf_array_match(zeh->zeh_leaf, zn, 695219089Spjd le->le_name_chunk, le->le_name_numints)) { 696185029Spjd if (allocdzn) 697185029Spjd zap_name_free(zn); 698185029Spjd return (B_TRUE); 699185029Spjd } 700185029Spjd } 701185029Spjd if (allocdzn) 702185029Spjd zap_name_free(zn); 703185029Spjd return (B_FALSE); 704185029Spjd} 705185029Spjd 706185029Spjd/* 707168404Spjd * Routines for transferring entries between leafs. 708168404Spjd */ 709168404Spjd 710185029Spjdstatic uint16_t * 711168404Spjdzap_leaf_rehash_entry(zap_leaf_t *l, uint16_t entry) 712168404Spjd{ 713168404Spjd struct zap_leaf_entry *le = ZAP_LEAF_ENTRY(l, entry); 714185029Spjd struct zap_leaf_entry *le2; 715185029Spjd uint16_t *chunkp; 716185029Spjd 717185029Spjd /* 718185029Spjd * keep the entry chain sorted by cd 719185029Spjd * NB: this will not cause problems for unsorted leafs, though 720185029Spjd * it is unnecessary there. 721185029Spjd */ 722185029Spjd for (chunkp = LEAF_HASH_ENTPTR(l, le->le_hash); 723185029Spjd *chunkp != CHAIN_END; chunkp = &le2->le_next) { 724185029Spjd le2 = ZAP_LEAF_ENTRY(l, *chunkp); 725185029Spjd if (le2->le_cd > le->le_cd) 726185029Spjd break; 727185029Spjd } 728185029Spjd 729185029Spjd le->le_next = *chunkp; 730185029Spjd *chunkp = entry; 731185029Spjd return (chunkp); 732168404Spjd} 733168404Spjd 734168404Spjdstatic uint16_t 735168404Spjdzap_leaf_transfer_array(zap_leaf_t *l, uint16_t chunk, zap_leaf_t *nl) 736168404Spjd{ 737168404Spjd uint16_t new_chunk; 738168404Spjd uint16_t *nchunkp = &new_chunk; 739168404Spjd 740168404Spjd while (chunk != CHAIN_END) { 741168404Spjd uint16_t nchunk = zap_leaf_chunk_alloc(nl); 742168404Spjd struct zap_leaf_array *nla = 743168404Spjd &ZAP_LEAF_CHUNK(nl, nchunk).l_array; 744168404Spjd struct zap_leaf_array *la = 745168404Spjd &ZAP_LEAF_CHUNK(l, chunk).l_array; 746168404Spjd int nextchunk = la->la_next; 747168404Spjd 748168404Spjd ASSERT3U(chunk, <, ZAP_LEAF_NUMCHUNKS(l)); 749168404Spjd ASSERT3U(nchunk, <, ZAP_LEAF_NUMCHUNKS(l)); 750168404Spjd 751168404Spjd *nla = *la; /* structure assignment */ 752168404Spjd 753168404Spjd zap_leaf_chunk_free(l, chunk); 754168404Spjd chunk = nextchunk; 755168404Spjd *nchunkp = nchunk; 756168404Spjd nchunkp = &nla->la_next; 757168404Spjd } 758168404Spjd *nchunkp = CHAIN_END; 759168404Spjd return (new_chunk); 760168404Spjd} 761168404Spjd 762168404Spjdstatic void 763168404Spjdzap_leaf_transfer_entry(zap_leaf_t *l, int entry, zap_leaf_t *nl) 764168404Spjd{ 765168404Spjd struct zap_leaf_entry *le, *nle; 766168404Spjd uint16_t chunk; 767168404Spjd 768168404Spjd le = ZAP_LEAF_ENTRY(l, entry); 769168404Spjd ASSERT3U(le->le_type, ==, ZAP_CHUNK_ENTRY); 770168404Spjd 771168404Spjd chunk = zap_leaf_chunk_alloc(nl); 772168404Spjd nle = ZAP_LEAF_ENTRY(nl, chunk); 773168404Spjd *nle = *le; /* structure assignment */ 774168404Spjd 775185029Spjd (void) zap_leaf_rehash_entry(nl, chunk); 776168404Spjd 777168404Spjd nle->le_name_chunk = zap_leaf_transfer_array(l, le->le_name_chunk, nl); 778168404Spjd nle->le_value_chunk = 779168404Spjd zap_leaf_transfer_array(l, le->le_value_chunk, nl); 780168404Spjd 781168404Spjd zap_leaf_chunk_free(l, entry); 782168404Spjd 783168404Spjd l->l_phys->l_hdr.lh_nentries--; 784168404Spjd nl->l_phys->l_hdr.lh_nentries++; 785168404Spjd} 786168404Spjd 787168404Spjd/* 788168404Spjd * Transfer the entries whose hash prefix ends in 1 to the new leaf. 789168404Spjd */ 790168404Spjdvoid 791185029Spjdzap_leaf_split(zap_leaf_t *l, zap_leaf_t *nl, boolean_t sort) 792168404Spjd{ 793168404Spjd int i; 794168404Spjd int bit = 64 - 1 - l->l_phys->l_hdr.lh_prefix_len; 795168404Spjd 796168404Spjd /* set new prefix and prefix_len */ 797168404Spjd l->l_phys->l_hdr.lh_prefix <<= 1; 798168404Spjd l->l_phys->l_hdr.lh_prefix_len++; 799168404Spjd nl->l_phys->l_hdr.lh_prefix = l->l_phys->l_hdr.lh_prefix | 1; 800168404Spjd nl->l_phys->l_hdr.lh_prefix_len = l->l_phys->l_hdr.lh_prefix_len; 801168404Spjd 802168404Spjd /* break existing hash chains */ 803168404Spjd zap_memset(l->l_phys->l_hash, CHAIN_END, 2*ZAP_LEAF_HASH_NUMENTRIES(l)); 804168404Spjd 805185029Spjd if (sort) 806185029Spjd l->l_phys->l_hdr.lh_flags |= ZLF_ENTRIES_CDSORTED; 807185029Spjd 808168404Spjd /* 809168404Spjd * Transfer entries whose hash bit 'bit' is set to nl; rehash 810168404Spjd * the remaining entries 811168404Spjd * 812168404Spjd * NB: We could find entries via the hashtable instead. That 813168404Spjd * would be O(hashents+numents) rather than O(numblks+numents), 814168404Spjd * but this accesses memory more sequentially, and when we're 815168404Spjd * called, the block is usually pretty full. 816168404Spjd */ 817168404Spjd for (i = 0; i < ZAP_LEAF_NUMCHUNKS(l); i++) { 818168404Spjd struct zap_leaf_entry *le = ZAP_LEAF_ENTRY(l, i); 819168404Spjd if (le->le_type != ZAP_CHUNK_ENTRY) 820168404Spjd continue; 821168404Spjd 822168404Spjd if (le->le_hash & (1ULL << bit)) 823168404Spjd zap_leaf_transfer_entry(l, i, nl); 824168404Spjd else 825185029Spjd (void) zap_leaf_rehash_entry(l, i); 826168404Spjd } 827168404Spjd} 828168404Spjd 829168404Spjdvoid 830168404Spjdzap_leaf_stats(zap_t *zap, zap_leaf_t *l, zap_stats_t *zs) 831168404Spjd{ 832168404Spjd int i, n; 833168404Spjd 834168404Spjd n = zap->zap_f.zap_phys->zap_ptrtbl.zt_shift - 835168404Spjd l->l_phys->l_hdr.lh_prefix_len; 836168404Spjd n = MIN(n, ZAP_HISTOGRAM_SIZE-1); 837168404Spjd zs->zs_leafs_with_2n_pointers[n]++; 838168404Spjd 839168404Spjd 840168404Spjd n = l->l_phys->l_hdr.lh_nentries/5; 841168404Spjd n = MIN(n, ZAP_HISTOGRAM_SIZE-1); 842168404Spjd zs->zs_blocks_with_n5_entries[n]++; 843168404Spjd 844168404Spjd n = ((1<<FZAP_BLOCK_SHIFT(zap)) - 845168404Spjd l->l_phys->l_hdr.lh_nfree * (ZAP_LEAF_ARRAY_BYTES+1))*10 / 846168404Spjd (1<<FZAP_BLOCK_SHIFT(zap)); 847168404Spjd n = MIN(n, ZAP_HISTOGRAM_SIZE-1); 848168404Spjd zs->zs_blocks_n_tenths_full[n]++; 849168404Spjd 850168404Spjd for (i = 0; i < ZAP_LEAF_HASH_NUMENTRIES(l); i++) { 851168404Spjd int nentries = 0; 852168404Spjd int chunk = l->l_phys->l_hash[i]; 853168404Spjd 854168404Spjd while (chunk != CHAIN_END) { 855168404Spjd struct zap_leaf_entry *le = 856168404Spjd ZAP_LEAF_ENTRY(l, chunk); 857168404Spjd 858219089Spjd n = 1 + ZAP_LEAF_ARRAY_NCHUNKS(le->le_name_numints) + 859219089Spjd ZAP_LEAF_ARRAY_NCHUNKS(le->le_value_numints * 860219089Spjd le->le_value_intlen); 861168404Spjd n = MIN(n, ZAP_HISTOGRAM_SIZE-1); 862168404Spjd zs->zs_entries_using_n_chunks[n]++; 863168404Spjd 864168404Spjd chunk = le->le_next; 865168404Spjd nentries++; 866168404Spjd } 867168404Spjd 868168404Spjd n = nentries; 869168404Spjd n = MIN(n, ZAP_HISTOGRAM_SIZE-1); 870168404Spjd zs->zs_buckets_with_n_entries[n]++; 871168404Spjd } 872168404Spjd} 873