zap_leaf.c revision 249195
1168404Spjd/* 2168404Spjd * CDDL HEADER START 3168404Spjd * 4168404Spjd * The contents of this file are subject to the terms of the 5168404Spjd * Common Development and Distribution License (the "License"). 6168404Spjd * You may not use this file except in compliance with the License. 7168404Spjd * 8168404Spjd * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9168404Spjd * or http://www.opensolaris.org/os/licensing. 10168404Spjd * See the License for the specific language governing permissions 11168404Spjd * and limitations under the License. 12168404Spjd * 13168404Spjd * When distributing Covered Code, include this CDDL HEADER in each 14168404Spjd * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15168404Spjd * If applicable, add the following below this CDDL HEADER, with the 16168404Spjd * fields enclosed by brackets "[]" replaced with your own identifying 17168404Spjd * information: Portions Copyright [yyyy] [name of copyright owner] 18168404Spjd * 19168404Spjd * CDDL HEADER END 20168404Spjd */ 21168404Spjd/* 22219089Spjd * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. 23249195Smm * Copyright (c) 2013 by Delphix. All rights reserved. 24168404Spjd */ 25168404Spjd 26168404Spjd/* 27168404Spjd * The 512-byte leaf is broken into 32 16-byte chunks. 28168404Spjd * chunk number n means l_chunk[n], even though the header precedes it. 29168404Spjd * the names are stored null-terminated. 30168404Spjd */ 31168404Spjd 32219089Spjd#include <sys/zio.h> 33219089Spjd#include <sys/spa.h> 34219089Spjd#include <sys/dmu.h> 35168404Spjd#include <sys/zfs_context.h> 36219089Spjd#include <sys/fs/zfs.h> 37168404Spjd#include <sys/zap.h> 38168404Spjd#include <sys/zap_impl.h> 39168404Spjd#include <sys/zap_leaf.h> 40219089Spjd#include <sys/arc.h> 41168404Spjd 42185029Spjdstatic uint16_t *zap_leaf_rehash_entry(zap_leaf_t *l, uint16_t entry); 43185029Spjd 44168404Spjd#define CHAIN_END 0xffff /* end of the chunk chain */ 45168404Spjd 46168404Spjd/* half the (current) minimum block size */ 47168404Spjd#define MAX_ARRAY_BYTES (8<<10) 48168404Spjd 49168404Spjd#define LEAF_HASH(l, h) \ 50168404Spjd ((ZAP_LEAF_HASH_NUMENTRIES(l)-1) & \ 51168404Spjd ((h) >> (64 - ZAP_LEAF_HASH_SHIFT(l)-(l)->l_phys->l_hdr.lh_prefix_len))) 52168404Spjd 53168404Spjd#define LEAF_HASH_ENTPTR(l, h) (&(l)->l_phys->l_hash[LEAF_HASH(l, h)]) 54168404Spjd 55168404Spjd 56168404Spjdstatic void 57168404Spjdzap_memset(void *a, int c, size_t n) 58168404Spjd{ 59168404Spjd char *cp = a; 60168404Spjd char *cpend = cp + n; 61168404Spjd 62168404Spjd while (cp < cpend) 63168404Spjd *cp++ = c; 64168404Spjd} 65168404Spjd 66168404Spjdstatic void 67168404Spjdstv(int len, void *addr, uint64_t value) 68168404Spjd{ 69168404Spjd switch (len) { 70168404Spjd case 1: 71168404Spjd *(uint8_t *)addr = value; 72168404Spjd return; 73168404Spjd case 2: 74168404Spjd *(uint16_t *)addr = value; 75168404Spjd return; 76168404Spjd case 4: 77168404Spjd *(uint32_t *)addr = value; 78168404Spjd return; 79168404Spjd case 8: 80168404Spjd *(uint64_t *)addr = value; 81168404Spjd return; 82168404Spjd } 83168404Spjd ASSERT(!"bad int len"); 84168404Spjd} 85168404Spjd 86168404Spjdstatic uint64_t 87168404Spjdldv(int len, const void *addr) 88168404Spjd{ 89168404Spjd switch (len) { 90168404Spjd case 1: 91168404Spjd return (*(uint8_t *)addr); 92168404Spjd case 2: 93168404Spjd return (*(uint16_t *)addr); 94168404Spjd case 4: 95168404Spjd return (*(uint32_t *)addr); 96168404Spjd case 8: 97168404Spjd return (*(uint64_t *)addr); 98168404Spjd } 99168404Spjd ASSERT(!"bad int len"); 100168404Spjd return (0xFEEDFACEDEADBEEFULL); 101168404Spjd} 102168404Spjd 103168404Spjdvoid 104168404Spjdzap_leaf_byteswap(zap_leaf_phys_t *buf, int size) 105168404Spjd{ 106168404Spjd int i; 107168404Spjd zap_leaf_t l; 108168404Spjd l.l_bs = highbit(size)-1; 109168404Spjd l.l_phys = buf; 110168404Spjd 111168404Spjd buf->l_hdr.lh_block_type = BSWAP_64(buf->l_hdr.lh_block_type); 112168404Spjd buf->l_hdr.lh_prefix = BSWAP_64(buf->l_hdr.lh_prefix); 113168404Spjd buf->l_hdr.lh_magic = BSWAP_32(buf->l_hdr.lh_magic); 114168404Spjd buf->l_hdr.lh_nfree = BSWAP_16(buf->l_hdr.lh_nfree); 115168404Spjd buf->l_hdr.lh_nentries = BSWAP_16(buf->l_hdr.lh_nentries); 116168404Spjd buf->l_hdr.lh_prefix_len = BSWAP_16(buf->l_hdr.lh_prefix_len); 117168404Spjd buf->l_hdr.lh_freelist = BSWAP_16(buf->l_hdr.lh_freelist); 118168404Spjd 119168404Spjd for (i = 0; i < ZAP_LEAF_HASH_NUMENTRIES(&l); i++) 120168404Spjd buf->l_hash[i] = BSWAP_16(buf->l_hash[i]); 121168404Spjd 122168404Spjd for (i = 0; i < ZAP_LEAF_NUMCHUNKS(&l); i++) { 123168404Spjd zap_leaf_chunk_t *lc = &ZAP_LEAF_CHUNK(&l, i); 124168404Spjd struct zap_leaf_entry *le; 125168404Spjd 126168404Spjd switch (lc->l_free.lf_type) { 127168404Spjd case ZAP_CHUNK_ENTRY: 128168404Spjd le = &lc->l_entry; 129168404Spjd 130168404Spjd le->le_type = BSWAP_8(le->le_type); 131219089Spjd le->le_value_intlen = BSWAP_8(le->le_value_intlen); 132168404Spjd le->le_next = BSWAP_16(le->le_next); 133168404Spjd le->le_name_chunk = BSWAP_16(le->le_name_chunk); 134219089Spjd le->le_name_numints = BSWAP_16(le->le_name_numints); 135168404Spjd le->le_value_chunk = BSWAP_16(le->le_value_chunk); 136219089Spjd le->le_value_numints = BSWAP_16(le->le_value_numints); 137168404Spjd le->le_cd = BSWAP_32(le->le_cd); 138168404Spjd le->le_hash = BSWAP_64(le->le_hash); 139168404Spjd break; 140168404Spjd case ZAP_CHUNK_FREE: 141168404Spjd lc->l_free.lf_type = BSWAP_8(lc->l_free.lf_type); 142168404Spjd lc->l_free.lf_next = BSWAP_16(lc->l_free.lf_next); 143168404Spjd break; 144168404Spjd case ZAP_CHUNK_ARRAY: 145168404Spjd lc->l_array.la_type = BSWAP_8(lc->l_array.la_type); 146168404Spjd lc->l_array.la_next = BSWAP_16(lc->l_array.la_next); 147168404Spjd /* la_array doesn't need swapping */ 148168404Spjd break; 149168404Spjd default: 150168404Spjd ASSERT(!"bad leaf type"); 151168404Spjd } 152168404Spjd } 153168404Spjd} 154168404Spjd 155168404Spjdvoid 156185029Spjdzap_leaf_init(zap_leaf_t *l, boolean_t sort) 157168404Spjd{ 158168404Spjd int i; 159168404Spjd 160168404Spjd l->l_bs = highbit(l->l_dbuf->db_size)-1; 161168404Spjd zap_memset(&l->l_phys->l_hdr, 0, sizeof (struct zap_leaf_header)); 162168404Spjd zap_memset(l->l_phys->l_hash, CHAIN_END, 2*ZAP_LEAF_HASH_NUMENTRIES(l)); 163168404Spjd for (i = 0; i < ZAP_LEAF_NUMCHUNKS(l); i++) { 164168404Spjd ZAP_LEAF_CHUNK(l, i).l_free.lf_type = ZAP_CHUNK_FREE; 165168404Spjd ZAP_LEAF_CHUNK(l, i).l_free.lf_next = i+1; 166168404Spjd } 167168404Spjd ZAP_LEAF_CHUNK(l, ZAP_LEAF_NUMCHUNKS(l)-1).l_free.lf_next = CHAIN_END; 168168404Spjd l->l_phys->l_hdr.lh_block_type = ZBT_LEAF; 169168404Spjd l->l_phys->l_hdr.lh_magic = ZAP_LEAF_MAGIC; 170168404Spjd l->l_phys->l_hdr.lh_nfree = ZAP_LEAF_NUMCHUNKS(l); 171185029Spjd if (sort) 172185029Spjd l->l_phys->l_hdr.lh_flags |= ZLF_ENTRIES_CDSORTED; 173168404Spjd} 174168404Spjd 175168404Spjd/* 176168404Spjd * Routines which manipulate leaf chunks (l_chunk[]). 177168404Spjd */ 178168404Spjd 179168404Spjdstatic uint16_t 180168404Spjdzap_leaf_chunk_alloc(zap_leaf_t *l) 181168404Spjd{ 182168404Spjd int chunk; 183168404Spjd 184168404Spjd ASSERT(l->l_phys->l_hdr.lh_nfree > 0); 185168404Spjd 186168404Spjd chunk = l->l_phys->l_hdr.lh_freelist; 187168404Spjd ASSERT3U(chunk, <, ZAP_LEAF_NUMCHUNKS(l)); 188168404Spjd ASSERT3U(ZAP_LEAF_CHUNK(l, chunk).l_free.lf_type, ==, ZAP_CHUNK_FREE); 189168404Spjd 190168404Spjd l->l_phys->l_hdr.lh_freelist = ZAP_LEAF_CHUNK(l, chunk).l_free.lf_next; 191168404Spjd 192168404Spjd l->l_phys->l_hdr.lh_nfree--; 193168404Spjd 194168404Spjd return (chunk); 195168404Spjd} 196168404Spjd 197168404Spjdstatic void 198168404Spjdzap_leaf_chunk_free(zap_leaf_t *l, uint16_t chunk) 199168404Spjd{ 200168404Spjd struct zap_leaf_free *zlf = &ZAP_LEAF_CHUNK(l, chunk).l_free; 201168404Spjd ASSERT3U(l->l_phys->l_hdr.lh_nfree, <, ZAP_LEAF_NUMCHUNKS(l)); 202168404Spjd ASSERT3U(chunk, <, ZAP_LEAF_NUMCHUNKS(l)); 203168404Spjd ASSERT(zlf->lf_type != ZAP_CHUNK_FREE); 204168404Spjd 205168404Spjd zlf->lf_type = ZAP_CHUNK_FREE; 206168404Spjd zlf->lf_next = l->l_phys->l_hdr.lh_freelist; 207168404Spjd bzero(zlf->lf_pad, sizeof (zlf->lf_pad)); /* help it to compress */ 208168404Spjd l->l_phys->l_hdr.lh_freelist = chunk; 209168404Spjd 210168404Spjd l->l_phys->l_hdr.lh_nfree++; 211168404Spjd} 212168404Spjd 213168404Spjd/* 214168404Spjd * Routines which manipulate leaf arrays (zap_leaf_array type chunks). 215168404Spjd */ 216168404Spjd 217168404Spjdstatic uint16_t 218168404Spjdzap_leaf_array_create(zap_leaf_t *l, const char *buf, 219219089Spjd int integer_size, int num_integers) 220168404Spjd{ 221168404Spjd uint16_t chunk_head; 222168404Spjd uint16_t *chunkp = &chunk_head; 223168404Spjd int byten = 0; 224247187Smm uint64_t value = 0; 225168404Spjd int shift = (integer_size-1)*8; 226168404Spjd int len = num_integers; 227168404Spjd 228168404Spjd ASSERT3U(num_integers * integer_size, <, MAX_ARRAY_BYTES); 229168404Spjd 230168404Spjd while (len > 0) { 231168404Spjd uint16_t chunk = zap_leaf_chunk_alloc(l); 232168404Spjd struct zap_leaf_array *la = &ZAP_LEAF_CHUNK(l, chunk).l_array; 233168404Spjd int i; 234168404Spjd 235168404Spjd la->la_type = ZAP_CHUNK_ARRAY; 236168404Spjd for (i = 0; i < ZAP_LEAF_ARRAY_BYTES; i++) { 237168404Spjd if (byten == 0) 238168404Spjd value = ldv(integer_size, buf); 239168404Spjd la->la_array[i] = value >> shift; 240168404Spjd value <<= 8; 241168404Spjd if (++byten == integer_size) { 242168404Spjd byten = 0; 243168404Spjd buf += integer_size; 244168404Spjd if (--len == 0) 245168404Spjd break; 246168404Spjd } 247168404Spjd } 248168404Spjd 249168404Spjd *chunkp = chunk; 250168404Spjd chunkp = &la->la_next; 251168404Spjd } 252168404Spjd *chunkp = CHAIN_END; 253168404Spjd 254168404Spjd return (chunk_head); 255168404Spjd} 256168404Spjd 257168404Spjdstatic void 258168404Spjdzap_leaf_array_free(zap_leaf_t *l, uint16_t *chunkp) 259168404Spjd{ 260168404Spjd uint16_t chunk = *chunkp; 261168404Spjd 262168404Spjd *chunkp = CHAIN_END; 263168404Spjd 264168404Spjd while (chunk != CHAIN_END) { 265168404Spjd int nextchunk = ZAP_LEAF_CHUNK(l, chunk).l_array.la_next; 266168404Spjd ASSERT3U(ZAP_LEAF_CHUNK(l, chunk).l_array.la_type, ==, 267168404Spjd ZAP_CHUNK_ARRAY); 268168404Spjd zap_leaf_chunk_free(l, chunk); 269168404Spjd chunk = nextchunk; 270168404Spjd } 271168404Spjd} 272168404Spjd 273168404Spjd/* array_len and buf_len are in integers, not bytes */ 274168404Spjdstatic void 275168404Spjdzap_leaf_array_read(zap_leaf_t *l, uint16_t chunk, 276168404Spjd int array_int_len, int array_len, int buf_int_len, uint64_t buf_len, 277219089Spjd void *buf) 278168404Spjd{ 279168404Spjd int len = MIN(array_len, buf_len); 280168404Spjd int byten = 0; 281168404Spjd uint64_t value = 0; 282219089Spjd char *p = buf; 283168404Spjd 284168404Spjd ASSERT3U(array_int_len, <=, buf_int_len); 285168404Spjd 286168404Spjd /* Fast path for one 8-byte integer */ 287168404Spjd if (array_int_len == 8 && buf_int_len == 8 && len == 1) { 288168404Spjd struct zap_leaf_array *la = &ZAP_LEAF_CHUNK(l, chunk).l_array; 289168404Spjd uint8_t *ip = la->la_array; 290219089Spjd uint64_t *buf64 = buf; 291168404Spjd 292168404Spjd *buf64 = (uint64_t)ip[0] << 56 | (uint64_t)ip[1] << 48 | 293168404Spjd (uint64_t)ip[2] << 40 | (uint64_t)ip[3] << 32 | 294168404Spjd (uint64_t)ip[4] << 24 | (uint64_t)ip[5] << 16 | 295168404Spjd (uint64_t)ip[6] << 8 | (uint64_t)ip[7]; 296168404Spjd return; 297168404Spjd } 298168404Spjd 299168404Spjd /* Fast path for an array of 1-byte integers (eg. the entry name) */ 300168404Spjd if (array_int_len == 1 && buf_int_len == 1 && 301168404Spjd buf_len > array_len + ZAP_LEAF_ARRAY_BYTES) { 302168404Spjd while (chunk != CHAIN_END) { 303168404Spjd struct zap_leaf_array *la = 304168404Spjd &ZAP_LEAF_CHUNK(l, chunk).l_array; 305219089Spjd bcopy(la->la_array, p, ZAP_LEAF_ARRAY_BYTES); 306219089Spjd p += ZAP_LEAF_ARRAY_BYTES; 307168404Spjd chunk = la->la_next; 308168404Spjd } 309168404Spjd return; 310168404Spjd } 311168404Spjd 312168404Spjd while (len > 0) { 313168404Spjd struct zap_leaf_array *la = &ZAP_LEAF_CHUNK(l, chunk).l_array; 314168404Spjd int i; 315168404Spjd 316168404Spjd ASSERT3U(chunk, <, ZAP_LEAF_NUMCHUNKS(l)); 317168404Spjd for (i = 0; i < ZAP_LEAF_ARRAY_BYTES && len > 0; i++) { 318168404Spjd value = (value << 8) | la->la_array[i]; 319168404Spjd byten++; 320168404Spjd if (byten == array_int_len) { 321219089Spjd stv(buf_int_len, p, value); 322168404Spjd byten = 0; 323168404Spjd len--; 324168404Spjd if (len == 0) 325168404Spjd return; 326219089Spjd p += buf_int_len; 327168404Spjd } 328168404Spjd } 329168404Spjd chunk = la->la_next; 330168404Spjd } 331168404Spjd} 332168404Spjd 333185029Spjdstatic boolean_t 334219089Spjdzap_leaf_array_match(zap_leaf_t *l, zap_name_t *zn, 335219089Spjd int chunk, int array_numints) 336168404Spjd{ 337168404Spjd int bseen = 0; 338168404Spjd 339219089Spjd if (zap_getflags(zn->zn_zap) & ZAP_FLAG_UINT64_KEY) { 340219089Spjd uint64_t *thiskey; 341219089Spjd boolean_t match; 342219089Spjd 343219089Spjd ASSERT(zn->zn_key_intlen == sizeof (*thiskey)); 344219089Spjd thiskey = kmem_alloc(array_numints * sizeof (*thiskey), 345219089Spjd KM_SLEEP); 346219089Spjd 347219089Spjd zap_leaf_array_read(l, chunk, sizeof (*thiskey), array_numints, 348219089Spjd sizeof (*thiskey), array_numints, thiskey); 349219089Spjd match = bcmp(thiskey, zn->zn_key_orig, 350219089Spjd array_numints * sizeof (*thiskey)) == 0; 351219089Spjd kmem_free(thiskey, array_numints * sizeof (*thiskey)); 352219089Spjd return (match); 353219089Spjd } 354219089Spjd 355219089Spjd ASSERT(zn->zn_key_intlen == 1); 356185029Spjd if (zn->zn_matchtype == MT_FIRST) { 357219089Spjd char *thisname = kmem_alloc(array_numints, KM_SLEEP); 358185029Spjd boolean_t match; 359185029Spjd 360219089Spjd zap_leaf_array_read(l, chunk, sizeof (char), array_numints, 361219089Spjd sizeof (char), array_numints, thisname); 362185029Spjd match = zap_match(zn, thisname); 363219089Spjd kmem_free(thisname, array_numints); 364185029Spjd return (match); 365185029Spjd } 366185029Spjd 367219089Spjd /* 368219089Spjd * Fast path for exact matching. 369219089Spjd * First check that the lengths match, so that we don't read 370219089Spjd * past the end of the zn_key_orig array. 371219089Spjd */ 372219089Spjd if (array_numints != zn->zn_key_orig_numints) 373219089Spjd return (B_FALSE); 374219089Spjd while (bseen < array_numints) { 375168404Spjd struct zap_leaf_array *la = &ZAP_LEAF_CHUNK(l, chunk).l_array; 376219089Spjd int toread = MIN(array_numints - bseen, ZAP_LEAF_ARRAY_BYTES); 377168404Spjd ASSERT3U(chunk, <, ZAP_LEAF_NUMCHUNKS(l)); 378219089Spjd if (bcmp(la->la_array, (char *)zn->zn_key_orig + bseen, toread)) 379168404Spjd break; 380168404Spjd chunk = la->la_next; 381168404Spjd bseen += toread; 382168404Spjd } 383219089Spjd return (bseen == array_numints); 384168404Spjd} 385168404Spjd 386168404Spjd/* 387168404Spjd * Routines which manipulate leaf entries. 388168404Spjd */ 389168404Spjd 390168404Spjdint 391185029Spjdzap_leaf_lookup(zap_leaf_t *l, zap_name_t *zn, zap_entry_handle_t *zeh) 392168404Spjd{ 393168404Spjd uint16_t *chunkp; 394168404Spjd struct zap_leaf_entry *le; 395168404Spjd 396168404Spjd ASSERT3U(l->l_phys->l_hdr.lh_magic, ==, ZAP_LEAF_MAGIC); 397168404Spjd 398185029Spjdagain: 399185029Spjd for (chunkp = LEAF_HASH_ENTPTR(l, zn->zn_hash); 400168404Spjd *chunkp != CHAIN_END; chunkp = &le->le_next) { 401168404Spjd uint16_t chunk = *chunkp; 402168404Spjd le = ZAP_LEAF_ENTRY(l, chunk); 403168404Spjd 404168404Spjd ASSERT3U(chunk, <, ZAP_LEAF_NUMCHUNKS(l)); 405168404Spjd ASSERT3U(le->le_type, ==, ZAP_CHUNK_ENTRY); 406168404Spjd 407185029Spjd if (le->le_hash != zn->zn_hash) 408168404Spjd continue; 409168404Spjd 410185029Spjd /* 411185029Spjd * NB: the entry chain is always sorted by cd on 412185029Spjd * normalized zap objects, so this will find the 413185029Spjd * lowest-cd match for MT_FIRST. 414185029Spjd */ 415185029Spjd ASSERT(zn->zn_matchtype == MT_EXACT || 416185029Spjd (l->l_phys->l_hdr.lh_flags & ZLF_ENTRIES_CDSORTED)); 417185029Spjd if (zap_leaf_array_match(l, zn, le->le_name_chunk, 418219089Spjd le->le_name_numints)) { 419219089Spjd zeh->zeh_num_integers = le->le_value_numints; 420219089Spjd zeh->zeh_integer_size = le->le_value_intlen; 421168404Spjd zeh->zeh_cd = le->le_cd; 422168404Spjd zeh->zeh_hash = le->le_hash; 423168404Spjd zeh->zeh_chunkp = chunkp; 424168404Spjd zeh->zeh_leaf = l; 425168404Spjd return (0); 426168404Spjd } 427168404Spjd } 428168404Spjd 429185029Spjd /* 430185029Spjd * NB: we could of course do this in one pass, but that would be 431185029Spjd * a pain. We'll see if MT_BEST is even used much. 432185029Spjd */ 433185029Spjd if (zn->zn_matchtype == MT_BEST) { 434185029Spjd zn->zn_matchtype = MT_FIRST; 435185029Spjd goto again; 436185029Spjd } 437185029Spjd 438249195Smm return (SET_ERROR(ENOENT)); 439168404Spjd} 440168404Spjd 441168404Spjd/* Return (h1,cd1 >= h2,cd2) */ 442168404Spjd#define HCD_GTEQ(h1, cd1, h2, cd2) \ 443168404Spjd ((h1 > h2) ? TRUE : ((h1 == h2 && cd1 >= cd2) ? TRUE : FALSE)) 444168404Spjd 445168404Spjdint 446168404Spjdzap_leaf_lookup_closest(zap_leaf_t *l, 447168404Spjd uint64_t h, uint32_t cd, zap_entry_handle_t *zeh) 448168404Spjd{ 449168404Spjd uint16_t chunk; 450168404Spjd uint64_t besth = -1ULL; 451219089Spjd uint32_t bestcd = -1U; 452168404Spjd uint16_t bestlh = ZAP_LEAF_HASH_NUMENTRIES(l)-1; 453168404Spjd uint16_t lh; 454168404Spjd struct zap_leaf_entry *le; 455168404Spjd 456168404Spjd ASSERT3U(l->l_phys->l_hdr.lh_magic, ==, ZAP_LEAF_MAGIC); 457168404Spjd 458168404Spjd for (lh = LEAF_HASH(l, h); lh <= bestlh; lh++) { 459168404Spjd for (chunk = l->l_phys->l_hash[lh]; 460168404Spjd chunk != CHAIN_END; chunk = le->le_next) { 461168404Spjd le = ZAP_LEAF_ENTRY(l, chunk); 462168404Spjd 463168404Spjd ASSERT3U(chunk, <, ZAP_LEAF_NUMCHUNKS(l)); 464168404Spjd ASSERT3U(le->le_type, ==, ZAP_CHUNK_ENTRY); 465168404Spjd 466168404Spjd if (HCD_GTEQ(le->le_hash, le->le_cd, h, cd) && 467168404Spjd HCD_GTEQ(besth, bestcd, le->le_hash, le->le_cd)) { 468168404Spjd ASSERT3U(bestlh, >=, lh); 469168404Spjd bestlh = lh; 470168404Spjd besth = le->le_hash; 471168404Spjd bestcd = le->le_cd; 472168404Spjd 473219089Spjd zeh->zeh_num_integers = le->le_value_numints; 474219089Spjd zeh->zeh_integer_size = le->le_value_intlen; 475168404Spjd zeh->zeh_cd = le->le_cd; 476168404Spjd zeh->zeh_hash = le->le_hash; 477168404Spjd zeh->zeh_fakechunk = chunk; 478168404Spjd zeh->zeh_chunkp = &zeh->zeh_fakechunk; 479168404Spjd zeh->zeh_leaf = l; 480168404Spjd } 481168404Spjd } 482168404Spjd } 483168404Spjd 484219089Spjd return (bestcd == -1U ? ENOENT : 0); 485168404Spjd} 486168404Spjd 487168404Spjdint 488168404Spjdzap_entry_read(const zap_entry_handle_t *zeh, 489168404Spjd uint8_t integer_size, uint64_t num_integers, void *buf) 490168404Spjd{ 491168404Spjd struct zap_leaf_entry *le = 492168404Spjd ZAP_LEAF_ENTRY(zeh->zeh_leaf, *zeh->zeh_chunkp); 493168404Spjd ASSERT3U(le->le_type, ==, ZAP_CHUNK_ENTRY); 494168404Spjd 495219089Spjd if (le->le_value_intlen > integer_size) 496249195Smm return (SET_ERROR(EINVAL)); 497168404Spjd 498219089Spjd zap_leaf_array_read(zeh->zeh_leaf, le->le_value_chunk, 499219089Spjd le->le_value_intlen, le->le_value_numints, 500219089Spjd integer_size, num_integers, buf); 501168404Spjd 502168404Spjd if (zeh->zeh_num_integers > num_integers) 503249195Smm return (SET_ERROR(EOVERFLOW)); 504168404Spjd return (0); 505168404Spjd 506168404Spjd} 507168404Spjd 508168404Spjdint 509219089Spjdzap_entry_read_name(zap_t *zap, const zap_entry_handle_t *zeh, uint16_t buflen, 510219089Spjd char *buf) 511168404Spjd{ 512168404Spjd struct zap_leaf_entry *le = 513168404Spjd ZAP_LEAF_ENTRY(zeh->zeh_leaf, *zeh->zeh_chunkp); 514168404Spjd ASSERT3U(le->le_type, ==, ZAP_CHUNK_ENTRY); 515168404Spjd 516219089Spjd if (zap_getflags(zap) & ZAP_FLAG_UINT64_KEY) { 517219089Spjd zap_leaf_array_read(zeh->zeh_leaf, le->le_name_chunk, 8, 518219089Spjd le->le_name_numints, 8, buflen / 8, buf); 519219089Spjd } else { 520219089Spjd zap_leaf_array_read(zeh->zeh_leaf, le->le_name_chunk, 1, 521219089Spjd le->le_name_numints, 1, buflen, buf); 522219089Spjd } 523219089Spjd if (le->le_name_numints > buflen) 524249195Smm return (SET_ERROR(EOVERFLOW)); 525168404Spjd return (0); 526168404Spjd} 527168404Spjd 528168404Spjdint 529168404Spjdzap_entry_update(zap_entry_handle_t *zeh, 530168404Spjd uint8_t integer_size, uint64_t num_integers, const void *buf) 531168404Spjd{ 532168404Spjd int delta_chunks; 533168404Spjd zap_leaf_t *l = zeh->zeh_leaf; 534168404Spjd struct zap_leaf_entry *le = ZAP_LEAF_ENTRY(l, *zeh->zeh_chunkp); 535168404Spjd 536168404Spjd delta_chunks = ZAP_LEAF_ARRAY_NCHUNKS(num_integers * integer_size) - 537219089Spjd ZAP_LEAF_ARRAY_NCHUNKS(le->le_value_numints * le->le_value_intlen); 538168404Spjd 539168404Spjd if ((int)l->l_phys->l_hdr.lh_nfree < delta_chunks) 540249195Smm return (SET_ERROR(EAGAIN)); 541168404Spjd 542168404Spjd zap_leaf_array_free(l, &le->le_value_chunk); 543168404Spjd le->le_value_chunk = 544168404Spjd zap_leaf_array_create(l, buf, integer_size, num_integers); 545219089Spjd le->le_value_numints = num_integers; 546219089Spjd le->le_value_intlen = integer_size; 547168404Spjd return (0); 548168404Spjd} 549168404Spjd 550168404Spjdvoid 551168404Spjdzap_entry_remove(zap_entry_handle_t *zeh) 552168404Spjd{ 553168404Spjd uint16_t entry_chunk; 554168404Spjd struct zap_leaf_entry *le; 555168404Spjd zap_leaf_t *l = zeh->zeh_leaf; 556168404Spjd 557168404Spjd ASSERT3P(zeh->zeh_chunkp, !=, &zeh->zeh_fakechunk); 558168404Spjd 559168404Spjd entry_chunk = *zeh->zeh_chunkp; 560168404Spjd le = ZAP_LEAF_ENTRY(l, entry_chunk); 561168404Spjd ASSERT3U(le->le_type, ==, ZAP_CHUNK_ENTRY); 562168404Spjd 563168404Spjd zap_leaf_array_free(l, &le->le_name_chunk); 564168404Spjd zap_leaf_array_free(l, &le->le_value_chunk); 565168404Spjd 566168404Spjd *zeh->zeh_chunkp = le->le_next; 567168404Spjd zap_leaf_chunk_free(l, entry_chunk); 568168404Spjd 569168404Spjd l->l_phys->l_hdr.lh_nentries--; 570168404Spjd} 571168404Spjd 572168404Spjdint 573219089Spjdzap_entry_create(zap_leaf_t *l, zap_name_t *zn, uint32_t cd, 574168404Spjd uint8_t integer_size, uint64_t num_integers, const void *buf, 575168404Spjd zap_entry_handle_t *zeh) 576168404Spjd{ 577168404Spjd uint16_t chunk; 578168404Spjd uint16_t *chunkp; 579168404Spjd struct zap_leaf_entry *le; 580219089Spjd uint64_t valuelen; 581168404Spjd int numchunks; 582219089Spjd uint64_t h = zn->zn_hash; 583168404Spjd 584168404Spjd valuelen = integer_size * num_integers; 585168404Spjd 586219089Spjd numchunks = 1 + ZAP_LEAF_ARRAY_NCHUNKS(zn->zn_key_orig_numints * 587219089Spjd zn->zn_key_intlen) + ZAP_LEAF_ARRAY_NCHUNKS(valuelen); 588168404Spjd if (numchunks > ZAP_LEAF_NUMCHUNKS(l)) 589168404Spjd return (E2BIG); 590168404Spjd 591219089Spjd if (cd == ZAP_NEED_CD) { 592185029Spjd /* find the lowest unused cd */ 593185029Spjd if (l->l_phys->l_hdr.lh_flags & ZLF_ENTRIES_CDSORTED) { 594185029Spjd cd = 0; 595185029Spjd 596168404Spjd for (chunk = *LEAF_HASH_ENTPTR(l, h); 597168404Spjd chunk != CHAIN_END; chunk = le->le_next) { 598168404Spjd le = ZAP_LEAF_ENTRY(l, chunk); 599185029Spjd if (le->le_cd > cd) 600168404Spjd break; 601185029Spjd if (le->le_hash == h) { 602185029Spjd ASSERT3U(cd, ==, le->le_cd); 603185029Spjd cd++; 604168404Spjd } 605168404Spjd } 606185029Spjd } else { 607185029Spjd /* old unsorted format; do it the O(n^2) way */ 608219089Spjd for (cd = 0; ; cd++) { 609185029Spjd for (chunk = *LEAF_HASH_ENTPTR(l, h); 610185029Spjd chunk != CHAIN_END; chunk = le->le_next) { 611185029Spjd le = ZAP_LEAF_ENTRY(l, chunk); 612185029Spjd if (le->le_hash == h && 613185029Spjd le->le_cd == cd) { 614185029Spjd break; 615185029Spjd } 616185029Spjd } 617185029Spjd /* If this cd is not in use, we are good. */ 618185029Spjd if (chunk == CHAIN_END) 619185029Spjd break; 620185029Spjd } 621168404Spjd } 622185029Spjd /* 623219089Spjd * We would run out of space in a block before we could 624219089Spjd * store enough entries to run out of CD values. 625185029Spjd */ 626219089Spjd ASSERT3U(cd, <, zap_maxcd(zn->zn_zap)); 627168404Spjd } 628168404Spjd 629168404Spjd if (l->l_phys->l_hdr.lh_nfree < numchunks) 630249195Smm return (SET_ERROR(EAGAIN)); 631168404Spjd 632168404Spjd /* make the entry */ 633168404Spjd chunk = zap_leaf_chunk_alloc(l); 634168404Spjd le = ZAP_LEAF_ENTRY(l, chunk); 635168404Spjd le->le_type = ZAP_CHUNK_ENTRY; 636219089Spjd le->le_name_chunk = zap_leaf_array_create(l, zn->zn_key_orig, 637219089Spjd zn->zn_key_intlen, zn->zn_key_orig_numints); 638219089Spjd le->le_name_numints = zn->zn_key_orig_numints; 639168404Spjd le->le_value_chunk = 640168404Spjd zap_leaf_array_create(l, buf, integer_size, num_integers); 641219089Spjd le->le_value_numints = num_integers; 642219089Spjd le->le_value_intlen = integer_size; 643168404Spjd le->le_hash = h; 644168404Spjd le->le_cd = cd; 645168404Spjd 646168404Spjd /* link it into the hash chain */ 647185029Spjd /* XXX if we did the search above, we could just use that */ 648185029Spjd chunkp = zap_leaf_rehash_entry(l, chunk); 649168404Spjd 650168404Spjd l->l_phys->l_hdr.lh_nentries++; 651168404Spjd 652168404Spjd zeh->zeh_leaf = l; 653168404Spjd zeh->zeh_num_integers = num_integers; 654219089Spjd zeh->zeh_integer_size = le->le_value_intlen; 655168404Spjd zeh->zeh_cd = le->le_cd; 656168404Spjd zeh->zeh_hash = le->le_hash; 657168404Spjd zeh->zeh_chunkp = chunkp; 658168404Spjd 659168404Spjd return (0); 660168404Spjd} 661168404Spjd 662168404Spjd/* 663185029Spjd * Determine if there is another entry with the same normalized form. 664185029Spjd * For performance purposes, either zn or name must be provided (the 665185029Spjd * other can be NULL). Note, there usually won't be any hash 666185029Spjd * conflicts, in which case we don't need the concatenated/normalized 667185029Spjd * form of the name. But all callers have one of these on hand anyway, 668185029Spjd * so might as well take advantage. A cleaner but slower interface 669185029Spjd * would accept neither argument, and compute the normalized name as 670185029Spjd * needed (using zap_name_alloc(zap_entry_read_name(zeh))). 671185029Spjd */ 672185029Spjdboolean_t 673185029Spjdzap_entry_normalization_conflict(zap_entry_handle_t *zeh, zap_name_t *zn, 674185029Spjd const char *name, zap_t *zap) 675185029Spjd{ 676185029Spjd uint64_t chunk; 677185029Spjd struct zap_leaf_entry *le; 678185029Spjd boolean_t allocdzn = B_FALSE; 679185029Spjd 680185029Spjd if (zap->zap_normflags == 0) 681185029Spjd return (B_FALSE); 682185029Spjd 683185029Spjd for (chunk = *LEAF_HASH_ENTPTR(zeh->zeh_leaf, zeh->zeh_hash); 684185029Spjd chunk != CHAIN_END; chunk = le->le_next) { 685185029Spjd le = ZAP_LEAF_ENTRY(zeh->zeh_leaf, chunk); 686185029Spjd if (le->le_hash != zeh->zeh_hash) 687185029Spjd continue; 688185029Spjd if (le->le_cd == zeh->zeh_cd) 689185029Spjd continue; 690185029Spjd 691185029Spjd if (zn == NULL) { 692185029Spjd zn = zap_name_alloc(zap, name, MT_FIRST); 693185029Spjd allocdzn = B_TRUE; 694185029Spjd } 695185029Spjd if (zap_leaf_array_match(zeh->zeh_leaf, zn, 696219089Spjd le->le_name_chunk, le->le_name_numints)) { 697185029Spjd if (allocdzn) 698185029Spjd zap_name_free(zn); 699185029Spjd return (B_TRUE); 700185029Spjd } 701185029Spjd } 702185029Spjd if (allocdzn) 703185029Spjd zap_name_free(zn); 704185029Spjd return (B_FALSE); 705185029Spjd} 706185029Spjd 707185029Spjd/* 708168404Spjd * Routines for transferring entries between leafs. 709168404Spjd */ 710168404Spjd 711185029Spjdstatic uint16_t * 712168404Spjdzap_leaf_rehash_entry(zap_leaf_t *l, uint16_t entry) 713168404Spjd{ 714168404Spjd struct zap_leaf_entry *le = ZAP_LEAF_ENTRY(l, entry); 715185029Spjd struct zap_leaf_entry *le2; 716185029Spjd uint16_t *chunkp; 717185029Spjd 718185029Spjd /* 719185029Spjd * keep the entry chain sorted by cd 720185029Spjd * NB: this will not cause problems for unsorted leafs, though 721185029Spjd * it is unnecessary there. 722185029Spjd */ 723185029Spjd for (chunkp = LEAF_HASH_ENTPTR(l, le->le_hash); 724185029Spjd *chunkp != CHAIN_END; chunkp = &le2->le_next) { 725185029Spjd le2 = ZAP_LEAF_ENTRY(l, *chunkp); 726185029Spjd if (le2->le_cd > le->le_cd) 727185029Spjd break; 728185029Spjd } 729185029Spjd 730185029Spjd le->le_next = *chunkp; 731185029Spjd *chunkp = entry; 732185029Spjd return (chunkp); 733168404Spjd} 734168404Spjd 735168404Spjdstatic uint16_t 736168404Spjdzap_leaf_transfer_array(zap_leaf_t *l, uint16_t chunk, zap_leaf_t *nl) 737168404Spjd{ 738168404Spjd uint16_t new_chunk; 739168404Spjd uint16_t *nchunkp = &new_chunk; 740168404Spjd 741168404Spjd while (chunk != CHAIN_END) { 742168404Spjd uint16_t nchunk = zap_leaf_chunk_alloc(nl); 743168404Spjd struct zap_leaf_array *nla = 744168404Spjd &ZAP_LEAF_CHUNK(nl, nchunk).l_array; 745168404Spjd struct zap_leaf_array *la = 746168404Spjd &ZAP_LEAF_CHUNK(l, chunk).l_array; 747168404Spjd int nextchunk = la->la_next; 748168404Spjd 749168404Spjd ASSERT3U(chunk, <, ZAP_LEAF_NUMCHUNKS(l)); 750168404Spjd ASSERT3U(nchunk, <, ZAP_LEAF_NUMCHUNKS(l)); 751168404Spjd 752168404Spjd *nla = *la; /* structure assignment */ 753168404Spjd 754168404Spjd zap_leaf_chunk_free(l, chunk); 755168404Spjd chunk = nextchunk; 756168404Spjd *nchunkp = nchunk; 757168404Spjd nchunkp = &nla->la_next; 758168404Spjd } 759168404Spjd *nchunkp = CHAIN_END; 760168404Spjd return (new_chunk); 761168404Spjd} 762168404Spjd 763168404Spjdstatic void 764168404Spjdzap_leaf_transfer_entry(zap_leaf_t *l, int entry, zap_leaf_t *nl) 765168404Spjd{ 766168404Spjd struct zap_leaf_entry *le, *nle; 767168404Spjd uint16_t chunk; 768168404Spjd 769168404Spjd le = ZAP_LEAF_ENTRY(l, entry); 770168404Spjd ASSERT3U(le->le_type, ==, ZAP_CHUNK_ENTRY); 771168404Spjd 772168404Spjd chunk = zap_leaf_chunk_alloc(nl); 773168404Spjd nle = ZAP_LEAF_ENTRY(nl, chunk); 774168404Spjd *nle = *le; /* structure assignment */ 775168404Spjd 776185029Spjd (void) zap_leaf_rehash_entry(nl, chunk); 777168404Spjd 778168404Spjd nle->le_name_chunk = zap_leaf_transfer_array(l, le->le_name_chunk, nl); 779168404Spjd nle->le_value_chunk = 780168404Spjd zap_leaf_transfer_array(l, le->le_value_chunk, nl); 781168404Spjd 782168404Spjd zap_leaf_chunk_free(l, entry); 783168404Spjd 784168404Spjd l->l_phys->l_hdr.lh_nentries--; 785168404Spjd nl->l_phys->l_hdr.lh_nentries++; 786168404Spjd} 787168404Spjd 788168404Spjd/* 789168404Spjd * Transfer the entries whose hash prefix ends in 1 to the new leaf. 790168404Spjd */ 791168404Spjdvoid 792185029Spjdzap_leaf_split(zap_leaf_t *l, zap_leaf_t *nl, boolean_t sort) 793168404Spjd{ 794168404Spjd int i; 795168404Spjd int bit = 64 - 1 - l->l_phys->l_hdr.lh_prefix_len; 796168404Spjd 797168404Spjd /* set new prefix and prefix_len */ 798168404Spjd l->l_phys->l_hdr.lh_prefix <<= 1; 799168404Spjd l->l_phys->l_hdr.lh_prefix_len++; 800168404Spjd nl->l_phys->l_hdr.lh_prefix = l->l_phys->l_hdr.lh_prefix | 1; 801168404Spjd nl->l_phys->l_hdr.lh_prefix_len = l->l_phys->l_hdr.lh_prefix_len; 802168404Spjd 803168404Spjd /* break existing hash chains */ 804168404Spjd zap_memset(l->l_phys->l_hash, CHAIN_END, 2*ZAP_LEAF_HASH_NUMENTRIES(l)); 805168404Spjd 806185029Spjd if (sort) 807185029Spjd l->l_phys->l_hdr.lh_flags |= ZLF_ENTRIES_CDSORTED; 808185029Spjd 809168404Spjd /* 810168404Spjd * Transfer entries whose hash bit 'bit' is set to nl; rehash 811168404Spjd * the remaining entries 812168404Spjd * 813168404Spjd * NB: We could find entries via the hashtable instead. That 814168404Spjd * would be O(hashents+numents) rather than O(numblks+numents), 815168404Spjd * but this accesses memory more sequentially, and when we're 816168404Spjd * called, the block is usually pretty full. 817168404Spjd */ 818168404Spjd for (i = 0; i < ZAP_LEAF_NUMCHUNKS(l); i++) { 819168404Spjd struct zap_leaf_entry *le = ZAP_LEAF_ENTRY(l, i); 820168404Spjd if (le->le_type != ZAP_CHUNK_ENTRY) 821168404Spjd continue; 822168404Spjd 823168404Spjd if (le->le_hash & (1ULL << bit)) 824168404Spjd zap_leaf_transfer_entry(l, i, nl); 825168404Spjd else 826185029Spjd (void) zap_leaf_rehash_entry(l, i); 827168404Spjd } 828168404Spjd} 829168404Spjd 830168404Spjdvoid 831168404Spjdzap_leaf_stats(zap_t *zap, zap_leaf_t *l, zap_stats_t *zs) 832168404Spjd{ 833168404Spjd int i, n; 834168404Spjd 835168404Spjd n = zap->zap_f.zap_phys->zap_ptrtbl.zt_shift - 836168404Spjd l->l_phys->l_hdr.lh_prefix_len; 837168404Spjd n = MIN(n, ZAP_HISTOGRAM_SIZE-1); 838168404Spjd zs->zs_leafs_with_2n_pointers[n]++; 839168404Spjd 840168404Spjd 841168404Spjd n = l->l_phys->l_hdr.lh_nentries/5; 842168404Spjd n = MIN(n, ZAP_HISTOGRAM_SIZE-1); 843168404Spjd zs->zs_blocks_with_n5_entries[n]++; 844168404Spjd 845168404Spjd n = ((1<<FZAP_BLOCK_SHIFT(zap)) - 846168404Spjd l->l_phys->l_hdr.lh_nfree * (ZAP_LEAF_ARRAY_BYTES+1))*10 / 847168404Spjd (1<<FZAP_BLOCK_SHIFT(zap)); 848168404Spjd n = MIN(n, ZAP_HISTOGRAM_SIZE-1); 849168404Spjd zs->zs_blocks_n_tenths_full[n]++; 850168404Spjd 851168404Spjd for (i = 0; i < ZAP_LEAF_HASH_NUMENTRIES(l); i++) { 852168404Spjd int nentries = 0; 853168404Spjd int chunk = l->l_phys->l_hash[i]; 854168404Spjd 855168404Spjd while (chunk != CHAIN_END) { 856168404Spjd struct zap_leaf_entry *le = 857168404Spjd ZAP_LEAF_ENTRY(l, chunk); 858168404Spjd 859219089Spjd n = 1 + ZAP_LEAF_ARRAY_NCHUNKS(le->le_name_numints) + 860219089Spjd ZAP_LEAF_ARRAY_NCHUNKS(le->le_value_numints * 861219089Spjd le->le_value_intlen); 862168404Spjd n = MIN(n, ZAP_HISTOGRAM_SIZE-1); 863168404Spjd zs->zs_entries_using_n_chunks[n]++; 864168404Spjd 865168404Spjd chunk = le->le_next; 866168404Spjd nentries++; 867168404Spjd } 868168404Spjd 869168404Spjd n = nentries; 870168404Spjd n = MIN(n, ZAP_HISTOGRAM_SIZE-1); 871168404Spjd zs->zs_buckets_with_n_entries[n]++; 872168404Spjd } 873168404Spjd} 874