hash_bigkey.c revision 189327
1/*- 2 * Copyright (c) 1990, 1993, 1994 3 * The Regents of the University of California. All rights reserved. 4 * 5 * This code is derived from software contributed to Berkeley by 6 * Margo Seltzer. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 4. Neither the name of the University nor the names of its contributors 17 * may be used to endorse or promote products derived from this software 18 * without specific prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 */ 32 33#if defined(LIBC_SCCS) && !defined(lint) 34static char sccsid[] = "@(#)hash_bigkey.c 8.3 (Berkeley) 5/31/94"; 35#endif /* LIBC_SCCS and not lint */ 36#include <sys/cdefs.h> 37__FBSDID("$FreeBSD: head/lib/libc/db/hash/hash_bigkey.c 189327 2009-03-04 00:58:04Z delphij $"); 38 39/* 40 * PACKAGE: hash 41 * DESCRIPTION: 42 * Big key/data handling for the hashing package. 43 * 44 * ROUTINES: 45 * External 46 * __big_keydata 47 * __big_split 48 * __big_insert 49 * __big_return 50 * __big_delete 51 * __find_last_page 52 * Internal 53 * collect_key 54 * collect_data 55 */ 56 57#include <sys/param.h> 58 59#include <errno.h> 60#include <stdio.h> 61#include <stdlib.h> 62#include <string.h> 63 64#ifdef DEBUG 65#include <assert.h> 66#endif 67 68#include <db.h> 69#include "hash.h" 70#include "page.h" 71#include "extern.h" 72 73static int collect_key(HTAB *, BUFHEAD *, int, DBT *, int); 74static int collect_data(HTAB *, BUFHEAD *, int, int); 75 76/* 77 * Big_insert 78 * 79 * You need to do an insert and the key/data pair is too big 80 * 81 * Returns: 82 * 0 ==> OK 83 *-1 ==> ERROR 84 */ 85int 86__big_insert(HTAB *hashp, BUFHEAD *bufp, const DBT *key, const DBT *val) 87{ 88 u_int16_t *p; 89 int key_size, n, val_size; 90 u_int16_t space, move_bytes, off; 91 char *cp, *key_data, *val_data; 92 93 cp = bufp->page; /* Character pointer of p. */ 94 p = (u_int16_t *)cp; 95 96 key_data = (char *)key->data; 97 key_size = key->size; 98 val_data = (char *)val->data; 99 val_size = val->size; 100 101 /* First move the Key */ 102 for (space = FREESPACE(p) - BIGOVERHEAD; key_size; 103 space = FREESPACE(p) - BIGOVERHEAD) { 104 move_bytes = MIN(space, key_size); 105 off = OFFSET(p) - move_bytes; 106 memmove(cp + off, key_data, move_bytes); 107 key_size -= move_bytes; 108 key_data += move_bytes; 109 n = p[0]; 110 p[++n] = off; 111 p[0] = ++n; 112 FREESPACE(p) = off - PAGE_META(n); 113 OFFSET(p) = off; 114 p[n] = PARTIAL_KEY; 115 bufp = __add_ovflpage(hashp, bufp); 116 if (!bufp) 117 return (-1); 118 n = p[0]; 119 if (!key_size) { 120 if (FREESPACE(p)) { 121 move_bytes = MIN(FREESPACE(p), val_size); 122 off = OFFSET(p) - move_bytes; 123 p[n] = off; 124 memmove(cp + off, val_data, move_bytes); 125 val_data += move_bytes; 126 val_size -= move_bytes; 127 p[n - 2] = FULL_KEY_DATA; 128 FREESPACE(p) = FREESPACE(p) - move_bytes; 129 OFFSET(p) = off; 130 } else 131 p[n - 2] = FULL_KEY; 132 } 133 p = (u_int16_t *)bufp->page; 134 cp = bufp->page; 135 bufp->flags |= BUF_MOD; 136 } 137 138 /* Now move the data */ 139 for (space = FREESPACE(p) - BIGOVERHEAD; val_size; 140 space = FREESPACE(p) - BIGOVERHEAD) { 141 move_bytes = MIN(space, val_size); 142 /* 143 * Here's the hack to make sure that if the data ends on the 144 * same page as the key ends, FREESPACE is at least one. 145 */ 146 if (space == val_size && val_size == val->size) 147 move_bytes--; 148 off = OFFSET(p) - move_bytes; 149 memmove(cp + off, val_data, move_bytes); 150 val_size -= move_bytes; 151 val_data += move_bytes; 152 n = p[0]; 153 p[++n] = off; 154 p[0] = ++n; 155 FREESPACE(p) = off - PAGE_META(n); 156 OFFSET(p) = off; 157 if (val_size) { 158 p[n] = FULL_KEY; 159 bufp = __add_ovflpage(hashp, bufp); 160 if (!bufp) 161 return (-1); 162 cp = bufp->page; 163 p = (u_int16_t *)cp; 164 } else 165 p[n] = FULL_KEY_DATA; 166 bufp->flags |= BUF_MOD; 167 } 168 return (0); 169} 170 171/* 172 * Called when bufp's page contains a partial key (index should be 1) 173 * 174 * All pages in the big key/data pair except bufp are freed. We cannot 175 * free bufp because the page pointing to it is lost and we can't get rid 176 * of its pointer. 177 * 178 * Returns: 179 * 0 => OK 180 *-1 => ERROR 181 */ 182int 183__big_delete(HTAB *hashp, BUFHEAD *bufp) 184{ 185 BUFHEAD *last_bfp, *rbufp; 186 u_int16_t *bp, pageno; 187 int key_done, n; 188 189 rbufp = bufp; 190 last_bfp = NULL; 191 bp = (u_int16_t *)bufp->page; 192 pageno = 0; 193 key_done = 0; 194 195 while (!key_done || (bp[2] != FULL_KEY_DATA)) { 196 if (bp[2] == FULL_KEY || bp[2] == FULL_KEY_DATA) 197 key_done = 1; 198 199 /* 200 * If there is freespace left on a FULL_KEY_DATA page, then 201 * the data is short and fits entirely on this page, and this 202 * is the last page. 203 */ 204 if (bp[2] == FULL_KEY_DATA && FREESPACE(bp)) 205 break; 206 pageno = bp[bp[0] - 1]; 207 rbufp->flags |= BUF_MOD; 208 rbufp = __get_buf(hashp, pageno, rbufp, 0); 209 if (last_bfp) 210 __free_ovflpage(hashp, last_bfp); 211 last_bfp = rbufp; 212 if (!rbufp) 213 return (-1); /* Error. */ 214 bp = (u_int16_t *)rbufp->page; 215 } 216 217 /* 218 * If we get here then rbufp points to the last page of the big 219 * key/data pair. Bufp points to the first one -- it should now be 220 * empty pointing to the next page after this pair. Can't free it 221 * because we don't have the page pointing to it. 222 */ 223 224 /* This is information from the last page of the pair. */ 225 n = bp[0]; 226 pageno = bp[n - 1]; 227 228 /* Now, bp is the first page of the pair. */ 229 bp = (u_int16_t *)bufp->page; 230 if (n > 2) { 231 /* There is an overflow page. */ 232 bp[1] = pageno; 233 bp[2] = OVFLPAGE; 234 bufp->ovfl = rbufp->ovfl; 235 } else 236 /* This is the last page. */ 237 bufp->ovfl = NULL; 238 n -= 2; 239 bp[0] = n; 240 FREESPACE(bp) = hashp->BSIZE - PAGE_META(n); 241 OFFSET(bp) = hashp->BSIZE - 1; 242 243 bufp->flags |= BUF_MOD; 244 if (rbufp) 245 __free_ovflpage(hashp, rbufp); 246 if (last_bfp != rbufp) 247 __free_ovflpage(hashp, last_bfp); 248 249 hashp->NKEYS--; 250 return (0); 251} 252/* 253 * Returns: 254 * 0 = key not found 255 * -1 = get next overflow page 256 * -2 means key not found and this is big key/data 257 * -3 error 258 */ 259int 260__find_bigpair(HTAB *hashp, BUFHEAD *bufp, int ndx, char *key, int size) 261{ 262 u_int16_t *bp; 263 char *p; 264 int ksize; 265 u_int16_t bytes; 266 char *kkey; 267 268 bp = (u_int16_t *)bufp->page; 269 p = bufp->page; 270 ksize = size; 271 kkey = key; 272 273 for (bytes = hashp->BSIZE - bp[ndx]; 274 bytes <= size && bp[ndx + 1] == PARTIAL_KEY; 275 bytes = hashp->BSIZE - bp[ndx]) { 276 if (memcmp(p + bp[ndx], kkey, bytes)) 277 return (-2); 278 kkey += bytes; 279 ksize -= bytes; 280 bufp = __get_buf(hashp, bp[ndx + 2], bufp, 0); 281 if (!bufp) 282 return (-3); 283 p = bufp->page; 284 bp = (u_int16_t *)p; 285 ndx = 1; 286 } 287 288 if (bytes != ksize || memcmp(p + bp[ndx], kkey, bytes)) { 289#ifdef HASH_STATISTICS 290 ++hash_collisions; 291#endif 292 return (-2); 293 } else 294 return (ndx); 295} 296 297/* 298 * Given the buffer pointer of the first overflow page of a big pair, 299 * find the end of the big pair 300 * 301 * This will set bpp to the buffer header of the last page of the big pair. 302 * It will return the pageno of the overflow page following the last page 303 * of the pair; 0 if there isn't any (i.e. big pair is the last key in the 304 * bucket) 305 */ 306u_int16_t 307__find_last_page(HTAB *hashp, BUFHEAD **bpp) 308{ 309 BUFHEAD *bufp; 310 u_int16_t *bp, pageno; 311 int n; 312 313 bufp = *bpp; 314 bp = (u_int16_t *)bufp->page; 315 for (;;) { 316 n = bp[0]; 317 318 /* 319 * This is the last page if: the tag is FULL_KEY_DATA and 320 * either only 2 entries OVFLPAGE marker is explicit there 321 * is freespace on the page. 322 */ 323 if (bp[2] == FULL_KEY_DATA && 324 ((n == 2) || (bp[n] == OVFLPAGE) || (FREESPACE(bp)))) 325 break; 326 327 pageno = bp[n - 1]; 328 bufp = __get_buf(hashp, pageno, bufp, 0); 329 if (!bufp) 330 return (0); /* Need to indicate an error! */ 331 bp = (u_int16_t *)bufp->page; 332 } 333 334 *bpp = bufp; 335 if (bp[0] > 2) 336 return (bp[3]); 337 else 338 return (0); 339} 340 341/* 342 * Return the data for the key/data pair that begins on this page at this 343 * index (index should always be 1). 344 */ 345int 346__big_return(HTAB *hashp, BUFHEAD *bufp, int ndx, DBT *val, int set_current) 347{ 348 BUFHEAD *save_p; 349 u_int16_t *bp, len, off, save_addr; 350 char *tp; 351 352 bp = (u_int16_t *)bufp->page; 353 while (bp[ndx + 1] == PARTIAL_KEY) { 354 bufp = __get_buf(hashp, bp[bp[0] - 1], bufp, 0); 355 if (!bufp) 356 return (-1); 357 bp = (u_int16_t *)bufp->page; 358 ndx = 1; 359 } 360 361 if (bp[ndx + 1] == FULL_KEY) { 362 bufp = __get_buf(hashp, bp[bp[0] - 1], bufp, 0); 363 if (!bufp) 364 return (-1); 365 bp = (u_int16_t *)bufp->page; 366 save_p = bufp; 367 save_addr = save_p->addr; 368 off = bp[1]; 369 len = 0; 370 } else 371 if (!FREESPACE(bp)) { 372 /* 373 * This is a hack. We can't distinguish between 374 * FULL_KEY_DATA that contains complete data or 375 * incomplete data, so we require that if the data 376 * is complete, there is at least 1 byte of free 377 * space left. 378 */ 379 off = bp[bp[0]]; 380 len = bp[1] - off; 381 save_p = bufp; 382 save_addr = bufp->addr; 383 bufp = __get_buf(hashp, bp[bp[0] - 1], bufp, 0); 384 if (!bufp) 385 return (-1); 386 bp = (u_int16_t *)bufp->page; 387 } else { 388 /* The data is all on one page. */ 389 tp = (char *)bp; 390 off = bp[bp[0]]; 391 val->data = (u_char *)tp + off; 392 val->size = bp[1] - off; 393 if (set_current) { 394 if (bp[0] == 2) { /* No more buckets in 395 * chain */ 396 hashp->cpage = NULL; 397 hashp->cbucket++; 398 hashp->cndx = 1; 399 } else { 400 hashp->cpage = __get_buf(hashp, 401 bp[bp[0] - 1], bufp, 0); 402 if (!hashp->cpage) 403 return (-1); 404 hashp->cndx = 1; 405 if (!((u_int16_t *) 406 hashp->cpage->page)[0]) { 407 hashp->cbucket++; 408 hashp->cpage = NULL; 409 } 410 } 411 } 412 return (0); 413 } 414 415 val->size = (size_t)collect_data(hashp, bufp, (int)len, set_current); 416 if (val->size == (size_t)-1) 417 return (-1); 418 if (save_p->addr != save_addr) { 419 /* We are pretty short on buffers. */ 420 errno = EINVAL; /* OUT OF BUFFERS */ 421 return (-1); 422 } 423 memmove(hashp->tmp_buf, (save_p->page) + off, len); 424 val->data = (u_char *)hashp->tmp_buf; 425 return (0); 426} 427/* 428 * Count how big the total datasize is by recursing through the pages. Then 429 * allocate a buffer and copy the data as you recurse up. 430 */ 431static int 432collect_data(HTAB *hashp, BUFHEAD *bufp, int len, int set) 433{ 434 u_int16_t *bp; 435 char *p; 436 BUFHEAD *xbp; 437 u_int16_t save_addr; 438 int mylen, totlen; 439 440 p = bufp->page; 441 bp = (u_int16_t *)p; 442 mylen = hashp->BSIZE - bp[1]; 443 save_addr = bufp->addr; 444 445 if (bp[2] == FULL_KEY_DATA) { /* End of Data */ 446 totlen = len + mylen; 447 if (hashp->tmp_buf) 448 free(hashp->tmp_buf); 449 if ((hashp->tmp_buf = (char *)malloc(totlen)) == NULL) 450 return (-1); 451 if (set) { 452 hashp->cndx = 1; 453 if (bp[0] == 2) { /* No more buckets in chain */ 454 hashp->cpage = NULL; 455 hashp->cbucket++; 456 } else { 457 hashp->cpage = 458 __get_buf(hashp, bp[bp[0] - 1], bufp, 0); 459 if (!hashp->cpage) 460 return (-1); 461 else if (!((u_int16_t *)hashp->cpage->page)[0]) { 462 hashp->cbucket++; 463 hashp->cpage = NULL; 464 } 465 } 466 } 467 } else { 468 xbp = __get_buf(hashp, bp[bp[0] - 1], bufp, 0); 469 if (!xbp || ((totlen = 470 collect_data(hashp, xbp, len + mylen, set)) < 1)) 471 return (-1); 472 } 473 if (bufp->addr != save_addr) { 474 errno = EINVAL; /* Out of buffers. */ 475 return (-1); 476 } 477 memmove(&hashp->tmp_buf[len], (bufp->page) + bp[1], mylen); 478 return (totlen); 479} 480 481/* 482 * Fill in the key and data for this big pair. 483 */ 484int 485__big_keydata(HTAB *hashp, BUFHEAD *bufp, DBT *key, DBT *val, int set) 486{ 487 key->size = (size_t)collect_key(hashp, bufp, 0, val, set); 488 if (key->size == (size_t)-1) 489 return (-1); 490 key->data = (u_char *)hashp->tmp_key; 491 return (0); 492} 493 494/* 495 * Count how big the total key size is by recursing through the pages. Then 496 * collect the data, allocate a buffer and copy the key as you recurse up. 497 */ 498static int 499collect_key(HTAB *hashp, BUFHEAD *bufp, int len, DBT *val, int set) 500{ 501 BUFHEAD *xbp; 502 char *p; 503 int mylen, totlen; 504 u_int16_t *bp, save_addr; 505 506 p = bufp->page; 507 bp = (u_int16_t *)p; 508 mylen = hashp->BSIZE - bp[1]; 509 510 save_addr = bufp->addr; 511 totlen = len + mylen; 512 if (bp[2] == FULL_KEY || bp[2] == FULL_KEY_DATA) { /* End of Key. */ 513 if (hashp->tmp_key != NULL) 514 free(hashp->tmp_key); 515 if ((hashp->tmp_key = (char *)malloc(totlen)) == NULL) 516 return (-1); 517 if (__big_return(hashp, bufp, 1, val, set)) 518 return (-1); 519 } else { 520 xbp = __get_buf(hashp, bp[bp[0] - 1], bufp, 0); 521 if (!xbp || ((totlen = 522 collect_key(hashp, xbp, totlen, val, set)) < 1)) 523 return (-1); 524 } 525 if (bufp->addr != save_addr) { 526 errno = EINVAL; /* MIS -- OUT OF BUFFERS */ 527 return (-1); 528 } 529 memmove(&hashp->tmp_key[len], (bufp->page) + bp[1], mylen); 530 return (totlen); 531} 532 533/* 534 * Returns: 535 * 0 => OK 536 * -1 => error 537 */ 538int 539__big_split(HTAB *hashp, 540 BUFHEAD *op, /* Pointer to where to put keys that go in old bucket */ 541 BUFHEAD *np, /* Pointer to new bucket page */ 542 BUFHEAD *big_keyp, /* Pointer to first page containing the big key/data */ 543 int addr, /* Address of big_keyp */ 544 u_int32_t obucket, /* Old Bucket */ 545 SPLIT_RETURN *ret) 546{ 547 BUFHEAD *bp, *tmpp; 548 DBT key, val; 549 u_int32_t change; 550 u_int16_t free_space, n, off, *tp; 551 552 bp = big_keyp; 553 554 /* Now figure out where the big key/data goes */ 555 if (__big_keydata(hashp, big_keyp, &key, &val, 0)) 556 return (-1); 557 change = (__call_hash(hashp, key.data, key.size) != obucket); 558 559 if ( (ret->next_addr = __find_last_page(hashp, &big_keyp)) ) { 560 if (!(ret->nextp = 561 __get_buf(hashp, ret->next_addr, big_keyp, 0))) 562 return (-1); 563 } else 564 ret->nextp = NULL; 565 566 /* Now make one of np/op point to the big key/data pair */ 567#ifdef DEBUG 568 assert(np->ovfl == NULL); 569#endif 570 if (change) 571 tmpp = np; 572 else 573 tmpp = op; 574 575 tmpp->flags |= BUF_MOD; 576#ifdef DEBUG1 577 (void)fprintf(stderr, 578 "BIG_SPLIT: %d->ovfl was %d is now %d\n", tmpp->addr, 579 (tmpp->ovfl ? tmpp->ovfl->addr : 0), (bp ? bp->addr : 0)); 580#endif 581 tmpp->ovfl = bp; /* one of op/np point to big_keyp */ 582 tp = (u_int16_t *)tmpp->page; 583#ifdef DEBUG 584 assert(FREESPACE(tp) >= OVFLSIZE); 585#endif 586 n = tp[0]; 587 off = OFFSET(tp); 588 free_space = FREESPACE(tp); 589 tp[++n] = (u_int16_t)addr; 590 tp[++n] = OVFLPAGE; 591 tp[0] = n; 592 OFFSET(tp) = off; 593 FREESPACE(tp) = free_space - OVFLSIZE; 594 595 /* 596 * Finally, set the new and old return values. BIG_KEYP contains a 597 * pointer to the last page of the big key_data pair. Make sure that 598 * big_keyp has no following page (2 elements) or create an empty 599 * following page. 600 */ 601 602 ret->newp = np; 603 ret->oldp = op; 604 605 tp = (u_int16_t *)big_keyp->page; 606 big_keyp->flags |= BUF_MOD; 607 if (tp[0] > 2) { 608 /* 609 * There may be either one or two offsets on this page. If 610 * there is one, then the overflow page is linked on normally 611 * and tp[4] is OVFLPAGE. If there are two, tp[4] contains 612 * the second offset and needs to get stuffed in after the 613 * next overflow page is added. 614 */ 615 n = tp[4]; 616 free_space = FREESPACE(tp); 617 off = OFFSET(tp); 618 tp[0] -= 2; 619 FREESPACE(tp) = free_space + OVFLSIZE; 620 OFFSET(tp) = off; 621 tmpp = __add_ovflpage(hashp, big_keyp); 622 if (!tmpp) 623 return (-1); 624 tp[4] = n; 625 } else 626 tmpp = big_keyp; 627 628 if (change) 629 ret->newp = tmpp; 630 else 631 ret->oldp = tmpp; 632 return (0); 633} 634