hash_page.c revision 331722
165205Scg/*- 265205Scg * Copyright (c) 1990, 1993, 1994 365205Scg * The Regents of the University of California. All rights reserved. 465205Scg * 565205Scg * This code is derived from software contributed to Berkeley by 665205Scg * Margo Seltzer. 765205Scg * 865205Scg * Redistribution and use in source and binary forms, with or without 965205Scg * modification, are permitted provided that the following conditions 1065205Scg * are met: 1165205Scg * 1. Redistributions of source code must retain the above copyright 1265205Scg * notice, this list of conditions and the following disclaimer. 1365205Scg * 2. Redistributions in binary form must reproduce the above copyright 1465205Scg * notice, this list of conditions and the following disclaimer in the 1565205Scg * documentation and/or other materials provided with the distribution. 1665205Scg * 4. Neither the name of the University nor the names of its contributors 1765205Scg * may be used to endorse or promote products derived from this software 1865205Scg * without specific prior written permission. 1965205Scg * 2065205Scg * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 2165205Scg * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 2265205Scg * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 2365205Scg * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 2465205Scg * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 2565205Scg * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 2665205Scg * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 2765205Scg * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 2865205Scg * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29119287Simp * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30119287Simp * SUCH DAMAGE. 3165205Scg */ 3282180Scg 3382180Scg#if defined(LIBC_SCCS) && !defined(lint) 3465205Scgstatic char sccsid[] = "@(#)hash_page.c 8.7 (Berkeley) 8/16/94"; 3565205Scg#endif /* LIBC_SCCS and not lint */ 3665205Scg#include <sys/cdefs.h> 3765205Scg__FBSDID("$FreeBSD: stable/11/lib/libc/db/hash/hash_page.c 331722 2018-03-29 02:50:57Z eadler $"); 3865205Scg 3965205Scg/* 4065205Scg * PACKAGE: hashing 4165205Scg * 4265205Scg * DESCRIPTION: 4365205Scg * Page manipulation for hashing package. 4465205Scg * 4565205Scg * ROUTINES: 4665205Scg * 4765205Scg * External 4865205Scg * __get_page 4965205Scg * __add_ovflpage 5065205Scg * Internal 5165205Scg * overflow_page 5265205Scg * open_temp 5365205Scg */ 5465205Scg 5565205Scg#include "namespace.h" 5665205Scg#include <sys/param.h> 5765205Scg 5865205Scg#include <errno.h> 5965205Scg#include <fcntl.h> 6065205Scg#include <signal.h> 6165205Scg#include <stdio.h> 6265205Scg#include <stdlib.h> 6365205Scg#include <string.h> 6465205Scg#include <unistd.h> 6565205Scg#ifdef DEBUG 6665205Scg#include <assert.h> 6765205Scg#endif 6865205Scg#include "un-namespace.h" 6965205Scg#include "libc_private.h" 7065205Scg 7165205Scg#include <db.h> 7265205Scg#include "hash.h" 7365205Scg#include "page.h" 7465205Scg#include "extern.h" 7565205Scg 7665205Scgstatic u_int32_t *fetch_bitmap(HTAB *, int); 7765205Scgstatic u_int32_t first_free(u_int32_t); 7865205Scgstatic int open_temp(HTAB *); 7965205Scgstatic u_int16_t overflow_page(HTAB *); 8065205Scgstatic void putpair(char *, const DBT *, const DBT *); 8165205Scgstatic void squeeze_key(u_int16_t *, const DBT *, const DBT *); 8265205Scgstatic int ugly_split(HTAB *, u_int32_t, BUFHEAD *, BUFHEAD *, int, int); 8365205Scg 8465205Scg#define PAGE_INIT(P) { \ 8565205Scg ((u_int16_t *)(P))[0] = 0; \ 8665205Scg ((u_int16_t *)(P))[1] = hashp->BSIZE - 3 * sizeof(u_int16_t); \ 8765205Scg ((u_int16_t *)(P))[2] = hashp->BSIZE; \ 8865205Scg} 8965205Scg 9065205Scg/* 9165205Scg * This is called AFTER we have verified that there is room on the page for 9265205Scg * the pair (PAIRFITS has returned true) so we go right ahead and start moving 9365205Scg * stuff on. 9465205Scg */ 9565205Scgstatic void 9665205Scgputpair(char *p, const DBT *key, const DBT *val) 9765205Scg{ 9865205Scg u_int16_t *bp, n, off; 9984658Scg 10065205Scg bp = (u_int16_t *)p; 10165205Scg 10265205Scg /* Enter the key first. */ 10365205Scg n = bp[0]; 10465205Scg 10574763Scg off = OFFSET(bp) - key->size; 10665205Scg memmove(p + off, key->data, key->size); 10765205Scg bp[++n] = off; 10865205Scg 10965205Scg /* Now the data. */ 11065205Scg off -= val->size; 11165205Scg memmove(p + off, val->data, val->size); 11265644Scg bp[++n] = off; 11365205Scg 11465205Scg /* Adjust page info. */ 11565205Scg bp[0] = n; 11674763Scg bp[n + 1] = off - ((n + 3) * sizeof(u_int16_t)); 11765205Scg bp[n + 2] = off; 11865205Scg} 11965205Scg 12065205Scg/* 12165205Scg * Returns: 12265205Scg * 0 OK 12365205Scg * -1 error 124102620Ssobomax */ 125102620Ssobomaxint 126102620Ssobomax__delpair(HTAB *hashp, BUFHEAD *bufp, int ndx) 127102620Ssobomax{ 12865205Scg u_int16_t *bp, newoff, pairlen; 12965205Scg int n; 13065205Scg 13165205Scg bp = (u_int16_t *)bufp->page; 132102620Ssobomax n = bp[0]; 133102620Ssobomax 134102620Ssobomax if (bp[ndx + 1] < REAL_KEY) 135102620Ssobomax return (__big_delete(hashp, bufp)); 13665205Scg if (ndx != 1) 137102620Ssobomax newoff = bp[ndx - 1]; 138102620Ssobomax else 139102620Ssobomax newoff = hashp->BSIZE; 14065205Scg pairlen = newoff - bp[ndx + 1]; 141102620Ssobomax 142102620Ssobomax if (ndx != (n - 1)) { 143102620Ssobomax /* Hard Case -- need to shuffle keys */ 14465205Scg int i; 14565205Scg char *src = bufp->page + (int)OFFSET(bp); 14665205Scg char *dst = src + (int)pairlen; 14765205Scg memmove(dst, src, bp[ndx + 1] - OFFSET(bp)); 14865205Scg 14965205Scg /* Now adjust the pointers */ 15065205Scg for (i = ndx + 2; i <= n; i += 2) { 15165205Scg if (bp[i + 1] == OVFLPAGE) { 15265205Scg bp[i - 2] = bp[i]; 15365205Scg bp[i - 1] = bp[i + 1]; 15465205Scg } else { 15565205Scg bp[i - 2] = bp[i] + pairlen; 15665205Scg bp[i - 1] = bp[i + 1] + pairlen; 15765205Scg } 15865205Scg } 15965205Scg if (ndx == hashp->cndx) { 16065205Scg /* 161102620Ssobomax * We just removed pair we were "pointing" to. 16284658Scg * By moving back the cndx we ensure subsequent 163102620Ssobomax * hash_seq() calls won't skip over any entries. 164102889Ssobomax */ 165102889Ssobomax hashp->cndx -= 2; 16665205Scg } 16765205Scg } 16865205Scg /* Finally adjust the page data */ 16965205Scg bp[n] = OFFSET(bp) + pairlen; 17065205Scg bp[n - 1] = bp[n + 1] + pairlen + 2 * sizeof(u_int16_t); 17165205Scg bp[0] = n - 2; 17265205Scg hashp->NKEYS--; 17365205Scg 17465205Scg bufp->flags |= BUF_MOD; 17565205Scg return (0); 17665205Scg} 17765205Scg/* 17865205Scg * Returns: 17965205Scg * 0 ==> OK 18065205Scg * -1 ==> Error 18165205Scg */ 18265205Scgint 18365205Scg__split_page(HTAB *hashp, u_int32_t obucket, u_int32_t nbucket) 18465205Scg{ 18565205Scg BUFHEAD *new_bufp, *old_bufp; 18665205Scg u_int16_t *ino; 187108064Ssemenu char *np; 18865205Scg DBT key, val; 18965205Scg int n, ndx, retval; 190108064Ssemenu u_int16_t copyto, diff, off, moved; 191108064Ssemenu char *op; 19265205Scg 193108064Ssemenu copyto = (u_int16_t)hashp->BSIZE; 194108064Ssemenu off = (u_int16_t)hashp->BSIZE; 19565205Scg old_bufp = __get_buf(hashp, obucket, NULL, 0); 196108064Ssemenu if (old_bufp == NULL) 197108064Ssemenu return (-1); 19865205Scg new_bufp = __get_buf(hashp, nbucket, NULL, 0); 19965205Scg if (new_bufp == NULL) 20065205Scg return (-1); 20170134Scg 20265205Scg old_bufp->flags |= (BUF_MOD | BUF_PIN); 20365205Scg new_bufp->flags |= (BUF_MOD | BUF_PIN); 20465205Scg 20565205Scg ino = (u_int16_t *)(op = old_bufp->page); 20670134Scg np = new_bufp->page; 20770134Scg 20865205Scg moved = 0; 20965205Scg 21065205Scg for (n = 1, ndx = 1; n < ino[0]; n += 2) { 21165340Scg if (ino[n + 1] < REAL_KEY) { 21265205Scg retval = ugly_split(hashp, obucket, old_bufp, new_bufp, 21365205Scg (int)copyto, (int)moved); 21465205Scg old_bufp->flags &= ~BUF_PIN; 21565205Scg new_bufp->flags &= ~BUF_PIN; 21665205Scg return (retval); 21765205Scg 21865205Scg } 21965205Scg key.data = (u_char *)op + ino[n]; 22065340Scg key.size = off - ino[n]; 22165205Scg 22265205Scg if (__call_hash(hashp, key.data, key.size) == obucket) { 22365205Scg /* Don't switch page */ 22465205Scg diff = copyto - off; 22565205Scg if (diff) { 22665205Scg copyto = ino[n + 1] + diff; 22765205Scg memmove(op + copyto, op + ino[n + 1], 22865205Scg off - ino[n + 1]); 22965205Scg ino[ndx] = copyto + ino[n] - ino[n + 1]; 230102889Ssobomax ino[ndx + 1] = copyto; 23165205Scg } else 23265340Scg copyto = ino[n + 1]; 23365205Scg ndx += 2; 23465205Scg } else { 23565205Scg /* Switch page */ 23670134Scg val.data = (u_char *)op + ino[n + 1]; 23770134Scg val.size = ino[n] - ino[n + 1]; 23865205Scg putpair(np, &key, &val); 23965205Scg moved += 2; 24065205Scg } 24165340Scg 24265205Scg off = ino[n + 1]; 24365340Scg } 24465205Scg 24565340Scg /* Now clean up the page */ 24665205Scg ino[0] -= moved; 24765205Scg FREESPACE(ino) = copyto - sizeof(u_int16_t) * (ino[0] + 3); 24865205Scg OFFSET(ino) = copyto; 24965205Scg 25065205Scg#ifdef DEBUG3 25165205Scg (void)fprintf(stderr, "split %d/%d\n", 25265205Scg ((u_int16_t *)np)[0] / 2, 25370134Scg ((u_int16_t *)op)[0] / 2); 25465205Scg#endif 25565340Scg /* unpin both pages */ 25665205Scg old_bufp->flags &= ~BUF_PIN; 25765205Scg new_bufp->flags &= ~BUF_PIN; 25865340Scg return (0); 25965205Scg} 26065205Scg 26165205Scg/* 26265205Scg * Called when we encounter an overflow or big key/data page during split 26365205Scg * handling. This is special cased since we have to begin checking whether 26465205Scg * the key/data pairs fit on their respective pages and because we may need 26565205Scg * overflow pages for both the old and new pages. 26670134Scg * 26765205Scg * The first page might be a page with regular key/data pairs in which case 26865205Scg * we have a regular overflow condition and just need to go on to the next 26970134Scg * page or it might be a big key/data pair in which case we need to fix the 27065205Scg * big key/data pair. 27165205Scg * 27270134Scg * Returns: 27370134Scg * 0 ==> success 27470134Scg * -1 ==> failure 27570134Scg */ 27670134Scgstatic int 27770134Scgugly_split(HTAB *hashp, 27870134Scg u_int32_t obucket, /* Same as __split_page. */ 27970134Scg BUFHEAD *old_bufp, 28070134Scg BUFHEAD *new_bufp, 28165340Scg int copyto, /* First byte on page which contains key/data values. */ 28265340Scg int moved) /* Number of pairs moved to new page. */ 28365205Scg{ 28465205Scg BUFHEAD *bufp; /* Buffer header for ino */ 28565205Scg u_int16_t *ino; /* Page keys come off of */ 28665205Scg u_int16_t *np; /* New page */ 28765205Scg u_int16_t *op; /* Page keys go on to if they aren't moving */ 28865205Scg 28965340Scg BUFHEAD *last_bfp; /* Last buf header OVFL needing to be freed */ 29065205Scg DBT key, val; 29165340Scg SPLIT_RETURN ret; 29265205Scg u_int16_t n, off, ov_addr, scopyto; 29365205Scg char *cino; /* Character value of ino */ 29465205Scg 29565205Scg bufp = old_bufp; 29665205Scg ino = (u_int16_t *)old_bufp->page; 29765205Scg np = (u_int16_t *)new_bufp->page; 29865205Scg op = (u_int16_t *)old_bufp->page; 29965205Scg last_bfp = NULL; 30065340Scg scopyto = (u_int16_t)copyto; /* ANSI */ 30165205Scg 30265205Scg n = ino[0] - 1; 30365205Scg while (n < ino[0]) { 30465205Scg if (ino[2] < REAL_KEY && ino[2] != OVFLPAGE) { 30565205Scg if (__big_split(hashp, old_bufp, 30665205Scg new_bufp, bufp, bufp->addr, obucket, &ret)) 30765205Scg return (-1); 30865205Scg old_bufp = ret.oldp; 30965340Scg if (!old_bufp) 31065205Scg return (-1); 31165205Scg op = (u_int16_t *)old_bufp->page; 31265205Scg new_bufp = ret.newp; 31365205Scg if (!new_bufp) 31465340Scg return (-1); 31565205Scg np = (u_int16_t *)new_bufp->page; 31665205Scg bufp = ret.nextp; 31765205Scg if (!bufp) 31865205Scg return (0); 31965340Scg cino = (char *)bufp->page; 32065205Scg ino = (u_int16_t *)cino; 32165205Scg last_bfp = ret.nextp; 32265205Scg } else if (ino[n + 1] == OVFLPAGE) { 32365205Scg ov_addr = ino[n]; 32470134Scg /* 32565205Scg * Fix up the old page -- the extra 2 are the fields 32665205Scg * which contained the overflow information. 32774763Scg */ 32865205Scg ino[0] -= (moved + 2); 32965205Scg FREESPACE(ino) = 33065205Scg scopyto - sizeof(u_int16_t) * (ino[0] + 3); 33165340Scg OFFSET(ino) = scopyto; 33265205Scg 33365205Scg bufp = __get_buf(hashp, ov_addr, bufp, 0); 33465205Scg if (!bufp) 33565205Scg return (-1); 33665205Scg 33784658Scg ino = (u_int16_t *)bufp->page; 33865205Scg n = 1; 33965205Scg scopyto = hashp->BSIZE; 34065205Scg moved = 0; 34165205Scg 34270134Scg if (last_bfp) 34365205Scg __free_ovflpage(hashp, last_bfp); 34465205Scg last_bfp = bufp; 34565205Scg } 34665340Scg /* Move regular sized pairs of there are any */ 34765205Scg off = hashp->BSIZE; 34865205Scg for (n = 1; (n < ino[0]) && (ino[n + 1] >= REAL_KEY); n += 2) { 34965205Scg cino = (char *)ino; 35065205Scg key.data = (u_char *)cino + ino[n]; 35165205Scg key.size = off - ino[n]; 35265340Scg val.data = (u_char *)cino + ino[n + 1]; 35365205Scg val.size = ino[n] - ino[n + 1]; 35465205Scg off = ino[n + 1]; 35565205Scg 35665205Scg if (__call_hash(hashp, key.data, key.size) == obucket) { 35765205Scg /* Keep on old page */ 35865340Scg if (PAIRFITS(op, (&key), (&val))) 35965205Scg putpair((char *)op, &key, &val); 36065205Scg else { 36165205Scg old_bufp = 36265205Scg __add_ovflpage(hashp, old_bufp); 36365205Scg if (!old_bufp) 36465340Scg return (-1); 36565205Scg op = (u_int16_t *)old_bufp->page; 36665205Scg putpair((char *)op, &key, &val); 36765205Scg } 36865205Scg old_bufp->flags |= BUF_MOD; 36965205Scg } else { 37065205Scg /* Move to new page */ 37165205Scg if (PAIRFITS(np, (&key), (&val))) 37265340Scg putpair((char *)np, &key, &val); 37365340Scg else { 37465340Scg new_bufp = 37565340Scg __add_ovflpage(hashp, new_bufp); 37665340Scg if (!new_bufp) 37765340Scg return (-1); 37865340Scg np = (u_int16_t *)new_bufp->page; 37965340Scg putpair((char *)np, &key, &val); 38065340Scg } 38165340Scg new_bufp->flags |= BUF_MOD; 38265340Scg } 38365205Scg } 38465205Scg } 38565205Scg if (last_bfp) 38665205Scg __free_ovflpage(hashp, last_bfp); 38770134Scg return (0); 38865205Scg} 38965205Scg 39065205Scg/* 39165205Scg * Add the given pair to the page 39265340Scg * 39365340Scg * Returns: 39465205Scg * 0 ==> OK 39565340Scg * 1 ==> failure 39665205Scg */ 39765205Scgint 39865205Scg__addel(HTAB *hashp, BUFHEAD *bufp, const DBT *key, const DBT *val) 39965205Scg{ 40065205Scg u_int16_t *bp, *sop; 40165340Scg int do_expand; 40265205Scg 40365205Scg bp = (u_int16_t *)bufp->page; 40465205Scg do_expand = 0; 40565205Scg while (bp[0] && (bp[2] < REAL_KEY || bp[bp[0]] < REAL_KEY)) 40665205Scg /* Exception case */ 40765340Scg if (bp[2] == FULL_KEY_DATA && bp[0] == 2) 40865205Scg /* This is the last page of a big key/data pair 40965340Scg and we need to add another page */ 41065205Scg break; 41165205Scg else if (bp[2] < REAL_KEY && bp[bp[0]] != OVFLPAGE) { 41265205Scg bufp = __get_buf(hashp, bp[bp[0] - 1], bufp, 0); 41365205Scg if (!bufp) 41470134Scg return (-1); 41565205Scg bp = (u_int16_t *)bufp->page; 41665205Scg } else if (bp[bp[0]] != OVFLPAGE) { 41765205Scg /* Short key/data pairs, no more pages */ 41865340Scg break; 41965205Scg } else { 42065205Scg /* Try to squeeze key on this page */ 42165205Scg if (bp[2] >= REAL_KEY && 42265205Scg FREESPACE(bp) >= PAIRSIZE(key, val)) { 42365340Scg squeeze_key(bp, key, val); 42465205Scg goto stats; 42565205Scg } else { 42665205Scg bufp = __get_buf(hashp, bp[bp[0] - 1], bufp, 0); 42765205Scg if (!bufp) 42865340Scg return (-1); 42965205Scg bp = (u_int16_t *)bufp->page; 43065205Scg } 43165205Scg } 43265205Scg 43365205Scg if (PAIRFITS(bp, key, val)) 43465205Scg putpair(bufp->page, key, val); 43570134Scg else { 43665205Scg do_expand = 1; 43765205Scg bufp = __add_ovflpage(hashp, bufp); 43865205Scg if (!bufp) 439111183Scognet return (-1); 44065205Scg sop = (u_int16_t *)bufp->page; 44165340Scg 44265205Scg if (PAIRFITS(sop, key, val)) 44365340Scg putpair((char *)sop, key, val); 44465205Scg else 44565205Scg if (__big_insert(hashp, bufp, key, val)) 44665205Scg return (-1); 44765340Scg } 44865205Scgstats: 44965205Scg bufp->flags |= BUF_MOD; 45065340Scg /* 45165205Scg * If the average number of keys per bucket exceeds the fill factor, 45265205Scg * expand the table. 45365205Scg */ 45465205Scg hashp->NKEYS++; 45565205Scg if (do_expand || 45665205Scg (hashp->NKEYS / (hashp->MAX_BUCKET + 1) > hashp->FFACTOR)) 45765205Scg return (__expand_table(hashp)); 45865340Scg return (0); 45965205Scg} 46065205Scg 46165205Scg/* 46265205Scg * 46365205Scg * Returns: 46465205Scg * pointer on success 46565205Scg * NULL on error 46665205Scg */ 46765205ScgBUFHEAD * 46865205Scg__add_ovflpage(HTAB *hashp, BUFHEAD *bufp) 46965205Scg{ 47065205Scg u_int16_t *sp, ndx, ovfl_num; 47165205Scg#ifdef DEBUG1 47265205Scg int tmp1, tmp2; 47365205Scg#endif 47465205Scg sp = (u_int16_t *)bufp->page; 47565340Scg 47665340Scg /* Check if we are dynamically determining the fill factor */ 47765205Scg if (hashp->FFACTOR == DEF_FFACTOR) { 47865205Scg hashp->FFACTOR = sp[0] >> 1; 47965205Scg if (hashp->FFACTOR < MIN_FFACTOR) 48065205Scg hashp->FFACTOR = MIN_FFACTOR; 48165205Scg } 48265205Scg bufp->flags |= BUF_MOD; 48365205Scg ovfl_num = overflow_page(hashp); 48465205Scg#ifdef DEBUG1 48565205Scg tmp1 = bufp->addr; 48665340Scg tmp2 = bufp->ovfl ? bufp->ovfl->addr : 0; 48765205Scg#endif 48865205Scg if (!ovfl_num || !(bufp->ovfl = __get_buf(hashp, ovfl_num, bufp, 1))) 48965205Scg return (NULL); 49065205Scg bufp->ovfl->flags |= BUF_MOD; 49165205Scg#ifdef DEBUG1 49270134Scg (void)fprintf(stderr, "ADDOVFLPAGE: %d->ovfl was %d is now %d\n", 49365205Scg tmp1, tmp2, bufp->ovfl->addr); 49465205Scg#endif 49565205Scg ndx = sp[0]; 49665205Scg /* 49765340Scg * Since a pair is allocated on a page only if there's room to add 49865205Scg * an overflow page, we know that the OVFL information will fit on 49965340Scg * the page. 50065340Scg */ 50165205Scg sp[ndx + 4] = OFFSET(sp); 50265205Scg sp[ndx + 3] = FREESPACE(sp) - OVFLSIZE; 50365340Scg sp[ndx + 1] = ovfl_num; 50465205Scg sp[ndx + 2] = OVFLPAGE; 50565205Scg sp[0] = ndx + 2; 50665205Scg#ifdef HASH_STATISTICS 50765205Scg hash_overflows++; 50865340Scg#endif 50965340Scg return (bufp->ovfl); 51065205Scg} 51165205Scg 51265205Scg/* 51374763Scg * Returns: 51470134Scg * 0 indicates SUCCESS 51565205Scg * -1 indicates FAILURE 51665205Scg */ 51765205Scgint 51865205Scg__get_page(HTAB *hashp, char *p, u_int32_t bucket, int is_bucket, int is_disk, 51970134Scg int is_bitmap) 52070134Scg{ 52170134Scg int fd, page, size, rsize; 52270134Scg u_int16_t *bp; 52370134Scg 52470134Scg fd = hashp->fp; 52570134Scg size = hashp->BSIZE; 52670134Scg 52770134Scg if ((fd == -1) || !is_disk) { 52870134Scg PAGE_INIT(p); 52970134Scg return (0); 53070134Scg } 53170134Scg if (is_bucket) 53270134Scg page = BUCKET_TO_PAGE(bucket); 53370134Scg else 53470134Scg page = OADDR_TO_PAGE(bucket); 53570134Scg if ((rsize = pread(fd, p, size, (off_t)page << hashp->BSHIFT)) == -1) 53670134Scg return (-1); 53770134Scg bp = (u_int16_t *)p; 53870134Scg if (!rsize) 53970134Scg bp[0] = 0; /* We hit the EOF, so initialize a new page */ 54070134Scg else 54170134Scg if (rsize != size) { 54270134Scg errno = EFTYPE; 54370134Scg return (-1); 54470134Scg } 54570134Scg if (!is_bitmap && !bp[0]) { 54670134Scg PAGE_INIT(p); 54770134Scg } else 54870134Scg if (hashp->LORDER != BYTE_ORDER) { 54970134Scg int i, max; 55070134Scg 55170134Scg if (is_bitmap) { 55270134Scg max = hashp->BSIZE >> 2; /* divide by 4 */ 55370134Scg for (i = 0; i < max; i++) 55470134Scg M_32_SWAP(((int *)p)[i]); 55570134Scg } else { 55670134Scg M_16_SWAP(bp[0]); 55770134Scg max = bp[0] + 2; 55870134Scg for (i = 1; i <= max; i++) 55970134Scg M_16_SWAP(bp[i]); 56070134Scg } 56170134Scg } 56270134Scg return (0); 56370134Scg} 56470134Scg 56570134Scg/* 56670134Scg * Write page p to disk 56770134Scg * 56870134Scg * Returns: 56970134Scg * 0 ==> OK 57070134Scg * -1 ==>failure 57170134Scg */ 57270134Scgint 57370134Scg__put_page(HTAB *hashp, char *p, u_int32_t bucket, int is_bucket, int is_bitmap) 57470134Scg{ 57570134Scg int fd, page, size; 57670134Scg ssize_t wsize; 57778564Sgreid char pbuf[MAX_BSIZE]; 57870134Scg 57970134Scg size = hashp->BSIZE; 58070134Scg if ((hashp->fp == -1) && open_temp(hashp)) 58170134Scg return (-1); 58270134Scg fd = hashp->fp; 58370134Scg 58470134Scg if (hashp->LORDER != BYTE_ORDER) { 58570134Scg int i, max; 58670134Scg 58770134Scg memcpy(pbuf, p, size); 58870134Scg if (is_bitmap) { 58970134Scg max = hashp->BSIZE >> 2; /* divide by 4 */ 59070134Scg for (i = 0; i < max; i++) 59170134Scg M_32_SWAP(((int *)pbuf)[i]); 59270134Scg } else { 59370134Scg uint16_t *bp = (uint16_t *)(void *)pbuf; 59470134Scg max = bp[0] + 2; 59570134Scg for (i = 0; i <= max; i++) 59670134Scg M_16_SWAP(bp[i]); 59770134Scg } 59870134Scg p = pbuf; 59970134Scg } 60070134Scg if (is_bucket) 60170134Scg page = BUCKET_TO_PAGE(bucket); 60270134Scg else 60370134Scg page = OADDR_TO_PAGE(bucket); 60470134Scg if ((wsize = pwrite(fd, p, size, (off_t)page << hashp->BSHIFT)) == -1) 60570134Scg /* Errno is set */ 60670134Scg return (-1); 60770134Scg if (wsize != size) { 60870134Scg errno = EFTYPE; 60970134Scg return (-1); 61070134Scg } 61170134Scg return (0); 61270134Scg} 61384658Scg 61484658Scg#define BYTE_MASK ((1 << INT_BYTE_SHIFT) -1) 61570134Scg/* 61670134Scg * Initialize a new bitmap page. Bitmap pages are left in memory 61770134Scg * once they are read in. 61870134Scg */ 61970134Scgint 62070134Scg__ibitmap(HTAB *hashp, int pnum, int nbits, int ndx) 62170134Scg{ 62270134Scg u_int32_t *ip; 62370134Scg int clearbytes, clearints; 62470134Scg 62574763Scg if ((ip = (u_int32_t *)malloc(hashp->BSIZE)) == NULL) 62670134Scg return (1); 62770134Scg hashp->nmaps++; 62870134Scg clearints = ((nbits - 1) >> INT_BYTE_SHIFT) + 1; 62970134Scg clearbytes = clearints << INT_TO_BYTE; 63070134Scg (void)memset((char *)ip, 0, clearbytes); 63170134Scg (void)memset(((char *)ip) + clearbytes, 0xFF, 63270134Scg hashp->BSIZE - clearbytes); 63370134Scg ip[clearints - 1] = ALL_SET << (nbits & BYTE_MASK); 63484658Scg SETBIT(ip, 0); 635117126Sscottl hashp->BITMAPS[ndx] = (u_int16_t)pnum; 636117126Sscottl hashp->mapp[ndx] = ip; 63770134Scg return (0); 63870134Scg} 63970134Scg 64070134Scgstatic u_int32_t 64170134Scgfirst_free(u_int32_t map) 64270134Scg{ 64370134Scg u_int32_t i, mask; 64470134Scg 64570134Scg mask = 0x1; 64670134Scg for (i = 0; i < BITS_PER_MAP; i++) { 64770134Scg if (!(mask & map)) 64870134Scg return (i); 64970134Scg mask = mask << 1; 65070134Scg } 651102889Ssobomax return (i); 652102889Ssobomax} 653102889Ssobomax 65470134Scgstatic u_int16_t 65570134Scgoverflow_page(HTAB *hashp) 65670134Scg{ 65770134Scg u_int32_t *freep; 65870134Scg int max_free, offset, splitnum; 65970134Scg u_int16_t addr; 66070134Scg int bit, first_page, free_bit, free_page, i, in_use_bits, j; 66170134Scg#ifdef DEBUG2 66270134Scg int tmp1, tmp2; 66370134Scg#endif 66470134Scg splitnum = hashp->OVFL_POINT; 66570134Scg max_free = hashp->SPARES[splitnum]; 66670134Scg 66770134Scg free_page = (max_free - 1) >> (hashp->BSHIFT + BYTE_SHIFT); 66870134Scg free_bit = (max_free - 1) & ((hashp->BSIZE << BYTE_SHIFT) - 1); 66970134Scg 67070134Scg /* Look through all the free maps to find the first free block */ 67170134Scg first_page = hashp->LAST_FREED >>(hashp->BSHIFT + BYTE_SHIFT); 67270134Scg for ( i = first_page; i <= free_page; i++ ) { 67370134Scg if (!(freep = (u_int32_t *)hashp->mapp[i]) && 674102889Ssobomax !(freep = fetch_bitmap(hashp, i))) 675102889Ssobomax return (0); 676102889Ssobomax if (i == free_page) 677102889Ssobomax in_use_bits = free_bit; 678102889Ssobomax else 679102889Ssobomax in_use_bits = (hashp->BSIZE << BYTE_SHIFT) - 1; 680102889Ssobomax 681102889Ssobomax if (i == first_page) { 682102889Ssobomax bit = hashp->LAST_FREED & 683102889Ssobomax ((hashp->BSIZE << BYTE_SHIFT) - 1); 684102889Ssobomax j = bit / BITS_PER_MAP; 685102889Ssobomax bit = rounddown2(bit, BITS_PER_MAP); 68670134Scg } else { 68770134Scg bit = 0; 68870134Scg j = 0; 68970134Scg } 69070134Scg for (; bit <= in_use_bits; j++, bit += BITS_PER_MAP) 69170134Scg if (freep[j] != ALL_SET) 69270134Scg goto found; 69370134Scg } 69470134Scg 69570134Scg /* No Free Page Found */ 69670134Scg hashp->LAST_FREED = hashp->SPARES[splitnum]; 69770134Scg hashp->SPARES[splitnum]++; 69870134Scg offset = hashp->SPARES[splitnum] - 69970134Scg (splitnum ? hashp->SPARES[splitnum - 1] : 0); 70070134Scg 701102889Ssobomax#define OVMSG "HASH: Out of overflow pages. Increase page size\n" 702102889Ssobomax if (offset > SPLITMASK) { 703102889Ssobomax if (++splitnum >= NCACHED) { 704102889Ssobomax (void)_write(STDERR_FILENO, OVMSG, sizeof(OVMSG) - 1); 705102889Ssobomax errno = EFBIG; 706102889Ssobomax return (0); 707102889Ssobomax } 708102889Ssobomax hashp->OVFL_POINT = splitnum; 70970134Scg hashp->SPARES[splitnum] = hashp->SPARES[splitnum-1]; 710102889Ssobomax hashp->SPARES[splitnum-1]--; 711102889Ssobomax offset = 1; 712102889Ssobomax } 713102889Ssobomax 714102889Ssobomax /* Check if we need to allocate a new bitmap page */ 715102889Ssobomax if (free_bit == (hashp->BSIZE << BYTE_SHIFT) - 1) { 716102889Ssobomax free_page++; 717102889Ssobomax if (free_page >= NCACHED) { 718102889Ssobomax (void)_write(STDERR_FILENO, OVMSG, sizeof(OVMSG) - 1); 719102889Ssobomax errno = EFBIG; 720102889Ssobomax return (0); 721102889Ssobomax } 722102889Ssobomax /* 723102889Ssobomax * This is tricky. The 1 indicates that you want the new page 724102889Ssobomax * allocated with 1 clear bit. Actually, you are going to 725102889Ssobomax * allocate 2 pages from this map. The first is going to be 726102889Ssobomax * the map page, the second is the overflow page we were 727102889Ssobomax * looking for. The init_bitmap routine automatically, sets 728102889Ssobomax * the first bit of itself to indicate that the bitmap itself 729102889Ssobomax * is in use. We would explicitly set the second bit, but 730102889Ssobomax * don't have to if we tell init_bitmap not to leave it clear 731102889Ssobomax * in the first place. 732102889Ssobomax */ 733102889Ssobomax if (__ibitmap(hashp, 734102889Ssobomax (int)OADDR_OF(splitnum, offset), 1, free_page)) 735102889Ssobomax return (0); 736102889Ssobomax hashp->SPARES[splitnum]++; 737102889Ssobomax#ifdef DEBUG2 738102889Ssobomax free_bit = 2; 739102889Ssobomax#endif 740102889Ssobomax offset++; 741102889Ssobomax if (offset > SPLITMASK) { 742102889Ssobomax if (++splitnum >= NCACHED) { 743102889Ssobomax (void)_write(STDERR_FILENO, OVMSG, 744102889Ssobomax sizeof(OVMSG) - 1); 74570134Scg errno = EFBIG; 74670134Scg return (0); 74770134Scg } 74870134Scg hashp->OVFL_POINT = splitnum; 74970134Scg hashp->SPARES[splitnum] = hashp->SPARES[splitnum-1]; 75070134Scg hashp->SPARES[splitnum-1]--; 75170134Scg offset = 0; 752102889Ssobomax } 75370134Scg } else { 75470134Scg /* 755102889Ssobomax * Free_bit addresses the last used bit. Bump it to address 756102889Ssobomax * the first available bit. 757102889Ssobomax */ 758102889Ssobomax free_bit++; 759102889Ssobomax SETBIT(freep, free_bit); 760102889Ssobomax } 761102889Ssobomax 762102889Ssobomax /* Calculate address of the new overflow page */ 763102889Ssobomax addr = OADDR_OF(splitnum, offset); 764102889Ssobomax#ifdef DEBUG2 765102889Ssobomax (void)fprintf(stderr, "OVERFLOW_PAGE: ADDR: %d BIT: %d PAGE %d\n", 766102889Ssobomax addr, free_bit, free_page); 767102889Ssobomax#endif 768102889Ssobomax return (addr); 769102889Ssobomax 770102889Ssobomaxfound: 771102889Ssobomax bit = bit + first_free(freep[j]); 772102889Ssobomax SETBIT(freep, bit); 773102889Ssobomax#ifdef DEBUG2 774102889Ssobomax tmp1 = bit; 775102889Ssobomax tmp2 = i; 77665205Scg#endif 77765205Scg /* 77865205Scg * Bits are addressed starting with 0, but overflow pages are addressed 77965205Scg * beginning at 1. Bit is a bit addressnumber, so we need to increment 78065205Scg * it to convert it to a page number. 781102889Ssobomax */ 782102889Ssobomax bit = 1 + bit + (i * (hashp->BSIZE << BYTE_SHIFT)); 783102889Ssobomax if (bit >= hashp->LAST_FREED) 784102889Ssobomax hashp->LAST_FREED = bit - 1; 785102889Ssobomax 786102889Ssobomax /* Calculate the split number for this page */ 787102889Ssobomax for (i = 0; (i < splitnum) && (bit > hashp->SPARES[i]); i++); 788102889Ssobomax offset = (i ? bit - hashp->SPARES[i - 1] : bit); 789102889Ssobomax if (offset >= SPLITMASK) { 790102889Ssobomax (void)_write(STDERR_FILENO, OVMSG, sizeof(OVMSG) - 1); 79165205Scg errno = EFBIG; 79265205Scg return (0); /* Out of overflow pages */ 79365205Scg } 79465205Scg addr = OADDR_OF(i, offset); 79565205Scg#ifdef DEBUG2 79665205Scg (void)fprintf(stderr, "OVERFLOW_PAGE: ADDR: %d BIT: %d PAGE %d\n", 79782180Scg addr, tmp1, tmp2); 79865205Scg#endif 79965205Scg 80079046Scg /* Allocate and return the overflow page */ 80179046Scg return (addr); 80279046Scg} 803 804/* 805 * Mark this overflow page as free. 806 */ 807void 808__free_ovflpage(HTAB *hashp, BUFHEAD *obufp) 809{ 810 u_int16_t addr; 811 u_int32_t *freep; 812 int bit_address, free_page, free_bit; 813 u_int16_t ndx; 814 815 addr = obufp->addr; 816#ifdef DEBUG1 817 (void)fprintf(stderr, "Freeing %d\n", addr); 818#endif 819 ndx = (((u_int16_t)addr) >> SPLITSHIFT); 820 bit_address = 821 (ndx ? hashp->SPARES[ndx - 1] : 0) + (addr & SPLITMASK) - 1; 822 if (bit_address < hashp->LAST_FREED) 823 hashp->LAST_FREED = bit_address; 824 free_page = (bit_address >> (hashp->BSHIFT + BYTE_SHIFT)); 825 free_bit = bit_address & ((hashp->BSIZE << BYTE_SHIFT) - 1); 826 827 if (!(freep = hashp->mapp[free_page])) 828 freep = fetch_bitmap(hashp, free_page); 829#ifdef DEBUG 830 /* 831 * This had better never happen. It means we tried to read a bitmap 832 * that has already had overflow pages allocated off it, and we 833 * failed to read it from the file. 834 */ 835 if (!freep) 836 assert(0); 837#endif 838 CLRBIT(freep, free_bit); 839#ifdef DEBUG2 840 (void)fprintf(stderr, "FREE_OVFLPAGE: ADDR: %d BIT: %d PAGE %d\n", 841 obufp->addr, free_bit, free_page); 842#endif 843 __reclaim_buf(hashp, obufp); 844} 845 846/* 847 * Returns: 848 * 0 success 849 * -1 failure 850 */ 851static int 852open_temp(HTAB *hashp) 853{ 854 sigset_t set, oset; 855 int len; 856 char *envtmp = NULL; 857 char path[MAXPATHLEN]; 858 859 if (issetugid() == 0) 860 envtmp = getenv("TMPDIR"); 861 len = snprintf(path, 862 sizeof(path), "%s/_hash.XXXXXX", envtmp ? envtmp : "/tmp"); 863 if (len < 0 || len >= (int)sizeof(path)) { 864 errno = ENAMETOOLONG; 865 return (-1); 866 } 867 868 /* Block signals; make sure file goes away at process exit. */ 869 (void)sigfillset(&set); 870 (void)__libc_sigprocmask(SIG_BLOCK, &set, &oset); 871 if ((hashp->fp = mkostemp(path, O_CLOEXEC)) != -1) 872 (void)unlink(path); 873 (void)__libc_sigprocmask(SIG_SETMASK, &oset, (sigset_t *)NULL); 874 return (hashp->fp != -1 ? 0 : -1); 875} 876 877/* 878 * We have to know that the key will fit, but the last entry on the page is 879 * an overflow pair, so we need to shift things. 880 */ 881static void 882squeeze_key(u_int16_t *sp, const DBT *key, const DBT *val) 883{ 884 char *p; 885 u_int16_t free_space, n, off, pageno; 886 887 p = (char *)sp; 888 n = sp[0]; 889 free_space = FREESPACE(sp); 890 off = OFFSET(sp); 891 892 pageno = sp[n - 1]; 893 off -= key->size; 894 sp[n - 1] = off; 895 memmove(p + off, key->data, key->size); 896 off -= val->size; 897 sp[n] = off; 898 memmove(p + off, val->data, val->size); 899 sp[0] = n + 2; 900 sp[n + 1] = pageno; 901 sp[n + 2] = OVFLPAGE; 902 FREESPACE(sp) = free_space - PAIRSIZE(key, val); 903 OFFSET(sp) = off; 904} 905 906static u_int32_t * 907fetch_bitmap(HTAB *hashp, int ndx) 908{ 909 if (ndx >= hashp->nmaps) 910 return (NULL); 911 if ((hashp->mapp[ndx] = (u_int32_t *)malloc(hashp->BSIZE)) == NULL) 912 return (NULL); 913 if (__get_page(hashp, 914 (char *)hashp->mapp[ndx], hashp->BITMAPS[ndx], 0, 1, 1)) { 915 free(hashp->mapp[ndx]); 916 return (NULL); 917 } 918 return (hashp->mapp[ndx]); 919} 920 921#ifdef DEBUG4 922int 923print_chain(int addr) 924{ 925 BUFHEAD *bufp; 926 short *bp, oaddr; 927 928 (void)fprintf(stderr, "%d ", addr); 929 bufp = __get_buf(hashp, addr, NULL, 0); 930 bp = (short *)bufp->page; 931 while (bp[0] && ((bp[bp[0]] == OVFLPAGE) || 932 ((bp[0] > 2) && bp[2] < REAL_KEY))) { 933 oaddr = bp[bp[0] - 1]; 934 (void)fprintf(stderr, "%d ", (int)oaddr); 935 bufp = __get_buf(hashp, (int)oaddr, bufp, 0); 936 bp = (short *)bufp->page; 937 } 938 (void)fprintf(stderr, "\n"); 939} 940#endif 941