hash.h revision 14287
11573Srgrimes/*- 214287Spst * Copyright (c) 1990, 1993, 1994 31573Srgrimes * The Regents of the University of California. All rights reserved. 41573Srgrimes * 51573Srgrimes * This code is derived from software contributed to Berkeley by 61573Srgrimes * Margo Seltzer. 71573Srgrimes * 81573Srgrimes * Redistribution and use in source and binary forms, with or without 91573Srgrimes * modification, are permitted provided that the following conditions 101573Srgrimes * are met: 111573Srgrimes * 1. Redistributions of source code must retain the above copyright 121573Srgrimes * notice, this list of conditions and the following disclaimer. 131573Srgrimes * 2. Redistributions in binary form must reproduce the above copyright 141573Srgrimes * notice, this list of conditions and the following disclaimer in the 151573Srgrimes * documentation and/or other materials provided with the distribution. 161573Srgrimes * 3. All advertising materials mentioning features or use of this software 171573Srgrimes * must display the following acknowledgement: 181573Srgrimes * This product includes software developed by the University of 191573Srgrimes * California, Berkeley and its contributors. 201573Srgrimes * 4. Neither the name of the University nor the names of its contributors 211573Srgrimes * may be used to endorse or promote products derived from this software 221573Srgrimes * without specific prior written permission. 231573Srgrimes * 241573Srgrimes * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 251573Srgrimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 261573Srgrimes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 271573Srgrimes * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 281573Srgrimes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 291573Srgrimes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 301573Srgrimes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 311573Srgrimes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 321573Srgrimes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 331573Srgrimes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 341573Srgrimes * SUCH DAMAGE. 351573Srgrimes * 3614287Spst * @(#)hash.h 8.3 (Berkeley) 5/31/94 371573Srgrimes */ 381573Srgrimes 391573Srgrimes/* Operations */ 401573Srgrimestypedef enum { 411573Srgrimes HASH_GET, HASH_PUT, HASH_PUTNEW, HASH_DELETE, HASH_FIRST, HASH_NEXT 421573Srgrimes} ACTION; 431573Srgrimes 441573Srgrimes/* Buffer Management structures */ 451573Srgrimestypedef struct _bufhead BUFHEAD; 461573Srgrimes 471573Srgrimesstruct _bufhead { 4814287Spst BUFHEAD *prev; /* LRU links */ 4914287Spst BUFHEAD *next; /* LRU links */ 5014287Spst BUFHEAD *ovfl; /* Overflow page buffer header */ 5114287Spst u_int32_t addr; /* Address of this page */ 5214287Spst char *page; /* Actual page data */ 5314287Spst char flags; 541573Srgrimes#define BUF_MOD 0x0001 551573Srgrimes#define BUF_DISK 0x0002 561573Srgrimes#define BUF_BUCKET 0x0004 571573Srgrimes#define BUF_PIN 0x0008 581573Srgrimes}; 591573Srgrimes 601573Srgrimes#define IS_BUCKET(X) ((X) & BUF_BUCKET) 611573Srgrimes 621573Srgrimestypedef BUFHEAD **SEGMENT; 631573Srgrimes 641573Srgrimes/* Hash Table Information */ 6514287Spsttypedef struct hashhdr { /* Disk resident portion */ 6614287Spst int magic; /* Magic NO for hash tables */ 6714287Spst int version; /* Version ID */ 6814287Spst u_int32_t lorder; /* Byte Order */ 6914287Spst int bsize; /* Bucket/Page Size */ 7014287Spst int bshift; /* Bucket shift */ 7114287Spst int dsize; /* Directory Size */ 7214287Spst int ssize; /* Segment Size */ 7314287Spst int sshift; /* Segment shift */ 7414287Spst int ovfl_point; /* Where overflow pages are being 7514287Spst * allocated */ 7614287Spst int last_freed; /* Last overflow page freed */ 7714287Spst int max_bucket; /* ID of Maximum bucket in use */ 7814287Spst int high_mask; /* Mask to modulo into entire table */ 7914287Spst int low_mask; /* Mask to modulo into lower half of 8014287Spst * table */ 8114287Spst int ffactor; /* Fill factor */ 8214287Spst int nkeys; /* Number of keys in hash table */ 8314287Spst int hdrpages; /* Size of table header */ 8414287Spst int h_charkey; /* value of hash(CHARKEY) */ 8514287Spst#define NCACHED 32 /* number of bit maps and spare 8614287Spst * points */ 8714287Spst int spares[NCACHED];/* spare pages for overflow */ 8814287Spst u_int16_t bitmaps[NCACHED]; /* address of overflow page 8914287Spst * bitmaps */ 901573Srgrimes} HASHHDR; 911573Srgrimes 9214287Spsttypedef struct htab { /* Memory resident data structure */ 9314287Spst HASHHDR hdr; /* Header */ 9414287Spst int nsegs; /* Number of allocated segments */ 9514287Spst int exsegs; /* Number of extra allocated 9614287Spst * segments */ 9714287Spst u_int32_t /* Hash function */ 981573Srgrimes (*hash)__P((const void *, size_t)); 9914287Spst int flags; /* Flag values */ 10014287Spst int fp; /* File pointer */ 10114287Spst char *tmp_buf; /* Temporary Buffer for BIG data */ 10214287Spst char *tmp_key; /* Temporary Buffer for BIG keys */ 10314287Spst BUFHEAD *cpage; /* Current page */ 10414287Spst int cbucket; /* Current bucket */ 10514287Spst int cndx; /* Index of next item on cpage */ 10614287Spst int error; /* Error Number -- for DBM 10714287Spst * compatability */ 10814287Spst int new_file; /* Indicates if fd is backing store 10914287Spst * or no */ 11014287Spst int save_file; /* Indicates whether we need to flush 11114287Spst * file at 11214287Spst * exit */ 11314287Spst u_int32_t *mapp[NCACHED]; /* Pointers to page maps */ 11414287Spst int nmaps; /* Initial number of bitmaps */ 11514287Spst int nbufs; /* Number of buffers left to 11614287Spst * allocate */ 11714287Spst BUFHEAD bufhead; /* Header of buffer lru list */ 11814287Spst SEGMENT *dir; /* Hash Bucket directory */ 1191573Srgrimes} HTAB; 1201573Srgrimes 1211573Srgrimes/* 1221573Srgrimes * Constants 1231573Srgrimes */ 1241573Srgrimes#define MAX_BSIZE 65536 /* 2^16 */ 1251573Srgrimes#define MIN_BUFFERS 6 1261573Srgrimes#define MINHDRSIZE 512 1271573Srgrimes#define DEF_BUFSIZE 65536 /* 64 K */ 1281573Srgrimes#define DEF_BUCKET_SIZE 4096 1291573Srgrimes#define DEF_BUCKET_SHIFT 12 /* log2(BUCKET) */ 1301573Srgrimes#define DEF_SEGSIZE 256 1311573Srgrimes#define DEF_SEGSIZE_SHIFT 8 /* log2(SEGSIZE) */ 1321573Srgrimes#define DEF_DIRSIZE 256 1331573Srgrimes#define DEF_FFACTOR 65536 1341573Srgrimes#define MIN_FFACTOR 4 1351573Srgrimes#define SPLTMAX 8 1361573Srgrimes#define CHARKEY "%$sniglet^&" 1371573Srgrimes#define NUMKEY 1038583 1381573Srgrimes#define BYTE_SHIFT 3 1391573Srgrimes#define INT_TO_BYTE 2 1401573Srgrimes#define INT_BYTE_SHIFT 5 14114287Spst#define ALL_SET ((u_int32_t)0xFFFFFFFF) 1421573Srgrimes#define ALL_CLEAR 0 1431573Srgrimes 14414287Spst#define PTROF(X) ((BUFHEAD *)((ptrdiff_t)(X)&~0x3)) 14514287Spst#define ISMOD(X) ((u_int32_t)(ptrdiff_t)(X)&0x1) 14614287Spst#define DOMOD(X) ((X) = (char *)((ptrdiff_t)(X)|0x1)) 14714287Spst#define ISDISK(X) ((u_int32_t)(ptrdiff_t)(X)&0x2) 14814287Spst#define DODISK(X) ((X) = (char *)((ptrdiff_t)(X)|0x2)) 1491573Srgrimes 1501573Srgrimes#define BITS_PER_MAP 32 1511573Srgrimes 1521573Srgrimes/* Given the address of the beginning of a big map, clear/set the nth bit */ 1531573Srgrimes#define CLRBIT(A, N) ((A)[(N)/BITS_PER_MAP] &= ~(1<<((N)%BITS_PER_MAP))) 1541573Srgrimes#define SETBIT(A, N) ((A)[(N)/BITS_PER_MAP] |= (1<<((N)%BITS_PER_MAP))) 1551573Srgrimes#define ISSET(A, N) ((A)[(N)/BITS_PER_MAP] & (1<<((N)%BITS_PER_MAP))) 1561573Srgrimes 1571573Srgrimes/* Overflow management */ 1581573Srgrimes/* 1591573Srgrimes * Overflow page numbers are allocated per split point. At each doubling of 1601573Srgrimes * the table, we can allocate extra pages. So, an overflow page number has 1611573Srgrimes * the top 5 bits indicate which split point and the lower 11 bits indicate 1621573Srgrimes * which page at that split point is indicated (pages within split points are 1631573Srgrimes * numberered starting with 1). 1641573Srgrimes */ 1651573Srgrimes 1661573Srgrimes#define SPLITSHIFT 11 1671573Srgrimes#define SPLITMASK 0x7FF 16814287Spst#define SPLITNUM(N) (((u_int32_t)(N)) >> SPLITSHIFT) 1691573Srgrimes#define OPAGENUM(N) ((N) & SPLITMASK) 17014287Spst#define OADDR_OF(S,O) ((u_int32_t)((u_int32_t)(S) << SPLITSHIFT) + (O)) 1711573Srgrimes 1721573Srgrimes#define BUCKET_TO_PAGE(B) \ 1731573Srgrimes (B) + hashp->HDRPAGES + ((B) ? hashp->SPARES[__log2((B)+1)-1] : 0) 1741573Srgrimes#define OADDR_TO_PAGE(B) \ 1751573Srgrimes BUCKET_TO_PAGE ( (1 << SPLITNUM((B))) -1 ) + OPAGENUM((B)); 1761573Srgrimes 1771573Srgrimes/* 1781573Srgrimes * page.h contains a detailed description of the page format. 1791573Srgrimes * 1801573Srgrimes * Normally, keys and data are accessed from offset tables in the top of 1811573Srgrimes * each page which point to the beginning of the key and data. There are 1821573Srgrimes * four flag values which may be stored in these offset tables which indicate 1831573Srgrimes * the following: 1841573Srgrimes * 1851573Srgrimes * 1861573Srgrimes * OVFLPAGE Rather than a key data pair, this pair contains 1871573Srgrimes * the address of an overflow page. The format of 1881573Srgrimes * the pair is: 1891573Srgrimes * OVERFLOW_PAGE_NUMBER OVFLPAGE 1901573Srgrimes * 1911573Srgrimes * PARTIAL_KEY This must be the first key/data pair on a page 1921573Srgrimes * and implies that page contains only a partial key. 1931573Srgrimes * That is, the key is too big to fit on a single page 1941573Srgrimes * so it starts on this page and continues on the next. 1951573Srgrimes * The format of the page is: 1961573Srgrimes * KEY_OFF PARTIAL_KEY OVFL_PAGENO OVFLPAGE 1978870Srgrimes * 1981573Srgrimes * KEY_OFF -- offset of the beginning of the key 1991573Srgrimes * PARTIAL_KEY -- 1 2001573Srgrimes * OVFL_PAGENO - page number of the next overflow page 2011573Srgrimes * OVFLPAGE -- 0 2021573Srgrimes * 2031573Srgrimes * FULL_KEY This must be the first key/data pair on the page. It 2041573Srgrimes * is used in two cases. 2051573Srgrimes * 2061573Srgrimes * Case 1: 2071573Srgrimes * There is a complete key on the page but no data 2081573Srgrimes * (because it wouldn't fit). The next page contains 2091573Srgrimes * the data. 2101573Srgrimes * 2111573Srgrimes * Page format it: 2121573Srgrimes * KEY_OFF FULL_KEY OVFL_PAGENO OVFL_PAGE 2131573Srgrimes * 2141573Srgrimes * KEY_OFF -- offset of the beginning of the key 2151573Srgrimes * FULL_KEY -- 2 2161573Srgrimes * OVFL_PAGENO - page number of the next overflow page 2171573Srgrimes * OVFLPAGE -- 0 2181573Srgrimes * 2191573Srgrimes * Case 2: 2201573Srgrimes * This page contains no key, but part of a large 2211573Srgrimes * data field, which is continued on the next page. 2221573Srgrimes * 2231573Srgrimes * Page format it: 2241573Srgrimes * DATA_OFF FULL_KEY OVFL_PAGENO OVFL_PAGE 2251573Srgrimes * 2261573Srgrimes * KEY_OFF -- offset of the beginning of the data on 2271573Srgrimes * this page 2281573Srgrimes * FULL_KEY -- 2 2291573Srgrimes * OVFL_PAGENO - page number of the next overflow page 2301573Srgrimes * OVFLPAGE -- 0 2311573Srgrimes * 2328870Srgrimes * FULL_KEY_DATA 2331573Srgrimes * This must be the first key/data pair on the page. 2341573Srgrimes * There are two cases: 2351573Srgrimes * 2361573Srgrimes * Case 1: 2371573Srgrimes * This page contains a key and the beginning of the 2381573Srgrimes * data field, but the data field is continued on the 2391573Srgrimes * next page. 2401573Srgrimes * 2411573Srgrimes * Page format is: 2421573Srgrimes * KEY_OFF FULL_KEY_DATA OVFL_PAGENO DATA_OFF 2431573Srgrimes * 2441573Srgrimes * KEY_OFF -- offset of the beginning of the key 2451573Srgrimes * FULL_KEY_DATA -- 3 2461573Srgrimes * OVFL_PAGENO - page number of the next overflow page 2471573Srgrimes * DATA_OFF -- offset of the beginning of the data 2481573Srgrimes * 2491573Srgrimes * Case 2: 2501573Srgrimes * This page contains the last page of a big data pair. 2511573Srgrimes * There is no key, only the tail end of the data 2521573Srgrimes * on this page. 2531573Srgrimes * 2541573Srgrimes * Page format is: 2551573Srgrimes * DATA_OFF FULL_KEY_DATA <OVFL_PAGENO> <OVFLPAGE> 2561573Srgrimes * 2571573Srgrimes * DATA_OFF -- offset of the beginning of the data on 2581573Srgrimes * this page 2591573Srgrimes * FULL_KEY_DATA -- 3 2601573Srgrimes * OVFL_PAGENO - page number of the next overflow page 2611573Srgrimes * OVFLPAGE -- 0 2621573Srgrimes * 2631573Srgrimes * OVFL_PAGENO and OVFLPAGE are optional (they are 2641573Srgrimes * not present if there is no next page). 2651573Srgrimes */ 2661573Srgrimes 2671573Srgrimes#define OVFLPAGE 0 2681573Srgrimes#define PARTIAL_KEY 1 2691573Srgrimes#define FULL_KEY 2 2701573Srgrimes#define FULL_KEY_DATA 3 2711573Srgrimes#define REAL_KEY 4 2721573Srgrimes 2731573Srgrimes/* Short hands for accessing structure */ 2741573Srgrimes#define BSIZE hdr.bsize 2751573Srgrimes#define BSHIFT hdr.bshift 2761573Srgrimes#define DSIZE hdr.dsize 2771573Srgrimes#define SGSIZE hdr.ssize 2781573Srgrimes#define SSHIFT hdr.sshift 2791573Srgrimes#define LORDER hdr.lorder 2801573Srgrimes#define OVFL_POINT hdr.ovfl_point 2811573Srgrimes#define LAST_FREED hdr.last_freed 2821573Srgrimes#define MAX_BUCKET hdr.max_bucket 2831573Srgrimes#define FFACTOR hdr.ffactor 2841573Srgrimes#define HIGH_MASK hdr.high_mask 2851573Srgrimes#define LOW_MASK hdr.low_mask 2861573Srgrimes#define NKEYS hdr.nkeys 2871573Srgrimes#define HDRPAGES hdr.hdrpages 2881573Srgrimes#define SPARES hdr.spares 2891573Srgrimes#define BITMAPS hdr.bitmaps 2901573Srgrimes#define VERSION hdr.version 2911573Srgrimes#define MAGIC hdr.magic 2921573Srgrimes#define NEXT_FREE hdr.next_free 2931573Srgrimes#define H_CHARKEY hdr.h_charkey 294