hash_page.c revision 56698
1/*-
2 * Copyright (c) 1990, 1993, 1994
3 *	The Regents of the University of California.  All rights reserved.
4 *
5 * This code is derived from software contributed to Berkeley by
6 * Margo Seltzer.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 *    notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 * 3. All advertising materials mentioning features or use of this software
17 *    must display the following acknowledgement:
18 *	This product includes software developed by the University of
19 *	California, Berkeley and its contributors.
20 * 4. Neither the name of the University nor the names of its contributors
21 *    may be used to endorse or promote products derived from this software
22 *    without specific prior written permission.
23 *
24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 * SUCH DAMAGE.
35 *
36 * $FreeBSD: head/lib/libc/db/hash/hash_page.c 56698 2000-01-27 23:07:25Z jasone $
37 */
38
39#if defined(LIBC_SCCS) && !defined(lint)
40static char sccsid[] = "@(#)hash_page.c	8.7 (Berkeley) 8/16/94";
41#endif /* LIBC_SCCS and not lint */
42
43/*
44 * PACKAGE:  hashing
45 *
46 * DESCRIPTION:
47 *	Page manipulation for hashing package.
48 *
49 * ROUTINES:
50 *
51 * External
52 *	__get_page
53 *	__add_ovflpage
54 * Internal
55 *	overflow_page
56 *	open_temp
57 */
58
59#include <sys/types.h>
60
61#include <errno.h>
62#include <fcntl.h>
63#include <signal.h>
64#include <stdio.h>
65#include <stdlib.h>
66#include <string.h>
67#include <unistd.h>
68#ifdef DEBUG
69#include <assert.h>
70#endif
71
72#include <db.h>
73#include "hash.h"
74#include "page.h"
75#include "extern.h"
76
77static u_int32_t	*fetch_bitmap __P((HTAB *, int));
78static u_int32_t	 first_free __P((u_int32_t));
79static int	 open_temp __P((HTAB *));
80static u_int16_t	 overflow_page __P((HTAB *));
81static void	 putpair __P((char *, const DBT *, const DBT *));
82static void	 squeeze_key __P((u_int16_t *, const DBT *, const DBT *));
83static int	 ugly_split
84		    __P((HTAB *, u_int32_t, BUFHEAD *, BUFHEAD *, int, int));
85
86#define	PAGE_INIT(P) { \
87	((u_int16_t *)(P))[0] = 0; \
88	((u_int16_t *)(P))[1] = hashp->BSIZE - 3 * sizeof(u_int16_t); \
89	((u_int16_t *)(P))[2] = hashp->BSIZE; \
90}
91
92/*
93 * This is called AFTER we have verified that there is room on the page for
94 * the pair (PAIRFITS has returned true) so we go right ahead and start moving
95 * stuff on.
96 */
97static void
98putpair(p, key, val)
99	char *p;
100	const DBT *key, *val;
101{
102	register u_int16_t *bp, n, off;
103
104	bp = (u_int16_t *)p;
105
106	/* Enter the key first. */
107	n = bp[0];
108
109	off = OFFSET(bp) - key->size;
110	memmove(p + off, key->data, key->size);
111	bp[++n] = off;
112
113	/* Now the data. */
114	off -= val->size;
115	memmove(p + off, val->data, val->size);
116	bp[++n] = off;
117
118	/* Adjust page info. */
119	bp[0] = n;
120	bp[n + 1] = off - ((n + 3) * sizeof(u_int16_t));
121	bp[n + 2] = off;
122}
123
124/*
125 * Returns:
126 *	 0 OK
127 *	-1 error
128 */
129extern int
130__delpair(hashp, bufp, ndx)
131	HTAB *hashp;
132	BUFHEAD *bufp;
133	register int ndx;
134{
135	register u_int16_t *bp, newoff;
136	register int n;
137	u_int16_t pairlen;
138
139	bp = (u_int16_t *)bufp->page;
140	n = bp[0];
141
142	if (bp[ndx + 1] < REAL_KEY)
143		return (__big_delete(hashp, bufp));
144	if (ndx != 1)
145		newoff = bp[ndx - 1];
146	else
147		newoff = hashp->BSIZE;
148	pairlen = newoff - bp[ndx + 1];
149
150	if (ndx != (n - 1)) {
151		/* Hard Case -- need to shuffle keys */
152		register int i;
153		register char *src = bufp->page + (int)OFFSET(bp);
154		register char *dst = src + (int)pairlen;
155		memmove(dst, src, bp[ndx + 1] - OFFSET(bp));
156
157		/* Now adjust the pointers */
158		for (i = ndx + 2; i <= n; i += 2) {
159			if (bp[i + 1] == OVFLPAGE) {
160				bp[i - 2] = bp[i];
161				bp[i - 1] = bp[i + 1];
162			} else {
163				bp[i - 2] = bp[i] + pairlen;
164				bp[i - 1] = bp[i + 1] + pairlen;
165			}
166		}
167	}
168	/* Finally adjust the page data */
169	bp[n] = OFFSET(bp) + pairlen;
170	bp[n - 1] = bp[n + 1] + pairlen + 2 * sizeof(u_int16_t);
171	bp[0] = n - 2;
172	hashp->NKEYS--;
173
174	bufp->flags |= BUF_MOD;
175	return (0);
176}
177/*
178 * Returns:
179 *	 0 ==> OK
180 *	-1 ==> Error
181 */
182extern int
183__split_page(hashp, obucket, nbucket)
184	HTAB *hashp;
185	u_int32_t obucket, nbucket;
186{
187	register BUFHEAD *new_bufp, *old_bufp;
188	register u_int16_t *ino;
189	register char *np;
190	DBT key, val;
191	int n, ndx, retval;
192	u_int16_t copyto, diff, off, moved;
193	char *op;
194
195	copyto = (u_int16_t)hashp->BSIZE;
196	off = (u_int16_t)hashp->BSIZE;
197	old_bufp = __get_buf(hashp, obucket, NULL, 0);
198	if (old_bufp == NULL)
199		return (-1);
200	new_bufp = __get_buf(hashp, nbucket, NULL, 0);
201	if (new_bufp == NULL)
202		return (-1);
203
204	old_bufp->flags |= (BUF_MOD | BUF_PIN);
205	new_bufp->flags |= (BUF_MOD | BUF_PIN);
206
207	ino = (u_int16_t *)(op = old_bufp->page);
208	np = new_bufp->page;
209
210	moved = 0;
211
212	for (n = 1, ndx = 1; n < ino[0]; n += 2) {
213		if (ino[n + 1] < REAL_KEY) {
214			retval = ugly_split(hashp, obucket, old_bufp, new_bufp,
215			    (int)copyto, (int)moved);
216			old_bufp->flags &= ~BUF_PIN;
217			new_bufp->flags &= ~BUF_PIN;
218			return (retval);
219
220		}
221		key.data = (u_char *)op + ino[n];
222		key.size = off - ino[n];
223
224		if (__call_hash(hashp, key.data, key.size) == obucket) {
225			/* Don't switch page */
226			diff = copyto - off;
227			if (diff) {
228				copyto = ino[n + 1] + diff;
229				memmove(op + copyto, op + ino[n + 1],
230				    off - ino[n + 1]);
231				ino[ndx] = copyto + ino[n] - ino[n + 1];
232				ino[ndx + 1] = copyto;
233			} else
234				copyto = ino[n + 1];
235			ndx += 2;
236		} else {
237			/* Switch page */
238			val.data = (u_char *)op + ino[n + 1];
239			val.size = ino[n] - ino[n + 1];
240			putpair(np, &key, &val);
241			moved += 2;
242		}
243
244		off = ino[n + 1];
245	}
246
247	/* Now clean up the page */
248	ino[0] -= moved;
249	FREESPACE(ino) = copyto - sizeof(u_int16_t) * (ino[0] + 3);
250	OFFSET(ino) = copyto;
251
252#ifdef DEBUG3
253	(void)fprintf(stderr, "split %d/%d\n",
254	    ((u_int16_t *)np)[0] / 2,
255	    ((u_int16_t *)op)[0] / 2);
256#endif
257	/* unpin both pages */
258	old_bufp->flags &= ~BUF_PIN;
259	new_bufp->flags &= ~BUF_PIN;
260	return (0);
261}
262
263/*
264 * Called when we encounter an overflow or big key/data page during split
265 * handling.  This is special cased since we have to begin checking whether
266 * the key/data pairs fit on their respective pages and because we may need
267 * overflow pages for both the old and new pages.
268 *
269 * The first page might be a page with regular key/data pairs in which case
270 * we have a regular overflow condition and just need to go on to the next
271 * page or it might be a big key/data pair in which case we need to fix the
272 * big key/data pair.
273 *
274 * Returns:
275 *	 0 ==> success
276 *	-1 ==> failure
277 */
278static int
279ugly_split(hashp, obucket, old_bufp, new_bufp, copyto, moved)
280	HTAB *hashp;
281	u_int32_t obucket;	/* Same as __split_page. */
282	BUFHEAD *old_bufp, *new_bufp;
283	int copyto;	/* First byte on page which contains key/data values. */
284	int moved;	/* Number of pairs moved to new page. */
285{
286	register BUFHEAD *bufp;	/* Buffer header for ino */
287	register u_int16_t *ino;	/* Page keys come off of */
288	register u_int16_t *np;	/* New page */
289	register u_int16_t *op;	/* Page keys go on to if they aren't moving */
290
291	BUFHEAD *last_bfp;	/* Last buf header OVFL needing to be freed */
292	DBT key, val;
293	SPLIT_RETURN ret;
294	u_int16_t n, off, ov_addr, scopyto;
295	char *cino;		/* Character value of ino */
296
297	bufp = old_bufp;
298	ino = (u_int16_t *)old_bufp->page;
299	np = (u_int16_t *)new_bufp->page;
300	op = (u_int16_t *)old_bufp->page;
301	last_bfp = NULL;
302	scopyto = (u_int16_t)copyto;	/* ANSI */
303
304	n = ino[0] - 1;
305	while (n < ino[0]) {
306		if (ino[2] < REAL_KEY && ino[2] != OVFLPAGE) {
307			if (__big_split(hashp, old_bufp,
308			    new_bufp, bufp, bufp->addr, obucket, &ret))
309				return (-1);
310			old_bufp = ret.oldp;
311			if (!old_bufp)
312				return (-1);
313			op = (u_int16_t *)old_bufp->page;
314			new_bufp = ret.newp;
315			if (!new_bufp)
316				return (-1);
317			np = (u_int16_t *)new_bufp->page;
318			bufp = ret.nextp;
319			if (!bufp)
320				return (0);
321			cino = (char *)bufp->page;
322			ino = (u_int16_t *)cino;
323			last_bfp = ret.nextp;
324		} else if (ino[n + 1] == OVFLPAGE) {
325			ov_addr = ino[n];
326			/*
327			 * Fix up the old page -- the extra 2 are the fields
328			 * which contained the overflow information.
329			 */
330			ino[0] -= (moved + 2);
331			FREESPACE(ino) =
332			    scopyto - sizeof(u_int16_t) * (ino[0] + 3);
333			OFFSET(ino) = scopyto;
334
335			bufp = __get_buf(hashp, ov_addr, bufp, 0);
336			if (!bufp)
337				return (-1);
338
339			ino = (u_int16_t *)bufp->page;
340			n = 1;
341			scopyto = hashp->BSIZE;
342			moved = 0;
343
344			if (last_bfp)
345				__free_ovflpage(hashp, last_bfp);
346			last_bfp = bufp;
347		}
348		/* Move regular sized pairs of there are any */
349		off = hashp->BSIZE;
350		for (n = 1; (n < ino[0]) && (ino[n + 1] >= REAL_KEY); n += 2) {
351			cino = (char *)ino;
352			key.data = (u_char *)cino + ino[n];
353			key.size = off - ino[n];
354			val.data = (u_char *)cino + ino[n + 1];
355			val.size = ino[n] - ino[n + 1];
356			off = ino[n + 1];
357
358			if (__call_hash(hashp, key.data, key.size) == obucket) {
359				/* Keep on old page */
360				if (PAIRFITS(op, (&key), (&val)))
361					putpair((char *)op, &key, &val);
362				else {
363					old_bufp =
364					    __add_ovflpage(hashp, old_bufp);
365					if (!old_bufp)
366						return (-1);
367					op = (u_int16_t *)old_bufp->page;
368					putpair((char *)op, &key, &val);
369				}
370				old_bufp->flags |= BUF_MOD;
371			} else {
372				/* Move to new page */
373				if (PAIRFITS(np, (&key), (&val)))
374					putpair((char *)np, &key, &val);
375				else {
376					new_bufp =
377					    __add_ovflpage(hashp, new_bufp);
378					if (!new_bufp)
379						return (-1);
380					np = (u_int16_t *)new_bufp->page;
381					putpair((char *)np, &key, &val);
382				}
383				new_bufp->flags |= BUF_MOD;
384			}
385		}
386	}
387	if (last_bfp)
388		__free_ovflpage(hashp, last_bfp);
389	return (0);
390}
391
392/*
393 * Add the given pair to the page
394 *
395 * Returns:
396 *	0 ==> OK
397 *	1 ==> failure
398 */
399extern int
400__addel(hashp, bufp, key, val)
401	HTAB *hashp;
402	BUFHEAD *bufp;
403	const DBT *key, *val;
404{
405	register u_int16_t *bp, *sop;
406	int do_expand;
407
408	bp = (u_int16_t *)bufp->page;
409	do_expand = 0;
410	while (bp[0] && (bp[2] < REAL_KEY || bp[bp[0]] < REAL_KEY))
411		/* Exception case */
412		if (bp[2] == FULL_KEY_DATA && bp[0] == 2)
413			/* This is the last page of a big key/data pair
414			   and we need to add another page */
415			break;
416		else if (bp[2] < REAL_KEY && bp[bp[0]] != OVFLPAGE) {
417			bufp = __get_buf(hashp, bp[bp[0] - 1], bufp, 0);
418			if (!bufp)
419				return (-1);
420			bp = (u_int16_t *)bufp->page;
421		} else
422			/* Try to squeeze key on this page */
423			if (FREESPACE(bp) > PAIRSIZE(key, val)) {
424				squeeze_key(bp, key, val);
425				return (0);
426			} else {
427				bufp = __get_buf(hashp, bp[bp[0] - 1], bufp, 0);
428				if (!bufp)
429					return (-1);
430				bp = (u_int16_t *)bufp->page;
431			}
432
433	if (PAIRFITS(bp, key, val))
434		putpair(bufp->page, key, val);
435	else {
436		do_expand = 1;
437		bufp = __add_ovflpage(hashp, bufp);
438		if (!bufp)
439			return (-1);
440		sop = (u_int16_t *)bufp->page;
441
442		if (PAIRFITS(sop, key, val))
443			putpair((char *)sop, key, val);
444		else
445			if (__big_insert(hashp, bufp, key, val))
446				return (-1);
447	}
448	bufp->flags |= BUF_MOD;
449	/*
450	 * If the average number of keys per bucket exceeds the fill factor,
451	 * expand the table.
452	 */
453	hashp->NKEYS++;
454	if (do_expand ||
455	    (hashp->NKEYS / (hashp->MAX_BUCKET + 1) > hashp->FFACTOR))
456		return (__expand_table(hashp));
457	return (0);
458}
459
460/*
461 *
462 * Returns:
463 *	pointer on success
464 *	NULL on error
465 */
466extern BUFHEAD *
467__add_ovflpage(hashp, bufp)
468	HTAB *hashp;
469	BUFHEAD *bufp;
470{
471	register u_int16_t *sp;
472	u_int16_t ndx, ovfl_num;
473#ifdef DEBUG1
474	int tmp1, tmp2;
475#endif
476	sp = (u_int16_t *)bufp->page;
477
478	/* Check if we are dynamically determining the fill factor */
479	if (hashp->FFACTOR == DEF_FFACTOR) {
480		hashp->FFACTOR = sp[0] >> 1;
481		if (hashp->FFACTOR < MIN_FFACTOR)
482			hashp->FFACTOR = MIN_FFACTOR;
483	}
484	bufp->flags |= BUF_MOD;
485	ovfl_num = overflow_page(hashp);
486#ifdef DEBUG1
487	tmp1 = bufp->addr;
488	tmp2 = bufp->ovfl ? bufp->ovfl->addr : 0;
489#endif
490	if (!ovfl_num || !(bufp->ovfl = __get_buf(hashp, ovfl_num, bufp, 1)))
491		return (NULL);
492	bufp->ovfl->flags |= BUF_MOD;
493#ifdef DEBUG1
494	(void)fprintf(stderr, "ADDOVFLPAGE: %d->ovfl was %d is now %d\n",
495	    tmp1, tmp2, bufp->ovfl->addr);
496#endif
497	ndx = sp[0];
498	/*
499	 * Since a pair is allocated on a page only if there's room to add
500	 * an overflow page, we know that the OVFL information will fit on
501	 * the page.
502	 */
503	sp[ndx + 4] = OFFSET(sp);
504	sp[ndx + 3] = FREESPACE(sp) - OVFLSIZE;
505	sp[ndx + 1] = ovfl_num;
506	sp[ndx + 2] = OVFLPAGE;
507	sp[0] = ndx + 2;
508#ifdef HASH_STATISTICS
509	hash_overflows++;
510#endif
511	return (bufp->ovfl);
512}
513
514/*
515 * Returns:
516 *	 0 indicates SUCCESS
517 *	-1 indicates FAILURE
518 */
519extern int
520__get_page(hashp, p, bucket, is_bucket, is_disk, is_bitmap)
521	HTAB *hashp;
522	char *p;
523	u_int32_t bucket;
524	int is_bucket, is_disk, is_bitmap;
525{
526	register int fd, page, size;
527	int rsize;
528	u_int16_t *bp;
529
530	fd = hashp->fp;
531	size = hashp->BSIZE;
532
533	if ((fd == -1) || !is_disk) {
534		PAGE_INIT(p);
535		return (0);
536	}
537	if (is_bucket)
538		page = BUCKET_TO_PAGE(bucket);
539	else
540		page = OADDR_TO_PAGE(bucket);
541	if ((lseek(fd, (off_t)page << hashp->BSHIFT, SEEK_SET) == -1) ||
542	    ((rsize = _read(fd, p, size)) == -1))
543		return (-1);
544	bp = (u_int16_t *)p;
545	if (!rsize)
546		bp[0] = 0;	/* We hit the EOF, so initialize a new page */
547	else
548		if (rsize != size) {
549			errno = EFTYPE;
550			return (-1);
551		}
552	if (!is_bitmap && !bp[0]) {
553		PAGE_INIT(p);
554	} else
555		if (hashp->LORDER != BYTE_ORDER) {
556			register int i, max;
557
558			if (is_bitmap) {
559				max = hashp->BSIZE >> 2; /* divide by 4 */
560				for (i = 0; i < max; i++)
561					M_32_SWAP(((int *)p)[i]);
562			} else {
563				M_16_SWAP(bp[0]);
564				max = bp[0] + 2;
565				for (i = 1; i <= max; i++)
566					M_16_SWAP(bp[i]);
567			}
568		}
569	return (0);
570}
571
572/*
573 * Write page p to disk
574 *
575 * Returns:
576 *	 0 ==> OK
577 *	-1 ==>failure
578 */
579extern int
580__put_page(hashp, p, bucket, is_bucket, is_bitmap)
581	HTAB *hashp;
582	char *p;
583	u_int32_t bucket;
584	int is_bucket, is_bitmap;
585{
586	register int fd, page, size;
587	int wsize;
588
589	size = hashp->BSIZE;
590	if ((hashp->fp == -1) && open_temp(hashp))
591		return (-1);
592	fd = hashp->fp;
593
594	if (hashp->LORDER != BYTE_ORDER) {
595		register int i;
596		register int max;
597
598		if (is_bitmap) {
599			max = hashp->BSIZE >> 2;	/* divide by 4 */
600			for (i = 0; i < max; i++)
601				M_32_SWAP(((int *)p)[i]);
602		} else {
603			max = ((u_int16_t *)p)[0] + 2;
604			for (i = 0; i <= max; i++)
605				M_16_SWAP(((u_int16_t *)p)[i]);
606		}
607	}
608	if (is_bucket)
609		page = BUCKET_TO_PAGE(bucket);
610	else
611		page = OADDR_TO_PAGE(bucket);
612	if ((lseek(fd, (off_t)page << hashp->BSHIFT, SEEK_SET) == -1) ||
613	    ((wsize = _write(fd, p, size)) == -1))
614		/* Errno is set */
615		return (-1);
616	if (wsize != size) {
617		errno = EFTYPE;
618		return (-1);
619	}
620	return (0);
621}
622
623#define BYTE_MASK	((1 << INT_BYTE_SHIFT) -1)
624/*
625 * Initialize a new bitmap page.  Bitmap pages are left in memory
626 * once they are read in.
627 */
628extern int
629__ibitmap(hashp, pnum, nbits, ndx)
630	HTAB *hashp;
631	int pnum, nbits, ndx;
632{
633	u_int32_t *ip;
634	int clearbytes, clearints;
635
636	if ((ip = (u_int32_t *)malloc(hashp->BSIZE)) == NULL)
637		return (1);
638	hashp->nmaps++;
639	clearints = ((nbits - 1) >> INT_BYTE_SHIFT) + 1;
640	clearbytes = clearints << INT_TO_BYTE;
641	(void)memset((char *)ip, 0, clearbytes);
642	(void)memset(((char *)ip) + clearbytes, 0xFF,
643	    hashp->BSIZE - clearbytes);
644	ip[clearints - 1] = ALL_SET << (nbits & BYTE_MASK);
645	SETBIT(ip, 0);
646	hashp->BITMAPS[ndx] = (u_int16_t)pnum;
647	hashp->mapp[ndx] = ip;
648	return (0);
649}
650
651static u_int32_t
652first_free(map)
653	u_int32_t map;
654{
655	register u_int32_t i, mask;
656
657	mask = 0x1;
658	for (i = 0; i < BITS_PER_MAP; i++) {
659		if (!(mask & map))
660			return (i);
661		mask = mask << 1;
662	}
663	return (i);
664}
665
666static u_int16_t
667overflow_page(hashp)
668	HTAB *hashp;
669{
670	register u_int32_t *freep;
671	register int max_free, offset, splitnum;
672	u_int16_t addr;
673	int bit, first_page, free_bit, free_page, i, in_use_bits, j;
674#ifdef DEBUG2
675	int tmp1, tmp2;
676#endif
677	splitnum = hashp->OVFL_POINT;
678	max_free = hashp->SPARES[splitnum];
679
680	free_page = (max_free - 1) >> (hashp->BSHIFT + BYTE_SHIFT);
681	free_bit = (max_free - 1) & ((hashp->BSIZE << BYTE_SHIFT) - 1);
682
683	/* Look through all the free maps to find the first free block */
684	first_page = hashp->LAST_FREED >>(hashp->BSHIFT + BYTE_SHIFT);
685	for ( i = first_page; i <= free_page; i++ ) {
686		if (!(freep = (u_int32_t *)hashp->mapp[i]) &&
687		    !(freep = fetch_bitmap(hashp, i)))
688			return (0);
689		if (i == free_page)
690			in_use_bits = free_bit;
691		else
692			in_use_bits = (hashp->BSIZE << BYTE_SHIFT) - 1;
693
694		if (i == first_page) {
695			bit = hashp->LAST_FREED &
696			    ((hashp->BSIZE << BYTE_SHIFT) - 1);
697			j = bit / BITS_PER_MAP;
698			bit = bit & ~(BITS_PER_MAP - 1);
699		} else {
700			bit = 0;
701			j = 0;
702		}
703		for (; bit <= in_use_bits; j++, bit += BITS_PER_MAP)
704			if (freep[j] != ALL_SET)
705				goto found;
706	}
707
708	/* No Free Page Found */
709	hashp->LAST_FREED = hashp->SPARES[splitnum];
710	hashp->SPARES[splitnum]++;
711	offset = hashp->SPARES[splitnum] -
712	    (splitnum ? hashp->SPARES[splitnum - 1] : 0);
713
714#define	OVMSG	"HASH: Out of overflow pages.  Increase page size\n"
715	if (offset > SPLITMASK) {
716		if (++splitnum >= NCACHED) {
717			(void)_write(STDERR_FILENO, OVMSG, sizeof(OVMSG) - 1);
718			return (0);
719		}
720		hashp->OVFL_POINT = splitnum;
721		hashp->SPARES[splitnum] = hashp->SPARES[splitnum-1];
722		hashp->SPARES[splitnum-1]--;
723		offset = 1;
724	}
725
726	/* Check if we need to allocate a new bitmap page */
727	if (free_bit == (hashp->BSIZE << BYTE_SHIFT) - 1) {
728		free_page++;
729		if (free_page >= NCACHED) {
730			(void)_write(STDERR_FILENO, OVMSG, sizeof(OVMSG) - 1);
731			return (0);
732		}
733		/*
734		 * This is tricky.  The 1 indicates that you want the new page
735		 * allocated with 1 clear bit.  Actually, you are going to
736		 * allocate 2 pages from this map.  The first is going to be
737		 * the map page, the second is the overflow page we were
738		 * looking for.  The init_bitmap routine automatically, sets
739		 * the first bit of itself to indicate that the bitmap itself
740		 * is in use.  We would explicitly set the second bit, but
741		 * don't have to if we tell init_bitmap not to leave it clear
742		 * in the first place.
743		 */
744		if (__ibitmap(hashp,
745		    (int)OADDR_OF(splitnum, offset), 1, free_page))
746			return (0);
747		hashp->SPARES[splitnum]++;
748#ifdef DEBUG2
749		free_bit = 2;
750#endif
751		offset++;
752		if (offset > SPLITMASK) {
753			if (++splitnum >= NCACHED) {
754				(void)_write(STDERR_FILENO, OVMSG,
755				    sizeof(OVMSG) - 1);
756				return (0);
757			}
758			hashp->OVFL_POINT = splitnum;
759			hashp->SPARES[splitnum] = hashp->SPARES[splitnum-1];
760			hashp->SPARES[splitnum-1]--;
761			offset = 0;
762		}
763	} else {
764		/*
765		 * Free_bit addresses the last used bit.  Bump it to address
766		 * the first available bit.
767		 */
768		free_bit++;
769		SETBIT(freep, free_bit);
770	}
771
772	/* Calculate address of the new overflow page */
773	addr = OADDR_OF(splitnum, offset);
774#ifdef DEBUG2
775	(void)fprintf(stderr, "OVERFLOW_PAGE: ADDR: %d BIT: %d PAGE %d\n",
776	    addr, free_bit, free_page);
777#endif
778	return (addr);
779
780found:
781	bit = bit + first_free(freep[j]);
782	SETBIT(freep, bit);
783#ifdef DEBUG2
784	tmp1 = bit;
785	tmp2 = i;
786#endif
787	/*
788	 * Bits are addressed starting with 0, but overflow pages are addressed
789	 * beginning at 1. Bit is a bit addressnumber, so we need to increment
790	 * it to convert it to a page number.
791	 */
792	bit = 1 + bit + (i * (hashp->BSIZE << BYTE_SHIFT));
793	if (bit >= hashp->LAST_FREED)
794		hashp->LAST_FREED = bit - 1;
795
796	/* Calculate the split number for this page */
797	for (i = 0; (i < splitnum) && (bit > hashp->SPARES[i]); i++);
798	offset = (i ? bit - hashp->SPARES[i - 1] : bit);
799	if (offset >= SPLITMASK)
800		return (0);	/* Out of overflow pages */
801	addr = OADDR_OF(i, offset);
802#ifdef DEBUG2
803	(void)fprintf(stderr, "OVERFLOW_PAGE: ADDR: %d BIT: %d PAGE %d\n",
804	    addr, tmp1, tmp2);
805#endif
806
807	/* Allocate and return the overflow page */
808	return (addr);
809}
810
811/*
812 * Mark this overflow page as free.
813 */
814extern void
815__free_ovflpage(hashp, obufp)
816	HTAB *hashp;
817	BUFHEAD *obufp;
818{
819	register u_int16_t addr;
820	u_int32_t *freep;
821	int bit_address, free_page, free_bit;
822	u_int16_t ndx;
823
824	addr = obufp->addr;
825#ifdef DEBUG1
826	(void)fprintf(stderr, "Freeing %d\n", addr);
827#endif
828	ndx = (((u_int16_t)addr) >> SPLITSHIFT);
829	bit_address =
830	    (ndx ? hashp->SPARES[ndx - 1] : 0) + (addr & SPLITMASK) - 1;
831	 if (bit_address < hashp->LAST_FREED)
832		hashp->LAST_FREED = bit_address;
833	free_page = (bit_address >> (hashp->BSHIFT + BYTE_SHIFT));
834	free_bit = bit_address & ((hashp->BSIZE << BYTE_SHIFT) - 1);
835
836	if (!(freep = hashp->mapp[free_page]))
837		freep = fetch_bitmap(hashp, free_page);
838#ifdef DEBUG
839	/*
840	 * This had better never happen.  It means we tried to read a bitmap
841	 * that has already had overflow pages allocated off it, and we
842	 * failed to read it from the file.
843	 */
844	if (!freep)
845		assert(0);
846#endif
847	CLRBIT(freep, free_bit);
848#ifdef DEBUG2
849	(void)fprintf(stderr, "FREE_OVFLPAGE: ADDR: %d BIT: %d PAGE %d\n",
850	    obufp->addr, free_bit, free_page);
851#endif
852	__reclaim_buf(hashp, obufp);
853}
854
855/*
856 * Returns:
857 *	 0 success
858 *	-1 failure
859 */
860static int
861open_temp(hashp)
862	HTAB *hashp;
863{
864	sigset_t set, oset;
865	static char namestr[] = "_hashXXXXXX";
866
867	/* Block signals; make sure file goes away at process exit. */
868	(void)sigfillset(&set);
869	(void)sigprocmask(SIG_BLOCK, &set, &oset);
870	if ((hashp->fp = mkstemp(namestr)) != -1) {
871		(void)unlink(namestr);
872		(void)_fcntl(hashp->fp, F_SETFD, 1);
873	}
874	(void)sigprocmask(SIG_SETMASK, &oset, (sigset_t *)NULL);
875	return (hashp->fp != -1 ? 0 : -1);
876}
877
878/*
879 * We have to know that the key will fit, but the last entry on the page is
880 * an overflow pair, so we need to shift things.
881 */
882static void
883squeeze_key(sp, key, val)
884	u_int16_t *sp;
885	const DBT *key, *val;
886{
887	register char *p;
888	u_int16_t free_space, n, off, pageno;
889
890	p = (char *)sp;
891	n = sp[0];
892	free_space = FREESPACE(sp);
893	off = OFFSET(sp);
894
895	pageno = sp[n - 1];
896	off -= key->size;
897	sp[n - 1] = off;
898	memmove(p + off, key->data, key->size);
899	off -= val->size;
900	sp[n] = off;
901	memmove(p + off, val->data, val->size);
902	sp[0] = n + 2;
903	sp[n + 1] = pageno;
904	sp[n + 2] = OVFLPAGE;
905	FREESPACE(sp) = free_space - PAIRSIZE(key, val);
906	OFFSET(sp) = off;
907}
908
909static u_int32_t *
910fetch_bitmap(hashp, ndx)
911	HTAB *hashp;
912	int ndx;
913{
914	if (ndx >= hashp->nmaps)
915		return (NULL);
916	if ((hashp->mapp[ndx] = (u_int32_t *)malloc(hashp->BSIZE)) == NULL)
917		return (NULL);
918	if (__get_page(hashp,
919	    (char *)hashp->mapp[ndx], hashp->BITMAPS[ndx], 0, 1, 1)) {
920		free(hashp->mapp[ndx]);
921		return (NULL);
922	}
923	return (hashp->mapp[ndx]);
924}
925
926#ifdef DEBUG4
927int
928print_chain(addr)
929	int addr;
930{
931	BUFHEAD *bufp;
932	short *bp, oaddr;
933
934	(void)fprintf(stderr, "%d ", addr);
935	bufp = __get_buf(hashp, addr, NULL, 0);
936	bp = (short *)bufp->page;
937	while (bp[0] && ((bp[bp[0]] == OVFLPAGE) ||
938		((bp[0] > 2) && bp[2] < REAL_KEY))) {
939		oaddr = bp[bp[0] - 1];
940		(void)fprintf(stderr, "%d ", (int)oaddr);
941		bufp = __get_buf(hashp, (int)oaddr, bufp, 0);
942		bp = (short *)bufp->page;
943	}
944	(void)fprintf(stderr, "\n");
945}
946#endif
947