hash_page.c revision 71579
1/*-
2 * Copyright (c) 1990, 1993, 1994
3 *	The Regents of the University of California.  All rights reserved.
4 *
5 * This code is derived from software contributed to Berkeley by
6 * Margo Seltzer.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 *    notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 * 3. All advertising materials mentioning features or use of this software
17 *    must display the following acknowledgement:
18 *	This product includes software developed by the University of
19 *	California, Berkeley and its contributors.
20 * 4. Neither the name of the University nor the names of its contributors
21 *    may be used to endorse or promote products derived from this software
22 *    without specific prior written permission.
23 *
24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 * SUCH DAMAGE.
35 *
36 * $FreeBSD: head/lib/libc/db/hash/hash_page.c 71579 2001-01-24 13:01:12Z deischen $
37 */
38
39#if defined(LIBC_SCCS) && !defined(lint)
40static char sccsid[] = "@(#)hash_page.c	8.7 (Berkeley) 8/16/94";
41#endif /* LIBC_SCCS and not lint */
42
43/*
44 * PACKAGE:  hashing
45 *
46 * DESCRIPTION:
47 *	Page manipulation for hashing package.
48 *
49 * ROUTINES:
50 *
51 * External
52 *	__get_page
53 *	__add_ovflpage
54 * Internal
55 *	overflow_page
56 *	open_temp
57 */
58
59#include "namespace.h"
60#include <sys/types.h>
61
62#include <errno.h>
63#include <fcntl.h>
64#include <signal.h>
65#include <stdio.h>
66#include <stdlib.h>
67#include <string.h>
68#include <unistd.h>
69#ifdef DEBUG
70#include <assert.h>
71#endif
72#include "un-namespace.h"
73
74#include <db.h>
75#include "hash.h"
76#include "page.h"
77#include "extern.h"
78
79static u_int32_t	*fetch_bitmap __P((HTAB *, int));
80static u_int32_t	 first_free __P((u_int32_t));
81static int	 open_temp __P((HTAB *));
82static u_int16_t	 overflow_page __P((HTAB *));
83static void	 putpair __P((char *, const DBT *, const DBT *));
84static void	 squeeze_key __P((u_int16_t *, const DBT *, const DBT *));
85static int	 ugly_split
86		    __P((HTAB *, u_int32_t, BUFHEAD *, BUFHEAD *, int, int));
87
88#define	PAGE_INIT(P) { \
89	((u_int16_t *)(P))[0] = 0; \
90	((u_int16_t *)(P))[1] = hashp->BSIZE - 3 * sizeof(u_int16_t); \
91	((u_int16_t *)(P))[2] = hashp->BSIZE; \
92}
93
94/*
95 * This is called AFTER we have verified that there is room on the page for
96 * the pair (PAIRFITS has returned true) so we go right ahead and start moving
97 * stuff on.
98 */
99static void
100putpair(p, key, val)
101	char *p;
102	const DBT *key, *val;
103{
104	register u_int16_t *bp, n, off;
105
106	bp = (u_int16_t *)p;
107
108	/* Enter the key first. */
109	n = bp[0];
110
111	off = OFFSET(bp) - key->size;
112	memmove(p + off, key->data, key->size);
113	bp[++n] = off;
114
115	/* Now the data. */
116	off -= val->size;
117	memmove(p + off, val->data, val->size);
118	bp[++n] = off;
119
120	/* Adjust page info. */
121	bp[0] = n;
122	bp[n + 1] = off - ((n + 3) * sizeof(u_int16_t));
123	bp[n + 2] = off;
124}
125
126/*
127 * Returns:
128 *	 0 OK
129 *	-1 error
130 */
131extern int
132__delpair(hashp, bufp, ndx)
133	HTAB *hashp;
134	BUFHEAD *bufp;
135	register int ndx;
136{
137	register u_int16_t *bp, newoff;
138	register int n;
139	u_int16_t pairlen;
140
141	bp = (u_int16_t *)bufp->page;
142	n = bp[0];
143
144	if (bp[ndx + 1] < REAL_KEY)
145		return (__big_delete(hashp, bufp));
146	if (ndx != 1)
147		newoff = bp[ndx - 1];
148	else
149		newoff = hashp->BSIZE;
150	pairlen = newoff - bp[ndx + 1];
151
152	if (ndx != (n - 1)) {
153		/* Hard Case -- need to shuffle keys */
154		register int i;
155		register char *src = bufp->page + (int)OFFSET(bp);
156		register char *dst = src + (int)pairlen;
157		memmove(dst, src, bp[ndx + 1] - OFFSET(bp));
158
159		/* Now adjust the pointers */
160		for (i = ndx + 2; i <= n; i += 2) {
161			if (bp[i + 1] == OVFLPAGE) {
162				bp[i - 2] = bp[i];
163				bp[i - 1] = bp[i + 1];
164			} else {
165				bp[i - 2] = bp[i] + pairlen;
166				bp[i - 1] = bp[i + 1] + pairlen;
167			}
168		}
169	}
170	/* Finally adjust the page data */
171	bp[n] = OFFSET(bp) + pairlen;
172	bp[n - 1] = bp[n + 1] + pairlen + 2 * sizeof(u_int16_t);
173	bp[0] = n - 2;
174	hashp->NKEYS--;
175
176	bufp->flags |= BUF_MOD;
177	return (0);
178}
179/*
180 * Returns:
181 *	 0 ==> OK
182 *	-1 ==> Error
183 */
184extern int
185__split_page(hashp, obucket, nbucket)
186	HTAB *hashp;
187	u_int32_t obucket, nbucket;
188{
189	register BUFHEAD *new_bufp, *old_bufp;
190	register u_int16_t *ino;
191	register char *np;
192	DBT key, val;
193	int n, ndx, retval;
194	u_int16_t copyto, diff, off, moved;
195	char *op;
196
197	copyto = (u_int16_t)hashp->BSIZE;
198	off = (u_int16_t)hashp->BSIZE;
199	old_bufp = __get_buf(hashp, obucket, NULL, 0);
200	if (old_bufp == NULL)
201		return (-1);
202	new_bufp = __get_buf(hashp, nbucket, NULL, 0);
203	if (new_bufp == NULL)
204		return (-1);
205
206	old_bufp->flags |= (BUF_MOD | BUF_PIN);
207	new_bufp->flags |= (BUF_MOD | BUF_PIN);
208
209	ino = (u_int16_t *)(op = old_bufp->page);
210	np = new_bufp->page;
211
212	moved = 0;
213
214	for (n = 1, ndx = 1; n < ino[0]; n += 2) {
215		if (ino[n + 1] < REAL_KEY) {
216			retval = ugly_split(hashp, obucket, old_bufp, new_bufp,
217			    (int)copyto, (int)moved);
218			old_bufp->flags &= ~BUF_PIN;
219			new_bufp->flags &= ~BUF_PIN;
220			return (retval);
221
222		}
223		key.data = (u_char *)op + ino[n];
224		key.size = off - ino[n];
225
226		if (__call_hash(hashp, key.data, key.size) == obucket) {
227			/* Don't switch page */
228			diff = copyto - off;
229			if (diff) {
230				copyto = ino[n + 1] + diff;
231				memmove(op + copyto, op + ino[n + 1],
232				    off - ino[n + 1]);
233				ino[ndx] = copyto + ino[n] - ino[n + 1];
234				ino[ndx + 1] = copyto;
235			} else
236				copyto = ino[n + 1];
237			ndx += 2;
238		} else {
239			/* Switch page */
240			val.data = (u_char *)op + ino[n + 1];
241			val.size = ino[n] - ino[n + 1];
242			putpair(np, &key, &val);
243			moved += 2;
244		}
245
246		off = ino[n + 1];
247	}
248
249	/* Now clean up the page */
250	ino[0] -= moved;
251	FREESPACE(ino) = copyto - sizeof(u_int16_t) * (ino[0] + 3);
252	OFFSET(ino) = copyto;
253
254#ifdef DEBUG3
255	(void)fprintf(stderr, "split %d/%d\n",
256	    ((u_int16_t *)np)[0] / 2,
257	    ((u_int16_t *)op)[0] / 2);
258#endif
259	/* unpin both pages */
260	old_bufp->flags &= ~BUF_PIN;
261	new_bufp->flags &= ~BUF_PIN;
262	return (0);
263}
264
265/*
266 * Called when we encounter an overflow or big key/data page during split
267 * handling.  This is special cased since we have to begin checking whether
268 * the key/data pairs fit on their respective pages and because we may need
269 * overflow pages for both the old and new pages.
270 *
271 * The first page might be a page with regular key/data pairs in which case
272 * we have a regular overflow condition and just need to go on to the next
273 * page or it might be a big key/data pair in which case we need to fix the
274 * big key/data pair.
275 *
276 * Returns:
277 *	 0 ==> success
278 *	-1 ==> failure
279 */
280static int
281ugly_split(hashp, obucket, old_bufp, new_bufp, copyto, moved)
282	HTAB *hashp;
283	u_int32_t obucket;	/* Same as __split_page. */
284	BUFHEAD *old_bufp, *new_bufp;
285	int copyto;	/* First byte on page which contains key/data values. */
286	int moved;	/* Number of pairs moved to new page. */
287{
288	register BUFHEAD *bufp;	/* Buffer header for ino */
289	register u_int16_t *ino;	/* Page keys come off of */
290	register u_int16_t *np;	/* New page */
291	register u_int16_t *op;	/* Page keys go on to if they aren't moving */
292
293	BUFHEAD *last_bfp;	/* Last buf header OVFL needing to be freed */
294	DBT key, val;
295	SPLIT_RETURN ret;
296	u_int16_t n, off, ov_addr, scopyto;
297	char *cino;		/* Character value of ino */
298
299	bufp = old_bufp;
300	ino = (u_int16_t *)old_bufp->page;
301	np = (u_int16_t *)new_bufp->page;
302	op = (u_int16_t *)old_bufp->page;
303	last_bfp = NULL;
304	scopyto = (u_int16_t)copyto;	/* ANSI */
305
306	n = ino[0] - 1;
307	while (n < ino[0]) {
308		if (ino[2] < REAL_KEY && ino[2] != OVFLPAGE) {
309			if (__big_split(hashp, old_bufp,
310			    new_bufp, bufp, bufp->addr, obucket, &ret))
311				return (-1);
312			old_bufp = ret.oldp;
313			if (!old_bufp)
314				return (-1);
315			op = (u_int16_t *)old_bufp->page;
316			new_bufp = ret.newp;
317			if (!new_bufp)
318				return (-1);
319			np = (u_int16_t *)new_bufp->page;
320			bufp = ret.nextp;
321			if (!bufp)
322				return (0);
323			cino = (char *)bufp->page;
324			ino = (u_int16_t *)cino;
325			last_bfp = ret.nextp;
326		} else if (ino[n + 1] == OVFLPAGE) {
327			ov_addr = ino[n];
328			/*
329			 * Fix up the old page -- the extra 2 are the fields
330			 * which contained the overflow information.
331			 */
332			ino[0] -= (moved + 2);
333			FREESPACE(ino) =
334			    scopyto - sizeof(u_int16_t) * (ino[0] + 3);
335			OFFSET(ino) = scopyto;
336
337			bufp = __get_buf(hashp, ov_addr, bufp, 0);
338			if (!bufp)
339				return (-1);
340
341			ino = (u_int16_t *)bufp->page;
342			n = 1;
343			scopyto = hashp->BSIZE;
344			moved = 0;
345
346			if (last_bfp)
347				__free_ovflpage(hashp, last_bfp);
348			last_bfp = bufp;
349		}
350		/* Move regular sized pairs of there are any */
351		off = hashp->BSIZE;
352		for (n = 1; (n < ino[0]) && (ino[n + 1] >= REAL_KEY); n += 2) {
353			cino = (char *)ino;
354			key.data = (u_char *)cino + ino[n];
355			key.size = off - ino[n];
356			val.data = (u_char *)cino + ino[n + 1];
357			val.size = ino[n] - ino[n + 1];
358			off = ino[n + 1];
359
360			if (__call_hash(hashp, key.data, key.size) == obucket) {
361				/* Keep on old page */
362				if (PAIRFITS(op, (&key), (&val)))
363					putpair((char *)op, &key, &val);
364				else {
365					old_bufp =
366					    __add_ovflpage(hashp, old_bufp);
367					if (!old_bufp)
368						return (-1);
369					op = (u_int16_t *)old_bufp->page;
370					putpair((char *)op, &key, &val);
371				}
372				old_bufp->flags |= BUF_MOD;
373			} else {
374				/* Move to new page */
375				if (PAIRFITS(np, (&key), (&val)))
376					putpair((char *)np, &key, &val);
377				else {
378					new_bufp =
379					    __add_ovflpage(hashp, new_bufp);
380					if (!new_bufp)
381						return (-1);
382					np = (u_int16_t *)new_bufp->page;
383					putpair((char *)np, &key, &val);
384				}
385				new_bufp->flags |= BUF_MOD;
386			}
387		}
388	}
389	if (last_bfp)
390		__free_ovflpage(hashp, last_bfp);
391	return (0);
392}
393
394/*
395 * Add the given pair to the page
396 *
397 * Returns:
398 *	0 ==> OK
399 *	1 ==> failure
400 */
401extern int
402__addel(hashp, bufp, key, val)
403	HTAB *hashp;
404	BUFHEAD *bufp;
405	const DBT *key, *val;
406{
407	register u_int16_t *bp, *sop;
408	int do_expand;
409
410	bp = (u_int16_t *)bufp->page;
411	do_expand = 0;
412	while (bp[0] && (bp[2] < REAL_KEY || bp[bp[0]] < REAL_KEY))
413		/* Exception case */
414		if (bp[2] == FULL_KEY_DATA && bp[0] == 2)
415			/* This is the last page of a big key/data pair
416			   and we need to add another page */
417			break;
418		else if (bp[2] < REAL_KEY && bp[bp[0]] != OVFLPAGE) {
419			bufp = __get_buf(hashp, bp[bp[0] - 1], bufp, 0);
420			if (!bufp)
421				return (-1);
422			bp = (u_int16_t *)bufp->page;
423		} else
424			/* Try to squeeze key on this page */
425			if (FREESPACE(bp) > PAIRSIZE(key, val)) {
426				squeeze_key(bp, key, val);
427				return (0);
428			} else {
429				bufp = __get_buf(hashp, bp[bp[0] - 1], bufp, 0);
430				if (!bufp)
431					return (-1);
432				bp = (u_int16_t *)bufp->page;
433			}
434
435	if (PAIRFITS(bp, key, val))
436		putpair(bufp->page, key, val);
437	else {
438		do_expand = 1;
439		bufp = __add_ovflpage(hashp, bufp);
440		if (!bufp)
441			return (-1);
442		sop = (u_int16_t *)bufp->page;
443
444		if (PAIRFITS(sop, key, val))
445			putpair((char *)sop, key, val);
446		else
447			if (__big_insert(hashp, bufp, key, val))
448				return (-1);
449	}
450	bufp->flags |= BUF_MOD;
451	/*
452	 * If the average number of keys per bucket exceeds the fill factor,
453	 * expand the table.
454	 */
455	hashp->NKEYS++;
456	if (do_expand ||
457	    (hashp->NKEYS / (hashp->MAX_BUCKET + 1) > hashp->FFACTOR))
458		return (__expand_table(hashp));
459	return (0);
460}
461
462/*
463 *
464 * Returns:
465 *	pointer on success
466 *	NULL on error
467 */
468extern BUFHEAD *
469__add_ovflpage(hashp, bufp)
470	HTAB *hashp;
471	BUFHEAD *bufp;
472{
473	register u_int16_t *sp;
474	u_int16_t ndx, ovfl_num;
475#ifdef DEBUG1
476	int tmp1, tmp2;
477#endif
478	sp = (u_int16_t *)bufp->page;
479
480	/* Check if we are dynamically determining the fill factor */
481	if (hashp->FFACTOR == DEF_FFACTOR) {
482		hashp->FFACTOR = sp[0] >> 1;
483		if (hashp->FFACTOR < MIN_FFACTOR)
484			hashp->FFACTOR = MIN_FFACTOR;
485	}
486	bufp->flags |= BUF_MOD;
487	ovfl_num = overflow_page(hashp);
488#ifdef DEBUG1
489	tmp1 = bufp->addr;
490	tmp2 = bufp->ovfl ? bufp->ovfl->addr : 0;
491#endif
492	if (!ovfl_num || !(bufp->ovfl = __get_buf(hashp, ovfl_num, bufp, 1)))
493		return (NULL);
494	bufp->ovfl->flags |= BUF_MOD;
495#ifdef DEBUG1
496	(void)fprintf(stderr, "ADDOVFLPAGE: %d->ovfl was %d is now %d\n",
497	    tmp1, tmp2, bufp->ovfl->addr);
498#endif
499	ndx = sp[0];
500	/*
501	 * Since a pair is allocated on a page only if there's room to add
502	 * an overflow page, we know that the OVFL information will fit on
503	 * the page.
504	 */
505	sp[ndx + 4] = OFFSET(sp);
506	sp[ndx + 3] = FREESPACE(sp) - OVFLSIZE;
507	sp[ndx + 1] = ovfl_num;
508	sp[ndx + 2] = OVFLPAGE;
509	sp[0] = ndx + 2;
510#ifdef HASH_STATISTICS
511	hash_overflows++;
512#endif
513	return (bufp->ovfl);
514}
515
516/*
517 * Returns:
518 *	 0 indicates SUCCESS
519 *	-1 indicates FAILURE
520 */
521extern int
522__get_page(hashp, p, bucket, is_bucket, is_disk, is_bitmap)
523	HTAB *hashp;
524	char *p;
525	u_int32_t bucket;
526	int is_bucket, is_disk, is_bitmap;
527{
528	register int fd, page, size;
529	int rsize;
530	u_int16_t *bp;
531
532	fd = hashp->fp;
533	size = hashp->BSIZE;
534
535	if ((fd == -1) || !is_disk) {
536		PAGE_INIT(p);
537		return (0);
538	}
539	if (is_bucket)
540		page = BUCKET_TO_PAGE(bucket);
541	else
542		page = OADDR_TO_PAGE(bucket);
543	if ((lseek(fd, (off_t)page << hashp->BSHIFT, SEEK_SET) == -1) ||
544	    ((rsize = _read(fd, p, size)) == -1))
545		return (-1);
546	bp = (u_int16_t *)p;
547	if (!rsize)
548		bp[0] = 0;	/* We hit the EOF, so initialize a new page */
549	else
550		if (rsize != size) {
551			errno = EFTYPE;
552			return (-1);
553		}
554	if (!is_bitmap && !bp[0]) {
555		PAGE_INIT(p);
556	} else
557		if (hashp->LORDER != BYTE_ORDER) {
558			register int i, max;
559
560			if (is_bitmap) {
561				max = hashp->BSIZE >> 2; /* divide by 4 */
562				for (i = 0; i < max; i++)
563					M_32_SWAP(((int *)p)[i]);
564			} else {
565				M_16_SWAP(bp[0]);
566				max = bp[0] + 2;
567				for (i = 1; i <= max; i++)
568					M_16_SWAP(bp[i]);
569			}
570		}
571	return (0);
572}
573
574/*
575 * Write page p to disk
576 *
577 * Returns:
578 *	 0 ==> OK
579 *	-1 ==>failure
580 */
581extern int
582__put_page(hashp, p, bucket, is_bucket, is_bitmap)
583	HTAB *hashp;
584	char *p;
585	u_int32_t bucket;
586	int is_bucket, is_bitmap;
587{
588	register int fd, page, size;
589	int wsize;
590
591	size = hashp->BSIZE;
592	if ((hashp->fp == -1) && open_temp(hashp))
593		return (-1);
594	fd = hashp->fp;
595
596	if (hashp->LORDER != BYTE_ORDER) {
597		register int i;
598		register int max;
599
600		if (is_bitmap) {
601			max = hashp->BSIZE >> 2;	/* divide by 4 */
602			for (i = 0; i < max; i++)
603				M_32_SWAP(((int *)p)[i]);
604		} else {
605			max = ((u_int16_t *)p)[0] + 2;
606			for (i = 0; i <= max; i++)
607				M_16_SWAP(((u_int16_t *)p)[i]);
608		}
609	}
610	if (is_bucket)
611		page = BUCKET_TO_PAGE(bucket);
612	else
613		page = OADDR_TO_PAGE(bucket);
614	if ((lseek(fd, (off_t)page << hashp->BSHIFT, SEEK_SET) == -1) ||
615	    ((wsize = _write(fd, p, size)) == -1))
616		/* Errno is set */
617		return (-1);
618	if (wsize != size) {
619		errno = EFTYPE;
620		return (-1);
621	}
622	return (0);
623}
624
625#define BYTE_MASK	((1 << INT_BYTE_SHIFT) -1)
626/*
627 * Initialize a new bitmap page.  Bitmap pages are left in memory
628 * once they are read in.
629 */
630extern int
631__ibitmap(hashp, pnum, nbits, ndx)
632	HTAB *hashp;
633	int pnum, nbits, ndx;
634{
635	u_int32_t *ip;
636	int clearbytes, clearints;
637
638	if ((ip = (u_int32_t *)malloc(hashp->BSIZE)) == NULL)
639		return (1);
640	hashp->nmaps++;
641	clearints = ((nbits - 1) >> INT_BYTE_SHIFT) + 1;
642	clearbytes = clearints << INT_TO_BYTE;
643	(void)memset((char *)ip, 0, clearbytes);
644	(void)memset(((char *)ip) + clearbytes, 0xFF,
645	    hashp->BSIZE - clearbytes);
646	ip[clearints - 1] = ALL_SET << (nbits & BYTE_MASK);
647	SETBIT(ip, 0);
648	hashp->BITMAPS[ndx] = (u_int16_t)pnum;
649	hashp->mapp[ndx] = ip;
650	return (0);
651}
652
653static u_int32_t
654first_free(map)
655	u_int32_t map;
656{
657	register u_int32_t i, mask;
658
659	mask = 0x1;
660	for (i = 0; i < BITS_PER_MAP; i++) {
661		if (!(mask & map))
662			return (i);
663		mask = mask << 1;
664	}
665	return (i);
666}
667
668static u_int16_t
669overflow_page(hashp)
670	HTAB *hashp;
671{
672	register u_int32_t *freep;
673	register int max_free, offset, splitnum;
674	u_int16_t addr;
675	int bit, first_page, free_bit, free_page, i, in_use_bits, j;
676#ifdef DEBUG2
677	int tmp1, tmp2;
678#endif
679	splitnum = hashp->OVFL_POINT;
680	max_free = hashp->SPARES[splitnum];
681
682	free_page = (max_free - 1) >> (hashp->BSHIFT + BYTE_SHIFT);
683	free_bit = (max_free - 1) & ((hashp->BSIZE << BYTE_SHIFT) - 1);
684
685	/* Look through all the free maps to find the first free block */
686	first_page = hashp->LAST_FREED >>(hashp->BSHIFT + BYTE_SHIFT);
687	for ( i = first_page; i <= free_page; i++ ) {
688		if (!(freep = (u_int32_t *)hashp->mapp[i]) &&
689		    !(freep = fetch_bitmap(hashp, i)))
690			return (0);
691		if (i == free_page)
692			in_use_bits = free_bit;
693		else
694			in_use_bits = (hashp->BSIZE << BYTE_SHIFT) - 1;
695
696		if (i == first_page) {
697			bit = hashp->LAST_FREED &
698			    ((hashp->BSIZE << BYTE_SHIFT) - 1);
699			j = bit / BITS_PER_MAP;
700			bit = bit & ~(BITS_PER_MAP - 1);
701		} else {
702			bit = 0;
703			j = 0;
704		}
705		for (; bit <= in_use_bits; j++, bit += BITS_PER_MAP)
706			if (freep[j] != ALL_SET)
707				goto found;
708	}
709
710	/* No Free Page Found */
711	hashp->LAST_FREED = hashp->SPARES[splitnum];
712	hashp->SPARES[splitnum]++;
713	offset = hashp->SPARES[splitnum] -
714	    (splitnum ? hashp->SPARES[splitnum - 1] : 0);
715
716#define	OVMSG	"HASH: Out of overflow pages.  Increase page size\n"
717	if (offset > SPLITMASK) {
718		if (++splitnum >= NCACHED) {
719			(void)_write(STDERR_FILENO, OVMSG, sizeof(OVMSG) - 1);
720			return (0);
721		}
722		hashp->OVFL_POINT = splitnum;
723		hashp->SPARES[splitnum] = hashp->SPARES[splitnum-1];
724		hashp->SPARES[splitnum-1]--;
725		offset = 1;
726	}
727
728	/* Check if we need to allocate a new bitmap page */
729	if (free_bit == (hashp->BSIZE << BYTE_SHIFT) - 1) {
730		free_page++;
731		if (free_page >= NCACHED) {
732			(void)_write(STDERR_FILENO, OVMSG, sizeof(OVMSG) - 1);
733			return (0);
734		}
735		/*
736		 * This is tricky.  The 1 indicates that you want the new page
737		 * allocated with 1 clear bit.  Actually, you are going to
738		 * allocate 2 pages from this map.  The first is going to be
739		 * the map page, the second is the overflow page we were
740		 * looking for.  The init_bitmap routine automatically, sets
741		 * the first bit of itself to indicate that the bitmap itself
742		 * is in use.  We would explicitly set the second bit, but
743		 * don't have to if we tell init_bitmap not to leave it clear
744		 * in the first place.
745		 */
746		if (__ibitmap(hashp,
747		    (int)OADDR_OF(splitnum, offset), 1, free_page))
748			return (0);
749		hashp->SPARES[splitnum]++;
750#ifdef DEBUG2
751		free_bit = 2;
752#endif
753		offset++;
754		if (offset > SPLITMASK) {
755			if (++splitnum >= NCACHED) {
756				(void)_write(STDERR_FILENO, OVMSG,
757				    sizeof(OVMSG) - 1);
758				return (0);
759			}
760			hashp->OVFL_POINT = splitnum;
761			hashp->SPARES[splitnum] = hashp->SPARES[splitnum-1];
762			hashp->SPARES[splitnum-1]--;
763			offset = 0;
764		}
765	} else {
766		/*
767		 * Free_bit addresses the last used bit.  Bump it to address
768		 * the first available bit.
769		 */
770		free_bit++;
771		SETBIT(freep, free_bit);
772	}
773
774	/* Calculate address of the new overflow page */
775	addr = OADDR_OF(splitnum, offset);
776#ifdef DEBUG2
777	(void)fprintf(stderr, "OVERFLOW_PAGE: ADDR: %d BIT: %d PAGE %d\n",
778	    addr, free_bit, free_page);
779#endif
780	return (addr);
781
782found:
783	bit = bit + first_free(freep[j]);
784	SETBIT(freep, bit);
785#ifdef DEBUG2
786	tmp1 = bit;
787	tmp2 = i;
788#endif
789	/*
790	 * Bits are addressed starting with 0, but overflow pages are addressed
791	 * beginning at 1. Bit is a bit addressnumber, so we need to increment
792	 * it to convert it to a page number.
793	 */
794	bit = 1 + bit + (i * (hashp->BSIZE << BYTE_SHIFT));
795	if (bit >= hashp->LAST_FREED)
796		hashp->LAST_FREED = bit - 1;
797
798	/* Calculate the split number for this page */
799	for (i = 0; (i < splitnum) && (bit > hashp->SPARES[i]); i++);
800	offset = (i ? bit - hashp->SPARES[i - 1] : bit);
801	if (offset >= SPLITMASK)
802		return (0);	/* Out of overflow pages */
803	addr = OADDR_OF(i, offset);
804#ifdef DEBUG2
805	(void)fprintf(stderr, "OVERFLOW_PAGE: ADDR: %d BIT: %d PAGE %d\n",
806	    addr, tmp1, tmp2);
807#endif
808
809	/* Allocate and return the overflow page */
810	return (addr);
811}
812
813/*
814 * Mark this overflow page as free.
815 */
816extern void
817__free_ovflpage(hashp, obufp)
818	HTAB *hashp;
819	BUFHEAD *obufp;
820{
821	register u_int16_t addr;
822	u_int32_t *freep;
823	int bit_address, free_page, free_bit;
824	u_int16_t ndx;
825
826	addr = obufp->addr;
827#ifdef DEBUG1
828	(void)fprintf(stderr, "Freeing %d\n", addr);
829#endif
830	ndx = (((u_int16_t)addr) >> SPLITSHIFT);
831	bit_address =
832	    (ndx ? hashp->SPARES[ndx - 1] : 0) + (addr & SPLITMASK) - 1;
833	 if (bit_address < hashp->LAST_FREED)
834		hashp->LAST_FREED = bit_address;
835	free_page = (bit_address >> (hashp->BSHIFT + BYTE_SHIFT));
836	free_bit = bit_address & ((hashp->BSIZE << BYTE_SHIFT) - 1);
837
838	if (!(freep = hashp->mapp[free_page]))
839		freep = fetch_bitmap(hashp, free_page);
840#ifdef DEBUG
841	/*
842	 * This had better never happen.  It means we tried to read a bitmap
843	 * that has already had overflow pages allocated off it, and we
844	 * failed to read it from the file.
845	 */
846	if (!freep)
847		assert(0);
848#endif
849	CLRBIT(freep, free_bit);
850#ifdef DEBUG2
851	(void)fprintf(stderr, "FREE_OVFLPAGE: ADDR: %d BIT: %d PAGE %d\n",
852	    obufp->addr, free_bit, free_page);
853#endif
854	__reclaim_buf(hashp, obufp);
855}
856
857/*
858 * Returns:
859 *	 0 success
860 *	-1 failure
861 */
862static int
863open_temp(hashp)
864	HTAB *hashp;
865{
866	sigset_t set, oset;
867	static char namestr[] = "_hashXXXXXX";
868
869	/* Block signals; make sure file goes away at process exit. */
870	(void)sigfillset(&set);
871	(void)_sigprocmask(SIG_BLOCK, &set, &oset);
872	if ((hashp->fp = mkstemp(namestr)) != -1) {
873		(void)unlink(namestr);
874		(void)_fcntl(hashp->fp, F_SETFD, 1);
875	}
876	(void)_sigprocmask(SIG_SETMASK, &oset, (sigset_t *)NULL);
877	return (hashp->fp != -1 ? 0 : -1);
878}
879
880/*
881 * We have to know that the key will fit, but the last entry on the page is
882 * an overflow pair, so we need to shift things.
883 */
884static void
885squeeze_key(sp, key, val)
886	u_int16_t *sp;
887	const DBT *key, *val;
888{
889	register char *p;
890	u_int16_t free_space, n, off, pageno;
891
892	p = (char *)sp;
893	n = sp[0];
894	free_space = FREESPACE(sp);
895	off = OFFSET(sp);
896
897	pageno = sp[n - 1];
898	off -= key->size;
899	sp[n - 1] = off;
900	memmove(p + off, key->data, key->size);
901	off -= val->size;
902	sp[n] = off;
903	memmove(p + off, val->data, val->size);
904	sp[0] = n + 2;
905	sp[n + 1] = pageno;
906	sp[n + 2] = OVFLPAGE;
907	FREESPACE(sp) = free_space - PAIRSIZE(key, val);
908	OFFSET(sp) = off;
909}
910
911static u_int32_t *
912fetch_bitmap(hashp, ndx)
913	HTAB *hashp;
914	int ndx;
915{
916	if (ndx >= hashp->nmaps)
917		return (NULL);
918	if ((hashp->mapp[ndx] = (u_int32_t *)malloc(hashp->BSIZE)) == NULL)
919		return (NULL);
920	if (__get_page(hashp,
921	    (char *)hashp->mapp[ndx], hashp->BITMAPS[ndx], 0, 1, 1)) {
922		free(hashp->mapp[ndx]);
923		return (NULL);
924	}
925	return (hashp->mapp[ndx]);
926}
927
928#ifdef DEBUG4
929int
930print_chain(addr)
931	int addr;
932{
933	BUFHEAD *bufp;
934	short *bp, oaddr;
935
936	(void)fprintf(stderr, "%d ", addr);
937	bufp = __get_buf(hashp, addr, NULL, 0);
938	bp = (short *)bufp->page;
939	while (bp[0] && ((bp[bp[0]] == OVFLPAGE) ||
940		((bp[0] > 2) && bp[2] < REAL_KEY))) {
941		oaddr = bp[bp[0] - 1];
942		(void)fprintf(stderr, "%d ", (int)oaddr);
943		bufp = __get_buf(hashp, (int)oaddr, bufp, 0);
944		bp = (short *)bufp->page;
945	}
946	(void)fprintf(stderr, "\n");
947}
948#endif
949