hash_page.c revision 55837
1/*-
2 * Copyright (c) 1990, 1993, 1994
3 *	The Regents of the University of California.  All rights reserved.
4 *
5 * This code is derived from software contributed to Berkeley by
6 * Margo Seltzer.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 *    notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 * 3. All advertising materials mentioning features or use of this software
17 *    must display the following acknowledgement:
18 *	This product includes software developed by the University of
19 *	California, Berkeley and its contributors.
20 * 4. Neither the name of the University nor the names of its contributors
21 *    may be used to endorse or promote products derived from this software
22 *    without specific prior written permission.
23 *
24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 * SUCH DAMAGE.
35 *
36 * $FreeBSD: head/lib/libc/db/hash/hash_page.c 55837 2000-01-12 09:23:48Z jasone $
37 */
38
39#if defined(LIBC_SCCS) && !defined(lint)
40static char sccsid[] = "@(#)hash_page.c	8.7 (Berkeley) 8/16/94";
41#endif /* LIBC_SCCS and not lint */
42
43/*
44 * PACKAGE:  hashing
45 *
46 * DESCRIPTION:
47 *	Page manipulation for hashing package.
48 *
49 * ROUTINES:
50 *
51 * External
52 *	__get_page
53 *	__add_ovflpage
54 * Internal
55 *	overflow_page
56 *	open_temp
57 */
58
59#include <sys/types.h>
60
61#include <errno.h>
62#include <fcntl.h>
63#include <signal.h>
64#include <stdio.h>
65#include <stdlib.h>
66#include <string.h>
67#include <unistd.h>
68#ifdef DEBUG
69#include <assert.h>
70#endif
71
72#include <db.h>
73#include "hash.h"
74#include "page.h"
75#include "extern.h"
76
77static u_int32_t	*fetch_bitmap __P((HTAB *, int));
78static u_int32_t	 first_free __P((u_int32_t));
79static int	 open_temp __P((HTAB *));
80static u_int16_t	 overflow_page __P((HTAB *));
81static void	 putpair __P((char *, const DBT *, const DBT *));
82static void	 squeeze_key __P((u_int16_t *, const DBT *, const DBT *));
83static int	 ugly_split
84		    __P((HTAB *, u_int32_t, BUFHEAD *, BUFHEAD *, int, int));
85
86#define	PAGE_INIT(P) { \
87	((u_int16_t *)(P))[0] = 0; \
88	((u_int16_t *)(P))[1] = hashp->BSIZE - 3 * sizeof(u_int16_t); \
89	((u_int16_t *)(P))[2] = hashp->BSIZE; \
90}
91
92/*
93 * This is called AFTER we have verified that there is room on the page for
94 * the pair (PAIRFITS has returned true) so we go right ahead and start moving
95 * stuff on.
96 */
97static void
98putpair(p, key, val)
99	char *p;
100	const DBT *key, *val;
101{
102	register u_int16_t *bp, n, off;
103
104	bp = (u_int16_t *)p;
105
106	/* Enter the key first. */
107	n = bp[0];
108
109	off = OFFSET(bp) - key->size;
110	memmove(p + off, key->data, key->size);
111	bp[++n] = off;
112
113	/* Now the data. */
114	off -= val->size;
115	memmove(p + off, val->data, val->size);
116	bp[++n] = off;
117
118	/* Adjust page info. */
119	bp[0] = n;
120	bp[n + 1] = off - ((n + 3) * sizeof(u_int16_t));
121	bp[n + 2] = off;
122}
123
124/*
125 * Returns:
126 *	 0 OK
127 *	-1 error
128 */
129extern int
130__delpair(hashp, bufp, ndx)
131	HTAB *hashp;
132	BUFHEAD *bufp;
133	register int ndx;
134{
135	register u_int16_t *bp, newoff;
136	register int n;
137	u_int16_t pairlen;
138
139	bp = (u_int16_t *)bufp->page;
140	n = bp[0];
141
142	if (bp[ndx + 1] < REAL_KEY)
143		return (__big_delete(hashp, bufp));
144	if (ndx != 1)
145		newoff = bp[ndx - 1];
146	else
147		newoff = hashp->BSIZE;
148	pairlen = newoff - bp[ndx + 1];
149
150	if (ndx != (n - 1)) {
151		/* Hard Case -- need to shuffle keys */
152		register int i;
153		register char *src = bufp->page + (int)OFFSET(bp);
154		register char *dst = src + (int)pairlen;
155		memmove(dst, src, bp[ndx + 1] - OFFSET(bp));
156
157		/* Now adjust the pointers */
158		for (i = ndx + 2; i <= n; i += 2) {
159			if (bp[i + 1] == OVFLPAGE) {
160				bp[i - 2] = bp[i];
161				bp[i - 1] = bp[i + 1];
162			} else {
163				bp[i - 2] = bp[i] + pairlen;
164				bp[i - 1] = bp[i + 1] + pairlen;
165			}
166		}
167	}
168	/* Finally adjust the page data */
169	bp[n] = OFFSET(bp) + pairlen;
170	bp[n - 1] = bp[n + 1] + pairlen + 2 * sizeof(u_int16_t);
171	bp[0] = n - 2;
172	hashp->NKEYS--;
173
174	bufp->flags |= BUF_MOD;
175	return (0);
176}
177/*
178 * Returns:
179 *	 0 ==> OK
180 *	-1 ==> Error
181 */
182extern int
183__split_page(hashp, obucket, nbucket)
184	HTAB *hashp;
185	u_int32_t obucket, nbucket;
186{
187	register BUFHEAD *new_bufp, *old_bufp;
188	register u_int16_t *ino;
189	register char *np;
190	DBT key, val;
191	int n, ndx, retval;
192	u_int16_t copyto, diff, off, moved;
193	char *op;
194
195	copyto = (u_int16_t)hashp->BSIZE;
196	off = (u_int16_t)hashp->BSIZE;
197	old_bufp = __get_buf(hashp, obucket, NULL, 0);
198	if (old_bufp == NULL)
199		return (-1);
200	new_bufp = __get_buf(hashp, nbucket, NULL, 0);
201	if (new_bufp == NULL)
202		return (-1);
203
204	old_bufp->flags |= (BUF_MOD | BUF_PIN);
205	new_bufp->flags |= (BUF_MOD | BUF_PIN);
206
207	ino = (u_int16_t *)(op = old_bufp->page);
208	np = new_bufp->page;
209
210	moved = 0;
211
212	for (n = 1, ndx = 1; n < ino[0]; n += 2) {
213		if (ino[n + 1] < REAL_KEY) {
214			retval = ugly_split(hashp, obucket, old_bufp, new_bufp,
215			    (int)copyto, (int)moved);
216			old_bufp->flags &= ~BUF_PIN;
217			new_bufp->flags &= ~BUF_PIN;
218			return (retval);
219
220		}
221		key.data = (u_char *)op + ino[n];
222		key.size = off - ino[n];
223
224		if (__call_hash(hashp, key.data, key.size) == obucket) {
225			/* Don't switch page */
226			diff = copyto - off;
227			if (diff) {
228				copyto = ino[n + 1] + diff;
229				memmove(op + copyto, op + ino[n + 1],
230				    off - ino[n + 1]);
231				ino[ndx] = copyto + ino[n] - ino[n + 1];
232				ino[ndx + 1] = copyto;
233			} else
234				copyto = ino[n + 1];
235			ndx += 2;
236		} else {
237			/* Switch page */
238			val.data = (u_char *)op + ino[n + 1];
239			val.size = ino[n] - ino[n + 1];
240			putpair(np, &key, &val);
241			moved += 2;
242		}
243
244		off = ino[n + 1];
245	}
246
247	/* Now clean up the page */
248	ino[0] -= moved;
249	FREESPACE(ino) = copyto - sizeof(u_int16_t) * (ino[0] + 3);
250	OFFSET(ino) = copyto;
251
252#ifdef DEBUG3
253	(void)fprintf(stderr, "split %d/%d\n",
254	    ((u_int16_t *)np)[0] / 2,
255	    ((u_int16_t *)op)[0] / 2);
256#endif
257	/* unpin both pages */
258	old_bufp->flags &= ~BUF_PIN;
259	new_bufp->flags &= ~BUF_PIN;
260	return (0);
261}
262
263/*
264 * Called when we encounter an overflow or big key/data page during split
265 * handling.  This is special cased since we have to begin checking whether
266 * the key/data pairs fit on their respective pages and because we may need
267 * overflow pages for both the old and new pages.
268 *
269 * The first page might be a page with regular key/data pairs in which case
270 * we have a regular overflow condition and just need to go on to the next
271 * page or it might be a big key/data pair in which case we need to fix the
272 * big key/data pair.
273 *
274 * Returns:
275 *	 0 ==> success
276 *	-1 ==> failure
277 */
278static int
279ugly_split(hashp, obucket, old_bufp, new_bufp, copyto, moved)
280	HTAB *hashp;
281	u_int32_t obucket;	/* Same as __split_page. */
282	BUFHEAD *old_bufp, *new_bufp;
283	int copyto;	/* First byte on page which contains key/data values. */
284	int moved;	/* Number of pairs moved to new page. */
285{
286	register BUFHEAD *bufp;	/* Buffer header for ino */
287	register u_int16_t *ino;	/* Page keys come off of */
288	register u_int16_t *np;	/* New page */
289	register u_int16_t *op;	/* Page keys go on to if they aren't moving */
290
291	BUFHEAD *last_bfp;	/* Last buf header OVFL needing to be freed */
292	DBT key, val;
293	SPLIT_RETURN ret;
294	u_int16_t n, off, ov_addr, scopyto;
295	char *cino;		/* Character value of ino */
296
297	bufp = old_bufp;
298	ino = (u_int16_t *)old_bufp->page;
299	np = (u_int16_t *)new_bufp->page;
300	op = (u_int16_t *)old_bufp->page;
301	last_bfp = NULL;
302	scopyto = (u_int16_t)copyto;	/* ANSI */
303
304	n = ino[0] - 1;
305	while (n < ino[0]) {
306		if (ino[2] < REAL_KEY && ino[2] != OVFLPAGE) {
307			if (__big_split(hashp, old_bufp,
308			    new_bufp, bufp, bufp->addr, obucket, &ret))
309				return (-1);
310			old_bufp = ret.oldp;
311			if (!old_bufp)
312				return (-1);
313			op = (u_int16_t *)old_bufp->page;
314			new_bufp = ret.newp;
315			if (!new_bufp)
316				return (-1);
317			np = (u_int16_t *)new_bufp->page;
318			bufp = ret.nextp;
319			if (!bufp)
320				return (0);
321			cino = (char *)bufp->page;
322			ino = (u_int16_t *)cino;
323			last_bfp = ret.nextp;
324		} else if (ino[n + 1] == OVFLPAGE) {
325			ov_addr = ino[n];
326			/*
327			 * Fix up the old page -- the extra 2 are the fields
328			 * which contained the overflow information.
329			 */
330			ino[0] -= (moved + 2);
331			FREESPACE(ino) =
332			    scopyto - sizeof(u_int16_t) * (ino[0] + 3);
333			OFFSET(ino) = scopyto;
334
335			bufp = __get_buf(hashp, ov_addr, bufp, 0);
336			if (!bufp)
337				return (-1);
338
339			ino = (u_int16_t *)bufp->page;
340			n = 1;
341			scopyto = hashp->BSIZE;
342			moved = 0;
343
344			if (last_bfp)
345				__free_ovflpage(hashp, last_bfp);
346			last_bfp = bufp;
347		}
348		/* Move regular sized pairs of there are any */
349		off = hashp->BSIZE;
350		for (n = 1; (n < ino[0]) && (ino[n + 1] >= REAL_KEY); n += 2) {
351			cino = (char *)ino;
352			key.data = (u_char *)cino + ino[n];
353			key.size = off - ino[n];
354			val.data = (u_char *)cino + ino[n + 1];
355			val.size = ino[n] - ino[n + 1];
356			off = ino[n + 1];
357
358			if (__call_hash(hashp, key.data, key.size) == obucket) {
359				/* Keep on old page */
360				if (PAIRFITS(op, (&key), (&val)))
361					putpair((char *)op, &key, &val);
362				else {
363					old_bufp =
364					    __add_ovflpage(hashp, old_bufp);
365					if (!old_bufp)
366						return (-1);
367					op = (u_int16_t *)old_bufp->page;
368					putpair((char *)op, &key, &val);
369				}
370				old_bufp->flags |= BUF_MOD;
371			} else {
372				/* Move to new page */
373				if (PAIRFITS(np, (&key), (&val)))
374					putpair((char *)np, &key, &val);
375				else {
376					new_bufp =
377					    __add_ovflpage(hashp, new_bufp);
378					if (!new_bufp)
379						return (-1);
380					np = (u_int16_t *)new_bufp->page;
381					putpair((char *)np, &key, &val);
382				}
383				new_bufp->flags |= BUF_MOD;
384			}
385		}
386	}
387	if (last_bfp)
388		__free_ovflpage(hashp, last_bfp);
389	return (0);
390}
391
392/*
393 * Add the given pair to the page
394 *
395 * Returns:
396 *	0 ==> OK
397 *	1 ==> failure
398 */
399extern int
400__addel(hashp, bufp, key, val)
401	HTAB *hashp;
402	BUFHEAD *bufp;
403	const DBT *key, *val;
404{
405	register u_int16_t *bp, *sop;
406	int do_expand;
407
408	bp = (u_int16_t *)bufp->page;
409	do_expand = 0;
410	while (bp[0] && (bp[2] < REAL_KEY || bp[bp[0]] < REAL_KEY))
411		/* Exception case */
412		if (bp[2] == FULL_KEY_DATA && bp[0] == 2)
413			/* This is the last page of a big key/data pair
414			   and we need to add another page */
415			break;
416		else if (bp[2] < REAL_KEY && bp[bp[0]] != OVFLPAGE) {
417			bufp = __get_buf(hashp, bp[bp[0] - 1], bufp, 0);
418			if (!bufp)
419				return (-1);
420			bp = (u_int16_t *)bufp->page;
421		} else
422			/* Try to squeeze key on this page */
423			if (FREESPACE(bp) > PAIRSIZE(key, val)) {
424				squeeze_key(bp, key, val);
425				return (0);
426			} else {
427				bufp = __get_buf(hashp, bp[bp[0] - 1], bufp, 0);
428				if (!bufp)
429					return (-1);
430				bp = (u_int16_t *)bufp->page;
431			}
432
433	if (PAIRFITS(bp, key, val))
434		putpair(bufp->page, key, val);
435	else {
436		do_expand = 1;
437		bufp = __add_ovflpage(hashp, bufp);
438		if (!bufp)
439			return (-1);
440		sop = (u_int16_t *)bufp->page;
441
442		if (PAIRFITS(sop, key, val))
443			putpair((char *)sop, key, val);
444		else
445			if (__big_insert(hashp, bufp, key, val))
446				return (-1);
447	}
448	bufp->flags |= BUF_MOD;
449	/*
450	 * If the average number of keys per bucket exceeds the fill factor,
451	 * expand the table.
452	 */
453	hashp->NKEYS++;
454	if (do_expand ||
455	    (hashp->NKEYS / (hashp->MAX_BUCKET + 1) > hashp->FFACTOR))
456		return (__expand_table(hashp));
457	return (0);
458}
459
460/*
461 *
462 * Returns:
463 *	pointer on success
464 *	NULL on error
465 */
466extern BUFHEAD *
467__add_ovflpage(hashp, bufp)
468	HTAB *hashp;
469	BUFHEAD *bufp;
470{
471	register u_int16_t *sp;
472	u_int16_t ndx, ovfl_num;
473#ifdef DEBUG1
474	int tmp1, tmp2;
475#endif
476	sp = (u_int16_t *)bufp->page;
477
478	/* Check if we are dynamically determining the fill factor */
479	if (hashp->FFACTOR == DEF_FFACTOR) {
480		hashp->FFACTOR = sp[0] >> 1;
481		if (hashp->FFACTOR < MIN_FFACTOR)
482			hashp->FFACTOR = MIN_FFACTOR;
483	}
484	bufp->flags |= BUF_MOD;
485	ovfl_num = overflow_page(hashp);
486#ifdef DEBUG1
487	tmp1 = bufp->addr;
488	tmp2 = bufp->ovfl ? bufp->ovfl->addr : 0;
489#endif
490	if (!ovfl_num || !(bufp->ovfl = __get_buf(hashp, ovfl_num, bufp, 1)))
491		return (NULL);
492	bufp->ovfl->flags |= BUF_MOD;
493#ifdef DEBUG1
494	(void)fprintf(stderr, "ADDOVFLPAGE: %d->ovfl was %d is now %d\n",
495	    tmp1, tmp2, bufp->ovfl->addr);
496#endif
497	ndx = sp[0];
498	/*
499	 * Since a pair is allocated on a page only if there's room to add
500	 * an overflow page, we know that the OVFL information will fit on
501	 * the page.
502	 */
503	sp[ndx + 4] = OFFSET(sp);
504	sp[ndx + 3] = FREESPACE(sp) - OVFLSIZE;
505	sp[ndx + 1] = ovfl_num;
506	sp[ndx + 2] = OVFLPAGE;
507	sp[0] = ndx + 2;
508#ifdef HASH_STATISTICS
509	hash_overflows++;
510#endif
511	return (bufp->ovfl);
512}
513
514/*
515 * Returns:
516 *	 0 indicates SUCCESS
517 *	-1 indicates FAILURE
518 */
519extern int
520__get_page(hashp, p, bucket, is_bucket, is_disk, is_bitmap)
521	HTAB *hashp;
522	char *p;
523	u_int32_t bucket;
524	int is_bucket, is_disk, is_bitmap;
525{
526	register int fd, page, size;
527	int rsize;
528	u_int16_t *bp;
529
530	fd = hashp->fp;
531	size = hashp->BSIZE;
532
533	if ((fd == -1) || !is_disk) {
534		PAGE_INIT(p);
535		return (0);
536	}
537	if (is_bucket)
538		page = BUCKET_TO_PAGE(bucket);
539	else
540		page = OADDR_TO_PAGE(bucket);
541	if ((lseek(fd, (off_t)page << hashp->BSHIFT, SEEK_SET) == -1) ||
542	    ((rsize = _libc_read(fd, p, size)) == -1))
543		return (-1);
544	bp = (u_int16_t *)p;
545	if (!rsize)
546		bp[0] = 0;	/* We hit the EOF, so initialize a new page */
547	else
548		if (rsize != size) {
549			errno = EFTYPE;
550			return (-1);
551		}
552	if (!is_bitmap && !bp[0]) {
553		PAGE_INIT(p);
554	} else
555		if (hashp->LORDER != BYTE_ORDER) {
556			register int i, max;
557
558			if (is_bitmap) {
559				max = hashp->BSIZE >> 2; /* divide by 4 */
560				for (i = 0; i < max; i++)
561					M_32_SWAP(((int *)p)[i]);
562			} else {
563				M_16_SWAP(bp[0]);
564				max = bp[0] + 2;
565				for (i = 1; i <= max; i++)
566					M_16_SWAP(bp[i]);
567			}
568		}
569	return (0);
570}
571
572/*
573 * Write page p to disk
574 *
575 * Returns:
576 *	 0 ==> OK
577 *	-1 ==>failure
578 */
579extern int
580__put_page(hashp, p, bucket, is_bucket, is_bitmap)
581	HTAB *hashp;
582	char *p;
583	u_int32_t bucket;
584	int is_bucket, is_bitmap;
585{
586	register int fd, page, size;
587	int wsize;
588
589	size = hashp->BSIZE;
590	if ((hashp->fp == -1) && open_temp(hashp))
591		return (-1);
592	fd = hashp->fp;
593
594	if (hashp->LORDER != BYTE_ORDER) {
595		register int i;
596		register int max;
597
598		if (is_bitmap) {
599			max = hashp->BSIZE >> 2;	/* divide by 4 */
600			for (i = 0; i < max; i++)
601				M_32_SWAP(((int *)p)[i]);
602		} else {
603			max = ((u_int16_t *)p)[0] + 2;
604			for (i = 0; i <= max; i++)
605				M_16_SWAP(((u_int16_t *)p)[i]);
606		}
607	}
608	if (is_bucket)
609		page = BUCKET_TO_PAGE(bucket);
610	else
611		page = OADDR_TO_PAGE(bucket);
612	if ((lseek(fd, (off_t)page << hashp->BSHIFT, SEEK_SET) == -1) ||
613	    ((wsize = _libc_write(fd, p, size)) == -1))
614		/* Errno is set */
615		return (-1);
616	if (wsize != size) {
617		errno = EFTYPE;
618		return (-1);
619	}
620	return (0);
621}
622
623#define BYTE_MASK	((1 << INT_BYTE_SHIFT) -1)
624/*
625 * Initialize a new bitmap page.  Bitmap pages are left in memory
626 * once they are read in.
627 */
628extern int
629__ibitmap(hashp, pnum, nbits, ndx)
630	HTAB *hashp;
631	int pnum, nbits, ndx;
632{
633	u_int32_t *ip;
634	int clearbytes, clearints;
635
636	if ((ip = (u_int32_t *)malloc(hashp->BSIZE)) == NULL)
637		return (1);
638	hashp->nmaps++;
639	clearints = ((nbits - 1) >> INT_BYTE_SHIFT) + 1;
640	clearbytes = clearints << INT_TO_BYTE;
641	(void)memset((char *)ip, 0, clearbytes);
642	(void)memset(((char *)ip) + clearbytes, 0xFF,
643	    hashp->BSIZE - clearbytes);
644	ip[clearints - 1] = ALL_SET << (nbits & BYTE_MASK);
645	SETBIT(ip, 0);
646	hashp->BITMAPS[ndx] = (u_int16_t)pnum;
647	hashp->mapp[ndx] = ip;
648	return (0);
649}
650
651static u_int32_t
652first_free(map)
653	u_int32_t map;
654{
655	register u_int32_t i, mask;
656
657	mask = 0x1;
658	for (i = 0; i < BITS_PER_MAP; i++) {
659		if (!(mask & map))
660			return (i);
661		mask = mask << 1;
662	}
663	return (i);
664}
665
666static u_int16_t
667overflow_page(hashp)
668	HTAB *hashp;
669{
670	register u_int32_t *freep;
671	register int max_free, offset, splitnum;
672	u_int16_t addr;
673	int bit, first_page, free_bit, free_page, i, in_use_bits, j;
674#ifdef DEBUG2
675	int tmp1, tmp2;
676#endif
677	splitnum = hashp->OVFL_POINT;
678	max_free = hashp->SPARES[splitnum];
679
680	free_page = (max_free - 1) >> (hashp->BSHIFT + BYTE_SHIFT);
681	free_bit = (max_free - 1) & ((hashp->BSIZE << BYTE_SHIFT) - 1);
682
683	/* Look through all the free maps to find the first free block */
684	first_page = hashp->LAST_FREED >>(hashp->BSHIFT + BYTE_SHIFT);
685	for ( i = first_page; i <= free_page; i++ ) {
686		if (!(freep = (u_int32_t *)hashp->mapp[i]) &&
687		    !(freep = fetch_bitmap(hashp, i)))
688			return (0);
689		if (i == free_page)
690			in_use_bits = free_bit;
691		else
692			in_use_bits = (hashp->BSIZE << BYTE_SHIFT) - 1;
693
694		if (i == first_page) {
695			bit = hashp->LAST_FREED &
696			    ((hashp->BSIZE << BYTE_SHIFT) - 1);
697			j = bit / BITS_PER_MAP;
698			bit = bit & ~(BITS_PER_MAP - 1);
699		} else {
700			bit = 0;
701			j = 0;
702		}
703		for (; bit <= in_use_bits; j++, bit += BITS_PER_MAP)
704			if (freep[j] != ALL_SET)
705				goto found;
706	}
707
708	/* No Free Page Found */
709	hashp->LAST_FREED = hashp->SPARES[splitnum];
710	hashp->SPARES[splitnum]++;
711	offset = hashp->SPARES[splitnum] -
712	    (splitnum ? hashp->SPARES[splitnum - 1] : 0);
713
714#define	OVMSG	"HASH: Out of overflow pages.  Increase page size\n"
715	if (offset > SPLITMASK) {
716		if (++splitnum >= NCACHED) {
717			(void)_libc_write(STDERR_FILENO, OVMSG, sizeof(OVMSG) -
718			    1);
719			return (0);
720		}
721		hashp->OVFL_POINT = splitnum;
722		hashp->SPARES[splitnum] = hashp->SPARES[splitnum-1];
723		hashp->SPARES[splitnum-1]--;
724		offset = 1;
725	}
726
727	/* Check if we need to allocate a new bitmap page */
728	if (free_bit == (hashp->BSIZE << BYTE_SHIFT) - 1) {
729		free_page++;
730		if (free_page >= NCACHED) {
731			(void)_libc_write(STDERR_FILENO, OVMSG, sizeof(OVMSG) -
732			    1);
733			return (0);
734		}
735		/*
736		 * This is tricky.  The 1 indicates that you want the new page
737		 * allocated with 1 clear bit.  Actually, you are going to
738		 * allocate 2 pages from this map.  The first is going to be
739		 * the map page, the second is the overflow page we were
740		 * looking for.  The init_bitmap routine automatically, sets
741		 * the first bit of itself to indicate that the bitmap itself
742		 * is in use.  We would explicitly set the second bit, but
743		 * don't have to if we tell init_bitmap not to leave it clear
744		 * in the first place.
745		 */
746		if (__ibitmap(hashp,
747		    (int)OADDR_OF(splitnum, offset), 1, free_page))
748			return (0);
749		hashp->SPARES[splitnum]++;
750#ifdef DEBUG2
751		free_bit = 2;
752#endif
753		offset++;
754		if (offset > SPLITMASK) {
755			if (++splitnum >= NCACHED) {
756				(void)_libc_write(STDERR_FILENO, OVMSG,
757				    sizeof(OVMSG) - 1);
758				return (0);
759			}
760			hashp->OVFL_POINT = splitnum;
761			hashp->SPARES[splitnum] = hashp->SPARES[splitnum-1];
762			hashp->SPARES[splitnum-1]--;
763			offset = 0;
764		}
765	} else {
766		/*
767		 * Free_bit addresses the last used bit.  Bump it to address
768		 * the first available bit.
769		 */
770		free_bit++;
771		SETBIT(freep, free_bit);
772	}
773
774	/* Calculate address of the new overflow page */
775	addr = OADDR_OF(splitnum, offset);
776#ifdef DEBUG2
777	(void)fprintf(stderr, "OVERFLOW_PAGE: ADDR: %d BIT: %d PAGE %d\n",
778	    addr, free_bit, free_page);
779#endif
780	return (addr);
781
782found:
783	bit = bit + first_free(freep[j]);
784	SETBIT(freep, bit);
785#ifdef DEBUG2
786	tmp1 = bit;
787	tmp2 = i;
788#endif
789	/*
790	 * Bits are addressed starting with 0, but overflow pages are addressed
791	 * beginning at 1. Bit is a bit addressnumber, so we need to increment
792	 * it to convert it to a page number.
793	 */
794	bit = 1 + bit + (i * (hashp->BSIZE << BYTE_SHIFT));
795	if (bit >= hashp->LAST_FREED)
796		hashp->LAST_FREED = bit - 1;
797
798	/* Calculate the split number for this page */
799	for (i = 0; (i < splitnum) && (bit > hashp->SPARES[i]); i++);
800	offset = (i ? bit - hashp->SPARES[i - 1] : bit);
801	if (offset >= SPLITMASK)
802		return (0);	/* Out of overflow pages */
803	addr = OADDR_OF(i, offset);
804#ifdef DEBUG2
805	(void)fprintf(stderr, "OVERFLOW_PAGE: ADDR: %d BIT: %d PAGE %d\n",
806	    addr, tmp1, tmp2);
807#endif
808
809	/* Allocate and return the overflow page */
810	return (addr);
811}
812
813/*
814 * Mark this overflow page as free.
815 */
816extern void
817__free_ovflpage(hashp, obufp)
818	HTAB *hashp;
819	BUFHEAD *obufp;
820{
821	register u_int16_t addr;
822	u_int32_t *freep;
823	int bit_address, free_page, free_bit;
824	u_int16_t ndx;
825
826	addr = obufp->addr;
827#ifdef DEBUG1
828	(void)fprintf(stderr, "Freeing %d\n", addr);
829#endif
830	ndx = (((u_int16_t)addr) >> SPLITSHIFT);
831	bit_address =
832	    (ndx ? hashp->SPARES[ndx - 1] : 0) + (addr & SPLITMASK) - 1;
833	 if (bit_address < hashp->LAST_FREED)
834		hashp->LAST_FREED = bit_address;
835	free_page = (bit_address >> (hashp->BSHIFT + BYTE_SHIFT));
836	free_bit = bit_address & ((hashp->BSIZE << BYTE_SHIFT) - 1);
837
838	if (!(freep = hashp->mapp[free_page]))
839		freep = fetch_bitmap(hashp, free_page);
840#ifdef DEBUG
841	/*
842	 * This had better never happen.  It means we tried to read a bitmap
843	 * that has already had overflow pages allocated off it, and we
844	 * failed to read it from the file.
845	 */
846	if (!freep)
847		assert(0);
848#endif
849	CLRBIT(freep, free_bit);
850#ifdef DEBUG2
851	(void)fprintf(stderr, "FREE_OVFLPAGE: ADDR: %d BIT: %d PAGE %d\n",
852	    obufp->addr, free_bit, free_page);
853#endif
854	__reclaim_buf(hashp, obufp);
855}
856
857/*
858 * Returns:
859 *	 0 success
860 *	-1 failure
861 */
862static int
863open_temp(hashp)
864	HTAB *hashp;
865{
866	sigset_t set, oset;
867	static char namestr[] = "_hashXXXXXX";
868
869	/* Block signals; make sure file goes away at process exit. */
870	(void)sigfillset(&set);
871	(void)sigprocmask(SIG_BLOCK, &set, &oset);
872	if ((hashp->fp = mkstemp(namestr)) != -1) {
873		(void)unlink(namestr);
874		(void)_libc_fcntl(hashp->fp, F_SETFD, 1);
875	}
876	(void)sigprocmask(SIG_SETMASK, &oset, (sigset_t *)NULL);
877	return (hashp->fp != -1 ? 0 : -1);
878}
879
880/*
881 * We have to know that the key will fit, but the last entry on the page is
882 * an overflow pair, so we need to shift things.
883 */
884static void
885squeeze_key(sp, key, val)
886	u_int16_t *sp;
887	const DBT *key, *val;
888{
889	register char *p;
890	u_int16_t free_space, n, off, pageno;
891
892	p = (char *)sp;
893	n = sp[0];
894	free_space = FREESPACE(sp);
895	off = OFFSET(sp);
896
897	pageno = sp[n - 1];
898	off -= key->size;
899	sp[n - 1] = off;
900	memmove(p + off, key->data, key->size);
901	off -= val->size;
902	sp[n] = off;
903	memmove(p + off, val->data, val->size);
904	sp[0] = n + 2;
905	sp[n + 1] = pageno;
906	sp[n + 2] = OVFLPAGE;
907	FREESPACE(sp) = free_space - PAIRSIZE(key, val);
908	OFFSET(sp) = off;
909}
910
911static u_int32_t *
912fetch_bitmap(hashp, ndx)
913	HTAB *hashp;
914	int ndx;
915{
916	if (ndx >= hashp->nmaps)
917		return (NULL);
918	if ((hashp->mapp[ndx] = (u_int32_t *)malloc(hashp->BSIZE)) == NULL)
919		return (NULL);
920	if (__get_page(hashp,
921	    (char *)hashp->mapp[ndx], hashp->BITMAPS[ndx], 0, 1, 1)) {
922		free(hashp->mapp[ndx]);
923		return (NULL);
924	}
925	return (hashp->mapp[ndx]);
926}
927
928#ifdef DEBUG4
929int
930print_chain(addr)
931	int addr;
932{
933	BUFHEAD *bufp;
934	short *bp, oaddr;
935
936	(void)fprintf(stderr, "%d ", addr);
937	bufp = __get_buf(hashp, addr, NULL, 0);
938	bp = (short *)bufp->page;
939	while (bp[0] && ((bp[bp[0]] == OVFLPAGE) ||
940		((bp[0] > 2) && bp[2] < REAL_KEY))) {
941		oaddr = bp[bp[0] - 1];
942		(void)fprintf(stderr, "%d ", (int)oaddr);
943		bufp = __get_buf(hashp, (int)oaddr, bufp, 0);
944		bp = (short *)bufp->page;
945	}
946	(void)fprintf(stderr, "\n");
947}
948#endif
949