1/*-
2 * SPDX-License-Identifier: BSD-3-Clause
3 *
4 * Copyright (c) 1990, 1993, 1994
5 *	The Regents of the University of California.  All rights reserved.
6 *
7 * This code is derived from software contributed to Berkeley by
8 * Margo Seltzer.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 *    notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 *    notice, this list of conditions and the following disclaimer in the
17 *    documentation and/or other materials provided with the distribution.
18 * 3. Neither the name of the University nor the names of its contributors
19 *    may be used to endorse or promote products derived from this software
20 *    without specific prior written permission.
21 *
22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 * SUCH DAMAGE.
33 */
34
35#if defined(LIBC_SCCS) && !defined(lint)
36static char sccsid[] = "@(#)hash_page.c	8.7 (Berkeley) 8/16/94";
37#endif /* LIBC_SCCS and not lint */
38#include <sys/cdefs.h>
39__FBSDID("$FreeBSD$");
40
41/*
42 * PACKAGE:  hashing
43 *
44 * DESCRIPTION:
45 *	Page manipulation for hashing package.
46 *
47 * ROUTINES:
48 *
49 * External
50 *	__get_page
51 *	__add_ovflpage
52 * Internal
53 *	overflow_page
54 *	open_temp
55 */
56
57#include "namespace.h"
58#include <sys/param.h>
59
60#include <errno.h>
61#include <fcntl.h>
62#include <signal.h>
63#include <stdio.h>
64#include <stdlib.h>
65#include <string.h>
66#include <unistd.h>
67#ifdef DEBUG
68#include <assert.h>
69#endif
70#include "un-namespace.h"
71#include "libc_private.h"
72
73#include <db.h>
74#include "hash.h"
75#include "page.h"
76#include "extern.h"
77
78static u_int32_t *fetch_bitmap(HTAB *, int);
79static u_int32_t  first_free(u_int32_t);
80static int	  open_temp(HTAB *);
81static u_int16_t  overflow_page(HTAB *);
82static void	  putpair(char *, const DBT *, const DBT *);
83static void	  squeeze_key(u_int16_t *, const DBT *, const DBT *);
84static int	  ugly_split(HTAB *, u_int32_t, BUFHEAD *, BUFHEAD *, int, int);
85
86#define	PAGE_INIT(P) { \
87	((u_int16_t *)(P))[0] = 0; \
88	((u_int16_t *)(P))[1] = hashp->BSIZE - 3 * sizeof(u_int16_t); \
89	((u_int16_t *)(P))[2] = hashp->BSIZE; \
90}
91
92/*
93 * This is called AFTER we have verified that there is room on the page for
94 * the pair (PAIRFITS has returned true) so we go right ahead and start moving
95 * stuff on.
96 */
97static void
98putpair(char *p, const DBT *key, const DBT *val)
99{
100	u_int16_t *bp, n, off;
101
102	bp = (u_int16_t *)p;
103
104	/* Enter the key first. */
105	n = bp[0];
106
107	off = OFFSET(bp) - key->size;
108	memmove(p + off, key->data, key->size);
109	bp[++n] = off;
110
111	/* Now the data. */
112	off -= val->size;
113	memmove(p + off, val->data, val->size);
114	bp[++n] = off;
115
116	/* Adjust page info. */
117	bp[0] = n;
118	bp[n + 1] = off - ((n + 3) * sizeof(u_int16_t));
119	bp[n + 2] = off;
120}
121
122/*
123 * Returns:
124 *	 0 OK
125 *	-1 error
126 */
127int
128__delpair(HTAB *hashp, BUFHEAD *bufp, int ndx)
129{
130	u_int16_t *bp, newoff, pairlen;
131	int n;
132
133	bp = (u_int16_t *)bufp->page;
134	n = bp[0];
135
136	if (bp[ndx + 1] < REAL_KEY)
137		return (__big_delete(hashp, bufp));
138	if (ndx != 1)
139		newoff = bp[ndx - 1];
140	else
141		newoff = hashp->BSIZE;
142	pairlen = newoff - bp[ndx + 1];
143
144	if (ndx != (n - 1)) {
145		/* Hard Case -- need to shuffle keys */
146		int i;
147		char *src = bufp->page + (int)OFFSET(bp);
148		char *dst = src + (int)pairlen;
149		memmove(dst, src, bp[ndx + 1] - OFFSET(bp));
150
151		/* Now adjust the pointers */
152		for (i = ndx + 2; i <= n; i += 2) {
153			if (bp[i + 1] == OVFLPAGE) {
154				bp[i - 2] = bp[i];
155				bp[i - 1] = bp[i + 1];
156			} else {
157				bp[i - 2] = bp[i] + pairlen;
158				bp[i - 1] = bp[i + 1] + pairlen;
159			}
160		}
161		if (ndx == hashp->cndx) {
162			/*
163			 * We just removed pair we were "pointing" to.
164			 * By moving back the cndx we ensure subsequent
165			 * hash_seq() calls won't skip over any entries.
166			 */
167			hashp->cndx -= 2;
168		}
169	}
170	/* Finally adjust the page data */
171	bp[n] = OFFSET(bp) + pairlen;
172	bp[n - 1] = bp[n + 1] + pairlen + 2 * sizeof(u_int16_t);
173	bp[0] = n - 2;
174	hashp->NKEYS--;
175
176	bufp->flags |= BUF_MOD;
177	return (0);
178}
179/*
180 * Returns:
181 *	 0 ==> OK
182 *	-1 ==> Error
183 */
184int
185__split_page(HTAB *hashp, u_int32_t obucket, u_int32_t nbucket)
186{
187	BUFHEAD *new_bufp, *old_bufp;
188	u_int16_t *ino;
189	char *np;
190	DBT key, val;
191	int n, ndx, retval;
192	u_int16_t copyto, diff, off, moved;
193	char *op;
194
195	copyto = (u_int16_t)hashp->BSIZE;
196	off = (u_int16_t)hashp->BSIZE;
197	old_bufp = __get_buf(hashp, obucket, NULL, 0);
198	if (old_bufp == NULL)
199		return (-1);
200	new_bufp = __get_buf(hashp, nbucket, NULL, 0);
201	if (new_bufp == NULL)
202		return (-1);
203
204	old_bufp->flags |= (BUF_MOD | BUF_PIN);
205	new_bufp->flags |= (BUF_MOD | BUF_PIN);
206
207	ino = (u_int16_t *)(op = old_bufp->page);
208	np = new_bufp->page;
209
210	moved = 0;
211
212	for (n = 1, ndx = 1; n < ino[0]; n += 2) {
213		if (ino[n + 1] < REAL_KEY) {
214			retval = ugly_split(hashp, obucket, old_bufp, new_bufp,
215			    (int)copyto, (int)moved);
216			old_bufp->flags &= ~BUF_PIN;
217			new_bufp->flags &= ~BUF_PIN;
218			return (retval);
219
220		}
221		key.data = (u_char *)op + ino[n];
222		key.size = off - ino[n];
223
224		if (__call_hash(hashp, key.data, key.size) == obucket) {
225			/* Don't switch page */
226			diff = copyto - off;
227			if (diff) {
228				copyto = ino[n + 1] + diff;
229				memmove(op + copyto, op + ino[n + 1],
230				    off - ino[n + 1]);
231				ino[ndx] = copyto + ino[n] - ino[n + 1];
232				ino[ndx + 1] = copyto;
233			} else
234				copyto = ino[n + 1];
235			ndx += 2;
236		} else {
237			/* Switch page */
238			val.data = (u_char *)op + ino[n + 1];
239			val.size = ino[n] - ino[n + 1];
240			putpair(np, &key, &val);
241			moved += 2;
242		}
243
244		off = ino[n + 1];
245	}
246
247	/* Now clean up the page */
248	ino[0] -= moved;
249	FREESPACE(ino) = copyto - sizeof(u_int16_t) * (ino[0] + 3);
250	OFFSET(ino) = copyto;
251
252#ifdef DEBUG3
253	(void)fprintf(stderr, "split %d/%d\n",
254	    ((u_int16_t *)np)[0] / 2,
255	    ((u_int16_t *)op)[0] / 2);
256#endif
257	/* unpin both pages */
258	old_bufp->flags &= ~BUF_PIN;
259	new_bufp->flags &= ~BUF_PIN;
260	return (0);
261}
262
263/*
264 * Called when we encounter an overflow or big key/data page during split
265 * handling.  This is special cased since we have to begin checking whether
266 * the key/data pairs fit on their respective pages and because we may need
267 * overflow pages for both the old and new pages.
268 *
269 * The first page might be a page with regular key/data pairs in which case
270 * we have a regular overflow condition and just need to go on to the next
271 * page or it might be a big key/data pair in which case we need to fix the
272 * big key/data pair.
273 *
274 * Returns:
275 *	 0 ==> success
276 *	-1 ==> failure
277 */
278static int
279ugly_split(HTAB *hashp,
280    u_int32_t obucket,	/* Same as __split_page. */
281    BUFHEAD *old_bufp,
282    BUFHEAD *new_bufp,
283    int copyto,		/* First byte on page which contains key/data values. */
284    int moved)		/* Number of pairs moved to new page. */
285{
286	BUFHEAD *bufp;	/* Buffer header for ino */
287	u_int16_t *ino;	/* Page keys come off of */
288	u_int16_t *np;	/* New page */
289	u_int16_t *op;	/* Page keys go on to if they aren't moving */
290
291	BUFHEAD *last_bfp;	/* Last buf header OVFL needing to be freed */
292	DBT key, val;
293	SPLIT_RETURN ret;
294	u_int16_t n, off, ov_addr, scopyto;
295	char *cino;		/* Character value of ino */
296
297	bufp = old_bufp;
298	ino = (u_int16_t *)old_bufp->page;
299	np = (u_int16_t *)new_bufp->page;
300	op = (u_int16_t *)old_bufp->page;
301	last_bfp = NULL;
302	scopyto = (u_int16_t)copyto;	/* ANSI */
303
304	n = ino[0] - 1;
305	while (n < ino[0]) {
306		if (ino[2] < REAL_KEY && ino[2] != OVFLPAGE) {
307			if (__big_split(hashp, old_bufp,
308			    new_bufp, bufp, bufp->addr, obucket, &ret))
309				return (-1);
310			old_bufp = ret.oldp;
311			if (!old_bufp)
312				return (-1);
313			op = (u_int16_t *)old_bufp->page;
314			new_bufp = ret.newp;
315			if (!new_bufp)
316				return (-1);
317			np = (u_int16_t *)new_bufp->page;
318			bufp = ret.nextp;
319			if (!bufp)
320				return (0);
321			cino = (char *)bufp->page;
322			ino = (u_int16_t *)cino;
323			last_bfp = ret.nextp;
324		} else if (ino[n + 1] == OVFLPAGE) {
325			ov_addr = ino[n];
326			/*
327			 * Fix up the old page -- the extra 2 are the fields
328			 * which contained the overflow information.
329			 */
330			ino[0] -= (moved + 2);
331			FREESPACE(ino) =
332			    scopyto - sizeof(u_int16_t) * (ino[0] + 3);
333			OFFSET(ino) = scopyto;
334
335			bufp = __get_buf(hashp, ov_addr, bufp, 0);
336			if (!bufp)
337				return (-1);
338
339			ino = (u_int16_t *)bufp->page;
340			n = 1;
341			scopyto = hashp->BSIZE;
342			moved = 0;
343
344			if (last_bfp)
345				__free_ovflpage(hashp, last_bfp);
346			last_bfp = bufp;
347		}
348		/* Move regular sized pairs of there are any */
349		off = hashp->BSIZE;
350		for (n = 1; (n < ino[0]) && (ino[n + 1] >= REAL_KEY); n += 2) {
351			cino = (char *)ino;
352			key.data = (u_char *)cino + ino[n];
353			key.size = off - ino[n];
354			val.data = (u_char *)cino + ino[n + 1];
355			val.size = ino[n] - ino[n + 1];
356			off = ino[n + 1];
357
358			if (__call_hash(hashp, key.data, key.size) == obucket) {
359				/* Keep on old page */
360				if (PAIRFITS(op, (&key), (&val)))
361					putpair((char *)op, &key, &val);
362				else {
363					old_bufp =
364					    __add_ovflpage(hashp, old_bufp);
365					if (!old_bufp)
366						return (-1);
367					op = (u_int16_t *)old_bufp->page;
368					putpair((char *)op, &key, &val);
369				}
370				old_bufp->flags |= BUF_MOD;
371			} else {
372				/* Move to new page */
373				if (PAIRFITS(np, (&key), (&val)))
374					putpair((char *)np, &key, &val);
375				else {
376					new_bufp =
377					    __add_ovflpage(hashp, new_bufp);
378					if (!new_bufp)
379						return (-1);
380					np = (u_int16_t *)new_bufp->page;
381					putpair((char *)np, &key, &val);
382				}
383				new_bufp->flags |= BUF_MOD;
384			}
385		}
386	}
387	if (last_bfp)
388		__free_ovflpage(hashp, last_bfp);
389	return (0);
390}
391
392/*
393 * Add the given pair to the page
394 *
395 * Returns:
396 *	0 ==> OK
397 *	1 ==> failure
398 */
399int
400__addel(HTAB *hashp, BUFHEAD *bufp, const DBT *key, const DBT *val)
401{
402	u_int16_t *bp, *sop;
403	int do_expand;
404
405	bp = (u_int16_t *)bufp->page;
406	do_expand = 0;
407	while (bp[0] && (bp[2] < REAL_KEY || bp[bp[0]] < REAL_KEY))
408		/* Exception case */
409		if (bp[2] == FULL_KEY_DATA && bp[0] == 2)
410			/* This is the last page of a big key/data pair
411			   and we need to add another page */
412			break;
413		else if (bp[2] < REAL_KEY && bp[bp[0]] != OVFLPAGE) {
414			bufp = __get_buf(hashp, bp[bp[0] - 1], bufp, 0);
415			if (!bufp)
416				return (-1);
417			bp = (u_int16_t *)bufp->page;
418		} else if (bp[bp[0]] != OVFLPAGE) {
419			/* Short key/data pairs, no more pages */
420			break;
421		} else {
422			/* Try to squeeze key on this page */
423			if (bp[2] >= REAL_KEY &&
424			    FREESPACE(bp) >= PAIRSIZE(key, val)) {
425				squeeze_key(bp, key, val);
426				goto stats;
427			} else {
428				bufp = __get_buf(hashp, bp[bp[0] - 1], bufp, 0);
429				if (!bufp)
430					return (-1);
431				bp = (u_int16_t *)bufp->page;
432			}
433		}
434
435	if (PAIRFITS(bp, key, val))
436		putpair(bufp->page, key, val);
437	else {
438		do_expand = 1;
439		bufp = __add_ovflpage(hashp, bufp);
440		if (!bufp)
441			return (-1);
442		sop = (u_int16_t *)bufp->page;
443
444		if (PAIRFITS(sop, key, val))
445			putpair((char *)sop, key, val);
446		else
447			if (__big_insert(hashp, bufp, key, val))
448				return (-1);
449	}
450stats:
451	bufp->flags |= BUF_MOD;
452	/*
453	 * If the average number of keys per bucket exceeds the fill factor,
454	 * expand the table.
455	 */
456	hashp->NKEYS++;
457	if (do_expand ||
458	    (hashp->NKEYS / (hashp->MAX_BUCKET + 1) > hashp->FFACTOR))
459		return (__expand_table(hashp));
460	return (0);
461}
462
463/*
464 *
465 * Returns:
466 *	pointer on success
467 *	NULL on error
468 */
469BUFHEAD *
470__add_ovflpage(HTAB *hashp, BUFHEAD *bufp)
471{
472	u_int16_t *sp, ndx, ovfl_num;
473#ifdef DEBUG1
474	int tmp1, tmp2;
475#endif
476	sp = (u_int16_t *)bufp->page;
477
478	/* Check if we are dynamically determining the fill factor */
479	if (hashp->FFACTOR == DEF_FFACTOR) {
480		hashp->FFACTOR = sp[0] >> 1;
481		if (hashp->FFACTOR < MIN_FFACTOR)
482			hashp->FFACTOR = MIN_FFACTOR;
483	}
484	bufp->flags |= BUF_MOD;
485	ovfl_num = overflow_page(hashp);
486#ifdef DEBUG1
487	tmp1 = bufp->addr;
488	tmp2 = bufp->ovfl ? bufp->ovfl->addr : 0;
489#endif
490	if (!ovfl_num || !(bufp->ovfl = __get_buf(hashp, ovfl_num, bufp, 1)))
491		return (NULL);
492	bufp->ovfl->flags |= BUF_MOD;
493#ifdef DEBUG1
494	(void)fprintf(stderr, "ADDOVFLPAGE: %d->ovfl was %d is now %d\n",
495	    tmp1, tmp2, bufp->ovfl->addr);
496#endif
497	ndx = sp[0];
498	/*
499	 * Since a pair is allocated on a page only if there's room to add
500	 * an overflow page, we know that the OVFL information will fit on
501	 * the page.
502	 */
503	sp[ndx + 4] = OFFSET(sp);
504	sp[ndx + 3] = FREESPACE(sp) - OVFLSIZE;
505	sp[ndx + 1] = ovfl_num;
506	sp[ndx + 2] = OVFLPAGE;
507	sp[0] = ndx + 2;
508#ifdef HASH_STATISTICS
509	hash_overflows++;
510#endif
511	return (bufp->ovfl);
512}
513
514/*
515 * Returns:
516 *	 0 indicates SUCCESS
517 *	-1 indicates FAILURE
518 */
519int
520__get_page(HTAB *hashp, char *p, u_int32_t bucket, int is_bucket, int is_disk,
521    int is_bitmap)
522{
523	int fd, page, size, rsize;
524	u_int16_t *bp;
525
526	fd = hashp->fp;
527	size = hashp->BSIZE;
528
529	if ((fd == -1) || !is_disk) {
530		PAGE_INIT(p);
531		return (0);
532	}
533	if (is_bucket)
534		page = BUCKET_TO_PAGE(bucket);
535	else
536		page = OADDR_TO_PAGE(bucket);
537	if ((rsize = pread(fd, p, size, (off_t)page << hashp->BSHIFT)) == -1)
538		return (-1);
539	bp = (u_int16_t *)p;
540	if (!rsize)
541		bp[0] = 0;	/* We hit the EOF, so initialize a new page */
542	else
543		if (rsize != size) {
544			errno = EFTYPE;
545			return (-1);
546		}
547	if (!is_bitmap && !bp[0]) {
548		PAGE_INIT(p);
549	} else
550		if (hashp->LORDER != BYTE_ORDER) {
551			int i, max;
552
553			if (is_bitmap) {
554				max = hashp->BSIZE >> 2; /* divide by 4 */
555				for (i = 0; i < max; i++)
556					M_32_SWAP(((int *)p)[i]);
557			} else {
558				M_16_SWAP(bp[0]);
559				max = bp[0] + 2;
560				for (i = 1; i <= max; i++)
561					M_16_SWAP(bp[i]);
562			}
563		}
564	return (0);
565}
566
567/*
568 * Write page p to disk
569 *
570 * Returns:
571 *	 0 ==> OK
572 *	-1 ==>failure
573 */
574int
575__put_page(HTAB *hashp, char *p, u_int32_t bucket, int is_bucket, int is_bitmap)
576{
577	int fd, page, size;
578	ssize_t wsize;
579	char pbuf[MAX_BSIZE];
580
581	size = hashp->BSIZE;
582	if ((hashp->fp == -1) && open_temp(hashp))
583		return (-1);
584	fd = hashp->fp;
585
586	if (hashp->LORDER != BYTE_ORDER) {
587		int i, max;
588
589		memcpy(pbuf, p, size);
590		if (is_bitmap) {
591			max = hashp->BSIZE >> 2;	/* divide by 4 */
592			for (i = 0; i < max; i++)
593				M_32_SWAP(((int *)pbuf)[i]);
594		} else {
595			uint16_t *bp = (uint16_t *)(void *)pbuf;
596			max = bp[0] + 2;
597			for (i = 0; i <= max; i++)
598				M_16_SWAP(bp[i]);
599		}
600		p = pbuf;
601	}
602	if (is_bucket)
603		page = BUCKET_TO_PAGE(bucket);
604	else
605		page = OADDR_TO_PAGE(bucket);
606	if ((wsize = pwrite(fd, p, size, (off_t)page << hashp->BSHIFT)) == -1)
607		/* Errno is set */
608		return (-1);
609	if (wsize != size) {
610		errno = EFTYPE;
611		return (-1);
612	}
613	return (0);
614}
615
616#define BYTE_MASK	((1 << INT_BYTE_SHIFT) -1)
617/*
618 * Initialize a new bitmap page.  Bitmap pages are left in memory
619 * once they are read in.
620 */
621int
622__ibitmap(HTAB *hashp, int pnum, int nbits, int ndx)
623{
624	u_int32_t *ip;
625	int clearbytes, clearints;
626
627	if ((ip = (u_int32_t *)malloc(hashp->BSIZE)) == NULL)
628		return (1);
629	hashp->nmaps++;
630	clearints = ((nbits - 1) >> INT_BYTE_SHIFT) + 1;
631	clearbytes = clearints << INT_TO_BYTE;
632	(void)memset((char *)ip, 0, clearbytes);
633	(void)memset(((char *)ip) + clearbytes, 0xFF,
634	    hashp->BSIZE - clearbytes);
635	ip[clearints - 1] = ALL_SET << (nbits & BYTE_MASK);
636	SETBIT(ip, 0);
637	hashp->BITMAPS[ndx] = (u_int16_t)pnum;
638	hashp->mapp[ndx] = ip;
639	return (0);
640}
641
642static u_int32_t
643first_free(u_int32_t map)
644{
645	u_int32_t i, mask;
646
647	mask = 0x1;
648	for (i = 0; i < BITS_PER_MAP; i++) {
649		if (!(mask & map))
650			return (i);
651		mask = mask << 1;
652	}
653	return (i);
654}
655
656static u_int16_t
657overflow_page(HTAB *hashp)
658{
659	u_int32_t *freep;
660	int max_free, offset, splitnum;
661	u_int16_t addr;
662	int bit, first_page, free_bit, free_page, i, in_use_bits, j;
663#ifdef DEBUG2
664	int tmp1, tmp2;
665#endif
666	splitnum = hashp->OVFL_POINT;
667	max_free = hashp->SPARES[splitnum];
668
669	free_page = (max_free - 1) >> (hashp->BSHIFT + BYTE_SHIFT);
670	free_bit = (max_free - 1) & ((hashp->BSIZE << BYTE_SHIFT) - 1);
671
672	/* Look through all the free maps to find the first free block */
673	first_page = hashp->LAST_FREED >>(hashp->BSHIFT + BYTE_SHIFT);
674	for ( i = first_page; i <= free_page; i++ ) {
675		if (!(freep = (u_int32_t *)hashp->mapp[i]) &&
676		    !(freep = fetch_bitmap(hashp, i)))
677			return (0);
678		if (i == free_page)
679			in_use_bits = free_bit;
680		else
681			in_use_bits = (hashp->BSIZE << BYTE_SHIFT) - 1;
682
683		if (i == first_page) {
684			bit = hashp->LAST_FREED &
685			    ((hashp->BSIZE << BYTE_SHIFT) - 1);
686			j = bit / BITS_PER_MAP;
687			bit = rounddown2(bit, BITS_PER_MAP);
688		} else {
689			bit = 0;
690			j = 0;
691		}
692		for (; bit <= in_use_bits; j++, bit += BITS_PER_MAP)
693			if (freep[j] != ALL_SET)
694				goto found;
695	}
696
697	/* No Free Page Found */
698	hashp->LAST_FREED = hashp->SPARES[splitnum];
699	hashp->SPARES[splitnum]++;
700	offset = hashp->SPARES[splitnum] -
701	    (splitnum ? hashp->SPARES[splitnum - 1] : 0);
702
703#define	OVMSG	"HASH: Out of overflow pages.  Increase page size\n"
704	if (offset > SPLITMASK) {
705		if (++splitnum >= NCACHED) {
706			(void)_write(STDERR_FILENO, OVMSG, sizeof(OVMSG) - 1);
707			errno = EFBIG;
708			return (0);
709		}
710		hashp->OVFL_POINT = splitnum;
711		hashp->SPARES[splitnum] = hashp->SPARES[splitnum-1];
712		hashp->SPARES[splitnum-1]--;
713		offset = 1;
714	}
715
716	/* Check if we need to allocate a new bitmap page */
717	if (free_bit == (hashp->BSIZE << BYTE_SHIFT) - 1) {
718		free_page++;
719		if (free_page >= NCACHED) {
720			(void)_write(STDERR_FILENO, OVMSG, sizeof(OVMSG) - 1);
721			errno = EFBIG;
722			return (0);
723		}
724		/*
725		 * This is tricky.  The 1 indicates that you want the new page
726		 * allocated with 1 clear bit.  Actually, you are going to
727		 * allocate 2 pages from this map.  The first is going to be
728		 * the map page, the second is the overflow page we were
729		 * looking for.  The init_bitmap routine automatically, sets
730		 * the first bit of itself to indicate that the bitmap itself
731		 * is in use.  We would explicitly set the second bit, but
732		 * don't have to if we tell init_bitmap not to leave it clear
733		 * in the first place.
734		 */
735		if (__ibitmap(hashp,
736		    (int)OADDR_OF(splitnum, offset), 1, free_page))
737			return (0);
738		hashp->SPARES[splitnum]++;
739#ifdef DEBUG2
740		free_bit = 2;
741#endif
742		offset++;
743		if (offset > SPLITMASK) {
744			if (++splitnum >= NCACHED) {
745				(void)_write(STDERR_FILENO, OVMSG,
746				    sizeof(OVMSG) - 1);
747				errno = EFBIG;
748				return (0);
749			}
750			hashp->OVFL_POINT = splitnum;
751			hashp->SPARES[splitnum] = hashp->SPARES[splitnum-1];
752			hashp->SPARES[splitnum-1]--;
753			offset = 0;
754		}
755	} else {
756		/*
757		 * Free_bit addresses the last used bit.  Bump it to address
758		 * the first available bit.
759		 */
760		free_bit++;
761		SETBIT(freep, free_bit);
762	}
763
764	/* Calculate address of the new overflow page */
765	addr = OADDR_OF(splitnum, offset);
766#ifdef DEBUG2
767	(void)fprintf(stderr, "OVERFLOW_PAGE: ADDR: %d BIT: %d PAGE %d\n",
768	    addr, free_bit, free_page);
769#endif
770	return (addr);
771
772found:
773	bit = bit + first_free(freep[j]);
774	SETBIT(freep, bit);
775#ifdef DEBUG2
776	tmp1 = bit;
777	tmp2 = i;
778#endif
779	/*
780	 * Bits are addressed starting with 0, but overflow pages are addressed
781	 * beginning at 1. Bit is a bit addressnumber, so we need to increment
782	 * it to convert it to a page number.
783	 */
784	bit = 1 + bit + (i * (hashp->BSIZE << BYTE_SHIFT));
785	if (bit >= hashp->LAST_FREED)
786		hashp->LAST_FREED = bit - 1;
787
788	/* Calculate the split number for this page */
789	for (i = 0; (i < splitnum) && (bit > hashp->SPARES[i]); i++);
790	offset = (i ? bit - hashp->SPARES[i - 1] : bit);
791	if (offset >= SPLITMASK) {
792		(void)_write(STDERR_FILENO, OVMSG, sizeof(OVMSG) - 1);
793		errno = EFBIG;
794		return (0);	/* Out of overflow pages */
795	}
796	addr = OADDR_OF(i, offset);
797#ifdef DEBUG2
798	(void)fprintf(stderr, "OVERFLOW_PAGE: ADDR: %d BIT: %d PAGE %d\n",
799	    addr, tmp1, tmp2);
800#endif
801
802	/* Allocate and return the overflow page */
803	return (addr);
804}
805
806/*
807 * Mark this overflow page as free.
808 */
809void
810__free_ovflpage(HTAB *hashp, BUFHEAD *obufp)
811{
812	u_int16_t addr;
813	u_int32_t *freep;
814	int bit_address, free_page, free_bit;
815	u_int16_t ndx;
816
817	addr = obufp->addr;
818#ifdef DEBUG1
819	(void)fprintf(stderr, "Freeing %d\n", addr);
820#endif
821	ndx = (((u_int16_t)addr) >> SPLITSHIFT);
822	bit_address =
823	    (ndx ? hashp->SPARES[ndx - 1] : 0) + (addr & SPLITMASK) - 1;
824	 if (bit_address < hashp->LAST_FREED)
825		hashp->LAST_FREED = bit_address;
826	free_page = (bit_address >> (hashp->BSHIFT + BYTE_SHIFT));
827	free_bit = bit_address & ((hashp->BSIZE << BYTE_SHIFT) - 1);
828
829	if (!(freep = hashp->mapp[free_page]))
830		freep = fetch_bitmap(hashp, free_page);
831#ifdef DEBUG
832	/*
833	 * This had better never happen.  It means we tried to read a bitmap
834	 * that has already had overflow pages allocated off it, and we
835	 * failed to read it from the file.
836	 */
837	if (!freep)
838		assert(0);
839#endif
840	CLRBIT(freep, free_bit);
841#ifdef DEBUG2
842	(void)fprintf(stderr, "FREE_OVFLPAGE: ADDR: %d BIT: %d PAGE %d\n",
843	    obufp->addr, free_bit, free_page);
844#endif
845	__reclaim_buf(hashp, obufp);
846}
847
848/*
849 * Returns:
850 *	 0 success
851 *	-1 failure
852 */
853static int
854open_temp(HTAB *hashp)
855{
856	sigset_t set, oset;
857	int len;
858	char *envtmp = NULL;
859	char path[MAXPATHLEN];
860
861	if (issetugid() == 0)
862		envtmp = getenv("TMPDIR");
863	len = snprintf(path,
864	    sizeof(path), "%s/_hash.XXXXXX", envtmp ? envtmp : "/tmp");
865	if (len < 0 || len >= (int)sizeof(path)) {
866		errno = ENAMETOOLONG;
867		return (-1);
868	}
869
870	/* Block signals; make sure file goes away at process exit. */
871	(void)sigfillset(&set);
872	(void)__libc_sigprocmask(SIG_BLOCK, &set, &oset);
873	if ((hashp->fp = mkostemp(path, O_CLOEXEC)) != -1)
874		(void)unlink(path);
875	(void)__libc_sigprocmask(SIG_SETMASK, &oset, (sigset_t *)NULL);
876	return (hashp->fp != -1 ? 0 : -1);
877}
878
879/*
880 * We have to know that the key will fit, but the last entry on the page is
881 * an overflow pair, so we need to shift things.
882 */
883static void
884squeeze_key(u_int16_t *sp, const DBT *key, const DBT *val)
885{
886	char *p;
887	u_int16_t free_space, n, off, pageno;
888
889	p = (char *)sp;
890	n = sp[0];
891	free_space = FREESPACE(sp);
892	off = OFFSET(sp);
893
894	pageno = sp[n - 1];
895	off -= key->size;
896	sp[n - 1] = off;
897	memmove(p + off, key->data, key->size);
898	off -= val->size;
899	sp[n] = off;
900	memmove(p + off, val->data, val->size);
901	sp[0] = n + 2;
902	sp[n + 1] = pageno;
903	sp[n + 2] = OVFLPAGE;
904	FREESPACE(sp) = free_space - PAIRSIZE(key, val);
905	OFFSET(sp) = off;
906}
907
908static u_int32_t *
909fetch_bitmap(HTAB *hashp, int ndx)
910{
911	if (ndx >= hashp->nmaps)
912		return (NULL);
913	if ((hashp->mapp[ndx] = (u_int32_t *)malloc(hashp->BSIZE)) == NULL)
914		return (NULL);
915	if (__get_page(hashp,
916	    (char *)hashp->mapp[ndx], hashp->BITMAPS[ndx], 0, 1, 1)) {
917		free(hashp->mapp[ndx]);
918		return (NULL);
919	}
920	return (hashp->mapp[ndx]);
921}
922
923#ifdef DEBUG4
924int
925print_chain(int addr)
926{
927	BUFHEAD *bufp;
928	short *bp, oaddr;
929
930	(void)fprintf(stderr, "%d ", addr);
931	bufp = __get_buf(hashp, addr, NULL, 0);
932	bp = (short *)bufp->page;
933	while (bp[0] && ((bp[bp[0]] == OVFLPAGE) ||
934		((bp[0] > 2) && bp[2] < REAL_KEY))) {
935		oaddr = bp[bp[0] - 1];
936		(void)fprintf(stderr, "%d ", (int)oaddr);
937		bufp = __get_buf(hashp, (int)oaddr, bufp, 0);
938		bp = (short *)bufp->page;
939	}
940	(void)fprintf(stderr, "\n");
941}
942#endif
943