hash_bigkey.c revision 14272
1/*-
2 * Copyright (c) 1990, 1993, 1994
3 *	The Regents of the University of California.  All rights reserved.
4 *
5 * This code is derived from software contributed to Berkeley by
6 * Margo Seltzer.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 *    notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 * 3. All advertising materials mentioning features or use of this software
17 *    must display the following acknowledgement:
18 *	This product includes software developed by the University of
19 *	California, Berkeley and its contributors.
20 * 4. Neither the name of the University nor the names of its contributors
21 *    may be used to endorse or promote products derived from this software
22 *    without specific prior written permission.
23 *
24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 * SUCH DAMAGE.
35 */
36
37#if defined(LIBC_SCCS) && !defined(lint)
38static char sccsid[] = "@(#)hash_bigkey.c	8.3 (Berkeley) 5/31/94";
39#endif /* LIBC_SCCS and not lint */
40
41/*
42 * PACKAGE: hash
43 * DESCRIPTION:
44 *	Big key/data handling for the hashing package.
45 *
46 * ROUTINES:
47 * External
48 *	__big_keydata
49 *	__big_split
50 *	__big_insert
51 *	__big_return
52 *	__big_delete
53 *	__find_last_page
54 * Internal
55 *	collect_key
56 *	collect_data
57 */
58
59#include <sys/param.h>
60
61#include <errno.h>
62#include <stdio.h>
63#include <stdlib.h>
64#include <string.h>
65
66#ifdef DEBUG
67#include <assert.h>
68#endif
69
70#include <db.h>
71#include "hash.h"
72#include "page.h"
73#include "extern.h"
74
75static int collect_key __P((HTAB *, BUFHEAD *, int, DBT *, int));
76static int collect_data __P((HTAB *, BUFHEAD *, int, int));
77
78/*
79 * Big_insert
80 *
81 * You need to do an insert and the key/data pair is too big
82 *
83 * Returns:
84 * 0 ==> OK
85 *-1 ==> ERROR
86 */
87extern int
88__big_insert(hashp, bufp, key, val)
89	HTAB *hashp;
90	BUFHEAD *bufp;
91	const DBT *key, *val;
92{
93	register u_int16_t *p;
94	int key_size, n, val_size;
95	u_int16_t space, move_bytes, off;
96	char *cp, *key_data, *val_data;
97
98	cp = bufp->page;		/* Character pointer of p. */
99	p = (u_int16_t *)cp;
100
101	key_data = (char *)key->data;
102	key_size = key->size;
103	val_data = (char *)val->data;
104	val_size = val->size;
105
106	/* First move the Key */
107	for (space = FREESPACE(p) - BIGOVERHEAD; key_size;
108	    space = FREESPACE(p) - BIGOVERHEAD) {
109		move_bytes = MIN(space, key_size);
110		off = OFFSET(p) - move_bytes;
111		memmove(cp + off, key_data, move_bytes);
112		key_size -= move_bytes;
113		key_data += move_bytes;
114		n = p[0];
115		p[++n] = off;
116		p[0] = ++n;
117		FREESPACE(p) = off - PAGE_META(n);
118		OFFSET(p) = off;
119		p[n] = PARTIAL_KEY;
120		bufp = __add_ovflpage(hashp, bufp);
121		if (!bufp)
122			return (-1);
123		n = p[0];
124		if (!key_size)
125			if (FREESPACE(p)) {
126				move_bytes = MIN(FREESPACE(p), val_size);
127				off = OFFSET(p) - move_bytes;
128				p[n] = off;
129				memmove(cp + off, val_data, move_bytes);
130				val_data += move_bytes;
131				val_size -= move_bytes;
132				p[n - 2] = FULL_KEY_DATA;
133				FREESPACE(p) = FREESPACE(p) - move_bytes;
134				OFFSET(p) = off;
135			} else
136				p[n - 2] = FULL_KEY;
137		p = (u_int16_t *)bufp->page;
138		cp = bufp->page;
139		bufp->flags |= BUF_MOD;
140	}
141
142	/* Now move the data */
143	for (space = FREESPACE(p) - BIGOVERHEAD; val_size;
144	    space = FREESPACE(p) - BIGOVERHEAD) {
145		move_bytes = MIN(space, val_size);
146		/*
147		 * Here's the hack to make sure that if the data ends on the
148		 * same page as the key ends, FREESPACE is at least one.
149		 */
150		if (space == val_size && val_size == val->size)
151			move_bytes--;
152		off = OFFSET(p) - move_bytes;
153		memmove(cp + off, val_data, move_bytes);
154		val_size -= move_bytes;
155		val_data += move_bytes;
156		n = p[0];
157		p[++n] = off;
158		p[0] = ++n;
159		FREESPACE(p) = off - PAGE_META(n);
160		OFFSET(p) = off;
161		if (val_size) {
162			p[n] = FULL_KEY;
163			bufp = __add_ovflpage(hashp, bufp);
164			if (!bufp)
165				return (-1);
166			cp = bufp->page;
167			p = (u_int16_t *)cp;
168		} else
169			p[n] = FULL_KEY_DATA;
170		bufp->flags |= BUF_MOD;
171	}
172	return (0);
173}
174
175/*
176 * Called when bufp's page  contains a partial key (index should be 1)
177 *
178 * All pages in the big key/data pair except bufp are freed.  We cannot
179 * free bufp because the page pointing to it is lost and we can't get rid
180 * of its pointer.
181 *
182 * Returns:
183 * 0 => OK
184 *-1 => ERROR
185 */
186extern int
187__big_delete(hashp, bufp)
188	HTAB *hashp;
189	BUFHEAD *bufp;
190{
191	register BUFHEAD *last_bfp, *rbufp;
192	u_int16_t *bp, pageno;
193	int key_done, n;
194
195	rbufp = bufp;
196	last_bfp = NULL;
197	bp = (u_int16_t *)bufp->page;
198	pageno = 0;
199	key_done = 0;
200
201	while (!key_done || (bp[2] != FULL_KEY_DATA)) {
202		if (bp[2] == FULL_KEY || bp[2] == FULL_KEY_DATA)
203			key_done = 1;
204
205		/*
206		 * If there is freespace left on a FULL_KEY_DATA page, then
207		 * the data is short and fits entirely on this page, and this
208		 * is the last page.
209		 */
210		if (bp[2] == FULL_KEY_DATA && FREESPACE(bp))
211			break;
212		pageno = bp[bp[0] - 1];
213		rbufp->flags |= BUF_MOD;
214		rbufp = __get_buf(hashp, pageno, rbufp, 0);
215		if (last_bfp)
216			__free_ovflpage(hashp, last_bfp);
217		last_bfp = rbufp;
218		if (!rbufp)
219			return (-1);		/* Error. */
220		bp = (u_int16_t *)rbufp->page;
221	}
222
223	/*
224	 * If we get here then rbufp points to the last page of the big
225	 * key/data pair.  Bufp points to the first one -- it should now be
226	 * empty pointing to the next page after this pair.  Can't free it
227	 * because we don't have the page pointing to it.
228	 */
229
230	/* This is information from the last page of the pair. */
231	n = bp[0];
232	pageno = bp[n - 1];
233
234	/* Now, bp is the first page of the pair. */
235	bp = (u_int16_t *)bufp->page;
236	if (n > 2) {
237		/* There is an overflow page. */
238		bp[1] = pageno;
239		bp[2] = OVFLPAGE;
240		bufp->ovfl = rbufp->ovfl;
241	} else
242		/* This is the last page. */
243		bufp->ovfl = NULL;
244	n -= 2;
245	bp[0] = n;
246	FREESPACE(bp) = hashp->BSIZE - PAGE_META(n);
247	OFFSET(bp) = hashp->BSIZE - 1;
248
249	bufp->flags |= BUF_MOD;
250	if (rbufp)
251		__free_ovflpage(hashp, rbufp);
252	if (last_bfp != rbufp)
253		__free_ovflpage(hashp, last_bfp);
254
255	hashp->NKEYS--;
256	return (0);
257}
258/*
259 * Returns:
260 *  0 = key not found
261 * -1 = get next overflow page
262 * -2 means key not found and this is big key/data
263 * -3 error
264 */
265extern int
266__find_bigpair(hashp, bufp, ndx, key, size)
267	HTAB *hashp;
268	BUFHEAD *bufp;
269	int ndx;
270	char *key;
271	int size;
272{
273	register u_int16_t *bp;
274	register char *p;
275	int ksize;
276	u_int16_t bytes;
277	char *kkey;
278
279	bp = (u_int16_t *)bufp->page;
280	p = bufp->page;
281	ksize = size;
282	kkey = key;
283
284	for (bytes = hashp->BSIZE - bp[ndx];
285	    bytes <= size && bp[ndx + 1] == PARTIAL_KEY;
286	    bytes = hashp->BSIZE - bp[ndx]) {
287		if (memcmp(p + bp[ndx], kkey, bytes))
288			return (-2);
289		kkey += bytes;
290		ksize -= bytes;
291		bufp = __get_buf(hashp, bp[ndx + 2], bufp, 0);
292		if (!bufp)
293			return (-3);
294		p = bufp->page;
295		bp = (u_int16_t *)p;
296		ndx = 1;
297	}
298
299	if (bytes != ksize || memcmp(p + bp[ndx], kkey, bytes)) {
300#ifdef HASH_STATISTICS
301		++hash_collisions;
302#endif
303		return (-2);
304	} else
305		return (ndx);
306}
307
308/*
309 * Given the buffer pointer of the first overflow page of a big pair,
310 * find the end of the big pair
311 *
312 * This will set bpp to the buffer header of the last page of the big pair.
313 * It will return the pageno of the overflow page following the last page
314 * of the pair; 0 if there isn't any (i.e. big pair is the last key in the
315 * bucket)
316 */
317extern u_int16_t
318__find_last_page(hashp, bpp)
319	HTAB *hashp;
320	BUFHEAD **bpp;
321{
322	BUFHEAD *bufp;
323	u_int16_t *bp, pageno;
324	int n;
325
326	bufp = *bpp;
327	bp = (u_int16_t *)bufp->page;
328	for (;;) {
329		n = bp[0];
330
331		/*
332		 * This is the last page if: the tag is FULL_KEY_DATA and
333		 * either only 2 entries OVFLPAGE marker is explicit there
334		 * is freespace on the page.
335		 */
336		if (bp[2] == FULL_KEY_DATA &&
337		    ((n == 2) || (bp[n] == OVFLPAGE) || (FREESPACE(bp))))
338			break;
339
340		pageno = bp[n - 1];
341		bufp = __get_buf(hashp, pageno, bufp, 0);
342		if (!bufp)
343			return (0);	/* Need to indicate an error! */
344		bp = (u_int16_t *)bufp->page;
345	}
346
347	*bpp = bufp;
348	if (bp[0] > 2)
349		return (bp[3]);
350	else
351		return (0);
352}
353
354/*
355 * Return the data for the key/data pair that begins on this page at this
356 * index (index should always be 1).
357 */
358extern int
359__big_return(hashp, bufp, ndx, val, set_current)
360	HTAB *hashp;
361	BUFHEAD *bufp;
362	int ndx;
363	DBT *val;
364	int set_current;
365{
366	BUFHEAD *save_p;
367	u_int16_t *bp, len, off, save_addr;
368	char *tp;
369
370	bp = (u_int16_t *)bufp->page;
371	while (bp[ndx + 1] == PARTIAL_KEY) {
372		bufp = __get_buf(hashp, bp[bp[0] - 1], bufp, 0);
373		if (!bufp)
374			return (-1);
375		bp = (u_int16_t *)bufp->page;
376		ndx = 1;
377	}
378
379	if (bp[ndx + 1] == FULL_KEY) {
380		bufp = __get_buf(hashp, bp[bp[0] - 1], bufp, 0);
381		if (!bufp)
382			return (-1);
383		bp = (u_int16_t *)bufp->page;
384		save_p = bufp;
385		save_addr = save_p->addr;
386		off = bp[1];
387		len = 0;
388	} else
389		if (!FREESPACE(bp)) {
390			/*
391			 * This is a hack.  We can't distinguish between
392			 * FULL_KEY_DATA that contains complete data or
393			 * incomplete data, so we require that if the data
394			 * is complete, there is at least 1 byte of free
395			 * space left.
396			 */
397			off = bp[bp[0]];
398			len = bp[1] - off;
399			save_p = bufp;
400			save_addr = bufp->addr;
401			bufp = __get_buf(hashp, bp[bp[0] - 1], bufp, 0);
402			if (!bufp)
403				return (-1);
404			bp = (u_int16_t *)bufp->page;
405		} else {
406			/* The data is all on one page. */
407			tp = (char *)bp;
408			off = bp[bp[0]];
409			val->data = (u_char *)tp + off;
410			val->size = bp[1] - off;
411			if (set_current) {
412				if (bp[0] == 2) {	/* No more buckets in
413							 * chain */
414					hashp->cpage = NULL;
415					hashp->cbucket++;
416					hashp->cndx = 1;
417				} else {
418					hashp->cpage = __get_buf(hashp,
419					    bp[bp[0] - 1], bufp, 0);
420					if (!hashp->cpage)
421						return (-1);
422					hashp->cndx = 1;
423					if (!((u_int16_t *)
424					    hashp->cpage->page)[0]) {
425						hashp->cbucket++;
426						hashp->cpage = NULL;
427					}
428				}
429			}
430			return (0);
431		}
432
433	val->size = collect_data(hashp, bufp, (int)len, set_current);
434	if (val->size == -1)
435		return (-1);
436	if (save_p->addr != save_addr) {
437		/* We are pretty short on buffers. */
438		errno = EINVAL;			/* OUT OF BUFFERS */
439		return (-1);
440	}
441	memmove(hashp->tmp_buf, (save_p->page) + off, len);
442	val->data = (u_char *)hashp->tmp_buf;
443	return (0);
444}
445/*
446 * Count how big the total datasize is by recursing through the pages.  Then
447 * allocate a buffer and copy the data as you recurse up.
448 */
449static int
450collect_data(hashp, bufp, len, set)
451	HTAB *hashp;
452	BUFHEAD *bufp;
453	int len, set;
454{
455	register u_int16_t *bp;
456	register char *p;
457	BUFHEAD *xbp;
458	u_int16_t save_addr;
459	int mylen, totlen;
460
461	p = bufp->page;
462	bp = (u_int16_t *)p;
463	mylen = hashp->BSIZE - bp[1];
464	save_addr = bufp->addr;
465
466	if (bp[2] == FULL_KEY_DATA) {		/* End of Data */
467		totlen = len + mylen;
468		if (hashp->tmp_buf)
469			free(hashp->tmp_buf);
470		if ((hashp->tmp_buf = (char *)malloc(totlen)) == NULL)
471			return (-1);
472		if (set) {
473			hashp->cndx = 1;
474			if (bp[0] == 2) {	/* No more buckets in chain */
475				hashp->cpage = NULL;
476				hashp->cbucket++;
477			} else {
478				hashp->cpage =
479				    __get_buf(hashp, bp[bp[0] - 1], bufp, 0);
480				if (!hashp->cpage)
481					return (-1);
482				else if (!((u_int16_t *)hashp->cpage->page)[0]) {
483					hashp->cbucket++;
484					hashp->cpage = NULL;
485				}
486			}
487		}
488	} else {
489		xbp = __get_buf(hashp, bp[bp[0] - 1], bufp, 0);
490		if (!xbp || ((totlen =
491		    collect_data(hashp, xbp, len + mylen, set)) < 1))
492			return (-1);
493	}
494	if (bufp->addr != save_addr) {
495		errno = EINVAL;			/* Out of buffers. */
496		return (-1);
497	}
498	memmove(&hashp->tmp_buf[len], (bufp->page) + bp[1], mylen);
499	return (totlen);
500}
501
502/*
503 * Fill in the key and data for this big pair.
504 */
505extern int
506__big_keydata(hashp, bufp, key, val, set)
507	HTAB *hashp;
508	BUFHEAD *bufp;
509	DBT *key, *val;
510	int set;
511{
512	key->size = collect_key(hashp, bufp, 0, val, set);
513	if (key->size == -1)
514		return (-1);
515	key->data = (u_char *)hashp->tmp_key;
516	return (0);
517}
518
519/*
520 * Count how big the total key size is by recursing through the pages.  Then
521 * collect the data, allocate a buffer and copy the key as you recurse up.
522 */
523static int
524collect_key(hashp, bufp, len, val, set)
525	HTAB *hashp;
526	BUFHEAD *bufp;
527	int len;
528	DBT *val;
529	int set;
530{
531	BUFHEAD *xbp;
532	char *p;
533	int mylen, totlen;
534	u_int16_t *bp, save_addr;
535
536	p = bufp->page;
537	bp = (u_int16_t *)p;
538	mylen = hashp->BSIZE - bp[1];
539
540	save_addr = bufp->addr;
541	totlen = len + mylen;
542	if (bp[2] == FULL_KEY || bp[2] == FULL_KEY_DATA) {    /* End of Key. */
543		if (hashp->tmp_key != NULL)
544			free(hashp->tmp_key);
545		if ((hashp->tmp_key = (char *)malloc(totlen)) == NULL)
546			return (-1);
547		if (__big_return(hashp, bufp, 1, val, set))
548			return (-1);
549	} else {
550		xbp = __get_buf(hashp, bp[bp[0] - 1], bufp, 0);
551		if (!xbp || ((totlen =
552		    collect_key(hashp, xbp, totlen, val, set)) < 1))
553			return (-1);
554	}
555	if (bufp->addr != save_addr) {
556		errno = EINVAL;		/* MIS -- OUT OF BUFFERS */
557		return (-1);
558	}
559	memmove(&hashp->tmp_key[len], (bufp->page) + bp[1], mylen);
560	return (totlen);
561}
562
563/*
564 * Returns:
565 *  0 => OK
566 * -1 => error
567 */
568extern int
569__big_split(hashp, op, np, big_keyp, addr, obucket, ret)
570	HTAB *hashp;
571	BUFHEAD *op;	/* Pointer to where to put keys that go in old bucket */
572	BUFHEAD *np;	/* Pointer to new bucket page */
573			/* Pointer to first page containing the big key/data */
574	BUFHEAD *big_keyp;
575	int addr;	/* Address of big_keyp */
576	u_int32_t   obucket;/* Old Bucket */
577	SPLIT_RETURN *ret;
578{
579	register BUFHEAD *tmpp;
580	register u_int16_t *tp;
581	BUFHEAD *bp;
582	DBT key, val;
583	u_int32_t change;
584	u_int16_t free_space, n, off;
585
586	bp = big_keyp;
587
588	/* Now figure out where the big key/data goes */
589	if (__big_keydata(hashp, big_keyp, &key, &val, 0))
590		return (-1);
591	change = (__call_hash(hashp, key.data, key.size) != obucket);
592
593	if (ret->next_addr = __find_last_page(hashp, &big_keyp)) {
594		if (!(ret->nextp =
595		    __get_buf(hashp, ret->next_addr, big_keyp, 0)))
596			return (-1);;
597	} else
598		ret->nextp = NULL;
599
600	/* Now make one of np/op point to the big key/data pair */
601#ifdef DEBUG
602	assert(np->ovfl == NULL);
603#endif
604	if (change)
605		tmpp = np;
606	else
607		tmpp = op;
608
609	tmpp->flags |= BUF_MOD;
610#ifdef DEBUG1
611	(void)fprintf(stderr,
612	    "BIG_SPLIT: %d->ovfl was %d is now %d\n", tmpp->addr,
613	    (tmpp->ovfl ? tmpp->ovfl->addr : 0), (bp ? bp->addr : 0));
614#endif
615	tmpp->ovfl = bp;	/* one of op/np point to big_keyp */
616	tp = (u_int16_t *)tmpp->page;
617#ifdef DEBUG
618	assert(FREESPACE(tp) >= OVFLSIZE);
619#endif
620	n = tp[0];
621	off = OFFSET(tp);
622	free_space = FREESPACE(tp);
623	tp[++n] = (u_int16_t)addr;
624	tp[++n] = OVFLPAGE;
625	tp[0] = n;
626	OFFSET(tp) = off;
627	FREESPACE(tp) = free_space - OVFLSIZE;
628
629	/*
630	 * Finally, set the new and old return values. BIG_KEYP contains a
631	 * pointer to the last page of the big key_data pair. Make sure that
632	 * big_keyp has no following page (2 elements) or create an empty
633	 * following page.
634	 */
635
636	ret->newp = np;
637	ret->oldp = op;
638
639	tp = (u_int16_t *)big_keyp->page;
640	big_keyp->flags |= BUF_MOD;
641	if (tp[0] > 2) {
642		/*
643		 * There may be either one or two offsets on this page.  If
644		 * there is one, then the overflow page is linked on normally
645		 * and tp[4] is OVFLPAGE.  If there are two, tp[4] contains
646		 * the second offset and needs to get stuffed in after the
647		 * next overflow page is added.
648		 */
649		n = tp[4];
650		free_space = FREESPACE(tp);
651		off = OFFSET(tp);
652		tp[0] -= 2;
653		FREESPACE(tp) = free_space + OVFLSIZE;
654		OFFSET(tp) = off;
655		tmpp = __add_ovflpage(hashp, big_keyp);
656		if (!tmpp)
657			return (-1);
658		tp[4] = n;
659	} else
660		tmpp = big_keyp;
661
662	if (change)
663		ret->newp = tmpp;
664	else
665		ret->oldp = tmpp;
666	return (0);
667}
668