hash_bigkey.c revision 189327
1/*-
2 * Copyright (c) 1990, 1993, 1994
3 *	The Regents of the University of California.  All rights reserved.
4 *
5 * This code is derived from software contributed to Berkeley by
6 * Margo Seltzer.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 *    notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 * 4. Neither the name of the University nor the names of its contributors
17 *    may be used to endorse or promote products derived from this software
18 *    without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 * SUCH DAMAGE.
31 */
32
33#if defined(LIBC_SCCS) && !defined(lint)
34static char sccsid[] = "@(#)hash_bigkey.c	8.3 (Berkeley) 5/31/94";
35#endif /* LIBC_SCCS and not lint */
36#include <sys/cdefs.h>
37__FBSDID("$FreeBSD: head/lib/libc/db/hash/hash_bigkey.c 189327 2009-03-04 00:58:04Z delphij $");
38
39/*
40 * PACKAGE: hash
41 * DESCRIPTION:
42 *	Big key/data handling for the hashing package.
43 *
44 * ROUTINES:
45 * External
46 *	__big_keydata
47 *	__big_split
48 *	__big_insert
49 *	__big_return
50 *	__big_delete
51 *	__find_last_page
52 * Internal
53 *	collect_key
54 *	collect_data
55 */
56
57#include <sys/param.h>
58
59#include <errno.h>
60#include <stdio.h>
61#include <stdlib.h>
62#include <string.h>
63
64#ifdef DEBUG
65#include <assert.h>
66#endif
67
68#include <db.h>
69#include "hash.h"
70#include "page.h"
71#include "extern.h"
72
73static int collect_key(HTAB *, BUFHEAD *, int, DBT *, int);
74static int collect_data(HTAB *, BUFHEAD *, int, int);
75
76/*
77 * Big_insert
78 *
79 * You need to do an insert and the key/data pair is too big
80 *
81 * Returns:
82 * 0 ==> OK
83 *-1 ==> ERROR
84 */
85int
86__big_insert(HTAB *hashp, BUFHEAD *bufp, const DBT *key, const DBT *val)
87{
88	u_int16_t *p;
89	int key_size, n, val_size;
90	u_int16_t space, move_bytes, off;
91	char *cp, *key_data, *val_data;
92
93	cp = bufp->page;		/* Character pointer of p. */
94	p = (u_int16_t *)cp;
95
96	key_data = (char *)key->data;
97	key_size = key->size;
98	val_data = (char *)val->data;
99	val_size = val->size;
100
101	/* First move the Key */
102	for (space = FREESPACE(p) - BIGOVERHEAD; key_size;
103	    space = FREESPACE(p) - BIGOVERHEAD) {
104		move_bytes = MIN(space, key_size);
105		off = OFFSET(p) - move_bytes;
106		memmove(cp + off, key_data, move_bytes);
107		key_size -= move_bytes;
108		key_data += move_bytes;
109		n = p[0];
110		p[++n] = off;
111		p[0] = ++n;
112		FREESPACE(p) = off - PAGE_META(n);
113		OFFSET(p) = off;
114		p[n] = PARTIAL_KEY;
115		bufp = __add_ovflpage(hashp, bufp);
116		if (!bufp)
117			return (-1);
118		n = p[0];
119		if (!key_size) {
120			if (FREESPACE(p)) {
121				move_bytes = MIN(FREESPACE(p), val_size);
122				off = OFFSET(p) - move_bytes;
123				p[n] = off;
124				memmove(cp + off, val_data, move_bytes);
125				val_data += move_bytes;
126				val_size -= move_bytes;
127				p[n - 2] = FULL_KEY_DATA;
128				FREESPACE(p) = FREESPACE(p) - move_bytes;
129				OFFSET(p) = off;
130			} else
131				p[n - 2] = FULL_KEY;
132		}
133		p = (u_int16_t *)bufp->page;
134		cp = bufp->page;
135		bufp->flags |= BUF_MOD;
136	}
137
138	/* Now move the data */
139	for (space = FREESPACE(p) - BIGOVERHEAD; val_size;
140	    space = FREESPACE(p) - BIGOVERHEAD) {
141		move_bytes = MIN(space, val_size);
142		/*
143		 * Here's the hack to make sure that if the data ends on the
144		 * same page as the key ends, FREESPACE is at least one.
145		 */
146		if (space == val_size && val_size == val->size)
147			move_bytes--;
148		off = OFFSET(p) - move_bytes;
149		memmove(cp + off, val_data, move_bytes);
150		val_size -= move_bytes;
151		val_data += move_bytes;
152		n = p[0];
153		p[++n] = off;
154		p[0] = ++n;
155		FREESPACE(p) = off - PAGE_META(n);
156		OFFSET(p) = off;
157		if (val_size) {
158			p[n] = FULL_KEY;
159			bufp = __add_ovflpage(hashp, bufp);
160			if (!bufp)
161				return (-1);
162			cp = bufp->page;
163			p = (u_int16_t *)cp;
164		} else
165			p[n] = FULL_KEY_DATA;
166		bufp->flags |= BUF_MOD;
167	}
168	return (0);
169}
170
171/*
172 * Called when bufp's page  contains a partial key (index should be 1)
173 *
174 * All pages in the big key/data pair except bufp are freed.  We cannot
175 * free bufp because the page pointing to it is lost and we can't get rid
176 * of its pointer.
177 *
178 * Returns:
179 * 0 => OK
180 *-1 => ERROR
181 */
182int
183__big_delete(HTAB *hashp, BUFHEAD *bufp)
184{
185	BUFHEAD *last_bfp, *rbufp;
186	u_int16_t *bp, pageno;
187	int key_done, n;
188
189	rbufp = bufp;
190	last_bfp = NULL;
191	bp = (u_int16_t *)bufp->page;
192	pageno = 0;
193	key_done = 0;
194
195	while (!key_done || (bp[2] != FULL_KEY_DATA)) {
196		if (bp[2] == FULL_KEY || bp[2] == FULL_KEY_DATA)
197			key_done = 1;
198
199		/*
200		 * If there is freespace left on a FULL_KEY_DATA page, then
201		 * the data is short and fits entirely on this page, and this
202		 * is the last page.
203		 */
204		if (bp[2] == FULL_KEY_DATA && FREESPACE(bp))
205			break;
206		pageno = bp[bp[0] - 1];
207		rbufp->flags |= BUF_MOD;
208		rbufp = __get_buf(hashp, pageno, rbufp, 0);
209		if (last_bfp)
210			__free_ovflpage(hashp, last_bfp);
211		last_bfp = rbufp;
212		if (!rbufp)
213			return (-1);		/* Error. */
214		bp = (u_int16_t *)rbufp->page;
215	}
216
217	/*
218	 * If we get here then rbufp points to the last page of the big
219	 * key/data pair.  Bufp points to the first one -- it should now be
220	 * empty pointing to the next page after this pair.  Can't free it
221	 * because we don't have the page pointing to it.
222	 */
223
224	/* This is information from the last page of the pair. */
225	n = bp[0];
226	pageno = bp[n - 1];
227
228	/* Now, bp is the first page of the pair. */
229	bp = (u_int16_t *)bufp->page;
230	if (n > 2) {
231		/* There is an overflow page. */
232		bp[1] = pageno;
233		bp[2] = OVFLPAGE;
234		bufp->ovfl = rbufp->ovfl;
235	} else
236		/* This is the last page. */
237		bufp->ovfl = NULL;
238	n -= 2;
239	bp[0] = n;
240	FREESPACE(bp) = hashp->BSIZE - PAGE_META(n);
241	OFFSET(bp) = hashp->BSIZE - 1;
242
243	bufp->flags |= BUF_MOD;
244	if (rbufp)
245		__free_ovflpage(hashp, rbufp);
246	if (last_bfp != rbufp)
247		__free_ovflpage(hashp, last_bfp);
248
249	hashp->NKEYS--;
250	return (0);
251}
252/*
253 * Returns:
254 *  0 = key not found
255 * -1 = get next overflow page
256 * -2 means key not found and this is big key/data
257 * -3 error
258 */
259int
260__find_bigpair(HTAB *hashp, BUFHEAD *bufp, int ndx, char *key, int size)
261{
262	u_int16_t *bp;
263	char *p;
264	int ksize;
265	u_int16_t bytes;
266	char *kkey;
267
268	bp = (u_int16_t *)bufp->page;
269	p = bufp->page;
270	ksize = size;
271	kkey = key;
272
273	for (bytes = hashp->BSIZE - bp[ndx];
274	    bytes <= size && bp[ndx + 1] == PARTIAL_KEY;
275	    bytes = hashp->BSIZE - bp[ndx]) {
276		if (memcmp(p + bp[ndx], kkey, bytes))
277			return (-2);
278		kkey += bytes;
279		ksize -= bytes;
280		bufp = __get_buf(hashp, bp[ndx + 2], bufp, 0);
281		if (!bufp)
282			return (-3);
283		p = bufp->page;
284		bp = (u_int16_t *)p;
285		ndx = 1;
286	}
287
288	if (bytes != ksize || memcmp(p + bp[ndx], kkey, bytes)) {
289#ifdef HASH_STATISTICS
290		++hash_collisions;
291#endif
292		return (-2);
293	} else
294		return (ndx);
295}
296
297/*
298 * Given the buffer pointer of the first overflow page of a big pair,
299 * find the end of the big pair
300 *
301 * This will set bpp to the buffer header of the last page of the big pair.
302 * It will return the pageno of the overflow page following the last page
303 * of the pair; 0 if there isn't any (i.e. big pair is the last key in the
304 * bucket)
305 */
306u_int16_t
307__find_last_page(HTAB *hashp, BUFHEAD **bpp)
308{
309	BUFHEAD *bufp;
310	u_int16_t *bp, pageno;
311	int n;
312
313	bufp = *bpp;
314	bp = (u_int16_t *)bufp->page;
315	for (;;) {
316		n = bp[0];
317
318		/*
319		 * This is the last page if: the tag is FULL_KEY_DATA and
320		 * either only 2 entries OVFLPAGE marker is explicit there
321		 * is freespace on the page.
322		 */
323		if (bp[2] == FULL_KEY_DATA &&
324		    ((n == 2) || (bp[n] == OVFLPAGE) || (FREESPACE(bp))))
325			break;
326
327		pageno = bp[n - 1];
328		bufp = __get_buf(hashp, pageno, bufp, 0);
329		if (!bufp)
330			return (0);	/* Need to indicate an error! */
331		bp = (u_int16_t *)bufp->page;
332	}
333
334	*bpp = bufp;
335	if (bp[0] > 2)
336		return (bp[3]);
337	else
338		return (0);
339}
340
341/*
342 * Return the data for the key/data pair that begins on this page at this
343 * index (index should always be 1).
344 */
345int
346__big_return(HTAB *hashp, BUFHEAD *bufp, int ndx, DBT *val, int set_current)
347{
348	BUFHEAD *save_p;
349	u_int16_t *bp, len, off, save_addr;
350	char *tp;
351
352	bp = (u_int16_t *)bufp->page;
353	while (bp[ndx + 1] == PARTIAL_KEY) {
354		bufp = __get_buf(hashp, bp[bp[0] - 1], bufp, 0);
355		if (!bufp)
356			return (-1);
357		bp = (u_int16_t *)bufp->page;
358		ndx = 1;
359	}
360
361	if (bp[ndx + 1] == FULL_KEY) {
362		bufp = __get_buf(hashp, bp[bp[0] - 1], bufp, 0);
363		if (!bufp)
364			return (-1);
365		bp = (u_int16_t *)bufp->page;
366		save_p = bufp;
367		save_addr = save_p->addr;
368		off = bp[1];
369		len = 0;
370	} else
371		if (!FREESPACE(bp)) {
372			/*
373			 * This is a hack.  We can't distinguish between
374			 * FULL_KEY_DATA that contains complete data or
375			 * incomplete data, so we require that if the data
376			 * is complete, there is at least 1 byte of free
377			 * space left.
378			 */
379			off = bp[bp[0]];
380			len = bp[1] - off;
381			save_p = bufp;
382			save_addr = bufp->addr;
383			bufp = __get_buf(hashp, bp[bp[0] - 1], bufp, 0);
384			if (!bufp)
385				return (-1);
386			bp = (u_int16_t *)bufp->page;
387		} else {
388			/* The data is all on one page. */
389			tp = (char *)bp;
390			off = bp[bp[0]];
391			val->data = (u_char *)tp + off;
392			val->size = bp[1] - off;
393			if (set_current) {
394				if (bp[0] == 2) {	/* No more buckets in
395							 * chain */
396					hashp->cpage = NULL;
397					hashp->cbucket++;
398					hashp->cndx = 1;
399				} else {
400					hashp->cpage = __get_buf(hashp,
401					    bp[bp[0] - 1], bufp, 0);
402					if (!hashp->cpage)
403						return (-1);
404					hashp->cndx = 1;
405					if (!((u_int16_t *)
406					    hashp->cpage->page)[0]) {
407						hashp->cbucket++;
408						hashp->cpage = NULL;
409					}
410				}
411			}
412			return (0);
413		}
414
415	val->size = (size_t)collect_data(hashp, bufp, (int)len, set_current);
416	if (val->size == (size_t)-1)
417		return (-1);
418	if (save_p->addr != save_addr) {
419		/* We are pretty short on buffers. */
420		errno = EINVAL;			/* OUT OF BUFFERS */
421		return (-1);
422	}
423	memmove(hashp->tmp_buf, (save_p->page) + off, len);
424	val->data = (u_char *)hashp->tmp_buf;
425	return (0);
426}
427/*
428 * Count how big the total datasize is by recursing through the pages.  Then
429 * allocate a buffer and copy the data as you recurse up.
430 */
431static int
432collect_data(HTAB *hashp, BUFHEAD *bufp, int len, int set)
433{
434	u_int16_t *bp;
435	char *p;
436	BUFHEAD *xbp;
437	u_int16_t save_addr;
438	int mylen, totlen;
439
440	p = bufp->page;
441	bp = (u_int16_t *)p;
442	mylen = hashp->BSIZE - bp[1];
443	save_addr = bufp->addr;
444
445	if (bp[2] == FULL_KEY_DATA) {		/* End of Data */
446		totlen = len + mylen;
447		if (hashp->tmp_buf)
448			free(hashp->tmp_buf);
449		if ((hashp->tmp_buf = (char *)malloc(totlen)) == NULL)
450			return (-1);
451		if (set) {
452			hashp->cndx = 1;
453			if (bp[0] == 2) {	/* No more buckets in chain */
454				hashp->cpage = NULL;
455				hashp->cbucket++;
456			} else {
457				hashp->cpage =
458				    __get_buf(hashp, bp[bp[0] - 1], bufp, 0);
459				if (!hashp->cpage)
460					return (-1);
461				else if (!((u_int16_t *)hashp->cpage->page)[0]) {
462					hashp->cbucket++;
463					hashp->cpage = NULL;
464				}
465			}
466		}
467	} else {
468		xbp = __get_buf(hashp, bp[bp[0] - 1], bufp, 0);
469		if (!xbp || ((totlen =
470		    collect_data(hashp, xbp, len + mylen, set)) < 1))
471			return (-1);
472	}
473	if (bufp->addr != save_addr) {
474		errno = EINVAL;			/* Out of buffers. */
475		return (-1);
476	}
477	memmove(&hashp->tmp_buf[len], (bufp->page) + bp[1], mylen);
478	return (totlen);
479}
480
481/*
482 * Fill in the key and data for this big pair.
483 */
484int
485__big_keydata(HTAB *hashp, BUFHEAD *bufp, DBT *key, DBT *val, int set)
486{
487	key->size = (size_t)collect_key(hashp, bufp, 0, val, set);
488	if (key->size == (size_t)-1)
489		return (-1);
490	key->data = (u_char *)hashp->tmp_key;
491	return (0);
492}
493
494/*
495 * Count how big the total key size is by recursing through the pages.  Then
496 * collect the data, allocate a buffer and copy the key as you recurse up.
497 */
498static int
499collect_key(HTAB *hashp, BUFHEAD *bufp, int len, DBT *val, int set)
500{
501	BUFHEAD *xbp;
502	char *p;
503	int mylen, totlen;
504	u_int16_t *bp, save_addr;
505
506	p = bufp->page;
507	bp = (u_int16_t *)p;
508	mylen = hashp->BSIZE - bp[1];
509
510	save_addr = bufp->addr;
511	totlen = len + mylen;
512	if (bp[2] == FULL_KEY || bp[2] == FULL_KEY_DATA) {    /* End of Key. */
513		if (hashp->tmp_key != NULL)
514			free(hashp->tmp_key);
515		if ((hashp->tmp_key = (char *)malloc(totlen)) == NULL)
516			return (-1);
517		if (__big_return(hashp, bufp, 1, val, set))
518			return (-1);
519	} else {
520		xbp = __get_buf(hashp, bp[bp[0] - 1], bufp, 0);
521		if (!xbp || ((totlen =
522		    collect_key(hashp, xbp, totlen, val, set)) < 1))
523			return (-1);
524	}
525	if (bufp->addr != save_addr) {
526		errno = EINVAL;		/* MIS -- OUT OF BUFFERS */
527		return (-1);
528	}
529	memmove(&hashp->tmp_key[len], (bufp->page) + bp[1], mylen);
530	return (totlen);
531}
532
533/*
534 * Returns:
535 *  0 => OK
536 * -1 => error
537 */
538int
539__big_split(HTAB *hashp,
540    BUFHEAD *op,	/* Pointer to where to put keys that go in old bucket */
541    BUFHEAD *np,	/* Pointer to new bucket page */
542    BUFHEAD *big_keyp,	/* Pointer to first page containing the big key/data */
543    int addr,		/* Address of big_keyp */
544    u_int32_t obucket,	/* Old Bucket */
545    SPLIT_RETURN *ret)
546{
547	BUFHEAD *bp, *tmpp;
548	DBT key, val;
549	u_int32_t change;
550	u_int16_t free_space, n, off, *tp;
551
552	bp = big_keyp;
553
554	/* Now figure out where the big key/data goes */
555	if (__big_keydata(hashp, big_keyp, &key, &val, 0))
556		return (-1);
557	change = (__call_hash(hashp, key.data, key.size) != obucket);
558
559	if ( (ret->next_addr = __find_last_page(hashp, &big_keyp)) ) {
560		if (!(ret->nextp =
561		    __get_buf(hashp, ret->next_addr, big_keyp, 0)))
562			return (-1);
563	} else
564		ret->nextp = NULL;
565
566	/* Now make one of np/op point to the big key/data pair */
567#ifdef DEBUG
568	assert(np->ovfl == NULL);
569#endif
570	if (change)
571		tmpp = np;
572	else
573		tmpp = op;
574
575	tmpp->flags |= BUF_MOD;
576#ifdef DEBUG1
577	(void)fprintf(stderr,
578	    "BIG_SPLIT: %d->ovfl was %d is now %d\n", tmpp->addr,
579	    (tmpp->ovfl ? tmpp->ovfl->addr : 0), (bp ? bp->addr : 0));
580#endif
581	tmpp->ovfl = bp;	/* one of op/np point to big_keyp */
582	tp = (u_int16_t *)tmpp->page;
583#ifdef DEBUG
584	assert(FREESPACE(tp) >= OVFLSIZE);
585#endif
586	n = tp[0];
587	off = OFFSET(tp);
588	free_space = FREESPACE(tp);
589	tp[++n] = (u_int16_t)addr;
590	tp[++n] = OVFLPAGE;
591	tp[0] = n;
592	OFFSET(tp) = off;
593	FREESPACE(tp) = free_space - OVFLSIZE;
594
595	/*
596	 * Finally, set the new and old return values. BIG_KEYP contains a
597	 * pointer to the last page of the big key_data pair. Make sure that
598	 * big_keyp has no following page (2 elements) or create an empty
599	 * following page.
600	 */
601
602	ret->newp = np;
603	ret->oldp = op;
604
605	tp = (u_int16_t *)big_keyp->page;
606	big_keyp->flags |= BUF_MOD;
607	if (tp[0] > 2) {
608		/*
609		 * There may be either one or two offsets on this page.  If
610		 * there is one, then the overflow page is linked on normally
611		 * and tp[4] is OVFLPAGE.  If there are two, tp[4] contains
612		 * the second offset and needs to get stuffed in after the
613		 * next overflow page is added.
614		 */
615		n = tp[4];
616		free_space = FREESPACE(tp);
617		off = OFFSET(tp);
618		tp[0] -= 2;
619		FREESPACE(tp) = free_space + OVFLSIZE;
620		OFFSET(tp) = off;
621		tmpp = __add_ovflpage(hashp, big_keyp);
622		if (!tmpp)
623			return (-1);
624		tp[4] = n;
625	} else
626		tmpp = big_keyp;
627
628	if (change)
629		ret->newp = tmpp;
630	else
631		ret->oldp = tmpp;
632	return (0);
633}
634