bt_overflow.c revision 331722
1/*-
2 * Copyright (c) 1990, 1993, 1994
3 *	The Regents of the University of California.  All rights reserved.
4 *
5 * This code is derived from software contributed to Berkeley by
6 * Mike Olson.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 *    notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 * 4. Neither the name of the University nor the names of its contributors
17 *    may be used to endorse or promote products derived from this software
18 *    without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 * SUCH DAMAGE.
31 */
32
33#if defined(LIBC_SCCS) && !defined(lint)
34static char sccsid[] = "@(#)bt_overflow.c	8.5 (Berkeley) 7/16/94";
35#endif /* LIBC_SCCS and not lint */
36#include <sys/cdefs.h>
37__FBSDID("$FreeBSD: stable/11/lib/libc/db/btree/bt_overflow.c 331722 2018-03-29 02:50:57Z eadler $");
38
39#include <sys/param.h>
40
41#include <stdio.h>
42#include <stdlib.h>
43#include <string.h>
44
45#include <db.h>
46#include "btree.h"
47
48/*
49 * Big key/data code.
50 *
51 * Big key and data entries are stored on linked lists of pages.  The initial
52 * reference is byte string stored with the key or data and is the page number
53 * and size.  The actual record is stored in a chain of pages linked by the
54 * nextpg field of the PAGE header.
55 *
56 * The first page of the chain has a special property.  If the record is used
57 * by an internal page, it cannot be deleted and the P_PRESERVE bit will be set
58 * in the header.
59 *
60 * XXX
61 * A single DBT is written to each chain, so a lot of space on the last page
62 * is wasted.  This is a fairly major bug for some data sets.
63 */
64
65/*
66 * __OVFL_GET -- Get an overflow key/data item.
67 *
68 * Parameters:
69 *	t:	tree
70 *	p:	pointer to { pgno_t, u_int32_t }
71 *	buf:	storage address
72 *	bufsz:	storage size
73 *
74 * Returns:
75 *	RET_ERROR, RET_SUCCESS
76 */
77int
78__ovfl_get(BTREE *t, void *p, size_t *ssz, void **buf, size_t *bufsz)
79{
80	PAGE *h;
81	pgno_t pg;
82	size_t nb, plen;
83	u_int32_t sz;
84
85	memmove(&pg, p, sizeof(pgno_t));
86	memmove(&sz, (char *)p + sizeof(pgno_t), sizeof(u_int32_t));
87	*ssz = sz;
88
89#ifdef DEBUG
90	if (pg == P_INVALID || sz == 0)
91		abort();
92#endif
93	/* Make the buffer bigger as necessary. */
94	if (*bufsz < sz) {
95		*buf = reallocf(*buf, sz);
96		if (*buf == NULL)
97			return (RET_ERROR);
98		*bufsz = sz;
99	}
100
101	/*
102	 * Step through the linked list of pages, copying the data on each one
103	 * into the buffer.  Never copy more than the data's length.
104	 */
105	plen = t->bt_psize - BTDATAOFF;
106	for (p = *buf;; p = (char *)p + nb, pg = h->nextpg) {
107		if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL)
108			return (RET_ERROR);
109
110		nb = MIN(sz, plen);
111		memmove(p, (char *)h + BTDATAOFF, nb);
112		mpool_put(t->bt_mp, h, 0);
113
114		if ((sz -= nb) == 0)
115			break;
116	}
117	return (RET_SUCCESS);
118}
119
120/*
121 * __OVFL_PUT -- Store an overflow key/data item.
122 *
123 * Parameters:
124 *	t:	tree
125 *	data:	DBT to store
126 *	pgno:	storage page number
127 *
128 * Returns:
129 *	RET_ERROR, RET_SUCCESS
130 */
131int
132__ovfl_put(BTREE *t, const DBT *dbt, pgno_t *pg)
133{
134	PAGE *h, *last;
135	void *p;
136	pgno_t npg;
137	size_t nb, plen;
138	u_int32_t sz;
139
140	/*
141	 * Allocate pages and copy the key/data record into them.  Store the
142	 * number of the first page in the chain.
143	 */
144	plen = t->bt_psize - BTDATAOFF;
145	for (last = NULL, p = dbt->data, sz = dbt->size;;
146	    p = (char *)p + plen, last = h) {
147		if ((h = __bt_new(t, &npg)) == NULL)
148			return (RET_ERROR);
149
150		h->pgno = npg;
151		h->nextpg = h->prevpg = P_INVALID;
152		h->flags = P_OVERFLOW;
153		h->lower = h->upper = 0;
154
155		nb = MIN(sz, plen);
156		memmove((char *)h + BTDATAOFF, p, nb);
157
158		if (last) {
159			last->nextpg = h->pgno;
160			mpool_put(t->bt_mp, last, MPOOL_DIRTY);
161		} else
162			*pg = h->pgno;
163
164		if ((sz -= nb) == 0) {
165			mpool_put(t->bt_mp, h, MPOOL_DIRTY);
166			break;
167		}
168	}
169	return (RET_SUCCESS);
170}
171
172/*
173 * __OVFL_DELETE -- Delete an overflow chain.
174 *
175 * Parameters:
176 *	t:	tree
177 *	p:	pointer to { pgno_t, u_int32_t }
178 *
179 * Returns:
180 *	RET_ERROR, RET_SUCCESS
181 */
182int
183__ovfl_delete(BTREE *t, void *p)
184{
185	PAGE *h;
186	pgno_t pg;
187	size_t plen;
188	u_int32_t sz;
189
190	memmove(&pg, p, sizeof(pgno_t));
191	memmove(&sz, (char *)p + sizeof(pgno_t), sizeof(u_int32_t));
192
193#ifdef DEBUG
194	if (pg == P_INVALID || sz == 0)
195		abort();
196#endif
197	if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL)
198		return (RET_ERROR);
199
200	/* Don't delete chains used by internal pages. */
201	if (h->flags & P_PRESERVE) {
202		mpool_put(t->bt_mp, h, 0);
203		return (RET_SUCCESS);
204	}
205
206	/* Step through the chain, calling the free routine for each page. */
207	for (plen = t->bt_psize - BTDATAOFF;; sz -= plen) {
208		pg = h->nextpg;
209		__bt_free(t, h);
210		if (sz <= plen)
211			break;
212		if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL)
213			return (RET_ERROR);
214	}
215	return (RET_SUCCESS);
216}
217