bt_seq.c revision 1.7
1/*	$OpenBSD: bt_seq.c,v 1.7 2003/05/01 20:23:40 avsm Exp $	*/
2
3/*-
4 * Copyright (c) 1990, 1993, 1994
5 *	The Regents of the University of California.  All rights reserved.
6 *
7 * This code is derived from software contributed to Berkeley by
8 * Mike Olson.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 *    notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 *    notice, this list of conditions and the following disclaimer in the
17 *    documentation and/or other materials provided with the distribution.
18 * 3. All advertising materials mentioning features or use of this software
19 *    must display the following acknowledgement:
20 *	This product includes software developed by the University of
21 *	California, Berkeley and its contributors.
22 * 4. Neither the name of the University nor the names of its contributors
23 *    may be used to endorse or promote products derived from this software
24 *    without specific prior written permission.
25 *
26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36 * SUCH DAMAGE.
37 */
38
39#if defined(LIBC_SCCS) && !defined(lint)
40#if 0
41static char sccsid[] = "@(#)bt_seq.c	8.7 (Berkeley) 7/20/94";
42#else
43static const char rcsid[] = "$OpenBSD: bt_seq.c,v 1.7 2003/05/01 20:23:40 avsm Exp $";
44#endif
45#endif /* LIBC_SCCS and not lint */
46
47#include <sys/types.h>
48
49#include <errno.h>
50#include <stddef.h>
51#include <stdio.h>
52#include <stdlib.h>
53
54#include <db.h>
55#include "btree.h"
56
57static int __bt_first(BTREE *, const DBT *, EPG *, int *);
58static int __bt_seqadv(BTREE *, EPG *, int);
59static int __bt_seqset(BTREE *, EPG *, DBT *, int);
60
61/*
62 * Sequential scan support.
63 *
64 * The tree can be scanned sequentially, starting from either end of the
65 * tree or from any specific key.  A scan request before any scanning is
66 * done is initialized as starting from the least node.
67 */
68
69/*
70 * __bt_seq --
71 *	Btree sequential scan interface.
72 *
73 * Parameters:
74 *	dbp:	pointer to access method
75 *	key:	key for positioning and return value
76 *	data:	data return value
77 *	flags:	R_CURSOR, R_FIRST, R_LAST, R_NEXT, R_PREV.
78 *
79 * Returns:
80 *	RET_ERROR, RET_SUCCESS or RET_SPECIAL if there's no next key.
81 */
82int
83__bt_seq(dbp, key, data, flags)
84	const DB *dbp;
85	DBT *key, *data;
86	u_int flags;
87{
88	BTREE *t;
89	EPG e;
90	int status;
91
92	t = dbp->internal;
93
94	/* Toss any page pinned across calls. */
95	if (t->bt_pinned != NULL) {
96		mpool_put(t->bt_mp, t->bt_pinned, 0);
97		t->bt_pinned = NULL;
98	}
99
100	/*
101	 * If scan unitialized as yet, or starting at a specific record, set
102	 * the scan to a specific key.  Both __bt_seqset and __bt_seqadv pin
103	 * the page the cursor references if they're successful.
104	 */
105	switch (flags) {
106	case R_NEXT:
107	case R_PREV:
108		if (F_ISSET(&t->bt_cursor, CURS_INIT)) {
109			status = __bt_seqadv(t, &e, flags);
110			break;
111		}
112		/* FALLTHROUGH */
113	case R_FIRST:
114	case R_LAST:
115	case R_CURSOR:
116		status = __bt_seqset(t, &e, key, flags);
117		break;
118	default:
119		errno = EINVAL;
120		return (RET_ERROR);
121	}
122
123	if (status == RET_SUCCESS) {
124		__bt_setcur(t, e.page->pgno, e.index);
125
126		status =
127		    __bt_ret(t, &e, key, &t->bt_rkey, data, &t->bt_rdata, 0);
128
129		/*
130		 * If the user is doing concurrent access, we copied the
131		 * key/data, toss the page.
132		 */
133		if (F_ISSET(t, B_DB_LOCK))
134			mpool_put(t->bt_mp, e.page, 0);
135		else
136			t->bt_pinned = e.page;
137	}
138	return (status);
139}
140
141/*
142 * __bt_seqset --
143 *	Set the sequential scan to a specific key.
144 *
145 * Parameters:
146 *	t:	tree
147 *	ep:	storage for returned key
148 *	key:	key for initial scan position
149 *	flags:	R_CURSOR, R_FIRST, R_LAST, R_NEXT, R_PREV
150 *
151 * Side effects:
152 *	Pins the page the cursor references.
153 *
154 * Returns:
155 *	RET_ERROR, RET_SUCCESS or RET_SPECIAL if there's no next key.
156 */
157static int
158__bt_seqset(t, ep, key, flags)
159	BTREE *t;
160	EPG *ep;
161	DBT *key;
162	int flags;
163{
164	PAGE *h;
165	pgno_t pg;
166	int exact;
167
168	/*
169	 * Find the first, last or specific key in the tree and point the
170	 * cursor at it.  The cursor may not be moved until a new key has
171	 * been found.
172	 */
173	switch (flags) {
174	case R_CURSOR:				/* Keyed scan. */
175		/*
176		 * Find the first instance of the key or the smallest key
177		 * which is greater than or equal to the specified key.
178		 */
179		if (key->data == NULL || key->size == 0) {
180			errno = EINVAL;
181			return (RET_ERROR);
182		}
183		return (__bt_first(t, key, ep, &exact));
184	case R_FIRST:				/* First record. */
185	case R_NEXT:
186		/* Walk down the left-hand side of the tree. */
187		for (pg = P_ROOT;;) {
188			if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL)
189				return (RET_ERROR);
190
191			/* Check for an empty tree. */
192			if (NEXTINDEX(h) == 0) {
193				mpool_put(t->bt_mp, h, 0);
194				return (RET_SPECIAL);
195			}
196
197			if (h->flags & (P_BLEAF | P_RLEAF))
198				break;
199			pg = GETBINTERNAL(h, 0)->pgno;
200			mpool_put(t->bt_mp, h, 0);
201		}
202		ep->page = h;
203		ep->index = 0;
204		break;
205	case R_LAST:				/* Last record. */
206	case R_PREV:
207		/* Walk down the right-hand side of the tree. */
208		for (pg = P_ROOT;;) {
209			if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL)
210				return (RET_ERROR);
211
212			/* Check for an empty tree. */
213			if (NEXTINDEX(h) == 0) {
214				mpool_put(t->bt_mp, h, 0);
215				return (RET_SPECIAL);
216			}
217
218			if (h->flags & (P_BLEAF | P_RLEAF))
219				break;
220			pg = GETBINTERNAL(h, NEXTINDEX(h) - 1)->pgno;
221			mpool_put(t->bt_mp, h, 0);
222		}
223
224		ep->page = h;
225		ep->index = NEXTINDEX(h) - 1;
226		break;
227	}
228	return (RET_SUCCESS);
229}
230
231/*
232 * __bt_seqadvance --
233 *	Advance the sequential scan.
234 *
235 * Parameters:
236 *	t:	tree
237 *	flags:	R_NEXT, R_PREV
238 *
239 * Side effects:
240 *	Pins the page the new key/data record is on.
241 *
242 * Returns:
243 *	RET_ERROR, RET_SUCCESS or RET_SPECIAL if there's no next key.
244 */
245static int
246__bt_seqadv(t, ep, flags)
247	BTREE *t;
248	EPG *ep;
249	int flags;
250{
251	CURSOR *c;
252	PAGE *h;
253	indx_t index;
254	pgno_t pg;
255	int exact;
256
257	/*
258	 * There are a couple of states that we can be in.  The cursor has
259	 * been initialized by the time we get here, but that's all we know.
260	 */
261	c = &t->bt_cursor;
262
263	/*
264	 * The cursor was deleted where there weren't any duplicate records,
265	 * so the key was saved.  Find out where that key would go in the
266	 * current tree.  It doesn't matter if the returned key is an exact
267	 * match or not -- if it's an exact match, the record was added after
268	 * the delete so we can just return it.  If not, as long as there's
269	 * a record there, return it.
270	 */
271	if (F_ISSET(c, CURS_ACQUIRE))
272		return (__bt_first(t, &c->key, ep, &exact));
273
274	/* Get the page referenced by the cursor. */
275	if ((h = mpool_get(t->bt_mp, c->pg.pgno, 0)) == NULL)
276		return (RET_ERROR);
277
278	/*
279 	 * Find the next/previous record in the tree and point the cursor at
280	 * it.  The cursor may not be moved until a new key has been found.
281	 */
282	switch (flags) {
283	case R_NEXT:			/* Next record. */
284		/*
285		 * The cursor was deleted in duplicate records, and moved
286		 * forward to a record that has yet to be returned.  Clear
287		 * that flag, and return the record.
288		 */
289		if (F_ISSET(c, CURS_AFTER))
290			goto usecurrent;
291		index = c->pg.index;
292		if (++index == NEXTINDEX(h)) {
293			pg = h->nextpg;
294			mpool_put(t->bt_mp, h, 0);
295			if (pg == P_INVALID)
296				return (RET_SPECIAL);
297			if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL)
298				return (RET_ERROR);
299			index = 0;
300		}
301		break;
302	case R_PREV:			/* Previous record. */
303		/*
304		 * The cursor was deleted in duplicate records, and moved
305		 * backward to a record that has yet to be returned.  Clear
306		 * that flag, and return the record.
307		 */
308		if (F_ISSET(c, CURS_BEFORE)) {
309usecurrent:		F_CLR(c, CURS_AFTER | CURS_BEFORE);
310			ep->page = h;
311			ep->index = c->pg.index;
312			return (RET_SUCCESS);
313		}
314		index = c->pg.index;
315		if (index == 0) {
316			pg = h->prevpg;
317			mpool_put(t->bt_mp, h, 0);
318			if (pg == P_INVALID)
319				return (RET_SPECIAL);
320			if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL)
321				return (RET_ERROR);
322			index = NEXTINDEX(h) - 1;
323		} else
324			--index;
325		break;
326	}
327
328	ep->page = h;
329	ep->index = index;
330	return (RET_SUCCESS);
331}
332
333/*
334 * __bt_first --
335 *	Find the first entry.
336 *
337 * Parameters:
338 *	t:	the tree
339 *    key:	the key
340 *  erval:	return EPG
341 * exactp:	pointer to exact match flag
342 *
343 * Returns:
344 *	The first entry in the tree greater than or equal to key,
345 *	or RET_SPECIAL if no such key exists.
346 */
347static int
348__bt_first(t, key, erval, exactp)
349	BTREE *t;
350	const DBT *key;
351	EPG *erval;
352	int *exactp;
353{
354	PAGE *h;
355	EPG *ep, save;
356	pgno_t pg;
357
358	/*
359	 * Find any matching record; __bt_search pins the page.
360	 *
361	 * If it's an exact match and duplicates are possible, walk backwards
362	 * in the tree until we find the first one.  Otherwise, make sure it's
363	 * a valid key (__bt_search may return an index just past the end of a
364	 * page) and return it.
365	 */
366	if ((ep = __bt_search(t, key, exactp)) == NULL)
367		return (0);
368	if (*exactp) {
369		if (F_ISSET(t, B_NODUPS)) {
370			*erval = *ep;
371			return (RET_SUCCESS);
372		}
373
374		/*
375		 * Walk backwards, as long as the entry matches and there are
376		 * keys left in the tree.  Save a copy of each match in case
377		 * we go too far.
378		 */
379		save = *ep;
380		h = ep->page;
381		do {
382			if (save.page->pgno != ep->page->pgno) {
383				mpool_put(t->bt_mp, save.page, 0);
384				save = *ep;
385			} else
386				save.index = ep->index;
387
388			/*
389			 * Don't unpin the page the last (or original) match
390			 * was on, but make sure it's unpinned if an error
391			 * occurs.
392			 */
393			if (ep->index == 0) {
394				if (h->prevpg == P_INVALID)
395					break;
396				if (h->pgno != save.page->pgno)
397					mpool_put(t->bt_mp, h, 0);
398				if ((h = mpool_get(t->bt_mp,
399				    h->prevpg, 0)) == NULL) {
400					if (h->pgno == save.page->pgno)
401						mpool_put(t->bt_mp,
402						    save.page, 0);
403					return (RET_ERROR);
404				}
405				ep->page = h;
406				ep->index = NEXTINDEX(h);
407			}
408			--ep->index;
409		} while (__bt_cmp(t, key, ep) == 0);
410
411		/*
412		 * Reach here with the last page that was looked at pinned,
413		 * which may or may not be the same as the last (or original)
414		 * match page.  If it's not useful, release it.
415		 */
416		if (h->pgno != save.page->pgno)
417			mpool_put(t->bt_mp, h, 0);
418
419		*erval = save;
420		return (RET_SUCCESS);
421	}
422
423	/* If at the end of a page, find the next entry. */
424	if (ep->index == NEXTINDEX(ep->page)) {
425		h = ep->page;
426		pg = h->nextpg;
427		mpool_put(t->bt_mp, h, 0);
428		if (pg == P_INVALID)
429			return (RET_SPECIAL);
430		if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL)
431			return (RET_ERROR);
432		ep->index = 0;
433		ep->page = h;
434	}
435	*erval = *ep;
436	return (RET_SUCCESS);
437}
438
439/*
440 * __bt_setcur --
441 *	Set the cursor to an entry in the tree.
442 *
443 * Parameters:
444 *	t:	the tree
445 *   pgno:	page number
446 *  index:	page index
447 */
448void
449__bt_setcur(t, pgno, index)
450	BTREE *t;
451	pgno_t pgno;
452	u_int index;
453{
454	/* Lose any already deleted key. */
455	if (t->bt_cursor.key.data != NULL) {
456		free(t->bt_cursor.key.data);
457		t->bt_cursor.key.size = 0;
458		t->bt_cursor.key.data = NULL;
459	}
460	F_CLR(&t->bt_cursor, CURS_ACQUIRE | CURS_AFTER | CURS_BEFORE);
461
462	/* Update the cursor. */
463	t->bt_cursor.pg.pgno = pgno;
464	t->bt_cursor.pg.index = index;
465	F_SET(&t->bt_cursor, CURS_INIT);
466}
467