bt_seq.c revision 1.9
1/*	$OpenBSD: bt_seq.c,v 1.9 2005/01/03 22:30:28 millert Exp $	*/
2
3/*-
4 * Copyright (c) 1990, 1993, 1994
5 *	The Regents of the University of California.  All rights reserved.
6 *
7 * This code is derived from software contributed to Berkeley by
8 * Mike Olson.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 *    notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 *    notice, this list of conditions and the following disclaimer in the
17 *    documentation and/or other materials provided with the distribution.
18 * 3. Neither the name of the University nor the names of its contributors
19 *    may be used to endorse or promote products derived from this software
20 *    without specific prior written permission.
21 *
22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 * SUCH DAMAGE.
33 */
34
35#if defined(LIBC_SCCS) && !defined(lint)
36#if 0
37static char sccsid[] = "@(#)bt_seq.c	8.7 (Berkeley) 7/20/94";
38#else
39static const char rcsid[] = "$OpenBSD: bt_seq.c,v 1.9 2005/01/03 22:30:28 millert Exp $";
40#endif
41#endif /* LIBC_SCCS and not lint */
42
43#include <sys/types.h>
44
45#include <errno.h>
46#include <stddef.h>
47#include <stdio.h>
48#include <stdlib.h>
49
50#include <db.h>
51#include "btree.h"
52
53static int __bt_first(BTREE *, const DBT *, EPG *, int *);
54static int __bt_seqadv(BTREE *, EPG *, int);
55static int __bt_seqset(BTREE *, EPG *, DBT *, int);
56
57/*
58 * Sequential scan support.
59 *
60 * The tree can be scanned sequentially, starting from either end of the
61 * tree or from any specific key.  A scan request before any scanning is
62 * done is initialized as starting from the least node.
63 */
64
65/*
66 * __bt_seq --
67 *	Btree sequential scan interface.
68 *
69 * Parameters:
70 *	dbp:	pointer to access method
71 *	key:	key for positioning and return value
72 *	data:	data return value
73 *	flags:	R_CURSOR, R_FIRST, R_LAST, R_NEXT, R_PREV.
74 *
75 * Returns:
76 *	RET_ERROR, RET_SUCCESS or RET_SPECIAL if there's no next key.
77 */
78int
79__bt_seq(dbp, key, data, flags)
80	const DB *dbp;
81	DBT *key, *data;
82	u_int flags;
83{
84	BTREE *t;
85	EPG e;
86	int status;
87
88	t = dbp->internal;
89
90	/* Toss any page pinned across calls. */
91	if (t->bt_pinned != NULL) {
92		mpool_put(t->bt_mp, t->bt_pinned, 0);
93		t->bt_pinned = NULL;
94	}
95
96	/*
97	 * If scan unitialized as yet, or starting at a specific record, set
98	 * the scan to a specific key.  Both __bt_seqset and __bt_seqadv pin
99	 * the page the cursor references if they're successful.
100	 */
101	switch (flags) {
102	case R_NEXT:
103	case R_PREV:
104		if (F_ISSET(&t->bt_cursor, CURS_INIT)) {
105			status = __bt_seqadv(t, &e, flags);
106			break;
107		}
108		/* FALLTHROUGH */
109	case R_FIRST:
110	case R_LAST:
111	case R_CURSOR:
112		status = __bt_seqset(t, &e, key, flags);
113		break;
114	default:
115		errno = EINVAL;
116		return (RET_ERROR);
117	}
118
119	if (status == RET_SUCCESS) {
120		__bt_setcur(t, e.page->pgno, e.index);
121
122		status =
123		    __bt_ret(t, &e, key, &t->bt_rkey, data, &t->bt_rdata, 0);
124
125		/*
126		 * If the user is doing concurrent access, we copied the
127		 * key/data, toss the page.
128		 */
129		if (F_ISSET(t, B_DB_LOCK))
130			mpool_put(t->bt_mp, e.page, 0);
131		else
132			t->bt_pinned = e.page;
133	}
134	return (status);
135}
136
137/*
138 * __bt_seqset --
139 *	Set the sequential scan to a specific key.
140 *
141 * Parameters:
142 *	t:	tree
143 *	ep:	storage for returned key
144 *	key:	key for initial scan position
145 *	flags:	R_CURSOR, R_FIRST, R_LAST, R_NEXT, R_PREV
146 *
147 * Side effects:
148 *	Pins the page the cursor references.
149 *
150 * Returns:
151 *	RET_ERROR, RET_SUCCESS or RET_SPECIAL if there's no next key.
152 */
153static int
154__bt_seqset(t, ep, key, flags)
155	BTREE *t;
156	EPG *ep;
157	DBT *key;
158	int flags;
159{
160	PAGE *h;
161	pgno_t pg;
162	int exact;
163
164	/*
165	 * Find the first, last or specific key in the tree and point the
166	 * cursor at it.  The cursor may not be moved until a new key has
167	 * been found.
168	 */
169	switch (flags) {
170	case R_CURSOR:				/* Keyed scan. */
171		/*
172		 * Find the first instance of the key or the smallest key
173		 * which is greater than or equal to the specified key.
174		 */
175		if (key->data == NULL || key->size == 0) {
176			errno = EINVAL;
177			return (RET_ERROR);
178		}
179		return (__bt_first(t, key, ep, &exact));
180	case R_FIRST:				/* First record. */
181	case R_NEXT:
182		/* Walk down the left-hand side of the tree. */
183		for (pg = P_ROOT;;) {
184			if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL)
185				return (RET_ERROR);
186
187			/* Check for an empty tree. */
188			if (NEXTINDEX(h) == 0) {
189				mpool_put(t->bt_mp, h, 0);
190				return (RET_SPECIAL);
191			}
192
193			if (h->flags & (P_BLEAF | P_RLEAF))
194				break;
195			pg = GETBINTERNAL(h, 0)->pgno;
196			mpool_put(t->bt_mp, h, 0);
197		}
198		ep->page = h;
199		ep->index = 0;
200		break;
201	case R_LAST:				/* Last record. */
202	case R_PREV:
203		/* Walk down the right-hand side of the tree. */
204		for (pg = P_ROOT;;) {
205			if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL)
206				return (RET_ERROR);
207
208			/* Check for an empty tree. */
209			if (NEXTINDEX(h) == 0) {
210				mpool_put(t->bt_mp, h, 0);
211				return (RET_SPECIAL);
212			}
213
214			if (h->flags & (P_BLEAF | P_RLEAF))
215				break;
216			pg = GETBINTERNAL(h, NEXTINDEX(h) - 1)->pgno;
217			mpool_put(t->bt_mp, h, 0);
218		}
219
220		ep->page = h;
221		ep->index = NEXTINDEX(h) - 1;
222		break;
223	}
224	return (RET_SUCCESS);
225}
226
227/*
228 * __bt_seqadvance --
229 *	Advance the sequential scan.
230 *
231 * Parameters:
232 *	t:	tree
233 *	flags:	R_NEXT, R_PREV
234 *
235 * Side effects:
236 *	Pins the page the new key/data record is on.
237 *
238 * Returns:
239 *	RET_ERROR, RET_SUCCESS or RET_SPECIAL if there's no next key.
240 */
241static int
242__bt_seqadv(t, ep, flags)
243	BTREE *t;
244	EPG *ep;
245	int flags;
246{
247	CURSOR *c;
248	PAGE *h;
249	indx_t idx;
250	pgno_t pg;
251	int exact;
252
253	/*
254	 * There are a couple of states that we can be in.  The cursor has
255	 * been initialized by the time we get here, but that's all we know.
256	 */
257	c = &t->bt_cursor;
258
259	/*
260	 * The cursor was deleted where there weren't any duplicate records,
261	 * so the key was saved.  Find out where that key would go in the
262	 * current tree.  It doesn't matter if the returned key is an exact
263	 * match or not -- if it's an exact match, the record was added after
264	 * the delete so we can just return it.  If not, as long as there's
265	 * a record there, return it.
266	 */
267	if (F_ISSET(c, CURS_ACQUIRE))
268		return (__bt_first(t, &c->key, ep, &exact));
269
270	/* Get the page referenced by the cursor. */
271	if ((h = mpool_get(t->bt_mp, c->pg.pgno, 0)) == NULL)
272		return (RET_ERROR);
273
274	/*
275 	 * Find the next/previous record in the tree and point the cursor at
276	 * it.  The cursor may not be moved until a new key has been found.
277	 */
278	switch (flags) {
279	case R_NEXT:			/* Next record. */
280		/*
281		 * The cursor was deleted in duplicate records, and moved
282		 * forward to a record that has yet to be returned.  Clear
283		 * that flag, and return the record.
284		 */
285		if (F_ISSET(c, CURS_AFTER))
286			goto usecurrent;
287		idx = c->pg.index;
288		if (++idx == NEXTINDEX(h)) {
289			pg = h->nextpg;
290			mpool_put(t->bt_mp, h, 0);
291			if (pg == P_INVALID)
292				return (RET_SPECIAL);
293			if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL)
294				return (RET_ERROR);
295			idx = 0;
296		}
297		break;
298	case R_PREV:			/* Previous record. */
299		/*
300		 * The cursor was deleted in duplicate records, and moved
301		 * backward to a record that has yet to be returned.  Clear
302		 * that flag, and return the record.
303		 */
304		if (F_ISSET(c, CURS_BEFORE)) {
305usecurrent:		F_CLR(c, CURS_AFTER | CURS_BEFORE);
306			ep->page = h;
307			ep->index = c->pg.index;
308			return (RET_SUCCESS);
309		}
310		idx = c->pg.index;
311		if (idx == 0) {
312			pg = h->prevpg;
313			mpool_put(t->bt_mp, h, 0);
314			if (pg == P_INVALID)
315				return (RET_SPECIAL);
316			if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL)
317				return (RET_ERROR);
318			idx = NEXTINDEX(h) - 1;
319		} else
320			--idx;
321		break;
322	}
323
324	ep->page = h;
325	ep->index = idx;
326	return (RET_SUCCESS);
327}
328
329/*
330 * __bt_first --
331 *	Find the first entry.
332 *
333 * Parameters:
334 *	t:	the tree
335 *    key:	the key
336 *  erval:	return EPG
337 * exactp:	pointer to exact match flag
338 *
339 * Returns:
340 *	The first entry in the tree greater than or equal to key,
341 *	or RET_SPECIAL if no such key exists.
342 */
343static int
344__bt_first(t, key, erval, exactp)
345	BTREE *t;
346	const DBT *key;
347	EPG *erval;
348	int *exactp;
349{
350	PAGE *h;
351	EPG *ep, save;
352	pgno_t pg;
353
354	/*
355	 * Find any matching record; __bt_search pins the page.
356	 *
357	 * If it's an exact match and duplicates are possible, walk backwards
358	 * in the tree until we find the first one.  Otherwise, make sure it's
359	 * a valid key (__bt_search may return an index just past the end of a
360	 * page) and return it.
361	 */
362	if ((ep = __bt_search(t, key, exactp)) == NULL)
363		return (0);
364	if (*exactp) {
365		if (F_ISSET(t, B_NODUPS)) {
366			*erval = *ep;
367			return (RET_SUCCESS);
368		}
369
370		/*
371		 * Walk backwards, as long as the entry matches and there are
372		 * keys left in the tree.  Save a copy of each match in case
373		 * we go too far.
374		 */
375		save = *ep;
376		h = ep->page;
377		do {
378			if (save.page->pgno != ep->page->pgno) {
379				mpool_put(t->bt_mp, save.page, 0);
380				save = *ep;
381			} else
382				save.index = ep->index;
383
384			/*
385			 * Don't unpin the page the last (or original) match
386			 * was on, but make sure it's unpinned if an error
387			 * occurs.
388			 */
389			if (ep->index == 0) {
390				if (h->prevpg == P_INVALID)
391					break;
392				if (h->pgno != save.page->pgno)
393					mpool_put(t->bt_mp, h, 0);
394				if ((h = mpool_get(t->bt_mp,
395				    h->prevpg, 0)) == NULL) {
396					if (h->pgno == save.page->pgno)
397						mpool_put(t->bt_mp,
398						    save.page, 0);
399					return (RET_ERROR);
400				}
401				ep->page = h;
402				ep->index = NEXTINDEX(h);
403			}
404			--ep->index;
405		} while (__bt_cmp(t, key, ep) == 0);
406
407		/*
408		 * Reach here with the last page that was looked at pinned,
409		 * which may or may not be the same as the last (or original)
410		 * match page.  If it's not useful, release it.
411		 */
412		if (h->pgno != save.page->pgno)
413			mpool_put(t->bt_mp, h, 0);
414
415		*erval = save;
416		return (RET_SUCCESS);
417	}
418
419	/* If at the end of a page, find the next entry. */
420	if (ep->index == NEXTINDEX(ep->page)) {
421		h = ep->page;
422		pg = h->nextpg;
423		mpool_put(t->bt_mp, h, 0);
424		if (pg == P_INVALID)
425			return (RET_SPECIAL);
426		if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL)
427			return (RET_ERROR);
428		ep->index = 0;
429		ep->page = h;
430	}
431	*erval = *ep;
432	return (RET_SUCCESS);
433}
434
435/*
436 * __bt_setcur --
437 *	Set the cursor to an entry in the tree.
438 *
439 * Parameters:
440 *	t:	the tree
441 *   pgno:	page number
442 *    idx:	page index
443 */
444void
445__bt_setcur(t, pgno, idx)
446	BTREE *t;
447	pgno_t pgno;
448	u_int idx;
449{
450	/* Lose any already deleted key. */
451	if (t->bt_cursor.key.data != NULL) {
452		free(t->bt_cursor.key.data);
453		t->bt_cursor.key.size = 0;
454		t->bt_cursor.key.data = NULL;
455	}
456	F_CLR(&t->bt_cursor, CURS_ACQUIRE | CURS_AFTER | CURS_BEFORE);
457
458	/* Update the cursor. */
459	t->bt_cursor.pg.pgno = pgno;
460	t->bt_cursor.pg.index = idx;
461	F_SET(&t->bt_cursor, CURS_INIT);
462}
463