11573Srgrimes/*-
214272Spst * Copyright (c) 1990, 1993, 1994
31573Srgrimes *	The Regents of the University of California.  All rights reserved.
41573Srgrimes *
51573Srgrimes * This code is derived from software contributed to Berkeley by
61573Srgrimes * Mike Olson.
71573Srgrimes *
81573Srgrimes * Redistribution and use in source and binary forms, with or without
91573Srgrimes * modification, are permitted provided that the following conditions
101573Srgrimes * are met:
111573Srgrimes * 1. Redistributions of source code must retain the above copyright
121573Srgrimes *    notice, this list of conditions and the following disclaimer.
131573Srgrimes * 2. Redistributions in binary form must reproduce the above copyright
141573Srgrimes *    notice, this list of conditions and the following disclaimer in the
151573Srgrimes *    documentation and/or other materials provided with the distribution.
161573Srgrimes * 4. Neither the name of the University nor the names of its contributors
171573Srgrimes *    may be used to endorse or promote products derived from this software
181573Srgrimes *    without specific prior written permission.
191573Srgrimes *
201573Srgrimes * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
211573Srgrimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
221573Srgrimes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
231573Srgrimes * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
241573Srgrimes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
251573Srgrimes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
261573Srgrimes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
271573Srgrimes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
281573Srgrimes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
291573Srgrimes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
301573Srgrimes * SUCH DAMAGE.
311573Srgrimes */
321573Srgrimes
331573Srgrimes#if defined(LIBC_SCCS) && !defined(lint)
3414272Spststatic char sccsid[] = "@(#)bt_seq.c	8.7 (Berkeley) 7/20/94";
351573Srgrimes#endif /* LIBC_SCCS and not lint */
3692905Sobrien#include <sys/cdefs.h>
3792905Sobrien__FBSDID("$FreeBSD: releng/10.3/lib/libc/db/btree/bt_seq.c 189327 2009-03-04 00:58:04Z delphij $");
381573Srgrimes
391573Srgrimes#include <sys/types.h>
401573Srgrimes
411573Srgrimes#include <errno.h>
421573Srgrimes#include <stddef.h>
431573Srgrimes#include <stdio.h>
441573Srgrimes#include <stdlib.h>
451573Srgrimes
461573Srgrimes#include <db.h>
471573Srgrimes#include "btree.h"
481573Srgrimes
4992905Sobrienstatic int __bt_first(BTREE *, const DBT *, EPG *, int *);
5092905Sobrienstatic int __bt_seqadv(BTREE *, EPG *, int);
5192905Sobrienstatic int __bt_seqset(BTREE *, EPG *, DBT *, int);
521573Srgrimes
531573Srgrimes/*
541573Srgrimes * Sequential scan support.
551573Srgrimes *
5614272Spst * The tree can be scanned sequentially, starting from either end of the
5714272Spst * tree or from any specific key.  A scan request before any scanning is
5814272Spst * done is initialized as starting from the least node.
591573Srgrimes */
601573Srgrimes
611573Srgrimes/*
6214272Spst * __bt_seq --
6314272Spst *	Btree sequential scan interface.
641573Srgrimes *
651573Srgrimes * Parameters:
661573Srgrimes *	dbp:	pointer to access method
671573Srgrimes *	key:	key for positioning and return value
681573Srgrimes *	data:	data return value
691573Srgrimes *	flags:	R_CURSOR, R_FIRST, R_LAST, R_NEXT, R_PREV.
701573Srgrimes *
711573Srgrimes * Returns:
721573Srgrimes *	RET_ERROR, RET_SUCCESS or RET_SPECIAL if there's no next key.
731573Srgrimes */
741573Srgrimesint
75189291Sdelphij__bt_seq(const DB *dbp, DBT *key, DBT *data, u_int flags)
761573Srgrimes{
771573Srgrimes	BTREE *t;
781573Srgrimes	EPG e;
791573Srgrimes	int status;
801573Srgrimes
811573Srgrimes	t = dbp->internal;
821573Srgrimes
831573Srgrimes	/* Toss any page pinned across calls. */
841573Srgrimes	if (t->bt_pinned != NULL) {
851573Srgrimes		mpool_put(t->bt_mp, t->bt_pinned, 0);
861573Srgrimes		t->bt_pinned = NULL;
871573Srgrimes	}
881573Srgrimes
891573Srgrimes	/*
901573Srgrimes	 * If scan unitialized as yet, or starting at a specific record, set
9114272Spst	 * the scan to a specific key.  Both __bt_seqset and __bt_seqadv pin
9214272Spst	 * the page the cursor references if they're successful.
931573Srgrimes	 */
9414272Spst	switch (flags) {
951573Srgrimes	case R_NEXT:
961573Srgrimes	case R_PREV:
9714272Spst		if (F_ISSET(&t->bt_cursor, CURS_INIT)) {
9814272Spst			status = __bt_seqadv(t, &e, flags);
991573Srgrimes			break;
1001573Srgrimes		}
1011573Srgrimes		/* FALLTHROUGH */
1021573Srgrimes	case R_FIRST:
1031573Srgrimes	case R_LAST:
10414272Spst	case R_CURSOR:
10514272Spst		status = __bt_seqset(t, &e, key, flags);
1061573Srgrimes		break;
1071573Srgrimes	default:
1081573Srgrimes		errno = EINVAL;
1091573Srgrimes		return (RET_ERROR);
1101573Srgrimes	}
1111573Srgrimes
1121573Srgrimes	if (status == RET_SUCCESS) {
11314272Spst		__bt_setcur(t, e.page->pgno, e.index);
1141573Srgrimes
11514272Spst		status =
11614272Spst		    __bt_ret(t, &e, key, &t->bt_rkey, data, &t->bt_rdata, 0);
1171573Srgrimes
1181573Srgrimes		/*
1191573Srgrimes		 * If the user is doing concurrent access, we copied the
1201573Srgrimes		 * key/data, toss the page.
1211573Srgrimes		 */
12214272Spst		if (F_ISSET(t, B_DB_LOCK))
1231573Srgrimes			mpool_put(t->bt_mp, e.page, 0);
1241573Srgrimes		else
1251573Srgrimes			t->bt_pinned = e.page;
1261573Srgrimes	}
1271573Srgrimes	return (status);
1281573Srgrimes}
1291573Srgrimes
1301573Srgrimes/*
13114272Spst * __bt_seqset --
13214272Spst *	Set the sequential scan to a specific key.
1331573Srgrimes *
1341573Srgrimes * Parameters:
1351573Srgrimes *	t:	tree
1361573Srgrimes *	ep:	storage for returned key
1371573Srgrimes *	key:	key for initial scan position
1381573Srgrimes *	flags:	R_CURSOR, R_FIRST, R_LAST, R_NEXT, R_PREV
1391573Srgrimes *
1401573Srgrimes * Side effects:
1411573Srgrimes *	Pins the page the cursor references.
1421573Srgrimes *
1431573Srgrimes * Returns:
1441573Srgrimes *	RET_ERROR, RET_SUCCESS or RET_SPECIAL if there's no next key.
1451573Srgrimes */
1461573Srgrimesstatic int
147189291Sdelphij__bt_seqset(BTREE *t, EPG *ep, DBT *key, int flags)
1481573Srgrimes{
1491573Srgrimes	PAGE *h;
1501573Srgrimes	pgno_t pg;
1511573Srgrimes	int exact;
1521573Srgrimes
1531573Srgrimes	/*
15414272Spst	 * Find the first, last or specific key in the tree and point the
15514272Spst	 * cursor at it.  The cursor may not be moved until a new key has
15614272Spst	 * been found.
1571573Srgrimes	 */
15814272Spst	switch (flags) {
1591573Srgrimes	case R_CURSOR:				/* Keyed scan. */
1601573Srgrimes		/*
16114272Spst		 * Find the first instance of the key or the smallest key
16214272Spst		 * which is greater than or equal to the specified key.
1631573Srgrimes		 */
1641573Srgrimes		if (key->data == NULL || key->size == 0) {
1651573Srgrimes			errno = EINVAL;
1661573Srgrimes			return (RET_ERROR);
1671573Srgrimes		}
16814272Spst		return (__bt_first(t, key, ep, &exact));
1691573Srgrimes	case R_FIRST:				/* First record. */
1701573Srgrimes	case R_NEXT:
1711573Srgrimes		/* Walk down the left-hand side of the tree. */
1721573Srgrimes		for (pg = P_ROOT;;) {
1731573Srgrimes			if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL)
1741573Srgrimes				return (RET_ERROR);
17514272Spst
17614272Spst			/* Check for an empty tree. */
17714272Spst			if (NEXTINDEX(h) == 0) {
17814272Spst				mpool_put(t->bt_mp, h, 0);
17914272Spst				return (RET_SPECIAL);
18014272Spst			}
18114272Spst
1821573Srgrimes			if (h->flags & (P_BLEAF | P_RLEAF))
1831573Srgrimes				break;
1841573Srgrimes			pg = GETBINTERNAL(h, 0)->pgno;
1851573Srgrimes			mpool_put(t->bt_mp, h, 0);
1861573Srgrimes		}
1871573Srgrimes		ep->page = h;
1881573Srgrimes		ep->index = 0;
1891573Srgrimes		break;
1901573Srgrimes	case R_LAST:				/* Last record. */
1911573Srgrimes	case R_PREV:
1921573Srgrimes		/* Walk down the right-hand side of the tree. */
1931573Srgrimes		for (pg = P_ROOT;;) {
1941573Srgrimes			if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL)
1951573Srgrimes				return (RET_ERROR);
19614272Spst
19714272Spst			/* Check for an empty tree. */
19814272Spst			if (NEXTINDEX(h) == 0) {
19914272Spst				mpool_put(t->bt_mp, h, 0);
20014272Spst				return (RET_SPECIAL);
20114272Spst			}
20214272Spst
2031573Srgrimes			if (h->flags & (P_BLEAF | P_RLEAF))
2041573Srgrimes				break;
2051573Srgrimes			pg = GETBINTERNAL(h, NEXTINDEX(h) - 1)->pgno;
2061573Srgrimes			mpool_put(t->bt_mp, h, 0);
2071573Srgrimes		}
2081573Srgrimes
2091573Srgrimes		ep->page = h;
2101573Srgrimes		ep->index = NEXTINDEX(h) - 1;
2111573Srgrimes		break;
2121573Srgrimes	}
2131573Srgrimes	return (RET_SUCCESS);
2141573Srgrimes}
2151573Srgrimes
2161573Srgrimes/*
21714272Spst * __bt_seqadvance --
21814272Spst *	Advance the sequential scan.
2191573Srgrimes *
2201573Srgrimes * Parameters:
2211573Srgrimes *	t:	tree
2221573Srgrimes *	flags:	R_NEXT, R_PREV
2231573Srgrimes *
2241573Srgrimes * Side effects:
2251573Srgrimes *	Pins the page the new key/data record is on.
2261573Srgrimes *
2271573Srgrimes * Returns:
2281573Srgrimes *	RET_ERROR, RET_SUCCESS or RET_SPECIAL if there's no next key.
2291573Srgrimes */
2301573Srgrimesstatic int
231189291Sdelphij__bt_seqadv(BTREE *t, EPG *ep, int flags)
2321573Srgrimes{
23314272Spst	CURSOR *c;
2341573Srgrimes	PAGE *h;
235189292Sdelphij	indx_t idx;
2361573Srgrimes	pgno_t pg;
23714272Spst	int exact;
2381573Srgrimes
23914272Spst	/*
24014272Spst	 * There are a couple of states that we can be in.  The cursor has
24114272Spst	 * been initialized by the time we get here, but that's all we know.
24214272Spst	 */
24314272Spst	c = &t->bt_cursor;
2441573Srgrimes
24514272Spst	/*
24614272Spst	 * The cursor was deleted where there weren't any duplicate records,
24714272Spst	 * so the key was saved.  Find out where that key would go in the
24814272Spst	 * current tree.  It doesn't matter if the returned key is an exact
24914272Spst	 * match or not -- if it's an exact match, the record was added after
25014272Spst	 * the delete so we can just return it.  If not, as long as there's
25114272Spst	 * a record there, return it.
25214272Spst	 */
25314272Spst	if (F_ISSET(c, CURS_ACQUIRE))
25414272Spst		return (__bt_first(t, &c->key, ep, &exact));
25514272Spst
25614272Spst	/* Get the page referenced by the cursor. */
25714272Spst	if ((h = mpool_get(t->bt_mp, c->pg.pgno, 0)) == NULL)
2581573Srgrimes		return (RET_ERROR);
2591573Srgrimes
2601573Srgrimes	/*
261189327Sdelphij	 * Find the next/previous record in the tree and point the cursor at
26214272Spst	 * it.  The cursor may not be moved until a new key has been found.
2631573Srgrimes	 */
26414272Spst	switch (flags) {
2651573Srgrimes	case R_NEXT:			/* Next record. */
26614272Spst		/*
26714272Spst		 * The cursor was deleted in duplicate records, and moved
26814272Spst		 * forward to a record that has yet to be returned.  Clear
26914272Spst		 * that flag, and return the record.
27014272Spst		 */
27114272Spst		if (F_ISSET(c, CURS_AFTER))
27214272Spst			goto usecurrent;
273189292Sdelphij		idx = c->pg.index;
274189292Sdelphij		if (++idx == NEXTINDEX(h)) {
27514272Spst			pg = h->nextpg;
27614272Spst			mpool_put(t->bt_mp, h, 0);
27714272Spst			if (pg == P_INVALID)
27814272Spst				return (RET_SPECIAL);
27914272Spst			if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL)
28014272Spst				return (RET_ERROR);
281189292Sdelphij			idx = 0;
2821573Srgrimes		}
2831573Srgrimes		break;
2841573Srgrimes	case R_PREV:			/* Previous record. */
28514272Spst		/*
28614272Spst		 * The cursor was deleted in duplicate records, and moved
28714272Spst		 * backward to a record that has yet to be returned.  Clear
28814272Spst		 * that flag, and return the record.
28914272Spst		 */
29014272Spst		if (F_ISSET(c, CURS_BEFORE)) {
29114272Spstusecurrent:		F_CLR(c, CURS_AFTER | CURS_BEFORE);
29214272Spst			ep->page = h;
29314272Spst			ep->index = c->pg.index;
29414272Spst			return (RET_SUCCESS);
29514272Spst		}
296189292Sdelphij		idx = c->pg.index;
297189292Sdelphij		if (idx == 0) {
29814272Spst			pg = h->prevpg;
29914272Spst			mpool_put(t->bt_mp, h, 0);
30014272Spst			if (pg == P_INVALID)
30114272Spst				return (RET_SPECIAL);
30214272Spst			if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL)
30314272Spst				return (RET_ERROR);
304189292Sdelphij			idx = NEXTINDEX(h) - 1;
30514272Spst		} else
306189292Sdelphij			--idx;
3071573Srgrimes		break;
3081573Srgrimes	}
3091573Srgrimes
31014272Spst	ep->page = h;
311189292Sdelphij	ep->index = idx;
31214272Spst	return (RET_SUCCESS);
31314272Spst}
3141573Srgrimes
31514272Spst/*
31614272Spst * __bt_first --
31714272Spst *	Find the first entry.
31814272Spst *
31914272Spst * Parameters:
32014272Spst *	t:	the tree
32114272Spst *    key:	the key
32214272Spst *  erval:	return EPG
32314272Spst * exactp:	pointer to exact match flag
32414272Spst *
32514272Spst * Returns:
32614272Spst *	The first entry in the tree greater than or equal to key,
32714272Spst *	or RET_SPECIAL if no such key exists.
32814272Spst */
32914272Spststatic int
330189291Sdelphij__bt_first(BTREE *t, const DBT *key, EPG *erval, int *exactp)
33114272Spst{
33214272Spst	PAGE *h;
33314272Spst	EPG *ep, save;
33414272Spst	pgno_t pg;
33514272Spst
3361573Srgrimes	/*
33714272Spst	 * Find any matching record; __bt_search pins the page.
33814272Spst	 *
33914272Spst	 * If it's an exact match and duplicates are possible, walk backwards
34014272Spst	 * in the tree until we find the first one.  Otherwise, make sure it's
34114272Spst	 * a valid key (__bt_search may return an index just past the end of a
34214272Spst	 * page) and return it.
3431573Srgrimes	 */
34414272Spst	if ((ep = __bt_search(t, key, exactp)) == NULL)
34529574Sphk		return (0);
34614272Spst	if (*exactp) {
34714272Spst		if (F_ISSET(t, B_NODUPS)) {
34814272Spst			*erval = *ep;
34914272Spst			return (RET_SUCCESS);
35014272Spst		}
351189327Sdelphij
35214272Spst		/*
35314272Spst		 * Walk backwards, as long as the entry matches and there are
35414272Spst		 * keys left in the tree.  Save a copy of each match in case
35514272Spst		 * we go too far.
35614272Spst		 */
35714272Spst		save = *ep;
35814272Spst		h = ep->page;
35914272Spst		do {
36014272Spst			if (save.page->pgno != ep->page->pgno) {
36114272Spst				mpool_put(t->bt_mp, save.page, 0);
36214272Spst				save = *ep;
36314272Spst			} else
36414272Spst				save.index = ep->index;
36514272Spst
36614272Spst			/*
36714272Spst			 * Don't unpin the page the last (or original) match
36814272Spst			 * was on, but make sure it's unpinned if an error
36914272Spst			 * occurs.
37014272Spst			 */
37114272Spst			if (ep->index == 0) {
37214272Spst				if (h->prevpg == P_INVALID)
37314272Spst					break;
37414272Spst				if (h->pgno != save.page->pgno)
37514272Spst					mpool_put(t->bt_mp, h, 0);
37614272Spst				if ((h = mpool_get(t->bt_mp,
37714272Spst				    h->prevpg, 0)) == NULL) {
37814272Spst					if (h->pgno == save.page->pgno)
37914272Spst						mpool_put(t->bt_mp,
38014272Spst						    save.page, 0);
38114272Spst					return (RET_ERROR);
38214272Spst				}
38314272Spst				ep->page = h;
38414272Spst				ep->index = NEXTINDEX(h);
38514272Spst			}
38614272Spst			--ep->index;
38714272Spst		} while (__bt_cmp(t, key, ep) == 0);
38814272Spst
38914272Spst		/*
39014272Spst		 * Reach here with the last page that was looked at pinned,
39114272Spst		 * which may or may not be the same as the last (or original)
39214272Spst		 * match page.  If it's not useful, release it.
39314272Spst		 */
39414272Spst		if (h->pgno != save.page->pgno)
39514272Spst			mpool_put(t->bt_mp, h, 0);
39614272Spst
39714272Spst		*erval = save;
39814272Spst		return (RET_SUCCESS);
39914272Spst	}
40014272Spst
40114272Spst	/* If at the end of a page, find the next entry. */
40214272Spst	if (ep->index == NEXTINDEX(ep->page)) {
40314272Spst		h = ep->page;
40414272Spst		pg = h->nextpg;
40514272Spst		mpool_put(t->bt_mp, h, 0);
40614272Spst		if (pg == P_INVALID)
40714272Spst			return (RET_SPECIAL);
40814272Spst		if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL)
4091573Srgrimes			return (RET_ERROR);
41014272Spst		ep->index = 0;
41114272Spst		ep->page = h;
4121573Srgrimes	}
41314272Spst	*erval = *ep;
4141573Srgrimes	return (RET_SUCCESS);
4151573Srgrimes}
4161573Srgrimes
4171573Srgrimes/*
41814272Spst * __bt_setcur --
41914272Spst *	Set the cursor to an entry in the tree.
4201573Srgrimes *
4211573Srgrimes * Parameters:
42214272Spst *	t:	the tree
42314272Spst *   pgno:	page number
424189292Sdelphij *    idx:	page index
4251573Srgrimes */
42614272Spstvoid
427189292Sdelphij__bt_setcur(BTREE *t, pgno_t pgno, u_int idx)
4281573Srgrimes{
42914272Spst	/* Lose any already deleted key. */
43014272Spst	if (t->bt_cursor.key.data != NULL) {
43114272Spst		free(t->bt_cursor.key.data);
43214272Spst		t->bt_cursor.key.size = 0;
43314272Spst		t->bt_cursor.key.data = NULL;
43414272Spst	}
43514272Spst	F_CLR(&t->bt_cursor, CURS_ACQUIRE | CURS_AFTER | CURS_BEFORE);
4361573Srgrimes
43714272Spst	/* Update the cursor. */
43814272Spst	t->bt_cursor.pg.pgno = pgno;
439189292Sdelphij	t->bt_cursor.pg.index = idx;
44014272Spst	F_SET(&t->bt_cursor, CURS_INIT);
4411573Srgrimes}
442