11573Srgrimes/*- 214272Spst * Copyright (c) 1990, 1993, 1994 31573Srgrimes * The Regents of the University of California. All rights reserved. 41573Srgrimes * 51573Srgrimes * This code is derived from software contributed to Berkeley by 61573Srgrimes * Mike Olson. 71573Srgrimes * 81573Srgrimes * Redistribution and use in source and binary forms, with or without 91573Srgrimes * modification, are permitted provided that the following conditions 101573Srgrimes * are met: 111573Srgrimes * 1. Redistributions of source code must retain the above copyright 121573Srgrimes * notice, this list of conditions and the following disclaimer. 131573Srgrimes * 2. Redistributions in binary form must reproduce the above copyright 141573Srgrimes * notice, this list of conditions and the following disclaimer in the 151573Srgrimes * documentation and/or other materials provided with the distribution. 161573Srgrimes * 4. Neither the name of the University nor the names of its contributors 171573Srgrimes * may be used to endorse or promote products derived from this software 181573Srgrimes * without specific prior written permission. 191573Srgrimes * 201573Srgrimes * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 211573Srgrimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 221573Srgrimes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 231573Srgrimes * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 241573Srgrimes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 251573Srgrimes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 261573Srgrimes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 271573Srgrimes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 281573Srgrimes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 291573Srgrimes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 301573Srgrimes * SUCH DAMAGE. 311573Srgrimes */ 321573Srgrimes 331573Srgrimes#if defined(LIBC_SCCS) && !defined(lint) 3414272Spststatic char sccsid[] = "@(#)bt_seq.c 8.7 (Berkeley) 7/20/94"; 351573Srgrimes#endif /* LIBC_SCCS and not lint */ 3692905Sobrien#include <sys/cdefs.h> 3792905Sobrien__FBSDID("$FreeBSD: releng/10.3/lib/libc/db/btree/bt_seq.c 189327 2009-03-04 00:58:04Z delphij $"); 381573Srgrimes 391573Srgrimes#include <sys/types.h> 401573Srgrimes 411573Srgrimes#include <errno.h> 421573Srgrimes#include <stddef.h> 431573Srgrimes#include <stdio.h> 441573Srgrimes#include <stdlib.h> 451573Srgrimes 461573Srgrimes#include <db.h> 471573Srgrimes#include "btree.h" 481573Srgrimes 4992905Sobrienstatic int __bt_first(BTREE *, const DBT *, EPG *, int *); 5092905Sobrienstatic int __bt_seqadv(BTREE *, EPG *, int); 5192905Sobrienstatic int __bt_seqset(BTREE *, EPG *, DBT *, int); 521573Srgrimes 531573Srgrimes/* 541573Srgrimes * Sequential scan support. 551573Srgrimes * 5614272Spst * The tree can be scanned sequentially, starting from either end of the 5714272Spst * tree or from any specific key. A scan request before any scanning is 5814272Spst * done is initialized as starting from the least node. 591573Srgrimes */ 601573Srgrimes 611573Srgrimes/* 6214272Spst * __bt_seq -- 6314272Spst * Btree sequential scan interface. 641573Srgrimes * 651573Srgrimes * Parameters: 661573Srgrimes * dbp: pointer to access method 671573Srgrimes * key: key for positioning and return value 681573Srgrimes * data: data return value 691573Srgrimes * flags: R_CURSOR, R_FIRST, R_LAST, R_NEXT, R_PREV. 701573Srgrimes * 711573Srgrimes * Returns: 721573Srgrimes * RET_ERROR, RET_SUCCESS or RET_SPECIAL if there's no next key. 731573Srgrimes */ 741573Srgrimesint 75189291Sdelphij__bt_seq(const DB *dbp, DBT *key, DBT *data, u_int flags) 761573Srgrimes{ 771573Srgrimes BTREE *t; 781573Srgrimes EPG e; 791573Srgrimes int status; 801573Srgrimes 811573Srgrimes t = dbp->internal; 821573Srgrimes 831573Srgrimes /* Toss any page pinned across calls. */ 841573Srgrimes if (t->bt_pinned != NULL) { 851573Srgrimes mpool_put(t->bt_mp, t->bt_pinned, 0); 861573Srgrimes t->bt_pinned = NULL; 871573Srgrimes } 881573Srgrimes 891573Srgrimes /* 901573Srgrimes * If scan unitialized as yet, or starting at a specific record, set 9114272Spst * the scan to a specific key. Both __bt_seqset and __bt_seqadv pin 9214272Spst * the page the cursor references if they're successful. 931573Srgrimes */ 9414272Spst switch (flags) { 951573Srgrimes case R_NEXT: 961573Srgrimes case R_PREV: 9714272Spst if (F_ISSET(&t->bt_cursor, CURS_INIT)) { 9814272Spst status = __bt_seqadv(t, &e, flags); 991573Srgrimes break; 1001573Srgrimes } 1011573Srgrimes /* FALLTHROUGH */ 1021573Srgrimes case R_FIRST: 1031573Srgrimes case R_LAST: 10414272Spst case R_CURSOR: 10514272Spst status = __bt_seqset(t, &e, key, flags); 1061573Srgrimes break; 1071573Srgrimes default: 1081573Srgrimes errno = EINVAL; 1091573Srgrimes return (RET_ERROR); 1101573Srgrimes } 1111573Srgrimes 1121573Srgrimes if (status == RET_SUCCESS) { 11314272Spst __bt_setcur(t, e.page->pgno, e.index); 1141573Srgrimes 11514272Spst status = 11614272Spst __bt_ret(t, &e, key, &t->bt_rkey, data, &t->bt_rdata, 0); 1171573Srgrimes 1181573Srgrimes /* 1191573Srgrimes * If the user is doing concurrent access, we copied the 1201573Srgrimes * key/data, toss the page. 1211573Srgrimes */ 12214272Spst if (F_ISSET(t, B_DB_LOCK)) 1231573Srgrimes mpool_put(t->bt_mp, e.page, 0); 1241573Srgrimes else 1251573Srgrimes t->bt_pinned = e.page; 1261573Srgrimes } 1271573Srgrimes return (status); 1281573Srgrimes} 1291573Srgrimes 1301573Srgrimes/* 13114272Spst * __bt_seqset -- 13214272Spst * Set the sequential scan to a specific key. 1331573Srgrimes * 1341573Srgrimes * Parameters: 1351573Srgrimes * t: tree 1361573Srgrimes * ep: storage for returned key 1371573Srgrimes * key: key for initial scan position 1381573Srgrimes * flags: R_CURSOR, R_FIRST, R_LAST, R_NEXT, R_PREV 1391573Srgrimes * 1401573Srgrimes * Side effects: 1411573Srgrimes * Pins the page the cursor references. 1421573Srgrimes * 1431573Srgrimes * Returns: 1441573Srgrimes * RET_ERROR, RET_SUCCESS or RET_SPECIAL if there's no next key. 1451573Srgrimes */ 1461573Srgrimesstatic int 147189291Sdelphij__bt_seqset(BTREE *t, EPG *ep, DBT *key, int flags) 1481573Srgrimes{ 1491573Srgrimes PAGE *h; 1501573Srgrimes pgno_t pg; 1511573Srgrimes int exact; 1521573Srgrimes 1531573Srgrimes /* 15414272Spst * Find the first, last or specific key in the tree and point the 15514272Spst * cursor at it. The cursor may not be moved until a new key has 15614272Spst * been found. 1571573Srgrimes */ 15814272Spst switch (flags) { 1591573Srgrimes case R_CURSOR: /* Keyed scan. */ 1601573Srgrimes /* 16114272Spst * Find the first instance of the key or the smallest key 16214272Spst * which is greater than or equal to the specified key. 1631573Srgrimes */ 1641573Srgrimes if (key->data == NULL || key->size == 0) { 1651573Srgrimes errno = EINVAL; 1661573Srgrimes return (RET_ERROR); 1671573Srgrimes } 16814272Spst return (__bt_first(t, key, ep, &exact)); 1691573Srgrimes case R_FIRST: /* First record. */ 1701573Srgrimes case R_NEXT: 1711573Srgrimes /* Walk down the left-hand side of the tree. */ 1721573Srgrimes for (pg = P_ROOT;;) { 1731573Srgrimes if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL) 1741573Srgrimes return (RET_ERROR); 17514272Spst 17614272Spst /* Check for an empty tree. */ 17714272Spst if (NEXTINDEX(h) == 0) { 17814272Spst mpool_put(t->bt_mp, h, 0); 17914272Spst return (RET_SPECIAL); 18014272Spst } 18114272Spst 1821573Srgrimes if (h->flags & (P_BLEAF | P_RLEAF)) 1831573Srgrimes break; 1841573Srgrimes pg = GETBINTERNAL(h, 0)->pgno; 1851573Srgrimes mpool_put(t->bt_mp, h, 0); 1861573Srgrimes } 1871573Srgrimes ep->page = h; 1881573Srgrimes ep->index = 0; 1891573Srgrimes break; 1901573Srgrimes case R_LAST: /* Last record. */ 1911573Srgrimes case R_PREV: 1921573Srgrimes /* Walk down the right-hand side of the tree. */ 1931573Srgrimes for (pg = P_ROOT;;) { 1941573Srgrimes if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL) 1951573Srgrimes return (RET_ERROR); 19614272Spst 19714272Spst /* Check for an empty tree. */ 19814272Spst if (NEXTINDEX(h) == 0) { 19914272Spst mpool_put(t->bt_mp, h, 0); 20014272Spst return (RET_SPECIAL); 20114272Spst } 20214272Spst 2031573Srgrimes if (h->flags & (P_BLEAF | P_RLEAF)) 2041573Srgrimes break; 2051573Srgrimes pg = GETBINTERNAL(h, NEXTINDEX(h) - 1)->pgno; 2061573Srgrimes mpool_put(t->bt_mp, h, 0); 2071573Srgrimes } 2081573Srgrimes 2091573Srgrimes ep->page = h; 2101573Srgrimes ep->index = NEXTINDEX(h) - 1; 2111573Srgrimes break; 2121573Srgrimes } 2131573Srgrimes return (RET_SUCCESS); 2141573Srgrimes} 2151573Srgrimes 2161573Srgrimes/* 21714272Spst * __bt_seqadvance -- 21814272Spst * Advance the sequential scan. 2191573Srgrimes * 2201573Srgrimes * Parameters: 2211573Srgrimes * t: tree 2221573Srgrimes * flags: R_NEXT, R_PREV 2231573Srgrimes * 2241573Srgrimes * Side effects: 2251573Srgrimes * Pins the page the new key/data record is on. 2261573Srgrimes * 2271573Srgrimes * Returns: 2281573Srgrimes * RET_ERROR, RET_SUCCESS or RET_SPECIAL if there's no next key. 2291573Srgrimes */ 2301573Srgrimesstatic int 231189291Sdelphij__bt_seqadv(BTREE *t, EPG *ep, int flags) 2321573Srgrimes{ 23314272Spst CURSOR *c; 2341573Srgrimes PAGE *h; 235189292Sdelphij indx_t idx; 2361573Srgrimes pgno_t pg; 23714272Spst int exact; 2381573Srgrimes 23914272Spst /* 24014272Spst * There are a couple of states that we can be in. The cursor has 24114272Spst * been initialized by the time we get here, but that's all we know. 24214272Spst */ 24314272Spst c = &t->bt_cursor; 2441573Srgrimes 24514272Spst /* 24614272Spst * The cursor was deleted where there weren't any duplicate records, 24714272Spst * so the key was saved. Find out where that key would go in the 24814272Spst * current tree. It doesn't matter if the returned key is an exact 24914272Spst * match or not -- if it's an exact match, the record was added after 25014272Spst * the delete so we can just return it. If not, as long as there's 25114272Spst * a record there, return it. 25214272Spst */ 25314272Spst if (F_ISSET(c, CURS_ACQUIRE)) 25414272Spst return (__bt_first(t, &c->key, ep, &exact)); 25514272Spst 25614272Spst /* Get the page referenced by the cursor. */ 25714272Spst if ((h = mpool_get(t->bt_mp, c->pg.pgno, 0)) == NULL) 2581573Srgrimes return (RET_ERROR); 2591573Srgrimes 2601573Srgrimes /* 261189327Sdelphij * Find the next/previous record in the tree and point the cursor at 26214272Spst * it. The cursor may not be moved until a new key has been found. 2631573Srgrimes */ 26414272Spst switch (flags) { 2651573Srgrimes case R_NEXT: /* Next record. */ 26614272Spst /* 26714272Spst * The cursor was deleted in duplicate records, and moved 26814272Spst * forward to a record that has yet to be returned. Clear 26914272Spst * that flag, and return the record. 27014272Spst */ 27114272Spst if (F_ISSET(c, CURS_AFTER)) 27214272Spst goto usecurrent; 273189292Sdelphij idx = c->pg.index; 274189292Sdelphij if (++idx == NEXTINDEX(h)) { 27514272Spst pg = h->nextpg; 27614272Spst mpool_put(t->bt_mp, h, 0); 27714272Spst if (pg == P_INVALID) 27814272Spst return (RET_SPECIAL); 27914272Spst if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL) 28014272Spst return (RET_ERROR); 281189292Sdelphij idx = 0; 2821573Srgrimes } 2831573Srgrimes break; 2841573Srgrimes case R_PREV: /* Previous record. */ 28514272Spst /* 28614272Spst * The cursor was deleted in duplicate records, and moved 28714272Spst * backward to a record that has yet to be returned. Clear 28814272Spst * that flag, and return the record. 28914272Spst */ 29014272Spst if (F_ISSET(c, CURS_BEFORE)) { 29114272Spstusecurrent: F_CLR(c, CURS_AFTER | CURS_BEFORE); 29214272Spst ep->page = h; 29314272Spst ep->index = c->pg.index; 29414272Spst return (RET_SUCCESS); 29514272Spst } 296189292Sdelphij idx = c->pg.index; 297189292Sdelphij if (idx == 0) { 29814272Spst pg = h->prevpg; 29914272Spst mpool_put(t->bt_mp, h, 0); 30014272Spst if (pg == P_INVALID) 30114272Spst return (RET_SPECIAL); 30214272Spst if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL) 30314272Spst return (RET_ERROR); 304189292Sdelphij idx = NEXTINDEX(h) - 1; 30514272Spst } else 306189292Sdelphij --idx; 3071573Srgrimes break; 3081573Srgrimes } 3091573Srgrimes 31014272Spst ep->page = h; 311189292Sdelphij ep->index = idx; 31214272Spst return (RET_SUCCESS); 31314272Spst} 3141573Srgrimes 31514272Spst/* 31614272Spst * __bt_first -- 31714272Spst * Find the first entry. 31814272Spst * 31914272Spst * Parameters: 32014272Spst * t: the tree 32114272Spst * key: the key 32214272Spst * erval: return EPG 32314272Spst * exactp: pointer to exact match flag 32414272Spst * 32514272Spst * Returns: 32614272Spst * The first entry in the tree greater than or equal to key, 32714272Spst * or RET_SPECIAL if no such key exists. 32814272Spst */ 32914272Spststatic int 330189291Sdelphij__bt_first(BTREE *t, const DBT *key, EPG *erval, int *exactp) 33114272Spst{ 33214272Spst PAGE *h; 33314272Spst EPG *ep, save; 33414272Spst pgno_t pg; 33514272Spst 3361573Srgrimes /* 33714272Spst * Find any matching record; __bt_search pins the page. 33814272Spst * 33914272Spst * If it's an exact match and duplicates are possible, walk backwards 34014272Spst * in the tree until we find the first one. Otherwise, make sure it's 34114272Spst * a valid key (__bt_search may return an index just past the end of a 34214272Spst * page) and return it. 3431573Srgrimes */ 34414272Spst if ((ep = __bt_search(t, key, exactp)) == NULL) 34529574Sphk return (0); 34614272Spst if (*exactp) { 34714272Spst if (F_ISSET(t, B_NODUPS)) { 34814272Spst *erval = *ep; 34914272Spst return (RET_SUCCESS); 35014272Spst } 351189327Sdelphij 35214272Spst /* 35314272Spst * Walk backwards, as long as the entry matches and there are 35414272Spst * keys left in the tree. Save a copy of each match in case 35514272Spst * we go too far. 35614272Spst */ 35714272Spst save = *ep; 35814272Spst h = ep->page; 35914272Spst do { 36014272Spst if (save.page->pgno != ep->page->pgno) { 36114272Spst mpool_put(t->bt_mp, save.page, 0); 36214272Spst save = *ep; 36314272Spst } else 36414272Spst save.index = ep->index; 36514272Spst 36614272Spst /* 36714272Spst * Don't unpin the page the last (or original) match 36814272Spst * was on, but make sure it's unpinned if an error 36914272Spst * occurs. 37014272Spst */ 37114272Spst if (ep->index == 0) { 37214272Spst if (h->prevpg == P_INVALID) 37314272Spst break; 37414272Spst if (h->pgno != save.page->pgno) 37514272Spst mpool_put(t->bt_mp, h, 0); 37614272Spst if ((h = mpool_get(t->bt_mp, 37714272Spst h->prevpg, 0)) == NULL) { 37814272Spst if (h->pgno == save.page->pgno) 37914272Spst mpool_put(t->bt_mp, 38014272Spst save.page, 0); 38114272Spst return (RET_ERROR); 38214272Spst } 38314272Spst ep->page = h; 38414272Spst ep->index = NEXTINDEX(h); 38514272Spst } 38614272Spst --ep->index; 38714272Spst } while (__bt_cmp(t, key, ep) == 0); 38814272Spst 38914272Spst /* 39014272Spst * Reach here with the last page that was looked at pinned, 39114272Spst * which may or may not be the same as the last (or original) 39214272Spst * match page. If it's not useful, release it. 39314272Spst */ 39414272Spst if (h->pgno != save.page->pgno) 39514272Spst mpool_put(t->bt_mp, h, 0); 39614272Spst 39714272Spst *erval = save; 39814272Spst return (RET_SUCCESS); 39914272Spst } 40014272Spst 40114272Spst /* If at the end of a page, find the next entry. */ 40214272Spst if (ep->index == NEXTINDEX(ep->page)) { 40314272Spst h = ep->page; 40414272Spst pg = h->nextpg; 40514272Spst mpool_put(t->bt_mp, h, 0); 40614272Spst if (pg == P_INVALID) 40714272Spst return (RET_SPECIAL); 40814272Spst if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL) 4091573Srgrimes return (RET_ERROR); 41014272Spst ep->index = 0; 41114272Spst ep->page = h; 4121573Srgrimes } 41314272Spst *erval = *ep; 4141573Srgrimes return (RET_SUCCESS); 4151573Srgrimes} 4161573Srgrimes 4171573Srgrimes/* 41814272Spst * __bt_setcur -- 41914272Spst * Set the cursor to an entry in the tree. 4201573Srgrimes * 4211573Srgrimes * Parameters: 42214272Spst * t: the tree 42314272Spst * pgno: page number 424189292Sdelphij * idx: page index 4251573Srgrimes */ 42614272Spstvoid 427189292Sdelphij__bt_setcur(BTREE *t, pgno_t pgno, u_int idx) 4281573Srgrimes{ 42914272Spst /* Lose any already deleted key. */ 43014272Spst if (t->bt_cursor.key.data != NULL) { 43114272Spst free(t->bt_cursor.key.data); 43214272Spst t->bt_cursor.key.size = 0; 43314272Spst t->bt_cursor.key.data = NULL; 43414272Spst } 43514272Spst F_CLR(&t->bt_cursor, CURS_ACQUIRE | CURS_AFTER | CURS_BEFORE); 4361573Srgrimes 43714272Spst /* Update the cursor. */ 43814272Spst t->bt_cursor.pg.pgno = pgno; 439189292Sdelphij t->bt_cursor.pg.index = idx; 44014272Spst F_SET(&t->bt_cursor, CURS_INIT); 4411573Srgrimes} 442