bt_seq.c revision 1.4
1/* $OpenBSD: bt_seq.c,v 1.4 1999/02/15 05:11:23 millert Exp $ */ 2 3/*- 4 * Copyright (c) 1990, 1993, 1994 5 * The Regents of the University of California. All rights reserved. 6 * 7 * This code is derived from software contributed to Berkeley by 8 * Mike Olson. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. All advertising materials mentioning features or use of this software 19 * must display the following acknowledgement: 20 * This product includes software developed by the University of 21 * California, Berkeley and its contributors. 22 * 4. Neither the name of the University nor the names of its contributors 23 * may be used to endorse or promote products derived from this software 24 * without specific prior written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 36 * SUCH DAMAGE. 37 */ 38 39#if defined(LIBC_SCCS) && !defined(lint) 40#if 0 41static char sccsid[] = "@(#)bt_seq.c 8.7 (Berkeley) 7/20/94"; 42#else 43static char rcsid[] = "$OpenBSD: bt_seq.c,v 1.4 1999/02/15 05:11:23 millert Exp $"; 44#endif 45#endif /* LIBC_SCCS and not lint */ 46 47#include <sys/types.h> 48 49#include <errno.h> 50#include <stddef.h> 51#include <stdio.h> 52#include <stdlib.h> 53 54#include <db.h> 55#include "btree.h" 56 57static int __bt_first __P((BTREE *, const DBT *, EPG *, int *)); 58static int __bt_seqadv __P((BTREE *, EPG *, int)); 59static int __bt_seqset __P((BTREE *, EPG *, DBT *, int)); 60 61/* 62 * Sequential scan support. 63 * 64 * The tree can be scanned sequentially, starting from either end of the 65 * tree or from any specific key. A scan request before any scanning is 66 * done is initialized as starting from the least node. 67 */ 68 69/* 70 * __bt_seq -- 71 * Btree sequential scan interface. 72 * 73 * Parameters: 74 * dbp: pointer to access method 75 * key: key for positioning and return value 76 * data: data return value 77 * flags: R_CURSOR, R_FIRST, R_LAST, R_NEXT, R_PREV. 78 * 79 * Returns: 80 * RET_ERROR, RET_SUCCESS or RET_SPECIAL if there's no next key. 81 */ 82int 83__bt_seq(dbp, key, data, flags) 84 const DB *dbp; 85 DBT *key, *data; 86 u_int flags; 87{ 88 BTREE *t; 89 EPG e; 90 int status; 91 92 t = dbp->internal; 93 94 /* Toss any page pinned across calls. */ 95 if (t->bt_pinned != NULL) { 96 mpool_put(t->bt_mp, t->bt_pinned, 0); 97 t->bt_pinned = NULL; 98 } 99 100 /* 101 * If scan unitialized as yet, or starting at a specific record, set 102 * the scan to a specific key. Both __bt_seqset and __bt_seqadv pin 103 * the page the cursor references if they're successful. 104 */ 105 switch (flags) { 106 case R_NEXT: 107 case R_PREV: 108 if (F_ISSET(&t->bt_cursor, CURS_INIT)) { 109 status = __bt_seqadv(t, &e, flags); 110 break; 111 } 112 /* FALLTHROUGH */ 113 case R_FIRST: 114 case R_LAST: 115 case R_CURSOR: 116 status = __bt_seqset(t, &e, key, flags); 117 break; 118 default: 119 errno = EINVAL; 120 return (RET_ERROR); 121 } 122 123 if (status == RET_SUCCESS) { 124 __bt_setcur(t, e.page->pgno, e.index); 125 126 status = 127 __bt_ret(t, &e, key, &t->bt_rkey, data, &t->bt_rdata, 0); 128 129 /* 130 * If the user is doing concurrent access, we copied the 131 * key/data, toss the page. 132 */ 133 if (F_ISSET(t, B_DB_LOCK)) 134 mpool_put(t->bt_mp, e.page, 0); 135 else 136 t->bt_pinned = e.page; 137 } 138 return (status); 139} 140 141/* 142 * __bt_seqset -- 143 * Set the sequential scan to a specific key. 144 * 145 * Parameters: 146 * t: tree 147 * ep: storage for returned key 148 * key: key for initial scan position 149 * flags: R_CURSOR, R_FIRST, R_LAST, R_NEXT, R_PREV 150 * 151 * Side effects: 152 * Pins the page the cursor references. 153 * 154 * Returns: 155 * RET_ERROR, RET_SUCCESS or RET_SPECIAL if there's no next key. 156 */ 157static int 158__bt_seqset(t, ep, key, flags) 159 BTREE *t; 160 EPG *ep; 161 DBT *key; 162 int flags; 163{ 164 PAGE *h; 165 pgno_t pg; 166 int exact; 167 168 /* 169 * Find the first, last or specific key in the tree and point the 170 * cursor at it. The cursor may not be moved until a new key has 171 * been found. 172 */ 173 switch (flags) { 174 case R_CURSOR: /* Keyed scan. */ 175 /* 176 * Find the first instance of the key or the smallest key 177 * which is greater than or equal to the specified key. 178 */ 179 if (key->data == NULL || key->size == 0) { 180 errno = EINVAL; 181 return (RET_ERROR); 182 } 183 return (__bt_first(t, key, ep, &exact)); 184 case R_FIRST: /* First record. */ 185 case R_NEXT: 186 /* Walk down the left-hand side of the tree. */ 187 for (pg = P_ROOT;;) { 188 if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL) 189 return (RET_ERROR); 190 191 /* Check for an empty tree. */ 192 if (NEXTINDEX(h) == 0) { 193 mpool_put(t->bt_mp, h, 0); 194 return (RET_SPECIAL); 195 } 196 197 if (h->flags & (P_BLEAF | P_RLEAF)) 198 break; 199 pg = GETBINTERNAL(h, 0)->pgno; 200 mpool_put(t->bt_mp, h, 0); 201 } 202 ep->page = h; 203 ep->index = 0; 204 break; 205 case R_LAST: /* Last record. */ 206 case R_PREV: 207 /* Walk down the right-hand side of the tree. */ 208 for (pg = P_ROOT;;) { 209 if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL) 210 return (RET_ERROR); 211 212 /* Check for an empty tree. */ 213 if (NEXTINDEX(h) == 0) { 214 mpool_put(t->bt_mp, h, 0); 215 return (RET_SPECIAL); 216 } 217 218 if (h->flags & (P_BLEAF | P_RLEAF)) 219 break; 220 pg = GETBINTERNAL(h, NEXTINDEX(h) - 1)->pgno; 221 mpool_put(t->bt_mp, h, 0); 222 } 223 224 ep->page = h; 225 ep->index = NEXTINDEX(h) - 1; 226 break; 227 } 228 return (RET_SUCCESS); 229} 230 231/* 232 * __bt_seqadvance -- 233 * Advance the sequential scan. 234 * 235 * Parameters: 236 * t: tree 237 * flags: R_NEXT, R_PREV 238 * 239 * Side effects: 240 * Pins the page the new key/data record is on. 241 * 242 * Returns: 243 * RET_ERROR, RET_SUCCESS or RET_SPECIAL if there's no next key. 244 */ 245static int 246__bt_seqadv(t, ep, flags) 247 BTREE *t; 248 EPG *ep; 249 int flags; 250{ 251 CURSOR *c; 252 PAGE *h; 253 indx_t index; 254 pgno_t pg; 255 int exact; 256 257 /* 258 * There are a couple of states that we can be in. The cursor has 259 * been initialized by the time we get here, but that's all we know. 260 */ 261 c = &t->bt_cursor; 262 263 /* 264 * The cursor was deleted where there weren't any duplicate records, 265 * so the key was saved. Find out where that key would go in the 266 * current tree. It doesn't matter if the returned key is an exact 267 * match or not -- if it's an exact match, the record was added after 268 * the delete so we can just return it. If not, as long as there's 269 * a record there, return it. 270 */ 271 if (F_ISSET(c, CURS_ACQUIRE)) 272 return (__bt_first(t, &c->key, ep, &exact)); 273 274 /* Get the page referenced by the cursor. */ 275 if ((h = mpool_get(t->bt_mp, c->pg.pgno, 0)) == NULL) 276 return (RET_ERROR); 277 278 /* 279 * Find the next/previous record in the tree and point the cursor at 280 * it. The cursor may not be moved until a new key has been found. 281 */ 282 switch (flags) { 283 case R_NEXT: /* Next record. */ 284 /* 285 * The cursor was deleted in duplicate records, and moved 286 * forward to a record that has yet to be returned. Clear 287 * that flag, and return the record. 288 */ 289 if (F_ISSET(c, CURS_AFTER)) 290 goto usecurrent; 291 index = c->pg.index; 292 if (++index == NEXTINDEX(h)) { 293 pg = h->nextpg; 294 mpool_put(t->bt_mp, h, 0); 295 if (pg == P_INVALID) 296 return (RET_SPECIAL); 297 if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL) 298 return (RET_ERROR); 299 index = 0; 300 } 301 break; 302 case R_PREV: /* Previous record. */ 303 /* 304 * The cursor was deleted in duplicate records, and moved 305 * backward to a record that has yet to be returned. Clear 306 * that flag, and return the record. 307 */ 308 if (F_ISSET(c, CURS_BEFORE)) { 309usecurrent: F_CLR(c, CURS_AFTER | CURS_BEFORE); 310 ep->page = h; 311 ep->index = c->pg.index; 312 return (RET_SUCCESS); 313 } 314 index = c->pg.index; 315 if (index == 0) { 316 pg = h->prevpg; 317 mpool_put(t->bt_mp, h, 0); 318 if (pg == P_INVALID) 319 return (RET_SPECIAL); 320 if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL) 321 return (RET_ERROR); 322 index = NEXTINDEX(h) - 1; 323 } else 324 --index; 325 break; 326 } 327 328 ep->page = h; 329 ep->index = index; 330 return (RET_SUCCESS); 331} 332 333/* 334 * __bt_first -- 335 * Find the first entry. 336 * 337 * Parameters: 338 * t: the tree 339 * key: the key 340 * erval: return EPG 341 * exactp: pointer to exact match flag 342 * 343 * Returns: 344 * The first entry in the tree greater than or equal to key, 345 * or RET_SPECIAL if no such key exists. 346 */ 347static int 348__bt_first(t, key, erval, exactp) 349 BTREE *t; 350 const DBT *key; 351 EPG *erval; 352 int *exactp; 353{ 354 PAGE *h; 355 EPG *ep, save; 356 pgno_t pg; 357 358 /* 359 * Find any matching record; __bt_search pins the page. 360 * 361 * If it's an exact match and duplicates are possible, walk backwards 362 * in the tree until we find the first one. Otherwise, make sure it's 363 * a valid key (__bt_search may return an index just past the end of a 364 * page) and return it. 365 */ 366 if ((ep = __bt_search(t, key, exactp)) == NULL) 367 return (NULL); 368 if (*exactp) { 369 if (F_ISSET(t, B_NODUPS)) { 370 *erval = *ep; 371 return (RET_SUCCESS); 372 } 373 374 /* 375 * Walk backwards, as long as the entry matches and there are 376 * keys left in the tree. Save a copy of each match in case 377 * we go too far. 378 */ 379 save = *ep; 380 h = ep->page; 381 do { 382 if (save.page->pgno != ep->page->pgno) { 383 mpool_put(t->bt_mp, save.page, 0); 384 save = *ep; 385 } else 386 save.index = ep->index; 387 388 /* 389 * Don't unpin the page the last (or original) match 390 * was on, but make sure it's unpinned if an error 391 * occurs. 392 */ 393 if (ep->index == 0) { 394 if (h->prevpg == P_INVALID) 395 break; 396 if (h->pgno != save.page->pgno) 397 mpool_put(t->bt_mp, h, 0); 398 if ((h = mpool_get(t->bt_mp, 399 h->prevpg, 0)) == NULL) { 400 if (h->pgno == save.page->pgno) 401 mpool_put(t->bt_mp, 402 save.page, 0); 403 return (RET_ERROR); 404 } 405 ep->page = h; 406 ep->index = NEXTINDEX(h); 407 } 408 --ep->index; 409 } while (__bt_cmp(t, key, ep) == 0); 410 411 /* 412 * Reach here with the last page that was looked at pinned, 413 * which may or may not be the same as the last (or original) 414 * match page. If it's not useful, release it. 415 */ 416 if (h->pgno != save.page->pgno) 417 mpool_put(t->bt_mp, h, 0); 418 419 *erval = save; 420 return (RET_SUCCESS); 421 } 422 423 /* If at the end of a page, find the next entry. */ 424 if (ep->index == NEXTINDEX(ep->page)) { 425 h = ep->page; 426 pg = h->nextpg; 427 mpool_put(t->bt_mp, h, 0); 428 if (pg == P_INVALID) 429 return (RET_SPECIAL); 430 if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL) 431 return (RET_ERROR); 432 ep->index = 0; 433 ep->page = h; 434 } 435 *erval = *ep; 436 return (RET_SUCCESS); 437} 438 439/* 440 * __bt_setcur -- 441 * Set the cursor to an entry in the tree. 442 * 443 * Parameters: 444 * t: the tree 445 * pgno: page number 446 * index: page index 447 */ 448void 449__bt_setcur(t, pgno, index) 450 BTREE *t; 451 pgno_t pgno; 452 u_int index; 453{ 454 /* Lose any already deleted key. */ 455 if (t->bt_cursor.key.data != NULL) { 456 free(t->bt_cursor.key.data); 457 t->bt_cursor.key.size = 0; 458 t->bt_cursor.key.data = NULL; 459 } 460 F_CLR(&t->bt_cursor, CURS_ACQUIRE | CURS_AFTER | CURS_BEFORE); 461 462 /* Update the cursor. */ 463 t->bt_cursor.pg.pgno = pgno; 464 t->bt_cursor.pg.index = index; 465 F_SET(&t->bt_cursor, CURS_INIT); 466} 467