1/*- 2 * See the file LICENSE for redistribution information. 3 * 4 * Copyright (c) 1996,2008 Oracle. All rights reserved. 5 */ 6/* 7 * Copyright (c) 1990, 1993, 1994, 1995, 1996 8 * Keith Bostic. All rights reserved. 9 */ 10/* 11 * Copyright (c) 1990, 1993, 1994, 1995 12 * The Regents of the University of California. All rights reserved. 13 * 14 * This code is derived from software contributed to Berkeley by 15 * Mike Olson. 16 * 17 * Redistribution and use in source and binary forms, with or without 18 * modification, are permitted provided that the following conditions 19 * are met: 20 * 1. Redistributions of source code must retain the above copyright 21 * notice, this list of conditions and the following disclaimer. 22 * 2. Redistributions in binary form must reproduce the above copyright 23 * notice, this list of conditions and the following disclaimer in the 24 * documentation and/or other materials provided with the distribution. 25 * 3. Neither the name of the University nor the names of its contributors 26 * may be used to endorse or promote products derived from this software 27 * without specific prior written permission. 28 * 29 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 30 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 31 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 32 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 33 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 34 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 35 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 36 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 37 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 38 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 39 * SUCH DAMAGE. 40 * 41 * $Id: bt_search.c,v 12.40 2008/05/07 12:27:31 bschmeck Exp $ 42 */ 43 44#include "db_config.h" 45 46#include "db_int.h" 47#include "dbinc/db_page.h" 48#include "dbinc/btree.h" 49#include "dbinc/lock.h" 50#include "dbinc/mp.h" 51 52/* 53 * __bam_get_root -- 54 * Fetch the root of a tree and see if we want to keep 55 * it in the stack. 56 * 57 * PUBLIC: int __bam_get_root __P((DBC *, db_pgno_t, int, u_int32_t, int *)); 58 */ 59int 60__bam_get_root(dbc, pg, slevel, flags, stack) 61 DBC *dbc; 62 db_pgno_t pg; 63 int slevel; 64 u_int32_t flags; 65 int *stack; 66{ 67 BTREE_CURSOR *cp; 68 DB *dbp; 69 DB_LOCK lock; 70 DB_MPOOLFILE *mpf; 71 PAGE *h; 72 db_lockmode_t lock_mode; 73 int ret, t_ret; 74 75 dbp = dbc->dbp; 76 mpf = dbp->mpf; 77 cp = (BTREE_CURSOR *)dbc->internal; 78 /* 79 * If write-locking pages, we need to know whether or not to acquire a 80 * write lock on a page before getting it. This depends on how deep it 81 * is in tree, which we don't know until we acquire the root page. So, 82 * if we need to lock the root page we may have to upgrade it later, 83 * because we won't get the correct lock initially. 84 * 85 * Retrieve the root page. 86 */ 87try_again: 88 *stack = LF_ISSET(SR_STACK) && 89 (dbc->dbtype == DB_RECNO || F_ISSET(cp, C_RECNUM)); 90 lock_mode = DB_LOCK_READ; 91 if (*stack || 92 LF_ISSET(SR_DEL) || (LF_ISSET(SR_NEXT) && LF_ISSET(SR_WRITE))) 93 lock_mode = DB_LOCK_WRITE; 94 if ((ret = __db_lget(dbc, 0, pg, lock_mode, 0, &lock)) != 0) 95 return (ret); 96 if ((ret = __memp_fget(mpf, &pg, 97 dbc->thread_info, dbc->txn, 0, &h)) != 0) { 98 /* Did not read it, so we can release the lock */ 99 (void)__LPUT(dbc, lock); 100 return (ret); 101 } 102 103 /* 104 * Decide if we need to save this page; if we do, write lock it. 105 * We deliberately don't lock-couple on this call. If the tree 106 * is tiny, i.e., one page, and two threads are busily updating 107 * the root page, we're almost guaranteed deadlocks galore, as 108 * each one gets a read lock and then blocks the other's attempt 109 * for a write lock. 110 */ 111 if (!*stack && 112 ((LF_ISSET(SR_PARENT) && (u_int8_t)(slevel + 1) >= LEVEL(h)) || 113 (LF_ISSET(SR_WRITE) && LEVEL(h) == LEAFLEVEL) || 114 (LF_ISSET(SR_START) && slevel == LEVEL(h)))) { 115 if (!STD_LOCKING(dbc)) 116 goto no_relock; 117 ret = __memp_fput(mpf, dbc->thread_info, h, dbc->priority); 118 if ((t_ret = __LPUT(dbc, lock)) != 0 && ret == 0) 119 ret = t_ret; 120 if (ret != 0) 121 return (ret); 122 lock_mode = DB_LOCK_WRITE; 123 if ((ret = __db_lget(dbc, 0, pg, lock_mode, 0, &lock)) != 0) 124 return (ret); 125 if ((ret = __memp_fget(mpf, &pg, 126 dbc->thread_info, dbc->txn, 0, &h)) != 0) { 127 /* Did not read it, so we can release the lock */ 128 (void)__LPUT(dbc, lock); 129 return (ret); 130 } 131 if (!((LF_ISSET(SR_PARENT) && 132 (u_int8_t)(slevel + 1) >= LEVEL(h)) || 133 (LF_ISSET(SR_WRITE) && LEVEL(h) == LEAFLEVEL) || 134 (LF_ISSET(SR_START) && slevel == LEVEL(h)))) { 135 /* Someone else split the root, start over. */ 136 ret = __memp_fput(mpf, 137 dbc->thread_info, h, dbc->priority); 138 if ((t_ret = __LPUT(dbc, lock)) != 0 && ret == 0) 139 ret = t_ret; 140 if (ret != 0) 141 return (ret); 142 goto try_again; 143 } 144no_relock: *stack = 1; 145 } 146 BT_STK_ENTER(dbp->env, cp, h, 0, lock, lock_mode, ret); 147 148 return (ret); 149} 150 151/* 152 * __bam_search -- 153 * Search a btree for a key. 154 * 155 * PUBLIC: int __bam_search __P((DBC *, db_pgno_t, 156 * PUBLIC: const DBT *, u_int32_t, int, db_recno_t *, int *)); 157 */ 158int 159__bam_search(dbc, root_pgno, key, flags, slevel, recnop, exactp) 160 DBC *dbc; 161 db_pgno_t root_pgno; 162 const DBT *key; 163 u_int32_t flags; 164 int slevel, *exactp; 165 db_recno_t *recnop; 166{ 167 BTREE *t; 168 BTREE_CURSOR *cp; 169 DB *dbp; 170 DB_LOCK lock; 171 DB_MPOOLFILE *mpf; 172 ENV *env; 173 PAGE *h; 174 db_indx_t base, i, indx, *inp, lim; 175 db_lockmode_t lock_mode; 176 db_pgno_t pg; 177 db_recno_t recno; 178 int adjust, cmp, deloffset, ret, set_stack, stack, t_ret; 179 int (*func) __P((DB *, const DBT *, const DBT *)); 180 181 dbp = dbc->dbp; 182 env = dbp->env; 183 mpf = dbp->mpf; 184 cp = (BTREE_CURSOR *)dbc->internal; 185 h = NULL; 186 t = dbp->bt_internal; 187 recno = 0; 188 set_stack = 0; 189 190 BT_STK_CLR(cp); 191 192 /* 193 * There are several ways we search a btree tree. The flags argument 194 * specifies if we're acquiring read or write locks, if we position 195 * to the first or last item in a set of duplicates, if we return 196 * deleted items, and if we are locking pairs of pages. In addition, 197 * if we're modifying record numbers, we have to lock the entire tree 198 * regardless. See btree.h for more details. 199 */ 200 201 if (root_pgno == PGNO_INVALID) 202 root_pgno = cp->root; 203 if ((ret = __bam_get_root(dbc, root_pgno, slevel, flags, &stack)) != 0) 204 return (ret); 205 lock_mode = cp->csp->lock_mode; 206 lock = cp->csp->lock; 207 h = cp->csp->page; 208 209 BT_STK_CLR(cp); 210 211 /* Choose a comparison function. */ 212 func = F_ISSET(dbc, DBC_OPD) ? 213 (dbp->dup_compare == NULL ? __bam_defcmp : dbp->dup_compare) : 214 t->bt_compare; 215 216 for (;;) { 217 inp = P_INP(dbp, h); 218 adjust = TYPE(h) == P_LBTREE ? P_INDX : O_INDX; 219 if (LF_ISSET(SR_MIN | SR_MAX)) { 220 if (LF_ISSET(SR_MIN) || NUM_ENT(h) == 0) 221 indx = 0; 222 else if (TYPE(h) == P_LBTREE) 223 indx = NUM_ENT(h) - 2; 224 else 225 indx = NUM_ENT(h) - 1; 226 227 if (LEVEL(h) == LEAFLEVEL || 228 (!LF_ISSET(SR_START) && LEVEL(h) == slevel)) { 229 if (LF_ISSET(SR_NEXT)) 230 goto get_next; 231 goto found; 232 } 233 goto next; 234 } 235 /* 236 * Do a binary search on the current page. If we're searching 237 * a Btree leaf page, we have to walk the indices in groups of 238 * two. If we're searching an internal page or a off-page dup 239 * page, they're an index per page item. If we find an exact 240 * match on a leaf page, we're done. 241 */ 242 DB_BINARY_SEARCH_FOR(base, lim, h, adjust) { 243 DB_BINARY_SEARCH_INCR(indx, base, lim, adjust); 244 if ((ret = __bam_cmp(dbp, dbc->thread_info, 245 dbc->txn, key, h, indx, func, &cmp)) != 0) 246 goto err; 247 if (cmp == 0) { 248 if (LEVEL(h) == LEAFLEVEL || 249 (!LF_ISSET(SR_START) && 250 LEVEL(h) == slevel)) { 251 if (LF_ISSET(SR_NEXT)) 252 goto get_next; 253 goto found; 254 } 255 goto next; 256 } 257 if (cmp > 0) 258 DB_BINARY_SEARCH_SHIFT_BASE(indx, base, 259 lim, adjust); 260 } 261 262 /* 263 * No match found. Base is the smallest index greater than 264 * key and may be zero or a last + O_INDX index. 265 * 266 * If it's a leaf page or the stopping point, 267 * return base as the "found" value. 268 * Delete only deletes exact matches. 269 */ 270 if (LEVEL(h) == LEAFLEVEL || 271 (!LF_ISSET(SR_START) && LEVEL(h) == slevel)) { 272 *exactp = 0; 273 274 if (LF_ISSET(SR_EXACT)) { 275 ret = DB_NOTFOUND; 276 goto err; 277 } 278 279 if (LF_ISSET(SR_STK_ONLY)) { 280 BT_STK_NUM(env, cp, h, base, ret); 281 if ((t_ret = 282 __LPUT(dbc, lock)) != 0 && ret == 0) 283 ret = t_ret; 284 if ((t_ret = __memp_fput(mpf, dbc->thread_info, 285 h, dbc->priority)) != 0 && ret == 0) 286 ret = t_ret; 287 return (ret); 288 } 289 if (LF_ISSET(SR_NEXT)) { 290get_next: /* 291 * The caller could have asked for a NEXT 292 * at the root if the tree recently collapsed. 293 */ 294 if (PGNO(h) == root_pgno) { 295 ret = DB_NOTFOUND; 296 goto err; 297 } 298 /* 299 * Save the root of the subtree 300 * and drop the rest of the subtree 301 * and search down again starting at 302 * the next child. 303 */ 304 if ((ret = __LPUT(dbc, lock)) != 0) 305 goto err; 306 if ((ret = __memp_fput(mpf, 307 dbc->thread_info, h, dbc->priority)) != 0) 308 goto err; 309 h = NULL; 310 LF_SET(SR_MIN); 311 LF_CLR(SR_NEXT); 312 indx = cp->sp->indx + 1; 313 if (indx == NUM_ENT(cp->sp->page)) { 314 ret = DB_NOTFOUND; 315 cp->csp++; 316 goto err; 317 } 318 h = cp->sp->page; 319 cp->sp->page = NULL; 320 lock = cp->sp->lock; 321 LOCK_INIT(cp->sp->lock); 322 if ((ret = __bam_stkrel(dbc, STK_NOLOCK)) != 0) 323 goto err; 324 stack = 1; 325 goto next; 326 } 327 328 /* 329 * !!! 330 * Possibly returning a deleted record -- DB_SET_RANGE, 331 * DB_KEYFIRST and DB_KEYLAST don't require an exact 332 * match, and we don't want to walk multiple pages here 333 * to find an undeleted record. This is handled by the 334 * calling routine. 335 */ 336 if (LF_ISSET(SR_DEL) && cp->csp == cp->sp) 337 cp->csp++; 338 BT_STK_ENTER(env, cp, h, base, lock, lock_mode, ret); 339 if (ret != 0) 340 goto err; 341 return (0); 342 } 343 344 /* 345 * If it's not a leaf page, record the internal page (which is 346 * a parent page for the key). Decrement the base by 1 if it's 347 * non-zero so that if a split later occurs, the inserted page 348 * will be to the right of the saved page. 349 */ 350 indx = base > 0 ? base - O_INDX : base; 351 352 /* 353 * If we're trying to calculate the record number, sum up 354 * all the record numbers on this page up to the indx point. 355 */ 356next: if (recnop != NULL) 357 for (i = 0; i < indx; ++i) 358 recno += GET_BINTERNAL(dbp, h, i)->nrecs; 359 360 pg = GET_BINTERNAL(dbp, h, indx)->pgno; 361 362 /* See if we are at the level to start stacking. */ 363 if (LF_ISSET(SR_START) && slevel == LEVEL(h)) 364 stack = 1; 365 366 if (LF_ISSET(SR_STK_ONLY)) { 367 if (slevel == LEVEL(h)) { 368 BT_STK_NUM(env, cp, h, indx, ret); 369 if ((t_ret = 370 __LPUT(dbc, lock)) != 0 && ret == 0) 371 ret = t_ret; 372 if ((t_ret = __memp_fput(mpf, dbc->thread_info, 373 h, dbc->priority)) != 0 && ret == 0) 374 ret = t_ret; 375 return (ret); 376 } 377 BT_STK_NUMPUSH(env, cp, h, indx, ret); 378 (void)__memp_fput(mpf, 379 dbc->thread_info, h, dbc->priority); 380 h = NULL; 381 if ((ret = __db_lget(dbc, 382 LCK_COUPLE_ALWAYS, pg, lock_mode, 0, &lock)) != 0) { 383 /* 384 * Discard our lock and return on failure. This 385 * is OK because it only happens when descending 386 * the tree holding read-locks. 387 */ 388 (void)__LPUT(dbc, lock); 389 return (ret); 390 } 391 } else if (stack) { 392 /* Return if this is the lowest page wanted. */ 393 if (LF_ISSET(SR_PARENT) && slevel == LEVEL(h)) { 394 BT_STK_ENTER(env, 395 cp, h, indx, lock, lock_mode, ret); 396 if (ret != 0) 397 goto err; 398 return (0); 399 } 400 if (LF_ISSET(SR_DEL) && NUM_ENT(h) > 1) { 401 /* 402 * There was a page with a singleton pointer 403 * to a non-empty subtree. 404 */ 405 cp->csp--; 406 if ((ret = __bam_stkrel(dbc, STK_NOLOCK)) != 0) 407 goto err; 408 set_stack = stack = 0; 409 goto do_del; 410 } 411 BT_STK_PUSH(env, 412 cp, h, indx, lock, lock_mode, ret); 413 if (ret != 0) 414 goto err; 415 h = NULL; 416 417 lock_mode = DB_LOCK_WRITE; 418 if ((ret = 419 __db_lget(dbc, 0, pg, lock_mode, 0, &lock)) != 0) 420 goto err; 421 } else { 422 /* 423 * Decide if we want to return a reference to the next 424 * page in the return stack. If so, lock it and never 425 * unlock it. We will want to stack things on the 426 * next iteration. The stack variable cannot be 427 * set until we leave this clause. 428 */ 429 if ((LF_ISSET(SR_PARENT) && 430 (u_int8_t)(slevel + 1) >= (LEVEL(h) - 1)) || 431 (LEVEL(h) - 1) == LEAFLEVEL) 432 set_stack = 1; 433 434 /* 435 * Returning a subtree. See if we have hit the start 436 * point if so save the parent and set stack. 437 * Otherwise free the parent and temporarily 438 * save this one. 439 * For SR_DEL we need to find a page with 1 entry. 440 * For SR_NEXT we want find the minimal subtree 441 * that contains the key and the next page. 442 * We save pages as long as we are at the right 443 * edge of the subtree. When we leave the right 444 * edge, then drop the subtree. 445 */ 446 if (!LF_ISSET(SR_DEL | SR_NEXT)) { 447 if ((ret = __memp_fput(mpf, 448 dbc->thread_info, h, dbc->priority)) != 0) 449 goto err; 450 goto lock_next; 451 } 452 453 if ((LF_ISSET(SR_DEL) && NUM_ENT(h) == 1)) { 454 /* 455 * We are pushing the things on the stack, 456 * set the stack variable now to indicate this 457 * has happened. 458 */ 459 stack = set_stack = 1; 460 LF_SET(SR_WRITE); 461 /* Push the parent. */ 462 cp->csp++; 463 /* Push this node. */ 464 BT_STK_PUSH(env, cp, h, 465 indx, lock, lock_mode, ret); 466 if (ret != 0) 467 goto err; 468 LOCK_INIT(lock); 469 } else { 470 /* 471 * See if we want to save the tree so far. 472 * If we are looking for the next key, 473 * then we must save this node if we are 474 * at the end of the page. If not then 475 * discard anything we have saved so far. 476 * For delete only keep one node until 477 * we find a singleton. 478 */ 479do_del: if (cp->csp->page != NULL) { 480 if (LF_ISSET(SR_NEXT) && 481 indx == NUM_ENT(h) - 1) 482 cp->csp++; 483 else if ((ret = 484 __bam_stkrel(dbc, STK_NOLOCK)) != 0) 485 goto err; 486 } 487 /* Save this node. */ 488 BT_STK_ENTER(env, cp, 489 h, indx, lock, lock_mode, ret); 490 if (ret != 0) 491 goto err; 492 LOCK_INIT(lock); 493 } 494 495lock_next: h = NULL; 496 497 if (set_stack && LF_ISSET(SR_WRITE)) 498 lock_mode = DB_LOCK_WRITE; 499 if ((ret = __db_lget(dbc, 500 LCK_COUPLE_ALWAYS, pg, lock_mode, 0, &lock)) != 0) { 501 /* 502 * If we fail, discard the lock we held. This 503 * is OK because this only happens when we are 504 * descending the tree holding read-locks. 505 */ 506 (void)__LPUT(dbc, lock); 507 if (LF_ISSET(SR_DEL | SR_NEXT) && !stack) 508 cp->csp++; 509 goto err; 510 } 511 stack = set_stack; 512 } 513 if ((ret = __memp_fget(mpf, &pg, 514 dbc->thread_info, dbc->txn, 0, &h)) != 0) 515 goto err; 516 } 517 /* NOTREACHED */ 518 519found: *exactp = 1; 520 521 /* 522 * If we got here, we know that we have a Btree leaf or off-page 523 * duplicates page. If it's a Btree leaf page, we have to handle 524 * on-page duplicates. 525 * 526 * If there are duplicates, go to the first/last one. This is 527 * safe because we know that we're not going to leave the page, 528 * all duplicate sets that are not on overflow pages exist on a 529 * single leaf page. 530 */ 531 if (TYPE(h) == P_LBTREE && NUM_ENT(h) > P_INDX) { 532 if (LF_ISSET(SR_DUPLAST)) 533 while (indx < (db_indx_t)(NUM_ENT(h) - P_INDX) && 534 inp[indx] == inp[indx + P_INDX]) 535 indx += P_INDX; 536 else if (LF_ISSET(SR_DUPFIRST)) 537 while (indx > 0 && 538 inp[indx] == inp[indx - P_INDX]) 539 indx -= P_INDX; 540 } 541 542 /* 543 * Now check if we are allowed to return deleted items; if not, then 544 * find the next (or previous) non-deleted duplicate entry. (We do 545 * not move from the original found key on the basis of the SR_DELNO 546 * flag.) 547 */ 548 DB_ASSERT(env, recnop == NULL || LF_ISSET(SR_DELNO)); 549 if (LF_ISSET(SR_DELNO)) { 550 deloffset = TYPE(h) == P_LBTREE ? O_INDX : 0; 551 if (LF_ISSET(SR_DUPLAST)) 552 while (B_DISSET(GET_BKEYDATA(dbp, 553 h, indx + deloffset)->type) && indx > 0 && 554 inp[indx] == inp[indx - adjust]) 555 indx -= adjust; 556 else 557 while (B_DISSET(GET_BKEYDATA(dbp, 558 h, indx + deloffset)->type) && 559 indx < (db_indx_t)(NUM_ENT(h) - adjust) && 560 inp[indx] == inp[indx + adjust]) 561 indx += adjust; 562 563 /* 564 * If we weren't able to find a non-deleted duplicate, return 565 * DB_NOTFOUND. 566 */ 567 if (B_DISSET(GET_BKEYDATA(dbp, h, indx + deloffset)->type)) { 568 ret = DB_NOTFOUND; 569 goto err; 570 } 571 572 /* 573 * Increment the record counter to point to the found element. 574 * Ignore any deleted key/data pairs. There doesn't need to 575 * be any correction for duplicates, as Btree doesn't support 576 * duplicates and record numbers in the same tree. 577 */ 578 if (recnop != NULL) { 579 DB_ASSERT(env, TYPE(h) == P_LBTREE); 580 581 for (i = 0; i < indx; i += P_INDX) 582 if (!B_DISSET( 583 GET_BKEYDATA(dbp, h, i + O_INDX)->type)) 584 ++recno; 585 586 /* Correct the number for a 0-base. */ 587 *recnop = recno + 1; 588 } 589 } 590 591 if (LF_ISSET(SR_STK_ONLY)) { 592 BT_STK_NUM(env, cp, h, indx, ret); 593 if ((t_ret = __LPUT(dbc, lock)) != 0 && ret == 0) 594 ret = t_ret; 595 if ((t_ret = __memp_fput(mpf, 596 dbc->thread_info, h, dbc->priority)) != 0 && ret == 0) 597 ret = t_ret; 598 } else { 599 if (LF_ISSET(SR_DEL) && cp->csp == cp->sp) 600 cp->csp++; 601 BT_STK_ENTER(env, cp, h, indx, lock, lock_mode, ret); 602 } 603 if (ret != 0) 604 goto err; 605 606 return (0); 607 608err: if (h != NULL && (t_ret = __memp_fput(mpf, 609 dbc->thread_info, h, dbc->priority)) != 0 && ret == 0) 610 ret = t_ret; 611 612 /* Keep any not-found page locked for serializability. */ 613 if ((t_ret = __TLPUT(dbc, lock)) != 0 && ret == 0) 614 ret = t_ret; 615 616 BT_STK_POP(cp); 617 __bam_stkrel(dbc, 0); 618 619 return (ret); 620} 621 622/* 623 * __bam_stkrel -- 624 * Release all pages currently held in the stack. 625 * 626 * PUBLIC: int __bam_stkrel __P((DBC *, u_int32_t)); 627 */ 628int 629__bam_stkrel(dbc, flags) 630 DBC *dbc; 631 u_int32_t flags; 632{ 633 BTREE_CURSOR *cp; 634 DB *dbp; 635 DB_MPOOLFILE *mpf; 636 EPG *epg; 637 int ret, t_ret; 638 639 dbp = dbc->dbp; 640 mpf = dbp->mpf; 641 cp = (BTREE_CURSOR *)dbc->internal; 642 643 /* 644 * Release inner pages first. 645 * 646 * The caller must be sure that setting STK_NOLOCK will not effect 647 * either serializability or recoverability. 648 */ 649 for (ret = 0, epg = cp->sp; epg <= cp->csp; ++epg) { 650 if (epg->page != NULL) { 651 if (LF_ISSET(STK_CLRDBC) && cp->page == epg->page) { 652 cp->page = NULL; 653 LOCK_INIT(cp->lock); 654 } 655 if ((t_ret = __memp_fput(mpf, dbc->thread_info, 656 epg->page, dbc->priority)) != 0 && ret == 0) 657 ret = t_ret; 658 /* 659 * XXX 660 * Temporary fix for #3243 -- under certain deadlock 661 * conditions we call here again and re-free the page. 662 * The correct fix is to never release a stack that 663 * doesn't hold items. 664 */ 665 epg->page = NULL; 666 } 667 /* 668 * We set this if we need to release our pins, 669 * but are not logically ready to have the pages 670 * visible. 671 */ 672 if (LF_ISSET(STK_PGONLY)) 673 continue; 674 if (LF_ISSET(STK_NOLOCK)) { 675 if ((t_ret = __LPUT(dbc, epg->lock)) != 0 && ret == 0) 676 ret = t_ret; 677 } else 678 if ((t_ret = __TLPUT(dbc, epg->lock)) != 0 && ret == 0) 679 ret = t_ret; 680 } 681 682 /* Clear the stack, all pages have been released. */ 683 if (!LF_ISSET(STK_PGONLY)) 684 BT_STK_CLR(cp); 685 686 return (ret); 687} 688 689/* 690 * __bam_stkgrow -- 691 * Grow the stack. 692 * 693 * PUBLIC: int __bam_stkgrow __P((ENV *, BTREE_CURSOR *)); 694 */ 695int 696__bam_stkgrow(env, cp) 697 ENV *env; 698 BTREE_CURSOR *cp; 699{ 700 EPG *p; 701 size_t entries; 702 int ret; 703 704 entries = cp->esp - cp->sp; 705 706 if ((ret = __os_calloc(env, entries * 2, sizeof(EPG), &p)) != 0) 707 return (ret); 708 memcpy(p, cp->sp, entries * sizeof(EPG)); 709 if (cp->sp != cp->stack) 710 __os_free(env, cp->sp); 711 cp->sp = p; 712 cp->csp = p + entries; 713 cp->esp = p + entries * 2; 714 return (0); 715} 716