bt_delete.c revision 1.9
1/* $NetBSD: bt_delete.c,v 1.9 1997/07/21 14:06:31 jtc Exp $ */ 2 3/*- 4 * Copyright (c) 1990, 1993, 1994 5 * The Regents of the University of California. All rights reserved. 6 * 7 * This code is derived from software contributed to Berkeley by 8 * Mike Olson. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. All advertising materials mentioning features or use of this software 19 * must display the following acknowledgement: 20 * This product includes software developed by the University of 21 * California, Berkeley and its contributors. 22 * 4. Neither the name of the University nor the names of its contributors 23 * may be used to endorse or promote products derived from this software 24 * without specific prior written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 36 * SUCH DAMAGE. 37 */ 38 39#include <sys/cdefs.h> 40#if defined(LIBC_SCCS) && !defined(lint) 41#if 0 42static char sccsid[] = "@(#)bt_delete.c 8.13 (Berkeley) 7/28/94"; 43#else 44__RCSID("$NetBSD: bt_delete.c,v 1.9 1997/07/21 14:06:31 jtc Exp $"); 45#endif 46#endif /* LIBC_SCCS and not lint */ 47 48#include "namespace.h" 49#include <sys/types.h> 50 51#include <errno.h> 52#include <stdio.h> 53#include <string.h> 54 55#include <db.h> 56#include "btree.h" 57 58static int __bt_bdelete __P((BTREE *, const DBT *)); 59static int __bt_curdel __P((BTREE *, const DBT *, PAGE *, u_int)); 60static int __bt_pdelete __P((BTREE *, PAGE *)); 61static int __bt_relink __P((BTREE *, PAGE *)); 62static int __bt_stkacq __P((BTREE *, PAGE **, CURSOR *)); 63 64/* 65 * __bt_delete 66 * Delete the item(s) referenced by a key. 67 * 68 * Return RET_SPECIAL if the key is not found. 69 */ 70int 71__bt_delete(dbp, key, flags) 72 const DB *dbp; 73 const DBT *key; 74 u_int flags; 75{ 76 BTREE *t; 77 CURSOR *c; 78 PAGE *h; 79 int status; 80 81 t = dbp->internal; 82 83 /* Toss any page pinned across calls. */ 84 if (t->bt_pinned != NULL) { 85 mpool_put(t->bt_mp, t->bt_pinned, 0); 86 t->bt_pinned = NULL; 87 } 88 89 /* Check for change to a read-only tree. */ 90 if (F_ISSET(t, B_RDONLY)) { 91 errno = EPERM; 92 return (RET_ERROR); 93 } 94 95 switch (flags) { 96 case 0: 97 status = __bt_bdelete(t, key); 98 break; 99 case R_CURSOR: 100 /* 101 * If flags is R_CURSOR, delete the cursor. Must already 102 * have started a scan and not have already deleted it. 103 */ 104 c = &t->bt_cursor; 105 if (F_ISSET(c, CURS_INIT)) { 106 if (F_ISSET(c, CURS_ACQUIRE | CURS_AFTER | CURS_BEFORE)) 107 return (RET_SPECIAL); 108 if ((h = mpool_get(t->bt_mp, c->pg.pgno, 0)) == NULL) 109 return (RET_ERROR); 110 111 /* 112 * If the page is about to be emptied, we'll need to 113 * delete it, which means we have to acquire a stack. 114 */ 115 if (NEXTINDEX(h) == 1) 116 if (__bt_stkacq(t, &h, &t->bt_cursor)) 117 return (RET_ERROR); 118 119 status = __bt_dleaf(t, NULL, h, c->pg.index); 120 121 if (NEXTINDEX(h) == 0 && status == RET_SUCCESS) { 122 if (__bt_pdelete(t, h)) 123 return (RET_ERROR); 124 } else 125 mpool_put(t->bt_mp, 126 h, status == RET_SUCCESS ? MPOOL_DIRTY : 0); 127 break; 128 } 129 /* FALLTHROUGH */ 130 default: 131 errno = EINVAL; 132 return (RET_ERROR); 133 } 134 if (status == RET_SUCCESS) 135 F_SET(t, B_MODIFIED); 136 return (status); 137} 138 139/* 140 * __bt_stkacq -- 141 * Acquire a stack so we can delete a cursor entry. 142 * 143 * Parameters: 144 * t: tree 145 * hp: pointer to current, pinned PAGE pointer 146 * c: pointer to the cursor 147 * 148 * Returns: 149 * 0 on success, 1 on failure 150 */ 151static int 152__bt_stkacq(t, hp, c) 153 BTREE *t; 154 PAGE **hp; 155 CURSOR *c; 156{ 157 BINTERNAL *bi; 158 EPG *e; 159 EPGNO *parent; 160 PAGE *h; 161 indx_t index = 0; /* Pacify gcc */ 162 pgno_t pgno; 163 recno_t nextpg, prevpg; 164 int exact, level; 165 166 /* 167 * Find the first occurrence of the key in the tree. Toss the 168 * currently locked page so we don't hit an already-locked page. 169 */ 170 h = *hp; 171 mpool_put(t->bt_mp, h, 0); 172 if ((e = __bt_search(t, &c->key, &exact)) == NULL) 173 return (1); 174 h = e->page; 175 176 /* See if we got it in one shot. */ 177 if (h->pgno == c->pg.pgno) 178 goto ret; 179 180 /* 181 * Move right, looking for the page. At each move we have to move 182 * up the stack until we don't have to move to the next page. If 183 * we have to change pages at an internal level, we have to fix the 184 * stack back up. 185 */ 186 while (h->pgno != c->pg.pgno) { 187 if ((nextpg = h->nextpg) == P_INVALID) 188 break; 189 mpool_put(t->bt_mp, h, 0); 190 191 /* Move up the stack. */ 192 for (level = 0; (parent = BT_POP(t)) != NULL; ++level) { 193 /* Get the parent page. */ 194 if ((h = mpool_get(t->bt_mp, parent->pgno, 0)) == NULL) 195 return (1); 196 197 /* Move to the next index. */ 198 if (parent->index != NEXTINDEX(h) - 1) { 199 index = parent->index + 1; 200 BT_PUSH(t, h->pgno, index); 201 break; 202 } 203 mpool_put(t->bt_mp, h, 0); 204 } 205 206 /* Restore the stack. */ 207 while (level--) { 208 /* Push the next level down onto the stack. */ 209 bi = GETBINTERNAL(h, index); 210 pgno = bi->pgno; 211 BT_PUSH(t, pgno, 0); 212 213 /* Lose the currently pinned page. */ 214 mpool_put(t->bt_mp, h, 0); 215 216 /* Get the next level down. */ 217 if ((h = mpool_get(t->bt_mp, pgno, 0)) == NULL) 218 return (1); 219 index = 0; 220 } 221 mpool_put(t->bt_mp, h, 0); 222 if ((h = mpool_get(t->bt_mp, nextpg, 0)) == NULL) 223 return (1); 224 } 225 226 if (h->pgno == c->pg.pgno) 227 goto ret; 228 229 /* Reacquire the original stack. */ 230 mpool_put(t->bt_mp, h, 0); 231 if ((e = __bt_search(t, &c->key, &exact)) == NULL) 232 return (1); 233 h = e->page; 234 235 /* 236 * Move left, looking for the page. At each move we have to move 237 * up the stack until we don't have to change pages to move to the 238 * next page. If we have to change pages at an internal level, we 239 * have to fix the stack back up. 240 */ 241 while (h->pgno != c->pg.pgno) { 242 if ((prevpg = h->prevpg) == P_INVALID) 243 break; 244 mpool_put(t->bt_mp, h, 0); 245 246 /* Move up the stack. */ 247 for (level = 0; (parent = BT_POP(t)) != NULL; ++level) { 248 /* Get the parent page. */ 249 if ((h = mpool_get(t->bt_mp, parent->pgno, 0)) == NULL) 250 return (1); 251 252 /* Move to the next index. */ 253 if (parent->index != 0) { 254 index = parent->index - 1; 255 BT_PUSH(t, h->pgno, index); 256 break; 257 } 258 mpool_put(t->bt_mp, h, 0); 259 } 260 261 /* Restore the stack. */ 262 while (level--) { 263 /* Push the next level down onto the stack. */ 264 bi = GETBINTERNAL(h, index); 265 pgno = bi->pgno; 266 267 /* Lose the currently pinned page. */ 268 mpool_put(t->bt_mp, h, 0); 269 270 /* Get the next level down. */ 271 if ((h = mpool_get(t->bt_mp, pgno, 0)) == NULL) 272 return (1); 273 274 index = NEXTINDEX(h) - 1; 275 BT_PUSH(t, pgno, index); 276 } 277 mpool_put(t->bt_mp, h, 0); 278 if ((h = mpool_get(t->bt_mp, prevpg, 0)) == NULL) 279 return (1); 280 } 281 282 283ret: mpool_put(t->bt_mp, h, 0); 284 return ((*hp = mpool_get(t->bt_mp, c->pg.pgno, 0)) == NULL); 285} 286 287/* 288 * __bt_bdelete -- 289 * Delete all key/data pairs matching the specified key. 290 * 291 * Parameters: 292 * t: tree 293 * key: key to delete 294 * 295 * Returns: 296 * RET_ERROR, RET_SUCCESS and RET_SPECIAL if the key not found. 297 */ 298static int 299__bt_bdelete(t, key) 300 BTREE *t; 301 const DBT *key; 302{ 303 EPG *e; 304 PAGE *h; 305 int deleted, exact, redo; 306 307 deleted = 0; 308 309 /* Find any matching record; __bt_search pins the page. */ 310loop: if ((e = __bt_search(t, key, &exact)) == NULL) 311 return (deleted ? RET_SUCCESS : RET_ERROR); 312 if (!exact) { 313 mpool_put(t->bt_mp, e->page, 0); 314 return (deleted ? RET_SUCCESS : RET_SPECIAL); 315 } 316 317 /* 318 * Delete forward, then delete backward, from the found key. If 319 * there are duplicates and we reach either side of the page, do 320 * the key search again, so that we get them all. 321 */ 322 redo = 0; 323 h = e->page; 324 do { 325 if (__bt_dleaf(t, key, h, e->index)) { 326 mpool_put(t->bt_mp, h, 0); 327 return (RET_ERROR); 328 } 329 if (F_ISSET(t, B_NODUPS)) { 330 if (NEXTINDEX(h) == 0) { 331 if (__bt_pdelete(t, h)) 332 return (RET_ERROR); 333 } else 334 mpool_put(t->bt_mp, h, MPOOL_DIRTY); 335 return (RET_SUCCESS); 336 } 337 deleted = 1; 338 } while (e->index < NEXTINDEX(h) && __bt_cmp(t, key, e) == 0); 339 340 /* Check for right-hand edge of the page. */ 341 if (e->index == NEXTINDEX(h)) 342 redo = 1; 343 344 /* Delete from the key to the beginning of the page. */ 345 while (e->index-- > 0) { 346 if (__bt_cmp(t, key, e) != 0) 347 break; 348 if (__bt_dleaf(t, key, h, e->index) == RET_ERROR) { 349 mpool_put(t->bt_mp, h, 0); 350 return (RET_ERROR); 351 } 352 if (e->index == 0) 353 redo = 1; 354 } 355 356 /* Check for an empty page. */ 357 if (NEXTINDEX(h) == 0) { 358 if (__bt_pdelete(t, h)) 359 return (RET_ERROR); 360 goto loop; 361 } 362 363 /* Put the page. */ 364 mpool_put(t->bt_mp, h, MPOOL_DIRTY); 365 366 if (redo) 367 goto loop; 368 return (RET_SUCCESS); 369} 370 371/* 372 * __bt_pdelete -- 373 * Delete a single page from the tree. 374 * 375 * Parameters: 376 * t: tree 377 * h: leaf page 378 * 379 * Returns: 380 * RET_SUCCESS, RET_ERROR. 381 * 382 * Side-effects: 383 * mpool_put's the page 384 */ 385static int 386__bt_pdelete(t, h) 387 BTREE *t; 388 PAGE *h; 389{ 390 BINTERNAL *bi; 391 PAGE *pg; 392 EPGNO *parent; 393 indx_t cnt, index, *ip, offset; 394 u_int32_t nksize; 395 char *from; 396 397 /* 398 * Walk the parent page stack -- a LIFO stack of the pages that were 399 * traversed when we searched for the page where the delete occurred. 400 * Each stack entry is a page number and a page index offset. The 401 * offset is for the page traversed on the search. We've just deleted 402 * a page, so we have to delete the key from the parent page. 403 * 404 * If the delete from the parent page makes it empty, this process may 405 * continue all the way up the tree. We stop if we reach the root page 406 * (which is never deleted, it's just not worth the effort) or if the 407 * delete does not empty the page. 408 */ 409 while ((parent = BT_POP(t)) != NULL) { 410 /* Get the parent page. */ 411 if ((pg = mpool_get(t->bt_mp, parent->pgno, 0)) == NULL) 412 return (RET_ERROR); 413 414 index = parent->index; 415 bi = GETBINTERNAL(pg, index); 416 417 /* Free any overflow pages. */ 418 if (bi->flags & P_BIGKEY && 419 __ovfl_delete(t, bi->bytes) == RET_ERROR) { 420 mpool_put(t->bt_mp, pg, 0); 421 return (RET_ERROR); 422 } 423 424 /* 425 * Free the parent if it has only the one key and it's not the 426 * root page. If it's the rootpage, turn it back into an empty 427 * leaf page. 428 */ 429 if (NEXTINDEX(pg) == 1) 430 if (pg->pgno == P_ROOT) { 431 pg->lower = BTDATAOFF; 432 pg->upper = t->bt_psize; 433 pg->flags = P_BLEAF; 434 } else { 435 if (__bt_relink(t, pg) || __bt_free(t, pg)) 436 return (RET_ERROR); 437 continue; 438 } 439 else { 440 /* Pack remaining key items at the end of the page. */ 441 nksize = NBINTERNAL(bi->ksize); 442 from = (char *)pg + pg->upper; 443 memmove(from + nksize, from, (char *)bi - from); 444 pg->upper += nksize; 445 446 /* Adjust indices' offsets, shift the indices down. */ 447 offset = pg->linp[index]; 448 for (cnt = index, ip = &pg->linp[0]; cnt--; ++ip) 449 if (ip[0] < offset) 450 ip[0] += nksize; 451 for (cnt = NEXTINDEX(pg) - index; --cnt; ++ip) 452 ip[0] = ip[1] < offset ? ip[1] + nksize : ip[1]; 453 pg->lower -= sizeof(indx_t); 454 } 455 456 mpool_put(t->bt_mp, pg, MPOOL_DIRTY); 457 break; 458 } 459 460 /* Free the leaf page, as long as it wasn't the root. */ 461 if (h->pgno == P_ROOT) { 462 mpool_put(t->bt_mp, h, MPOOL_DIRTY); 463 return (RET_SUCCESS); 464 } 465 return (__bt_relink(t, h) || __bt_free(t, h)); 466} 467 468/* 469 * __bt_dleaf -- 470 * Delete a single record from a leaf page. 471 * 472 * Parameters: 473 * t: tree 474 * key: referenced key 475 * h: page 476 * index: index on page to delete 477 * 478 * Returns: 479 * RET_SUCCESS, RET_ERROR. 480 */ 481int 482__bt_dleaf(t, key, h, index) 483 BTREE *t; 484 const DBT *key; 485 PAGE *h; 486 u_int index; 487{ 488 BLEAF *bl; 489 indx_t cnt, *ip, offset; 490 u_int32_t nbytes; 491 void *to; 492 char *from; 493 494 /* If this record is referenced by the cursor, delete the cursor. */ 495 if (F_ISSET(&t->bt_cursor, CURS_INIT) && 496 !F_ISSET(&t->bt_cursor, CURS_ACQUIRE) && 497 t->bt_cursor.pg.pgno == h->pgno && t->bt_cursor.pg.index == index && 498 __bt_curdel(t, key, h, index)) 499 return (RET_ERROR); 500 501 /* If the entry uses overflow pages, make them available for reuse. */ 502 to = bl = GETBLEAF(h, index); 503 if (bl->flags & P_BIGKEY && __ovfl_delete(t, bl->bytes) == RET_ERROR) 504 return (RET_ERROR); 505 if (bl->flags & P_BIGDATA && 506 __ovfl_delete(t, bl->bytes + bl->ksize) == RET_ERROR) 507 return (RET_ERROR); 508 509 /* Pack the remaining key/data items at the end of the page. */ 510 nbytes = NBLEAF(bl); 511 from = (char *)h + h->upper; 512 memmove(from + nbytes, from, (char *)to - from); 513 h->upper += nbytes; 514 515 /* Adjust the indices' offsets, shift the indices down. */ 516 offset = h->linp[index]; 517 for (cnt = index, ip = &h->linp[0]; cnt--; ++ip) 518 if (ip[0] < offset) 519 ip[0] += nbytes; 520 for (cnt = NEXTINDEX(h) - index; --cnt; ++ip) 521 ip[0] = ip[1] < offset ? ip[1] + nbytes : ip[1]; 522 h->lower -= sizeof(indx_t); 523 524 /* If the cursor is on this page, adjust it as necessary. */ 525 if (F_ISSET(&t->bt_cursor, CURS_INIT) && 526 !F_ISSET(&t->bt_cursor, CURS_ACQUIRE) && 527 t->bt_cursor.pg.pgno == h->pgno && t->bt_cursor.pg.index > index) 528 --t->bt_cursor.pg.index; 529 530 return (RET_SUCCESS); 531} 532 533/* 534 * __bt_curdel -- 535 * Delete the cursor. 536 * 537 * Parameters: 538 * t: tree 539 * key: referenced key (or NULL) 540 * h: page 541 * index: index on page to delete 542 * 543 * Returns: 544 * RET_SUCCESS, RET_ERROR. 545 */ 546static int 547__bt_curdel(t, key, h, index) 548 BTREE *t; 549 const DBT *key; 550 PAGE *h; 551 u_int index; 552{ 553 CURSOR *c; 554 EPG e; 555 PAGE *pg; 556 int curcopy, status; 557 558 /* 559 * If there are duplicates, move forward or backward to one. 560 * Otherwise, copy the key into the cursor area. 561 */ 562 c = &t->bt_cursor; 563 F_CLR(c, CURS_AFTER | CURS_BEFORE | CURS_ACQUIRE); 564 565 curcopy = 0; 566 if (!F_ISSET(t, B_NODUPS)) { 567 /* 568 * We're going to have to do comparisons. If we weren't 569 * provided a copy of the key, i.e. the user is deleting 570 * the current cursor position, get one. 571 */ 572 if (key == NULL) { 573 e.page = h; 574 e.index = index; 575 if ((status = __bt_ret(t, &e, 576 &c->key, &c->key, NULL, NULL, 1)) != RET_SUCCESS) 577 return (status); 578 curcopy = 1; 579 key = &c->key; 580 } 581 /* Check previous key, if not at the beginning of the page. */ 582 if (index > 0) { 583 e.page = h; 584 e.index = index - 1; 585 if (__bt_cmp(t, key, &e) == 0) { 586 F_SET(c, CURS_BEFORE); 587 goto dup2; 588 } 589 } 590 /* Check next key, if not at the end of the page. */ 591 if (index < NEXTINDEX(h) - 1) { 592 e.page = h; 593 e.index = index + 1; 594 if (__bt_cmp(t, key, &e) == 0) { 595 F_SET(c, CURS_AFTER); 596 goto dup2; 597 } 598 } 599 /* Check previous key if at the beginning of the page. */ 600 if (index == 0 && h->prevpg != P_INVALID) { 601 if ((pg = mpool_get(t->bt_mp, h->prevpg, 0)) == NULL) 602 return (RET_ERROR); 603 e.page = pg; 604 e.index = NEXTINDEX(pg) - 1; 605 if (__bt_cmp(t, key, &e) == 0) { 606 F_SET(c, CURS_BEFORE); 607 goto dup1; 608 } 609 mpool_put(t->bt_mp, pg, 0); 610 } 611 /* Check next key if at the end of the page. */ 612 if (index == NEXTINDEX(h) - 1 && h->nextpg != P_INVALID) { 613 if ((pg = mpool_get(t->bt_mp, h->nextpg, 0)) == NULL) 614 return (RET_ERROR); 615 e.page = pg; 616 e.index = 0; 617 if (__bt_cmp(t, key, &e) == 0) { 618 F_SET(c, CURS_AFTER); 619dup1: mpool_put(t->bt_mp, pg, 0); 620dup2: c->pg.pgno = e.page->pgno; 621 c->pg.index = e.index; 622 return (RET_SUCCESS); 623 } 624 mpool_put(t->bt_mp, pg, 0); 625 } 626 } 627 e.page = h; 628 e.index = index; 629 if (curcopy || (status = 630 __bt_ret(t, &e, &c->key, &c->key, NULL, NULL, 1)) == RET_SUCCESS) { 631 F_SET(c, CURS_ACQUIRE); 632 return (RET_SUCCESS); 633 } 634 return (status); 635} 636 637/* 638 * __bt_relink -- 639 * Link around a deleted page. 640 * 641 * Parameters: 642 * t: tree 643 * h: page to be deleted 644 */ 645static int 646__bt_relink(t, h) 647 BTREE *t; 648 PAGE *h; 649{ 650 PAGE *pg; 651 652 if (h->nextpg != P_INVALID) { 653 if ((pg = mpool_get(t->bt_mp, h->nextpg, 0)) == NULL) 654 return (RET_ERROR); 655 pg->prevpg = h->prevpg; 656 mpool_put(t->bt_mp, pg, MPOOL_DIRTY); 657 } 658 if (h->prevpg != P_INVALID) { 659 if ((pg = mpool_get(t->bt_mp, h->prevpg, 0)) == NULL) 660 return (RET_ERROR); 661 pg->nextpg = h->nextpg; 662 mpool_put(t->bt_mp, pg, MPOOL_DIRTY); 663 } 664 return (0); 665} 666