bt_delete.c revision 1.7
1/* $NetBSD: bt_delete.c,v 1.7 1996/05/03 21:50:44 cgd Exp $ */ 2 3/*- 4 * Copyright (c) 1990, 1993, 1994 5 * The Regents of the University of California. All rights reserved. 6 * 7 * This code is derived from software contributed to Berkeley by 8 * Mike Olson. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. All advertising materials mentioning features or use of this software 19 * must display the following acknowledgement: 20 * This product includes software developed by the University of 21 * California, Berkeley and its contributors. 22 * 4. Neither the name of the University nor the names of its contributors 23 * may be used to endorse or promote products derived from this software 24 * without specific prior written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 36 * SUCH DAMAGE. 37 */ 38 39#if defined(LIBC_SCCS) && !defined(lint) 40#if 0 41static char sccsid[] = "@(#)bt_delete.c 8.13 (Berkeley) 7/28/94"; 42#else 43static char rcsid[] = "$NetBSD: bt_delete.c,v 1.7 1996/05/03 21:50:44 cgd Exp $"; 44#endif 45#endif /* LIBC_SCCS and not lint */ 46 47#include <sys/types.h> 48 49#include <errno.h> 50#include <stdio.h> 51#include <string.h> 52 53#include <db.h> 54#include "btree.h" 55 56static int __bt_bdelete __P((BTREE *, const DBT *)); 57static int __bt_curdel __P((BTREE *, const DBT *, PAGE *, u_int)); 58static int __bt_pdelete __P((BTREE *, PAGE *)); 59static int __bt_relink __P((BTREE *, PAGE *)); 60static int __bt_stkacq __P((BTREE *, PAGE **, CURSOR *)); 61 62/* 63 * __bt_delete 64 * Delete the item(s) referenced by a key. 65 * 66 * Return RET_SPECIAL if the key is not found. 67 */ 68int 69__bt_delete(dbp, key, flags) 70 const DB *dbp; 71 const DBT *key; 72 u_int flags; 73{ 74 BTREE *t; 75 CURSOR *c; 76 PAGE *h; 77 int status; 78 79 t = dbp->internal; 80 81 /* Toss any page pinned across calls. */ 82 if (t->bt_pinned != NULL) { 83 mpool_put(t->bt_mp, t->bt_pinned, 0); 84 t->bt_pinned = NULL; 85 } 86 87 /* Check for change to a read-only tree. */ 88 if (F_ISSET(t, B_RDONLY)) { 89 errno = EPERM; 90 return (RET_ERROR); 91 } 92 93 switch (flags) { 94 case 0: 95 status = __bt_bdelete(t, key); 96 break; 97 case R_CURSOR: 98 /* 99 * If flags is R_CURSOR, delete the cursor. Must already 100 * have started a scan and not have already deleted it. 101 */ 102 c = &t->bt_cursor; 103 if (F_ISSET(c, CURS_INIT)) { 104 if (F_ISSET(c, CURS_ACQUIRE | CURS_AFTER | CURS_BEFORE)) 105 return (RET_SPECIAL); 106 if ((h = mpool_get(t->bt_mp, c->pg.pgno, 0)) == NULL) 107 return (RET_ERROR); 108 109 /* 110 * If the page is about to be emptied, we'll need to 111 * delete it, which means we have to acquire a stack. 112 */ 113 if (NEXTINDEX(h) == 1) 114 if (__bt_stkacq(t, &h, &t->bt_cursor)) 115 return (RET_ERROR); 116 117 status = __bt_dleaf(t, NULL, h, c->pg.index); 118 119 if (NEXTINDEX(h) == 0 && status == RET_SUCCESS) { 120 if (__bt_pdelete(t, h)) 121 return (RET_ERROR); 122 } else 123 mpool_put(t->bt_mp, 124 h, status == RET_SUCCESS ? MPOOL_DIRTY : 0); 125 break; 126 } 127 /* FALLTHROUGH */ 128 default: 129 errno = EINVAL; 130 return (RET_ERROR); 131 } 132 if (status == RET_SUCCESS) 133 F_SET(t, B_MODIFIED); 134 return (status); 135} 136 137/* 138 * __bt_stkacq -- 139 * Acquire a stack so we can delete a cursor entry. 140 * 141 * Parameters: 142 * t: tree 143 * hp: pointer to current, pinned PAGE pointer 144 * c: pointer to the cursor 145 * 146 * Returns: 147 * 0 on success, 1 on failure 148 */ 149static int 150__bt_stkacq(t, hp, c) 151 BTREE *t; 152 PAGE **hp; 153 CURSOR *c; 154{ 155 BINTERNAL *bi; 156 EPG *e; 157 EPGNO *parent; 158 PAGE *h; 159 indx_t index; 160 pgno_t pgno; 161 recno_t nextpg, prevpg; 162 int exact, level; 163 164 /* 165 * Find the first occurrence of the key in the tree. Toss the 166 * currently locked page so we don't hit an already-locked page. 167 */ 168 h = *hp; 169 mpool_put(t->bt_mp, h, 0); 170 if ((e = __bt_search(t, &c->key, &exact)) == NULL) 171 return (1); 172 h = e->page; 173 174 /* See if we got it in one shot. */ 175 if (h->pgno == c->pg.pgno) 176 goto ret; 177 178 /* 179 * Move right, looking for the page. At each move we have to move 180 * up the stack until we don't have to move to the next page. If 181 * we have to change pages at an internal level, we have to fix the 182 * stack back up. 183 */ 184 while (h->pgno != c->pg.pgno) { 185 if ((nextpg = h->nextpg) == P_INVALID) 186 break; 187 mpool_put(t->bt_mp, h, 0); 188 189 /* Move up the stack. */ 190 for (level = 0; (parent = BT_POP(t)) != NULL; ++level) { 191 /* Get the parent page. */ 192 if ((h = mpool_get(t->bt_mp, parent->pgno, 0)) == NULL) 193 return (1); 194 195 /* Move to the next index. */ 196 if (parent->index != NEXTINDEX(h) - 1) { 197 index = parent->index + 1; 198 BT_PUSH(t, h->pgno, index); 199 break; 200 } 201 mpool_put(t->bt_mp, h, 0); 202 } 203 204 /* Restore the stack. */ 205 while (level--) { 206 /* Push the next level down onto the stack. */ 207 bi = GETBINTERNAL(h, index); 208 pgno = bi->pgno; 209 BT_PUSH(t, pgno, 0); 210 211 /* Lose the currently pinned page. */ 212 mpool_put(t->bt_mp, h, 0); 213 214 /* Get the next level down. */ 215 if ((h = mpool_get(t->bt_mp, pgno, 0)) == NULL) 216 return (1); 217 index = 0; 218 } 219 mpool_put(t->bt_mp, h, 0); 220 if ((h = mpool_get(t->bt_mp, nextpg, 0)) == NULL) 221 return (1); 222 } 223 224 if (h->pgno == c->pg.pgno) 225 goto ret; 226 227 /* Reacquire the original stack. */ 228 mpool_put(t->bt_mp, h, 0); 229 if ((e = __bt_search(t, &c->key, &exact)) == NULL) 230 return (1); 231 h = e->page; 232 233 /* 234 * Move left, looking for the page. At each move we have to move 235 * up the stack until we don't have to change pages to move to the 236 * next page. If we have to change pages at an internal level, we 237 * have to fix the stack back up. 238 */ 239 while (h->pgno != c->pg.pgno) { 240 if ((prevpg = h->prevpg) == P_INVALID) 241 break; 242 mpool_put(t->bt_mp, h, 0); 243 244 /* Move up the stack. */ 245 for (level = 0; (parent = BT_POP(t)) != NULL; ++level) { 246 /* Get the parent page. */ 247 if ((h = mpool_get(t->bt_mp, parent->pgno, 0)) == NULL) 248 return (1); 249 250 /* Move to the next index. */ 251 if (parent->index != 0) { 252 index = parent->index - 1; 253 BT_PUSH(t, h->pgno, index); 254 break; 255 } 256 mpool_put(t->bt_mp, h, 0); 257 } 258 259 /* Restore the stack. */ 260 while (level--) { 261 /* Push the next level down onto the stack. */ 262 bi = GETBINTERNAL(h, index); 263 pgno = bi->pgno; 264 265 /* Lose the currently pinned page. */ 266 mpool_put(t->bt_mp, h, 0); 267 268 /* Get the next level down. */ 269 if ((h = mpool_get(t->bt_mp, pgno, 0)) == NULL) 270 return (1); 271 272 index = NEXTINDEX(h) - 1; 273 BT_PUSH(t, pgno, index); 274 } 275 mpool_put(t->bt_mp, h, 0); 276 if ((h = mpool_get(t->bt_mp, prevpg, 0)) == NULL) 277 return (1); 278 } 279 280 281ret: mpool_put(t->bt_mp, h, 0); 282 return ((*hp = mpool_get(t->bt_mp, c->pg.pgno, 0)) == NULL); 283} 284 285/* 286 * __bt_bdelete -- 287 * Delete all key/data pairs matching the specified key. 288 * 289 * Parameters: 290 * t: tree 291 * key: key to delete 292 * 293 * Returns: 294 * RET_ERROR, RET_SUCCESS and RET_SPECIAL if the key not found. 295 */ 296static int 297__bt_bdelete(t, key) 298 BTREE *t; 299 const DBT *key; 300{ 301 EPG *e; 302 PAGE *h; 303 int deleted, exact, redo; 304 305 deleted = 0; 306 307 /* Find any matching record; __bt_search pins the page. */ 308loop: if ((e = __bt_search(t, key, &exact)) == NULL) 309 return (deleted ? RET_SUCCESS : RET_ERROR); 310 if (!exact) { 311 mpool_put(t->bt_mp, e->page, 0); 312 return (deleted ? RET_SUCCESS : RET_SPECIAL); 313 } 314 315 /* 316 * Delete forward, then delete backward, from the found key. If 317 * there are duplicates and we reach either side of the page, do 318 * the key search again, so that we get them all. 319 */ 320 redo = 0; 321 h = e->page; 322 do { 323 if (__bt_dleaf(t, key, h, e->index)) { 324 mpool_put(t->bt_mp, h, 0); 325 return (RET_ERROR); 326 } 327 if (F_ISSET(t, B_NODUPS)) { 328 if (NEXTINDEX(h) == 0) { 329 if (__bt_pdelete(t, h)) 330 return (RET_ERROR); 331 } else 332 mpool_put(t->bt_mp, h, MPOOL_DIRTY); 333 return (RET_SUCCESS); 334 } 335 deleted = 1; 336 } while (e->index < NEXTINDEX(h) && __bt_cmp(t, key, e) == 0); 337 338 /* Check for right-hand edge of the page. */ 339 if (e->index == NEXTINDEX(h)) 340 redo = 1; 341 342 /* Delete from the key to the beginning of the page. */ 343 while (e->index-- > 0) { 344 if (__bt_cmp(t, key, e) != 0) 345 break; 346 if (__bt_dleaf(t, key, h, e->index) == RET_ERROR) { 347 mpool_put(t->bt_mp, h, 0); 348 return (RET_ERROR); 349 } 350 if (e->index == 0) 351 redo = 1; 352 } 353 354 /* Check for an empty page. */ 355 if (NEXTINDEX(h) == 0) { 356 if (__bt_pdelete(t, h)) 357 return (RET_ERROR); 358 goto loop; 359 } 360 361 /* Put the page. */ 362 mpool_put(t->bt_mp, h, MPOOL_DIRTY); 363 364 if (redo) 365 goto loop; 366 return (RET_SUCCESS); 367} 368 369/* 370 * __bt_pdelete -- 371 * Delete a single page from the tree. 372 * 373 * Parameters: 374 * t: tree 375 * h: leaf page 376 * 377 * Returns: 378 * RET_SUCCESS, RET_ERROR. 379 * 380 * Side-effects: 381 * mpool_put's the page 382 */ 383static int 384__bt_pdelete(t, h) 385 BTREE *t; 386 PAGE *h; 387{ 388 BINTERNAL *bi; 389 PAGE *pg; 390 EPGNO *parent; 391 indx_t cnt, index, *ip, offset; 392 u_int32_t nksize; 393 char *from; 394 395 /* 396 * Walk the parent page stack -- a LIFO stack of the pages that were 397 * traversed when we searched for the page where the delete occurred. 398 * Each stack entry is a page number and a page index offset. The 399 * offset is for the page traversed on the search. We've just deleted 400 * a page, so we have to delete the key from the parent page. 401 * 402 * If the delete from the parent page makes it empty, this process may 403 * continue all the way up the tree. We stop if we reach the root page 404 * (which is never deleted, it's just not worth the effort) or if the 405 * delete does not empty the page. 406 */ 407 while ((parent = BT_POP(t)) != NULL) { 408 /* Get the parent page. */ 409 if ((pg = mpool_get(t->bt_mp, parent->pgno, 0)) == NULL) 410 return (RET_ERROR); 411 412 index = parent->index; 413 bi = GETBINTERNAL(pg, index); 414 415 /* Free any overflow pages. */ 416 if (bi->flags & P_BIGKEY && 417 __ovfl_delete(t, bi->bytes) == RET_ERROR) { 418 mpool_put(t->bt_mp, pg, 0); 419 return (RET_ERROR); 420 } 421 422 /* 423 * Free the parent if it has only the one key and it's not the 424 * root page. If it's the rootpage, turn it back into an empty 425 * leaf page. 426 */ 427 if (NEXTINDEX(pg) == 1) 428 if (pg->pgno == P_ROOT) { 429 pg->lower = BTDATAOFF; 430 pg->upper = t->bt_psize; 431 pg->flags = P_BLEAF; 432 } else { 433 if (__bt_relink(t, pg) || __bt_free(t, pg)) 434 return (RET_ERROR); 435 continue; 436 } 437 else { 438 /* Pack remaining key items at the end of the page. */ 439 nksize = NBINTERNAL(bi->ksize); 440 from = (char *)pg + pg->upper; 441 memmove(from + nksize, from, (char *)bi - from); 442 pg->upper += nksize; 443 444 /* Adjust indices' offsets, shift the indices down. */ 445 offset = pg->linp[index]; 446 for (cnt = index, ip = &pg->linp[0]; cnt--; ++ip) 447 if (ip[0] < offset) 448 ip[0] += nksize; 449 for (cnt = NEXTINDEX(pg) - index; --cnt; ++ip) 450 ip[0] = ip[1] < offset ? ip[1] + nksize : ip[1]; 451 pg->lower -= sizeof(indx_t); 452 } 453 454 mpool_put(t->bt_mp, pg, MPOOL_DIRTY); 455 break; 456 } 457 458 /* Free the leaf page, as long as it wasn't the root. */ 459 if (h->pgno == P_ROOT) { 460 mpool_put(t->bt_mp, h, MPOOL_DIRTY); 461 return (RET_SUCCESS); 462 } 463 return (__bt_relink(t, h) || __bt_free(t, h)); 464} 465 466/* 467 * __bt_dleaf -- 468 * Delete a single record from a leaf page. 469 * 470 * Parameters: 471 * t: tree 472 * key: referenced key 473 * h: page 474 * index: index on page to delete 475 * 476 * Returns: 477 * RET_SUCCESS, RET_ERROR. 478 */ 479int 480__bt_dleaf(t, key, h, index) 481 BTREE *t; 482 const DBT *key; 483 PAGE *h; 484 u_int index; 485{ 486 BLEAF *bl; 487 indx_t cnt, *ip, offset; 488 u_int32_t nbytes; 489 void *to; 490 char *from; 491 492 /* If this record is referenced by the cursor, delete the cursor. */ 493 if (F_ISSET(&t->bt_cursor, CURS_INIT) && 494 !F_ISSET(&t->bt_cursor, CURS_ACQUIRE) && 495 t->bt_cursor.pg.pgno == h->pgno && t->bt_cursor.pg.index == index && 496 __bt_curdel(t, key, h, index)) 497 return (RET_ERROR); 498 499 /* If the entry uses overflow pages, make them available for reuse. */ 500 to = bl = GETBLEAF(h, index); 501 if (bl->flags & P_BIGKEY && __ovfl_delete(t, bl->bytes) == RET_ERROR) 502 return (RET_ERROR); 503 if (bl->flags & P_BIGDATA && 504 __ovfl_delete(t, bl->bytes + bl->ksize) == RET_ERROR) 505 return (RET_ERROR); 506 507 /* Pack the remaining key/data items at the end of the page. */ 508 nbytes = NBLEAF(bl); 509 from = (char *)h + h->upper; 510 memmove(from + nbytes, from, (char *)to - from); 511 h->upper += nbytes; 512 513 /* Adjust the indices' offsets, shift the indices down. */ 514 offset = h->linp[index]; 515 for (cnt = index, ip = &h->linp[0]; cnt--; ++ip) 516 if (ip[0] < offset) 517 ip[0] += nbytes; 518 for (cnt = NEXTINDEX(h) - index; --cnt; ++ip) 519 ip[0] = ip[1] < offset ? ip[1] + nbytes : ip[1]; 520 h->lower -= sizeof(indx_t); 521 522 /* If the cursor is on this page, adjust it as necessary. */ 523 if (F_ISSET(&t->bt_cursor, CURS_INIT) && 524 !F_ISSET(&t->bt_cursor, CURS_ACQUIRE) && 525 t->bt_cursor.pg.pgno == h->pgno && t->bt_cursor.pg.index > index) 526 --t->bt_cursor.pg.index; 527 528 return (RET_SUCCESS); 529} 530 531/* 532 * __bt_curdel -- 533 * Delete the cursor. 534 * 535 * Parameters: 536 * t: tree 537 * key: referenced key (or NULL) 538 * h: page 539 * index: index on page to delete 540 * 541 * Returns: 542 * RET_SUCCESS, RET_ERROR. 543 */ 544static int 545__bt_curdel(t, key, h, index) 546 BTREE *t; 547 const DBT *key; 548 PAGE *h; 549 u_int index; 550{ 551 CURSOR *c; 552 EPG e; 553 PAGE *pg; 554 int curcopy, status; 555 556 /* 557 * If there are duplicates, move forward or backward to one. 558 * Otherwise, copy the key into the cursor area. 559 */ 560 c = &t->bt_cursor; 561 F_CLR(c, CURS_AFTER | CURS_BEFORE | CURS_ACQUIRE); 562 563 curcopy = 0; 564 if (!F_ISSET(t, B_NODUPS)) { 565 /* 566 * We're going to have to do comparisons. If we weren't 567 * provided a copy of the key, i.e. the user is deleting 568 * the current cursor position, get one. 569 */ 570 if (key == NULL) { 571 e.page = h; 572 e.index = index; 573 if ((status = __bt_ret(t, &e, 574 &c->key, &c->key, NULL, NULL, 1)) != RET_SUCCESS) 575 return (status); 576 curcopy = 1; 577 key = &c->key; 578 } 579 /* Check previous key, if not at the beginning of the page. */ 580 if (index > 0) { 581 e.page = h; 582 e.index = index - 1; 583 if (__bt_cmp(t, key, &e) == 0) { 584 F_SET(c, CURS_BEFORE); 585 goto dup2; 586 } 587 } 588 /* Check next key, if not at the end of the page. */ 589 if (index < NEXTINDEX(h) - 1) { 590 e.page = h; 591 e.index = index + 1; 592 if (__bt_cmp(t, key, &e) == 0) { 593 F_SET(c, CURS_AFTER); 594 goto dup2; 595 } 596 } 597 /* Check previous key if at the beginning of the page. */ 598 if (index == 0 && h->prevpg != P_INVALID) { 599 if ((pg = mpool_get(t->bt_mp, h->prevpg, 0)) == NULL) 600 return (RET_ERROR); 601 e.page = pg; 602 e.index = NEXTINDEX(pg) - 1; 603 if (__bt_cmp(t, key, &e) == 0) { 604 F_SET(c, CURS_BEFORE); 605 goto dup1; 606 } 607 mpool_put(t->bt_mp, pg, 0); 608 } 609 /* Check next key if at the end of the page. */ 610 if (index == NEXTINDEX(h) - 1 && h->nextpg != P_INVALID) { 611 if ((pg = mpool_get(t->bt_mp, h->nextpg, 0)) == NULL) 612 return (RET_ERROR); 613 e.page = pg; 614 e.index = 0; 615 if (__bt_cmp(t, key, &e) == 0) { 616 F_SET(c, CURS_AFTER); 617dup1: mpool_put(t->bt_mp, pg, 0); 618dup2: c->pg.pgno = e.page->pgno; 619 c->pg.index = e.index; 620 return (RET_SUCCESS); 621 } 622 mpool_put(t->bt_mp, pg, 0); 623 } 624 } 625 e.page = h; 626 e.index = index; 627 if (curcopy || (status = 628 __bt_ret(t, &e, &c->key, &c->key, NULL, NULL, 1)) == RET_SUCCESS) { 629 F_SET(c, CURS_ACQUIRE); 630 return (RET_SUCCESS); 631 } 632 return (status); 633} 634 635/* 636 * __bt_relink -- 637 * Link around a deleted page. 638 * 639 * Parameters: 640 * t: tree 641 * h: page to be deleted 642 */ 643static int 644__bt_relink(t, h) 645 BTREE *t; 646 PAGE *h; 647{ 648 PAGE *pg; 649 650 if (h->nextpg != P_INVALID) { 651 if ((pg = mpool_get(t->bt_mp, h->nextpg, 0)) == NULL) 652 return (RET_ERROR); 653 pg->prevpg = h->prevpg; 654 mpool_put(t->bt_mp, pg, MPOOL_DIRTY); 655 } 656 if (h->prevpg != P_INVALID) { 657 if ((pg = mpool_get(t->bt_mp, h->prevpg, 0)) == NULL) 658 return (RET_ERROR); 659 pg->nextpg = h->nextpg; 660 mpool_put(t->bt_mp, pg, MPOOL_DIRTY); 661 } 662 return (0); 663} 664