1/*- 2 * See the file LICENSE for redistribution information. 3 * 4 * Copyright (c) 1996,2008 Oracle. All rights reserved. 5 * 6 * $Id: bt_rec.c,v 12.39 2008/02/18 06:14:08 mjc Exp $ 7 */ 8 9#include "db_config.h" 10 11#include "db_int.h" 12#include "dbinc/db_page.h" 13#include "dbinc/btree.h" 14#include "dbinc/lock.h" 15#include "dbinc/log.h" 16#include "dbinc/mp.h" 17 18#define IS_BTREE_PAGE(pagep) \ 19 (TYPE(pagep) == P_IBTREE || \ 20 TYPE(pagep) == P_LBTREE || TYPE(pagep) == P_LDUP) 21 22/* 23 * __bam_split_recover -- 24 * Recovery function for split. 25 * 26 * PUBLIC: int __bam_split_recover 27 * PUBLIC: __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); 28 */ 29int 30__bam_split_recover(env, dbtp, lsnp, op, info) 31 ENV *env; 32 DBT *dbtp; 33 DB_LSN *lsnp; 34 db_recops op; 35 void *info; 36{ 37 __bam_split_args *argp; 38 DB_THREAD_INFO *ip; 39 DB *file_dbp; 40 DBC *dbc; 41 DB_MPOOLFILE *mpf; 42 PAGE *_lp, *lp, *np, *pp, *_rp, *rp, *sp; 43 db_pgno_t pgno, root_pgno; 44 u_int32_t ptype; 45 int cmp, l_update, p_update, r_update, rc, ret, rootsplit, t_ret; 46 47 ip = ((DB_TXNHEAD *)info)->thread_info; 48 REC_PRINT(__bam_split_print); 49 50 _lp = lp = np = pp = _rp = rp = NULL; 51 sp = NULL; 52 53 REC_INTRO(__bam_split_read, ip, 0); 54 55 /* 56 * There are two kinds of splits that we have to recover from. The 57 * first is a root-page split, where the root page is split from a 58 * leaf page into an internal page and two new leaf pages are created. 59 * The second is where a page is split into two pages, and a new key 60 * is inserted into the parent page. 61 * 62 * DBTs are not aligned in log records, so we need to copy the page 63 * so that we can access fields within it throughout this routine. 64 * Although we could hardcode the unaligned copies in this routine, 65 * we will be calling into regular btree functions with this page, 66 * so it's got to be aligned. Copying it into allocated memory is 67 * the only way to guarantee this. 68 */ 69 if ((ret = __os_malloc(env, argp->pg.size, &sp)) != 0) 70 goto out; 71 memcpy(sp, argp->pg.data, argp->pg.size); 72 73 pgno = PGNO(sp); 74 root_pgno = argp->root_pgno; 75 rootsplit = root_pgno != PGNO_INVALID; 76 REC_FGET(mpf, ip, argp->left, &lp, right); 77right: REC_FGET(mpf, ip, argp->right, &rp, redo); 78 79redo: if (DB_REDO(op)) { 80 l_update = r_update = p_update = 0; 81 /* 82 * Decide if we need to resplit the page. 83 * 84 * If this is a root split, then the root has to exist unless 85 * we have truncated it due to a future deallocation. 86 */ 87 if (rootsplit) { 88 REC_FGET(mpf, ip, root_pgno, &pp, do_left); 89 cmp = LOG_COMPARE(&LSN(pp), &LSN(argp->pg.data)); 90 CHECK_LSN(env, op, 91 cmp, &LSN(pp), &LSN(argp->pg.data)); 92 p_update = cmp == 0; 93 } 94 95do_left: if (lp != NULL) { 96 cmp = LOG_COMPARE(&LSN(lp), &argp->llsn); 97 CHECK_LSN(env, op, cmp, &LSN(lp), &argp->llsn); 98 if (cmp == 0) 99 l_update = 1; 100 } 101 102 if (rp != NULL) { 103 cmp = LOG_COMPARE(&LSN(rp), &argp->rlsn); 104 CHECK_LSN(env, op, cmp, &LSN(rp), &argp->rlsn); 105 if (cmp == 0) 106 r_update = 1; 107 } 108 109 if (!p_update && !l_update && !r_update) 110 goto check_next; 111 112 /* Allocate and initialize new left/right child pages. */ 113 if ((ret = __os_malloc(env, file_dbp->pgsize, &_lp)) != 0 || 114 (ret = __os_malloc(env, file_dbp->pgsize, &_rp)) != 0) 115 goto out; 116 if (rootsplit) { 117 P_INIT(_lp, file_dbp->pgsize, argp->left, 118 PGNO_INVALID, 119 ISINTERNAL(sp) ? PGNO_INVALID : argp->right, 120 LEVEL(sp), TYPE(sp)); 121 P_INIT(_rp, file_dbp->pgsize, argp->right, 122 ISINTERNAL(sp) ? PGNO_INVALID : argp->left, 123 PGNO_INVALID, LEVEL(sp), TYPE(sp)); 124 } else { 125 P_INIT(_lp, file_dbp->pgsize, PGNO(sp), 126 ISINTERNAL(sp) ? PGNO_INVALID : PREV_PGNO(sp), 127 ISINTERNAL(sp) ? PGNO_INVALID : argp->right, 128 LEVEL(sp), TYPE(sp)); 129 P_INIT(_rp, file_dbp->pgsize, argp->right, 130 ISINTERNAL(sp) ? PGNO_INVALID : sp->pgno, 131 ISINTERNAL(sp) ? PGNO_INVALID : NEXT_PGNO(sp), 132 LEVEL(sp), TYPE(sp)); 133 } 134 135 /* Split the page. */ 136 if ((ret = __bam_copy(file_dbp, sp, _lp, 0, argp->indx)) != 0 || 137 (ret = __bam_copy(file_dbp, sp, _rp, argp->indx, 138 NUM_ENT(sp))) != 0) 139 goto out; 140 141 if (l_update) { 142 REC_DIRTY(mpf, ip, file_dbp->priority, &lp); 143 memcpy(lp, _lp, file_dbp->pgsize); 144 lp->lsn = *lsnp; 145 if ((ret = __memp_fput(mpf, 146 ip, lp, file_dbp->priority)) != 0) 147 goto out; 148 lp = NULL; 149 } 150 151 if (r_update) { 152 REC_DIRTY(mpf, ip, file_dbp->priority, &rp); 153 memcpy(rp, _rp, file_dbp->pgsize); 154 rp->lsn = *lsnp; 155 if ((ret = __memp_fput(mpf, 156 ip, rp, file_dbp->priority)) != 0) 157 goto out; 158 rp = NULL; 159 } 160 161 /* 162 * If the parent page is wrong, update it. This is of interest 163 * only if it was a root split, since root splits create parent 164 * pages. All other splits modify a parent page, but those are 165 * separately logged and recovered. 166 */ 167 if (rootsplit && p_update) { 168 if (IS_BTREE_PAGE(sp)) { 169 ptype = P_IBTREE; 170 rc = argp->opflags & SPL_NRECS ? 1 : 0; 171 } else { 172 ptype = P_IRECNO; 173 rc = 1; 174 } 175 176 REC_DIRTY(mpf, ip, file_dbp->priority, &pp); 177 P_INIT(pp, file_dbp->pgsize, root_pgno, 178 PGNO_INVALID, PGNO_INVALID, _lp->level + 1, ptype); 179 RE_NREC_SET(pp, rc ? __bam_total(file_dbp, _lp) + 180 __bam_total(file_dbp, _rp) : 0); 181 182 pp->lsn = *lsnp; 183 if ((ret = __memp_fput(mpf, 184 ip, pp, file_dbp->priority)) != 0) 185 goto out; 186 pp = NULL; 187 } 188 189check_next: /* 190 * Finally, redo the next-page link if necessary. This is of 191 * interest only if it wasn't a root split -- inserting a new 192 * page in the tree requires that any following page have its 193 * previous-page pointer updated to our new page. The next 194 * page must exist because we're redoing the operation. 195 */ 196 if (!rootsplit && argp->npgno != PGNO_INVALID) { 197 if ((ret = __memp_fget(mpf, &argp->npgno, 198 ip, NULL, 0, &np)) != 0) { 199 if (ret != DB_PAGE_NOTFOUND) { 200 ret = __db_pgerr( 201 file_dbp, argp->npgno, ret); 202 goto out; 203 } else 204 goto done; 205 } 206 cmp = LOG_COMPARE(&LSN(np), &argp->nlsn); 207 CHECK_LSN(env, op, cmp, &LSN(np), &argp->nlsn); 208 if (cmp == 0) { 209 REC_DIRTY(mpf, ip, file_dbp->priority, &np); 210 PREV_PGNO(np) = argp->right; 211 np->lsn = *lsnp; 212 if ((ret = __memp_fput(mpf, ip, 213 np, file_dbp->priority)) != 0) 214 goto out; 215 np = NULL; 216 } 217 } 218 } else { 219 /* 220 * If the split page is wrong, replace its contents with the 221 * logged page contents. If the page doesn't exist, it means 222 * that the create of the page never happened, nor did any of 223 * the adds onto the page that caused the split, and there's 224 * really no undo-ing to be done. 225 */ 226 if ((ret = __memp_fget(mpf, &pgno, ip, NULL, 227 DB_MPOOL_EDIT, &pp)) != 0) { 228 pp = NULL; 229 goto lrundo; 230 } 231 if (LOG_COMPARE(lsnp, &LSN(pp)) == 0) { 232 REC_DIRTY(mpf, ip, file_dbp->priority, &pp); 233 memcpy(pp, argp->pg.data, argp->pg.size); 234 if ((ret = __memp_fput(mpf, 235 ip, pp, file_dbp->priority)) != 0) 236 goto out; 237 pp = NULL; 238 } 239 240 /* 241 * If it's a root split and the left child ever existed, update 242 * its LSN. (If it's not a root split, we've updated the left 243 * page already -- it's the same as the split page.) If the 244 * right child ever existed, root split or not, update its LSN. 245 * The undo of the page allocation(s) will restore them to the 246 * free list. 247 */ 248lrundo: if ((rootsplit && lp != NULL) || rp != NULL) { 249 if (rootsplit && lp != NULL && 250 LOG_COMPARE(lsnp, &LSN(lp)) == 0) { 251 REC_DIRTY(mpf, ip, file_dbp->priority, &lp); 252 lp->lsn = argp->llsn; 253 if ((ret = __memp_fput(mpf, ip, 254 lp, file_dbp->priority)) != 0) 255 goto out; 256 lp = NULL; 257 } 258 if (rp != NULL && 259 LOG_COMPARE(lsnp, &LSN(rp)) == 0) { 260 REC_DIRTY(mpf, ip, file_dbp->priority, &rp); 261 rp->lsn = argp->rlsn; 262 if ((ret = __memp_fput(mpf, ip, 263 rp, file_dbp->priority)) != 0) 264 goto out; 265 rp = NULL; 266 } 267 } 268 269 /* 270 * Finally, undo the next-page link if necessary. This is of 271 * interest only if it wasn't a root split -- inserting a new 272 * page in the tree requires that any following page have its 273 * previous-page pointer updated to our new page. Since it's 274 * possible that the next-page never existed, we ignore it as 275 * if there's nothing to undo. 276 */ 277 if (!rootsplit && argp->npgno != PGNO_INVALID) { 278 if ((ret = __memp_fget(mpf, &argp->npgno, 279 ip, NULL, DB_MPOOL_EDIT, &np)) != 0) { 280 np = NULL; 281 goto done; 282 } 283 if (LOG_COMPARE(lsnp, &LSN(np)) == 0) { 284 REC_DIRTY(mpf, ip, file_dbp->priority, &np); 285 PREV_PGNO(np) = argp->left; 286 np->lsn = argp->nlsn; 287 if (__memp_fput(mpf, 288 ip, np, file_dbp->priority)) 289 goto out; 290 np = NULL; 291 } 292 } 293 } 294 295done: *lsnp = argp->prev_lsn; 296 ret = 0; 297 298out: /* Free any pages that weren't dirtied. */ 299 if (pp != NULL && (t_ret = __memp_fput(mpf, 300 ip, pp, file_dbp->priority)) != 0 && ret == 0) 301 ret = t_ret; 302 if (lp != NULL && (t_ret = __memp_fput(mpf, 303 ip, lp, file_dbp->priority)) != 0 && ret == 0) 304 ret = t_ret; 305 if (np != NULL && (t_ret = __memp_fput(mpf, 306 ip, np, file_dbp->priority)) != 0 && ret == 0) 307 ret = t_ret; 308 if (rp != NULL && (t_ret = __memp_fput(mpf, 309 ip, rp, file_dbp->priority)) != 0 && ret == 0) 310 ret = t_ret; 311 312 /* Free any allocated space. */ 313 if (_lp != NULL) 314 __os_free(env, _lp); 315 if (_rp != NULL) 316 __os_free(env, _rp); 317 if (sp != NULL) 318 __os_free(env, sp); 319 320 REC_CLOSE; 321} 322 323/* 324 * __bam_rsplit_recover -- 325 * Recovery function for a reverse split. 326 * 327 * PUBLIC: int __bam_rsplit_recover 328 * PUBLIC: __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); 329 */ 330int 331__bam_rsplit_recover(env, dbtp, lsnp, op, info) 332 ENV *env; 333 DBT *dbtp; 334 DB_LSN *lsnp; 335 db_recops op; 336 void *info; 337{ 338 __bam_rsplit_args *argp; 339 DB_THREAD_INFO *ip; 340 DB *file_dbp; 341 DBC *dbc; 342 DB_LSN copy_lsn; 343 DB_MPOOLFILE *mpf; 344 PAGE *pagep; 345 db_pgno_t pgno, root_pgno; 346 db_recno_t rcnt; 347 int cmp_n, cmp_p, ret; 348 349 ip = ((DB_TXNHEAD *)info)->thread_info; 350 pagep = NULL; 351 REC_PRINT(__bam_rsplit_print); 352 REC_INTRO(__bam_rsplit_read, ip, 1); 353 354 /* Fix the root page. */ 355 pgno = root_pgno = argp->root_pgno; 356 if ((ret = __memp_fget(mpf, &pgno, ip, NULL, 0, &pagep)) != 0) { 357 if (ret != DB_PAGE_NOTFOUND) { 358 ret = __db_pgerr(file_dbp, pgno, ret); 359 goto out; 360 } else 361 goto do_page; 362 } 363 364 cmp_n = LOG_COMPARE(lsnp, &LSN(pagep)); 365 cmp_p = LOG_COMPARE(&LSN(pagep), &argp->rootlsn); 366 CHECK_LSN(env, op, cmp_p, &LSN(pagep), &argp->rootlsn); 367 if (cmp_p == 0 && DB_REDO(op)) { 368 /* 369 * Copy the new data to the root page. If it is not now a 370 * leaf page we need to restore the record number. We could 371 * try to determine if C_RECNUM was set in the btree, but 372 * that's not really necessary since the field is not used 373 * otherwise. 374 */ 375 REC_DIRTY(mpf, ip, dbc->priority, &pagep); 376 rcnt = RE_NREC(pagep); 377 memcpy(pagep, argp->pgdbt.data, argp->pgdbt.size); 378 if (LEVEL(pagep) > LEAFLEVEL) 379 RE_NREC_SET(pagep, rcnt); 380 pagep->pgno = root_pgno; 381 pagep->lsn = *lsnp; 382 } else if (cmp_n == 0 && DB_UNDO(op)) { 383 /* Need to undo update described. */ 384 REC_DIRTY(mpf, ip, dbc->priority, &pagep); 385 P_INIT(pagep, file_dbp->pgsize, root_pgno, 386 argp->nrec, PGNO_INVALID, pagep->level + 1, 387 IS_BTREE_PAGE(pagep) ? P_IBTREE : P_IRECNO); 388 if ((ret = __db_pitem(dbc, pagep, 0, 389 argp->rootent.size, &argp->rootent, NULL)) != 0) 390 goto out; 391 pagep->lsn = argp->rootlsn; 392 } 393 if ((ret = __memp_fput(mpf, ip, pagep, dbc->priority)) != 0) 394 goto out; 395 396do_page: 397 /* 398 * Fix the page copied over the root page. It's possible that the 399 * page never made it to disk, or was truncated so if the page 400 * doesn't exist, it's okay and there's nothing further to do. 401 */ 402 if ((ret = __memp_fget(mpf, &argp->pgno, ip, NULL, 0, &pagep)) != 0) { 403 if (ret != DB_PAGE_NOTFOUND) { 404 ret = __db_pgerr(file_dbp, argp->pgno, ret); 405 goto out; 406 } else 407 goto done; 408 } 409 (void)__ua_memcpy(©_lsn, &LSN(argp->pgdbt.data), sizeof(DB_LSN)); 410 cmp_n = LOG_COMPARE(lsnp, &LSN(pagep)); 411 cmp_p = LOG_COMPARE(&LSN(pagep), ©_lsn); 412 CHECK_LSN(env, op, cmp_p, &LSN(pagep), ©_lsn); 413 if (cmp_p == 0 && DB_REDO(op)) { 414 /* Need to redo update described. */ 415 REC_DIRTY(mpf, ip, dbc->priority, &pagep); 416 pagep->lsn = *lsnp; 417 } else if (cmp_n == 0 && DB_UNDO(op)) { 418 /* Need to undo update described. */ 419 REC_DIRTY(mpf, ip, dbc->priority, &pagep); 420 memcpy(pagep, argp->pgdbt.data, argp->pgdbt.size); 421 } 422 if ((ret = __memp_fput(mpf, ip, pagep, dbc->priority)) != 0) 423 goto out; 424 pagep = NULL; 425 426done: *lsnp = argp->prev_lsn; 427 ret = 0; 428 429out: if (pagep != NULL) 430 (void)__memp_fput(mpf, ip, pagep, dbc->priority); 431 REC_CLOSE; 432} 433 434/* 435 * __bam_adj_recover -- 436 * Recovery function for adj. 437 * 438 * PUBLIC: int __bam_adj_recover 439 * PUBLIC: __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); 440 */ 441int 442__bam_adj_recover(env, dbtp, lsnp, op, info) 443 ENV *env; 444 DBT *dbtp; 445 DB_LSN *lsnp; 446 db_recops op; 447 void *info; 448{ 449 __bam_adj_args *argp; 450 DB_THREAD_INFO *ip; 451 DB *file_dbp; 452 DBC *dbc; 453 DB_MPOOLFILE *mpf; 454 PAGE *pagep; 455 int cmp_n, cmp_p, ret; 456 457 ip = ((DB_TXNHEAD *)info)->thread_info; 458 pagep = NULL; 459 REC_PRINT(__bam_adj_print); 460 REC_INTRO(__bam_adj_read, ip, 1); 461 462 /* Get the page; if it never existed and we're undoing, we're done. */ 463 if ((ret = __memp_fget(mpf, &argp->pgno, ip, NULL, 0, &pagep)) != 0) { 464 if (ret != DB_PAGE_NOTFOUND) { 465 ret = __db_pgerr(file_dbp, argp->pgno, ret); 466 goto out; 467 } else 468 goto done; 469 } 470 471 cmp_n = LOG_COMPARE(lsnp, &LSN(pagep)); 472 cmp_p = LOG_COMPARE(&LSN(pagep), &argp->lsn); 473 CHECK_LSN(env, op, cmp_p, &LSN(pagep), &argp->lsn); 474 if (cmp_p == 0 && DB_REDO(op)) { 475 /* Need to redo update described. */ 476 REC_DIRTY(mpf, ip, dbc->priority, &pagep); 477 if ((ret = __bam_adjindx(dbc, 478 pagep, argp->indx, argp->indx_copy, argp->is_insert)) != 0) 479 goto out; 480 481 LSN(pagep) = *lsnp; 482 } else if (cmp_n == 0 && DB_UNDO(op)) { 483 /* Need to undo update described. */ 484 REC_DIRTY(mpf, ip, dbc->priority, &pagep); 485 if ((ret = __bam_adjindx(dbc, 486 pagep, argp->indx, argp->indx_copy, !argp->is_insert)) != 0) 487 goto out; 488 489 LSN(pagep) = argp->lsn; 490 } 491 if ((ret = __memp_fput(mpf, ip, pagep, dbc->priority)) != 0) 492 goto out; 493 pagep = NULL; 494 495done: *lsnp = argp->prev_lsn; 496 ret = 0; 497 498out: if (pagep != NULL) 499 (void)__memp_fput(mpf, ip, pagep, dbc->priority); 500 REC_CLOSE; 501} 502 503/* 504 * __bam_cadjust_recover -- 505 * Recovery function for the adjust of a count change in an internal 506 * page. 507 * 508 * PUBLIC: int __bam_cadjust_recover 509 * PUBLIC: __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); 510 */ 511int 512__bam_cadjust_recover(env, dbtp, lsnp, op, info) 513 ENV *env; 514 DBT *dbtp; 515 DB_LSN *lsnp; 516 db_recops op; 517 void *info; 518{ 519 __bam_cadjust_args *argp; 520 DB_THREAD_INFO *ip; 521 DB *file_dbp; 522 DBC *dbc; 523 DB_MPOOLFILE *mpf; 524 PAGE *pagep; 525 int cmp_n, cmp_p, ret; 526 527 ip = ((DB_TXNHEAD *)info)->thread_info; 528 pagep = NULL; 529 REC_PRINT(__bam_cadjust_print); 530 REC_INTRO(__bam_cadjust_read, ip, 0); 531 532 /* Get the page; if it never existed and we're undoing, we're done. */ 533 if ((ret = __memp_fget(mpf, &argp->pgno, ip, NULL, 0, &pagep)) != 0) { 534 if (ret != DB_PAGE_NOTFOUND) { 535 ret = __db_pgerr(file_dbp, argp->pgno, ret); 536 goto out; 537 } else 538 goto done; 539 } 540 541 cmp_n = LOG_COMPARE(lsnp, &LSN(pagep)); 542 cmp_p = LOG_COMPARE(&LSN(pagep), &argp->lsn); 543 CHECK_LSN(env, op, cmp_p, &LSN(pagep), &argp->lsn); 544 if (cmp_p == 0 && DB_REDO(op)) { 545 /* Need to redo update described. */ 546 REC_DIRTY(mpf, ip, file_dbp->priority, &pagep); 547 if (IS_BTREE_PAGE(pagep)) { 548 GET_BINTERNAL(file_dbp, pagep, argp->indx)->nrecs += 549 argp->adjust; 550 if (argp->opflags & CAD_UPDATEROOT) 551 RE_NREC_ADJ(pagep, argp->adjust); 552 } else { 553 GET_RINTERNAL(file_dbp, pagep, argp->indx)->nrecs += 554 argp->adjust; 555 if (argp->opflags & CAD_UPDATEROOT) 556 RE_NREC_ADJ(pagep, argp->adjust); 557 } 558 559 LSN(pagep) = *lsnp; 560 } else if (cmp_n == 0 && DB_UNDO(op)) { 561 /* Need to undo update described. */ 562 REC_DIRTY(mpf, ip, file_dbp->priority, &pagep); 563 if (IS_BTREE_PAGE(pagep)) { 564 GET_BINTERNAL(file_dbp, pagep, argp->indx)->nrecs -= 565 argp->adjust; 566 if (argp->opflags & CAD_UPDATEROOT) 567 RE_NREC_ADJ(pagep, -(argp->adjust)); 568 } else { 569 GET_RINTERNAL(file_dbp, pagep, argp->indx)->nrecs -= 570 argp->adjust; 571 if (argp->opflags & CAD_UPDATEROOT) 572 RE_NREC_ADJ(pagep, -(argp->adjust)); 573 } 574 LSN(pagep) = argp->lsn; 575 } 576 if ((ret = __memp_fput(mpf, ip, pagep, file_dbp->priority)) != 0) 577 goto out; 578 pagep = NULL; 579 580done: *lsnp = argp->prev_lsn; 581 ret = 0; 582 583out: if (pagep != NULL) 584 (void)__memp_fput(mpf, ip, pagep, file_dbp->priority); 585 REC_CLOSE; 586} 587 588/* 589 * __bam_cdel_recover -- 590 * Recovery function for the intent-to-delete of a cursor record. 591 * 592 * PUBLIC: int __bam_cdel_recover 593 * PUBLIC: __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); 594 */ 595int 596__bam_cdel_recover(env, dbtp, lsnp, op, info) 597 ENV *env; 598 DBT *dbtp; 599 DB_LSN *lsnp; 600 db_recops op; 601 void *info; 602{ 603 __bam_cdel_args *argp; 604 DB_THREAD_INFO *ip; 605 DB *file_dbp; 606 DBC *dbc; 607 DB_MPOOLFILE *mpf; 608 PAGE *pagep; 609 u_int32_t indx; 610 int cmp_n, cmp_p, ret; 611 612 ip = ((DB_TXNHEAD *)info)->thread_info; 613 pagep = NULL; 614 REC_PRINT(__bam_cdel_print); 615 REC_INTRO(__bam_cdel_read, ip, 0); 616 617 /* Get the page; if it never existed and we're undoing, we're done. */ 618 if ((ret = __memp_fget(mpf, &argp->pgno, ip, NULL, 0, &pagep)) != 0) { 619 if (ret != DB_PAGE_NOTFOUND) { 620 ret = __db_pgerr(file_dbp, argp->pgno, ret); 621 goto out; 622 } else 623 goto done; 624 } 625 626 cmp_n = LOG_COMPARE(lsnp, &LSN(pagep)); 627 cmp_p = LOG_COMPARE(&LSN(pagep), &argp->lsn); 628 CHECK_LSN(env, op, cmp_p, &LSN(pagep), &argp->lsn); 629 if (cmp_p == 0 && DB_REDO(op)) { 630 /* Need to redo update described. */ 631 REC_DIRTY(mpf, ip, file_dbp->priority, &pagep); 632 indx = argp->indx + (TYPE(pagep) == P_LBTREE ? O_INDX : 0); 633 B_DSET(GET_BKEYDATA(file_dbp, pagep, indx)->type); 634 635 LSN(pagep) = *lsnp; 636 } else if (cmp_n == 0 && DB_UNDO(op)) { 637 /* Need to undo update described. */ 638 REC_DIRTY(mpf, ip, file_dbp->priority, &pagep); 639 indx = argp->indx + (TYPE(pagep) == P_LBTREE ? O_INDX : 0); 640 B_DCLR(GET_BKEYDATA(file_dbp, pagep, indx)->type); 641 642 if ((ret = __bam_ca_delete( 643 file_dbp, argp->pgno, argp->indx, 0, NULL)) != 0) 644 goto out; 645 646 LSN(pagep) = argp->lsn; 647 } 648 if ((ret = __memp_fput(mpf, ip, pagep, file_dbp->priority)) != 0) 649 goto out; 650 pagep = NULL; 651 652done: *lsnp = argp->prev_lsn; 653 ret = 0; 654 655out: if (pagep != NULL) 656 (void)__memp_fput(mpf, ip, pagep, file_dbp->priority); 657 REC_CLOSE; 658} 659 660/* 661 * __bam_repl_recover -- 662 * Recovery function for page item replacement. 663 * 664 * PUBLIC: int __bam_repl_recover 665 * PUBLIC: __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); 666 */ 667int 668__bam_repl_recover(env, dbtp, lsnp, op, info) 669 ENV *env; 670 DBT *dbtp; 671 DB_LSN *lsnp; 672 db_recops op; 673 void *info; 674{ 675 __bam_repl_args *argp; 676 DB_THREAD_INFO *ip; 677 BKEYDATA *bk; 678 DB *file_dbp; 679 DBC *dbc; 680 DBT dbt; 681 DB_MPOOLFILE *mpf; 682 PAGE *pagep; 683 int cmp_n, cmp_p, ret; 684 u_int8_t *p; 685 686 ip = ((DB_TXNHEAD *)info)->thread_info; 687 pagep = NULL; 688 REC_PRINT(__bam_repl_print); 689 REC_INTRO(__bam_repl_read, ip, 1); 690 691 /* Get the page; if it never existed and we're undoing, we're done. */ 692 if ((ret = __memp_fget(mpf, &argp->pgno, ip, NULL, 0, &pagep)) != 0) { 693 if (ret != DB_PAGE_NOTFOUND) { 694 ret = __db_pgerr(file_dbp, argp->pgno, ret); 695 goto out; 696 } else 697 goto done; 698 } 699 bk = GET_BKEYDATA(file_dbp, pagep, argp->indx); 700 701 cmp_n = LOG_COMPARE(lsnp, &LSN(pagep)); 702 cmp_p = LOG_COMPARE(&LSN(pagep), &argp->lsn); 703 CHECK_LSN(env, op, cmp_p, &LSN(pagep), &argp->lsn); 704 if (cmp_p == 0 && DB_REDO(op)) { 705 /* 706 * Need to redo update described. 707 * 708 * Re-build the replacement item. 709 */ 710 REC_DIRTY(mpf, ip, dbc->priority, &pagep); 711 memset(&dbt, 0, sizeof(dbt)); 712 dbt.size = argp->prefix + argp->suffix + argp->repl.size; 713 if ((ret = __os_malloc(env, dbt.size, &dbt.data)) != 0) 714 goto out; 715 p = dbt.data; 716 memcpy(p, bk->data, argp->prefix); 717 p += argp->prefix; 718 memcpy(p, argp->repl.data, argp->repl.size); 719 p += argp->repl.size; 720 memcpy(p, bk->data + (bk->len - argp->suffix), argp->suffix); 721 722 ret = __bam_ritem(dbc, pagep, argp->indx, &dbt); 723 __os_free(env, dbt.data); 724 if (ret != 0) 725 goto out; 726 727 LSN(pagep) = *lsnp; 728 } else if (cmp_n == 0 && DB_UNDO(op)) { 729 /* 730 * Need to undo update described. 731 * 732 * Re-build the original item. 733 */ 734 REC_DIRTY(mpf, ip, dbc->priority, &pagep); 735 memset(&dbt, 0, sizeof(dbt)); 736 dbt.size = argp->prefix + argp->suffix + argp->orig.size; 737 if ((ret = __os_malloc(env, dbt.size, &dbt.data)) != 0) 738 goto out; 739 p = dbt.data; 740 memcpy(p, bk->data, argp->prefix); 741 p += argp->prefix; 742 memcpy(p, argp->orig.data, argp->orig.size); 743 p += argp->orig.size; 744 memcpy(p, bk->data + (bk->len - argp->suffix), argp->suffix); 745 746 ret = __bam_ritem(dbc, pagep, argp->indx, &dbt); 747 __os_free(env, dbt.data); 748 if (ret != 0) 749 goto out; 750 751 /* Reset the deleted flag, if necessary. */ 752 if (argp->isdeleted) 753 B_DSET(GET_BKEYDATA(file_dbp, pagep, argp->indx)->type); 754 755 LSN(pagep) = argp->lsn; 756 } 757 if ((ret = __memp_fput(mpf, ip, pagep, dbc->priority)) != 0) 758 goto out; 759 pagep = NULL; 760 761done: *lsnp = argp->prev_lsn; 762 ret = 0; 763 764out: if (pagep != NULL) 765 (void)__memp_fput(mpf, ip, pagep, dbc->priority); 766 REC_CLOSE; 767} 768 769/* 770 * __bam_root_recover -- 771 * Recovery function for setting the root page on the meta-data page. 772 * 773 * PUBLIC: int __bam_root_recover 774 * PUBLIC: __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); 775 */ 776int 777__bam_root_recover(env, dbtp, lsnp, op, info) 778 ENV *env; 779 DBT *dbtp; 780 DB_LSN *lsnp; 781 db_recops op; 782 void *info; 783{ 784 __bam_root_args *argp; 785 DB_THREAD_INFO *ip; 786 BTMETA *meta; 787 DB *file_dbp; 788 DBC *dbc; 789 DB_MPOOLFILE *mpf; 790 int cmp_n, cmp_p, ret; 791 792 ip = ((DB_TXNHEAD *)info)->thread_info; 793 meta = NULL; 794 REC_PRINT(__bam_root_print); 795 REC_INTRO(__bam_root_read, ip, 0); 796 797 if ((ret = __memp_fget(mpf, &argp->meta_pgno, ip, NULL, 798 0, &meta)) != 0) { 799 if (ret != DB_PAGE_NOTFOUND) { 800 ret = __db_pgerr(file_dbp, argp->meta_pgno, ret); 801 goto out; 802 } else 803 goto done; 804 } 805 806 cmp_n = LOG_COMPARE(lsnp, &LSN(meta)); 807 cmp_p = LOG_COMPARE(&LSN(meta), &argp->meta_lsn); 808 CHECK_LSN(env, op, cmp_p, &LSN(meta), &argp->meta_lsn); 809 if (cmp_p == 0 && DB_REDO(op)) { 810 /* Need to redo update described. */ 811 REC_DIRTY(mpf, ip, file_dbp->priority, &meta); 812 meta->root = argp->root_pgno; 813 meta->dbmeta.lsn = *lsnp; 814 ((BTREE *)file_dbp->bt_internal)->bt_root = meta->root; 815 } else if (cmp_n == 0 && DB_UNDO(op)) { 816 /* Nothing to undo except lsn. */ 817 REC_DIRTY(mpf, ip, file_dbp->priority, &meta); 818 meta->dbmeta.lsn = argp->meta_lsn; 819 } 820 if ((ret = __memp_fput(mpf, ip, meta, file_dbp->priority)) != 0) 821 goto out; 822 meta = NULL; 823 824done: *lsnp = argp->prev_lsn; 825 ret = 0; 826 827out: if (meta != NULL) 828 (void)__memp_fput(mpf, ip, meta, file_dbp->priority); 829 REC_CLOSE; 830} 831 832/* 833 * __bam_curadj_recover -- 834 * Transaction abort function to undo cursor adjustments. 835 * This should only be triggered by subtransaction aborts. 836 * 837 * PUBLIC: int __bam_curadj_recover 838 * PUBLIC: __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); 839 */ 840int 841__bam_curadj_recover(env, dbtp, lsnp, op, info) 842 ENV *env; 843 DBT *dbtp; 844 DB_LSN *lsnp; 845 db_recops op; 846 void *info; 847{ 848 __bam_curadj_args *argp; 849 DB_THREAD_INFO *ip; 850 DB *file_dbp; 851 DBC *dbc; 852 DB_MPOOLFILE *mpf; 853 int ret; 854 855 COMPQUIET(mpf, NULL); 856 857 ip = ((DB_TXNHEAD *)info)->thread_info; 858 REC_PRINT(__bam_curadj_print); 859 REC_INTRO(__bam_curadj_read, ip, 1); 860 861 ret = 0; 862 if (op != DB_TXN_ABORT) 863 goto done; 864 865 switch (argp->mode) { 866 case DB_CA_DI: 867 if ((ret = __bam_ca_di(dbc, argp->from_pgno, 868 argp->from_indx, -(int)argp->first_indx)) != 0) 869 goto out; 870 break; 871 case DB_CA_DUP: 872 if ((ret = __bam_ca_undodup(file_dbp, argp->first_indx, 873 argp->from_pgno, argp->from_indx, argp->to_indx)) != 0) 874 goto out; 875 break; 876 877 case DB_CA_RSPLIT: 878 if ((ret = 879 __bam_ca_rsplit(dbc, argp->to_pgno, argp->from_pgno)) != 0) 880 goto out; 881 break; 882 883 case DB_CA_SPLIT: 884 if ((ret = __bam_ca_undosplit(file_dbp, argp->from_pgno, 885 argp->to_pgno, argp->left_pgno, argp->from_indx)) != 0) 886 goto out; 887 break; 888 } 889 890done: *lsnp = argp->prev_lsn; 891out: REC_CLOSE; 892} 893 894/* 895 * __bam_rcuradj_recover -- 896 * Transaction abort function to undo cursor adjustments in rrecno. 897 * This should only be triggered by subtransaction aborts. 898 * 899 * PUBLIC: int __bam_rcuradj_recover 900 * PUBLIC: __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); 901 */ 902int 903__bam_rcuradj_recover(env, dbtp, lsnp, op, info) 904 ENV *env; 905 DBT *dbtp; 906 DB_LSN *lsnp; 907 db_recops op; 908 void *info; 909{ 910 __bam_rcuradj_args *argp; 911 DB_THREAD_INFO *ip; 912 BTREE_CURSOR *cp; 913 DB *file_dbp; 914 DBC *dbc, *rdbc; 915 DB_MPOOLFILE *mpf; 916 int ret, t_ret; 917 918 COMPQUIET(mpf, NULL); 919 920 ip = ((DB_TXNHEAD *)info)->thread_info; 921 rdbc = NULL; 922 REC_PRINT(__bam_rcuradj_print); 923 REC_INTRO(__bam_rcuradj_read, ip, 1); 924 925 ret = t_ret = 0; 926 927 if (op != DB_TXN_ABORT) 928 goto done; 929 930 /* 931 * We don't know whether we're in an offpage dup set, and 932 * thus don't know whether the dbc REC_INTRO has handed us is 933 * of a reasonable type. It's certainly unset, so if this is 934 * an offpage dup set, we don't have an OPD cursor. The 935 * simplest solution is just to allocate a whole new cursor 936 * for our use; we're only really using it to hold pass some 937 * state into __ram_ca, and this way we don't need to make 938 * this function know anything about how offpage dups work. 939 */ 940 if ((ret = __db_cursor_int(file_dbp, NULL, 941 NULL, DB_RECNO, argp->root, 0, NULL, &rdbc)) != 0) 942 goto out; 943 944 cp = (BTREE_CURSOR *)rdbc->internal; 945 F_SET(cp, C_RENUMBER); 946 cp->recno = argp->recno; 947 948 switch (argp->mode) { 949 case CA_DELETE: 950 /* 951 * The way to undo a delete is with an insert. Since 952 * we're undoing it, the delete flag must be set. 953 */ 954 F_SET(cp, C_DELETED); 955 F_SET(cp, C_RENUMBER); /* Just in case. */ 956 cp->order = argp->order; 957 if ((ret = __ram_ca(rdbc, CA_ICURRENT, NULL)) != 0) 958 goto out; 959 break; 960 case CA_IAFTER: 961 case CA_IBEFORE: 962 case CA_ICURRENT: 963 /* 964 * The way to undo an insert is with a delete. The delete 965 * flag is unset to start with. 966 */ 967 F_CLR(cp, C_DELETED); 968 cp->order = INVALID_ORDER; 969 if ((ret = __ram_ca(rdbc, CA_DELETE, NULL)) != 0) 970 goto out; 971 break; 972 } 973 974done: *lsnp = argp->prev_lsn; 975out: if (rdbc != NULL && (t_ret = __dbc_close(rdbc)) != 0 && ret == 0) 976 ret = t_ret; 977 REC_CLOSE; 978} 979 980/* 981 * __bam_relink_recover -- 982 * Recovery function for relink. 983 * 984 * PUBLIC: int __bam_relink_recover 985 * PUBLIC: __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); 986 */ 987int 988__bam_relink_recover(env, dbtp, lsnp, op, info) 989 ENV *env; 990 DBT *dbtp; 991 DB_LSN *lsnp; 992 db_recops op; 993 void *info; 994{ 995 __bam_relink_args *argp; 996 DB_THREAD_INFO *ip; 997 DB *file_dbp; 998 DBC *dbc; 999 DB_MPOOLFILE *mpf; 1000 PAGE *pagep; 1001 int cmp_n, cmp_p, ret; 1002 1003 ip = ((DB_TXNHEAD *)info)->thread_info; 1004 pagep = NULL; 1005 REC_PRINT(__bam_relink_print); 1006 REC_INTRO(__bam_relink_read, ip, 0); 1007 1008 /* 1009 * There are up to three pages we need to check -- the page, and the 1010 * previous and next pages, if they existed. For a page add operation, 1011 * the current page is the result of a split and is being recovered 1012 * elsewhere, so all we need do is recover the next page. 1013 */ 1014 if ((ret = __memp_fget(mpf, &argp->next, ip, NULL, 0, &pagep)) != 0) { 1015 if (ret != DB_PAGE_NOTFOUND) { 1016 ret = __db_pgerr(file_dbp, argp->next, ret); 1017 goto out; 1018 } else 1019 goto prev; 1020 } 1021 1022 cmp_n = LOG_COMPARE(lsnp, &LSN(pagep)); 1023 cmp_p = LOG_COMPARE(&LSN(pagep), &argp->lsn_next); 1024 CHECK_LSN(env, op, cmp_p, &LSN(pagep), &argp->lsn_next); 1025 if (cmp_p == 0 && DB_REDO(op)) { 1026 /* Redo the remove or replace. */ 1027 REC_DIRTY(mpf, ip, file_dbp->priority, &pagep); 1028 if (argp->new_pgno == PGNO_INVALID) 1029 pagep->prev_pgno = argp->prev; 1030 else 1031 pagep->prev_pgno = argp->new_pgno; 1032 1033 pagep->lsn = *lsnp; 1034 } else if (cmp_n == 0 && DB_UNDO(op)) { 1035 /* Undo the remove or replace. */ 1036 REC_DIRTY(mpf, ip, file_dbp->priority, &pagep); 1037 pagep->prev_pgno = argp->pgno; 1038 1039 pagep->lsn = argp->lsn_next; 1040 } 1041 1042 if ((ret = __memp_fput(mpf, ip, pagep, file_dbp->priority)) != 0) 1043 goto out; 1044 pagep = NULL; 1045 1046prev: if ((ret = __memp_fget(mpf, &argp->prev, ip, NULL, 0, &pagep)) != 0) { 1047 if (ret != DB_PAGE_NOTFOUND) { 1048 ret = __db_pgerr(file_dbp, argp->prev, ret); 1049 goto out; 1050 } else 1051 goto done; 1052 } 1053 1054 cmp_p = LOG_COMPARE(&LSN(pagep), &argp->lsn_prev); 1055 CHECK_LSN(env, op, cmp_p, &LSN(pagep), &argp->lsn_prev); 1056 if (cmp_p == 0 && DB_REDO(op)) { 1057 /* Redo the relink. */ 1058 REC_DIRTY(mpf, ip, file_dbp->priority, &pagep); 1059 if (argp->new_pgno == PGNO_INVALID) 1060 pagep->next_pgno = argp->next; 1061 else 1062 pagep->next_pgno = argp->new_pgno; 1063 1064 pagep->lsn = *lsnp; 1065 } else if (LOG_COMPARE(lsnp, &LSN(pagep)) == 0 && DB_UNDO(op)) { 1066 /* Undo the relink. */ 1067 REC_DIRTY(mpf, ip, file_dbp->priority, &pagep); 1068 pagep->next_pgno = argp->pgno; 1069 pagep->lsn = argp->lsn_prev; 1070 } 1071 1072 if ((ret = __memp_fput(mpf, 1073 ip, pagep, file_dbp->priority)) != 0) 1074 goto out; 1075 pagep = NULL; 1076 1077done: *lsnp = argp->prev_lsn; 1078 ret = 0; 1079 1080out: if (pagep != NULL) 1081 (void)__memp_fput(mpf, ip, pagep, file_dbp->priority); 1082 REC_CLOSE; 1083} 1084 1085/* 1086 * __bam_merge_44_recover -- 1087 * Recovery function for merge. 1088 * 1089 * PUBLIC: int __bam_merge_44_recover 1090 * PUBLIC: __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); 1091 */ 1092int 1093__bam_merge_44_recover(env, dbtp, lsnp, op, info) 1094 ENV *env; 1095 DBT *dbtp; 1096 DB_LSN *lsnp; 1097 db_recops op; 1098 void *info; 1099{ 1100 __bam_merge_44_args *argp; 1101 DB_THREAD_INFO *ip; 1102 BKEYDATA *bk; 1103 DB *file_dbp; 1104 DBC *dbc; 1105 DB_MPOOLFILE *mpf; 1106 PAGE *pagep; 1107 db_indx_t indx, *ninp, *pinp; 1108 u_int32_t size; 1109 u_int8_t *bp; 1110 int cmp_n, cmp_p, i, ret; 1111 1112 ip = ((DB_TXNHEAD *)info)->thread_info; 1113 REC_PRINT(__bam_merge_44_print); 1114 REC_INTRO(__bam_merge_44_read, ip, 1); 1115 1116 if ((ret = __memp_fget(mpf, &argp->pgno, ip, NULL, 0, &pagep)) != 0) { 1117 if (ret != DB_PAGE_NOTFOUND) { 1118 ret = __db_pgerr(file_dbp, argp->pgno, ret); 1119 goto out; 1120 } else 1121 goto next; 1122 } 1123 1124 cmp_n = LOG_COMPARE(lsnp, &LSN(pagep)); 1125 cmp_p = LOG_COMPARE(&LSN(pagep), &argp->lsn); 1126 CHECK_LSN(file_dbp->env, op, cmp_p, &LSN(pagep), &argp->lsn); 1127 1128 if (cmp_p == 0 && DB_REDO(op)) { 1129 /* 1130 * If the header is provided the page is empty, copy the 1131 * needed data. 1132 */ 1133 DB_ASSERT(env, argp->hdr.size == 0 || NUM_ENT(pagep) == 0); 1134 REC_DIRTY(mpf, ip, dbc->priority, &pagep); 1135 if (argp->hdr.size != 0) { 1136 P_INIT(pagep, file_dbp->pgsize, pagep->pgno, 1137 PREV_PGNO(argp->hdr.data), 1138 NEXT_PGNO(argp->hdr.data), 1139 LEVEL(argp->hdr.data), TYPE(argp->hdr.data)); 1140 } 1141 if (TYPE(pagep) == P_OVERFLOW) { 1142 OV_REF(pagep) = OV_REF(argp->hdr.data); 1143 OV_LEN(pagep) = OV_LEN(argp->hdr.data); 1144 bp = (u_int8_t *) pagep + P_OVERHEAD(file_dbp); 1145 memcpy(bp, argp->data.data, argp->data.size); 1146 } else { 1147 /* Copy the data segment. */ 1148 bp = (u_int8_t *)pagep + 1149 (db_indx_t)(HOFFSET(pagep) - argp->data.size); 1150 memcpy(bp, argp->data.data, argp->data.size); 1151 1152 /* Copy index table offset past the current entries. */ 1153 pinp = P_INP(file_dbp, pagep) + NUM_ENT(pagep); 1154 ninp = argp->ind.data; 1155 for (i = 0; 1156 i < (int)(argp->ind.size / sizeof(*ninp)); i++) 1157 *pinp++ = *ninp++ 1158 - (file_dbp->pgsize - HOFFSET(pagep)); 1159 HOFFSET(pagep) -= argp->data.size; 1160 NUM_ENT(pagep) += i; 1161 } 1162 pagep->lsn = *lsnp; 1163 } else if (cmp_n == 0 && !DB_REDO(op)) { 1164 /* 1165 * Since logging is logical at the page level 1166 * we cannot just truncate the data space. Delete 1167 * the proper number of items from the logical end 1168 * of the page. 1169 */ 1170 REC_DIRTY(mpf, ip, dbc->priority, &pagep); 1171 for (i = 0; i < (int)(argp->ind.size / sizeof(*ninp)); i++) { 1172 indx = NUM_ENT(pagep) - 1; 1173 if (P_INP(file_dbp, pagep)[indx] == 1174 P_INP(file_dbp, pagep)[indx - P_INDX]) { 1175 NUM_ENT(pagep)--; 1176 continue; 1177 } 1178 switch (TYPE(pagep)) { 1179 case P_LBTREE: 1180 case P_LRECNO: 1181 case P_LDUP: 1182 bk = GET_BKEYDATA(file_dbp, pagep, indx); 1183 size = BITEM_SIZE(bk); 1184 break; 1185 1186 case P_IBTREE: 1187 size = BINTERNAL_SIZE( 1188 GET_BINTERNAL(file_dbp, pagep, indx)->len); 1189 break; 1190 case P_IRECNO: 1191 size = RINTERNAL_SIZE; 1192 break; 1193 1194 default: 1195 ret = __db_pgfmt(env, PGNO(pagep)); 1196 goto out; 1197 } 1198 if ((ret = 1199 __db_ditem(dbc, pagep, indx, size)) != 0) 1200 goto out; 1201 } 1202 if (argp->ind.size == 0) 1203 HOFFSET(pagep) = file_dbp->pgsize; 1204 pagep->lsn = argp->lsn; 1205 } 1206 1207 if ((ret = __memp_fput(mpf, ip, pagep, dbc->priority)) != 0) 1208 goto out; 1209 1210next: if ((ret = __memp_fget(mpf, &argp->npgno, ip, NULL, 0, &pagep)) != 0) { 1211 if (ret != DB_PAGE_NOTFOUND) { 1212 ret = __db_pgerr(file_dbp, argp->pgno, ret); 1213 goto out; 1214 } else 1215 goto done; 1216 } 1217 1218 cmp_n = LOG_COMPARE(lsnp, &LSN(pagep)); 1219 cmp_p = LOG_COMPARE(&LSN(pagep), &argp->nlsn); 1220 CHECK_LSN(file_dbp->env, op, cmp_p, &LSN(pagep), &argp->nlsn); 1221 1222 if (cmp_p == 0 && DB_REDO(op)) { 1223 /* Need to truncate the page. */ 1224 REC_DIRTY(mpf, ip, dbc->priority, &pagep); 1225 HOFFSET(pagep) = file_dbp->pgsize; 1226 NUM_ENT(pagep) = 0; 1227 pagep->lsn = *lsnp; 1228 } else if (cmp_n == 0 && !DB_REDO(op)) { 1229 /* Need to put the data back on the page. */ 1230 REC_DIRTY(mpf, ip, dbc->priority, &pagep); 1231 if (TYPE(pagep) == P_OVERFLOW) { 1232 OV_REF(pagep) = OV_REF(argp->hdr.data); 1233 OV_LEN(pagep) = OV_LEN(argp->hdr.data); 1234 bp = (u_int8_t *) pagep + P_OVERHEAD(file_dbp); 1235 memcpy(bp, argp->data.data, argp->data.size); 1236 } else { 1237 bp = (u_int8_t *)pagep + 1238 (db_indx_t)(HOFFSET(pagep) - argp->data.size); 1239 memcpy(bp, argp->data.data, argp->data.size); 1240 1241 /* Copy index table. */ 1242 pinp = P_INP(file_dbp, pagep) + NUM_ENT(pagep); 1243 ninp = argp->ind.data; 1244 for (i = 0; 1245 i < (int)(argp->ind.size / sizeof(*ninp)); i++) 1246 *pinp++ = *ninp++; 1247 HOFFSET(pagep) -= argp->data.size; 1248 NUM_ENT(pagep) = i; 1249 } 1250 pagep->lsn = argp->nlsn; 1251 } 1252 1253 if ((ret = __memp_fput(mpf, 1254 ip, pagep, dbc->priority)) != 0) 1255 goto out; 1256done: 1257 *lsnp = argp->prev_lsn; 1258 ret = 0; 1259 1260out: REC_CLOSE; 1261} 1262 1263/* 1264 * __bam_merge_recover -- 1265 * Recovery function for merge. 1266 * 1267 * PUBLIC: int __bam_merge_recover 1268 * PUBLIC: __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); 1269 */ 1270int 1271__bam_merge_recover(env, dbtp, lsnp, op, info) 1272 ENV *env; 1273 DBT *dbtp; 1274 DB_LSN *lsnp; 1275 db_recops op; 1276 void *info; 1277{ 1278 __bam_merge_args *argp; 1279 DB_THREAD_INFO *ip; 1280 BKEYDATA *bk; 1281 DB *file_dbp; 1282 DBC *dbc; 1283 DB_MPOOLFILE *mpf; 1284 PAGE *pagep; 1285 db_indx_t indx, *ninp, *pinp; 1286 u_int32_t size; 1287 u_int8_t *bp; 1288 int cmp_n, cmp_p, i, ret; 1289 1290 ip = ((DB_TXNHEAD *)info)->thread_info; 1291 REC_PRINT(__bam_merge_print); 1292 REC_INTRO(__bam_merge_read, ip, 1); 1293 1294 if ((ret = __memp_fget(mpf, &argp->pgno, ip, NULL, 0, &pagep)) != 0) { 1295 if (ret != DB_PAGE_NOTFOUND) { 1296 ret = __db_pgerr(file_dbp, argp->pgno, ret); 1297 goto out; 1298 } else 1299 goto next; 1300 } 1301 1302 cmp_n = LOG_COMPARE(lsnp, &LSN(pagep)); 1303 cmp_p = LOG_COMPARE(&LSN(pagep), &argp->lsn); 1304 CHECK_LSN(file_dbp->env, op, cmp_p, &LSN(pagep), &argp->lsn); 1305 1306 if (cmp_p == 0 && DB_REDO(op)) { 1307 /* 1308 * When pg_copy is set, we are copying onto a new page. 1309 */ 1310 DB_ASSERT(env, !argp->pg_copy || NUM_ENT(pagep) == 0); 1311 REC_DIRTY(mpf, ip, dbc->priority, &pagep); 1312 if (argp->pg_copy) { 1313 P_INIT(pagep, file_dbp->pgsize, pagep->pgno, 1314 PREV_PGNO(argp->hdr.data), 1315 NEXT_PGNO(argp->hdr.data), 1316 LEVEL(argp->hdr.data), TYPE(argp->hdr.data)); 1317 } 1318 if (TYPE(pagep) == P_OVERFLOW) { 1319 OV_REF(pagep) = OV_REF(argp->hdr.data); 1320 OV_LEN(pagep) = OV_LEN(argp->hdr.data); 1321 bp = (u_int8_t *)pagep + P_OVERHEAD(file_dbp); 1322 memcpy(bp, argp->data.data, argp->data.size); 1323 } else { 1324 /* Copy the data segment. */ 1325 bp = (u_int8_t *)pagep + 1326 (db_indx_t)(HOFFSET(pagep) - argp->data.size); 1327 memcpy(bp, argp->data.data, argp->data.size); 1328 1329 /* Copy index table offset past the current entries. */ 1330 pinp = P_INP(file_dbp, pagep) + NUM_ENT(pagep); 1331 ninp = P_INP(file_dbp, argp->hdr.data); 1332 for (i = 0; i < NUM_ENT(argp->hdr.data); i++) 1333 *pinp++ = *ninp++ 1334 - (file_dbp->pgsize - HOFFSET(pagep)); 1335 HOFFSET(pagep) -= argp->data.size; 1336 NUM_ENT(pagep) += i; 1337 } 1338 pagep->lsn = *lsnp; 1339 } else if (cmp_n == 0 && !DB_REDO(op)) { 1340 REC_DIRTY(mpf, ip, dbc->priority, &pagep); 1341 if (TYPE(pagep) == P_OVERFLOW) { 1342 HOFFSET(pagep) = file_dbp->pgsize; 1343 goto setlsn; 1344 } 1345 1346 /* 1347 * Since logging is logical at the page level we cannot just 1348 * truncate the data space. Delete the proper number of items 1349 * from the logical end of the page. 1350 */ 1351 for (i = 0; i < NUM_ENT(argp->hdr.data); i++) { 1352 indx = NUM_ENT(pagep) - 1; 1353 if (P_INP(file_dbp, pagep)[indx] == 1354 P_INP(file_dbp, pagep)[indx - P_INDX]) { 1355 NUM_ENT(pagep)--; 1356 continue; 1357 } 1358 switch (TYPE(pagep)) { 1359 case P_LBTREE: 1360 case P_LRECNO: 1361 case P_LDUP: 1362 bk = GET_BKEYDATA(file_dbp, pagep, indx); 1363 size = BITEM_SIZE(bk); 1364 break; 1365 1366 case P_IBTREE: 1367 size = BINTERNAL_SIZE( 1368 GET_BINTERNAL(file_dbp, pagep, indx)->len); 1369 break; 1370 case P_IRECNO: 1371 size = RINTERNAL_SIZE; 1372 break; 1373 1374 default: 1375 ret = __db_pgfmt(env, PGNO(pagep)); 1376 goto out; 1377 } 1378 if ((ret = __db_ditem(dbc, pagep, indx, size)) != 0) 1379 goto out; 1380 } 1381setlsn: pagep->lsn = argp->lsn; 1382 } 1383 1384 if ((ret = __memp_fput(mpf, ip, pagep, dbc->priority)) != 0) 1385 goto out; 1386 1387next: if ((ret = __memp_fget(mpf, &argp->npgno, ip, NULL, 0, &pagep)) != 0) { 1388 if (ret != DB_PAGE_NOTFOUND) { 1389 ret = __db_pgerr(file_dbp, argp->pgno, ret); 1390 goto out; 1391 } else 1392 goto done; 1393 } 1394 1395 cmp_n = LOG_COMPARE(lsnp, &LSN(pagep)); 1396 cmp_p = LOG_COMPARE(&LSN(pagep), &argp->nlsn); 1397 CHECK_LSN(file_dbp->env, op, cmp_p, &LSN(pagep), &argp->nlsn); 1398 1399 if (cmp_p == 0 && DB_REDO(op)) { 1400 /* Need to truncate the page. */ 1401 REC_DIRTY(mpf, ip, dbc->priority, &pagep); 1402 HOFFSET(pagep) = file_dbp->pgsize; 1403 NUM_ENT(pagep) = 0; 1404 pagep->lsn = *lsnp; 1405 } else if (cmp_n == 0 && !DB_REDO(op)) { 1406 /* Need to put the data back on the page. */ 1407 REC_DIRTY(mpf, ip, dbc->priority, &pagep); 1408 if (TYPE(pagep) == P_OVERFLOW) { 1409 OV_REF(pagep) = OV_REF(argp->hdr.data); 1410 OV_LEN(pagep) = OV_LEN(argp->hdr.data); 1411 bp = (u_int8_t *)pagep + P_OVERHEAD(file_dbp); 1412 memcpy(bp, argp->data.data, argp->data.size); 1413 } else { 1414 bp = (u_int8_t *)pagep + 1415 (db_indx_t)(HOFFSET(pagep) - argp->data.size); 1416 memcpy(bp, argp->data.data, argp->data.size); 1417 1418 /* Copy index table. */ 1419 pinp = P_INP(file_dbp, pagep) + NUM_ENT(pagep); 1420 ninp = P_INP(file_dbp, argp->hdr.data); 1421 for (i = 0; i < NUM_ENT(argp->hdr.data); i++) 1422 *pinp++ = *ninp++; 1423 HOFFSET(pagep) -= argp->data.size; 1424 NUM_ENT(pagep) += i; 1425 } 1426 pagep->lsn = argp->nlsn; 1427 } 1428 1429 if ((ret = __memp_fput(mpf, 1430 ip, pagep, dbc->priority)) != 0) 1431 goto out; 1432done: 1433 *lsnp = argp->prev_lsn; 1434 ret = 0; 1435 1436out: REC_CLOSE; 1437} 1438 1439/* 1440 * __bam_pgno_recover -- 1441 * Recovery function for page number replacment. 1442 * 1443 * PUBLIC: int __bam_pgno_recover 1444 * PUBLIC: __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); 1445 */ 1446int 1447__bam_pgno_recover(env, dbtp, lsnp, op, info) 1448 ENV *env; 1449 DBT *dbtp; 1450 DB_LSN *lsnp; 1451 db_recops op; 1452 void *info; 1453{ 1454 BINTERNAL *bi; 1455 __bam_pgno_args *argp; 1456 DB_THREAD_INFO *ip; 1457 DB *file_dbp; 1458 DBC *dbc; 1459 DB_MPOOLFILE *mpf; 1460 PAGE *pagep, *npagep; 1461 db_pgno_t *pgnop; 1462 int cmp_n, cmp_p, ret; 1463 1464 ip = ((DB_TXNHEAD *)info)->thread_info; 1465 REC_PRINT(__bam_pgno_print); 1466 REC_INTRO(__bam_pgno_read, ip, 0); 1467 1468 REC_FGET(mpf, ip, argp->pgno, &pagep, done); 1469 1470 cmp_n = LOG_COMPARE(lsnp, &LSN(pagep)); 1471 cmp_p = LOG_COMPARE(&LSN(pagep), &argp->lsn); 1472 CHECK_LSN(file_dbp->env, op, cmp_p, &LSN(pagep), &argp->lsn); 1473 1474 if ((cmp_p == 0 && DB_REDO(op)) || (cmp_n == 0 && !DB_REDO(op))) { 1475 switch (TYPE(pagep)) { 1476 case P_IBTREE: 1477 /* 1478 * An internal record can have both a overflow 1479 * and child pointer. Fetch the page to see 1480 * which it is. 1481 */ 1482 bi = GET_BINTERNAL(file_dbp, pagep, argp->indx); 1483 if (B_TYPE(bi->type) == B_OVERFLOW) { 1484 REC_FGET(mpf, ip, argp->npgno, &npagep, out); 1485 1486 if (TYPE(npagep) == P_OVERFLOW) 1487 pgnop = 1488 &((BOVERFLOW *)(bi->data))->pgno; 1489 else 1490 pgnop = &bi->pgno; 1491 if ((ret = __memp_fput(mpf, ip, 1492 npagep, file_dbp->priority)) != 0) 1493 goto out; 1494 break; 1495 } 1496 pgnop = &bi->pgno; 1497 break; 1498 case P_IRECNO: 1499 pgnop = 1500 &GET_RINTERNAL(file_dbp, pagep, argp->indx)->pgno; 1501 break; 1502 default: 1503 pgnop = 1504 &GET_BOVERFLOW(file_dbp, pagep, argp->indx)->pgno; 1505 break; 1506 } 1507 1508 if (DB_REDO(op)) { 1509 /* Need to redo update described. */ 1510 REC_DIRTY(mpf, ip, file_dbp->priority, &pagep); 1511 *pgnop = argp->npgno; 1512 pagep->lsn = *lsnp; 1513 } else { 1514 REC_DIRTY(mpf, ip, file_dbp->priority, &pagep); 1515 *pgnop = argp->opgno; 1516 pagep->lsn = argp->lsn; 1517 } 1518 } 1519 1520 if ((ret = __memp_fput(mpf, ip, pagep, file_dbp->priority)) != 0) 1521 goto out; 1522 1523done: 1524 *lsnp = argp->prev_lsn; 1525 ret = 0; 1526 1527out: REC_CLOSE; 1528} 1529 1530/* 1531 * __bam_relink_43_recover -- 1532 * Recovery function for relink. 1533 * 1534 * PUBLIC: int __bam_relink_43_recover 1535 * PUBLIC: __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); 1536 */ 1537int 1538__bam_relink_43_recover(env, dbtp, lsnp, op, info) 1539 ENV *env; 1540 DBT *dbtp; 1541 DB_LSN *lsnp; 1542 db_recops op; 1543 void *info; 1544{ 1545 __bam_relink_43_args *argp; 1546 DB_THREAD_INFO *ip; 1547 DB *file_dbp; 1548 DBC *dbc; 1549 DB_MPOOLFILE *mpf; 1550 PAGE *pagep; 1551 int cmp_n, cmp_p, modified, ret; 1552 1553 ip = ((DB_TXNHEAD *)info)->thread_info; 1554 pagep = NULL; 1555 REC_PRINT(__bam_relink_43_print); 1556 REC_INTRO(__bam_relink_43_read, ip, 0); 1557 1558 /* 1559 * There are up to three pages we need to check -- the page, and the 1560 * previous and next pages, if they existed. For a page add operation, 1561 * the current page is the result of a split and is being recovered 1562 * elsewhere, so all we need do is recover the next page. 1563 */ 1564 if ((ret = __memp_fget(mpf, &argp->pgno, ip, NULL, 0, &pagep)) != 0) { 1565 if (ret != DB_PAGE_NOTFOUND) { 1566 ret = __db_pgerr(file_dbp, argp->pgno, ret); 1567 goto out; 1568 } else 1569 goto next2; 1570 } 1571 1572 cmp_p = LOG_COMPARE(&LSN(pagep), &argp->lsn); 1573 CHECK_LSN(env, op, cmp_p, &LSN(pagep), &argp->lsn); 1574 if (cmp_p == 0 && DB_REDO(op)) { 1575 /* Redo the relink. */ 1576 REC_DIRTY(mpf, ip, file_dbp->priority, &pagep); 1577 pagep->lsn = *lsnp; 1578 } else if (LOG_COMPARE(lsnp, &LSN(pagep)) == 0 && DB_UNDO(op)) { 1579 /* Undo the relink. */ 1580 REC_DIRTY(mpf, ip, file_dbp->priority, &pagep); 1581 pagep->next_pgno = argp->next; 1582 pagep->prev_pgno = argp->prev; 1583 pagep->lsn = argp->lsn; 1584 } 1585 if ((ret = __memp_fput(mpf, 1586 ip, pagep, file_dbp->priority)) != 0) 1587 goto out; 1588 pagep = NULL; 1589 1590next2: if ((ret = __memp_fget(mpf, &argp->next, ip, NULL, 0, &pagep)) != 0) { 1591 if (ret != DB_PAGE_NOTFOUND) { 1592 ret = __db_pgerr(file_dbp, argp->next, ret); 1593 goto out; 1594 } else 1595 goto prev; 1596 } 1597 1598 modified = 0; 1599 cmp_n = LOG_COMPARE(lsnp, &LSN(pagep)); 1600 cmp_p = LOG_COMPARE(&LSN(pagep), &argp->lsn_next); 1601 CHECK_LSN(env, op, cmp_p, &LSN(pagep), &argp->lsn_next); 1602 if (cmp_p == 0 && DB_REDO(op)) { 1603 /* Redo the remove or undo the add. */ 1604 REC_DIRTY(mpf, ip, file_dbp->priority, &pagep); 1605 pagep->prev_pgno = argp->prev; 1606 modified = 1; 1607 } else if (cmp_n == 0 && DB_UNDO(op)) { 1608 /* Undo the remove or redo the add. */ 1609 REC_DIRTY(mpf, ip, file_dbp->priority, &pagep); 1610 pagep->prev_pgno = argp->pgno; 1611 modified = 1; 1612 } 1613 if (modified) { 1614 if (DB_UNDO(op)) 1615 pagep->lsn = argp->lsn_next; 1616 else 1617 pagep->lsn = *lsnp; 1618 } 1619 if ((ret = __memp_fput(mpf, 1620 ip, pagep, file_dbp->priority)) != 0) 1621 goto out; 1622 pagep = NULL; 1623 1624prev: if ((ret = __memp_fget(mpf, &argp->prev, ip, NULL, 0, &pagep)) != 0) { 1625 if (ret != DB_PAGE_NOTFOUND) { 1626 ret = __db_pgerr(file_dbp, argp->prev, ret); 1627 goto out; 1628 } else 1629 goto done; 1630 } 1631 1632 modified = 0; 1633 cmp_p = LOG_COMPARE(&LSN(pagep), &argp->lsn_prev); 1634 CHECK_LSN(env, op, cmp_p, &LSN(pagep), &argp->lsn_prev); 1635 if (cmp_p == 0 && DB_REDO(op)) { 1636 /* Redo the relink. */ 1637 REC_DIRTY(mpf, ip, file_dbp->priority, &pagep); 1638 pagep->next_pgno = argp->next; 1639 modified = 1; 1640 } else if (LOG_COMPARE(lsnp, &LSN(pagep)) == 0 && DB_UNDO(op)) { 1641 /* Undo the relink. */ 1642 REC_DIRTY(mpf, ip, file_dbp->priority, &pagep); 1643 pagep->next_pgno = argp->pgno; 1644 modified = 1; 1645 } 1646 if (modified) { 1647 if (DB_UNDO(op)) 1648 pagep->lsn = argp->lsn_prev; 1649 else 1650 pagep->lsn = *lsnp; 1651 } 1652 if ((ret = __memp_fput(mpf, 1653 ip, pagep, file_dbp->priority)) != 0) 1654 goto out; 1655 pagep = NULL; 1656 1657done: *lsnp = argp->prev_lsn; 1658 ret = 0; 1659 1660out: if (pagep != NULL) 1661 (void)__memp_fput(mpf, ip, pagep, file_dbp->priority); 1662 REC_CLOSE; 1663} 1664