1/*- 2 * See the file LICENSE for redistribution information. 3 * 4 * Copyright (c) 1996,2008 Oracle. All rights reserved. 5 * 6 * $Id: db_rec.c,v 12.53 2008/03/12 20:33:03 mbrey Exp $ 7 */ 8 9#include "db_config.h" 10 11#include "db_int.h" 12#include "dbinc/db_page.h" 13#include "dbinc/log.h" 14#include "dbinc/mp.h" 15#include "dbinc/hash.h" 16 17static int __db_pg_free_recover_int __P((ENV *, DB_THREAD_INFO *, 18 __db_pg_freedata_args *, DB *, DB_LSN *, DB_MPOOLFILE *, db_recops, int)); 19static int __db_pg_free_recover_42_int __P((ENV *, DB_THREAD_INFO *, 20 __db_pg_freedata_42_args *, 21 DB *, DB_LSN *, DB_MPOOLFILE *, db_recops, int)); 22 23/* 24 * PUBLIC: int __db_addrem_recover 25 * PUBLIC: __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); 26 * 27 * This log message is generated whenever we add or remove a duplicate 28 * to/from a duplicate page. On recover, we just do the opposite. 29 */ 30int 31__db_addrem_recover(env, dbtp, lsnp, op, info) 32 ENV *env; 33 DBT *dbtp; 34 DB_LSN *lsnp; 35 db_recops op; 36 void *info; 37{ 38 __db_addrem_args *argp; 39 DB_THREAD_INFO *ip; 40 DB *file_dbp; 41 DBC *dbc; 42 DB_MPOOLFILE *mpf; 43 PAGE *pagep; 44 int cmp_n, cmp_p, modified, ret; 45 46 ip = ((DB_TXNHEAD *)info)->thread_info; 47 pagep = NULL; 48 REC_PRINT(__db_addrem_print); 49 REC_INTRO(__db_addrem_read, ip, 1); 50 51 REC_FGET(mpf, ip, argp->pgno, &pagep, done); 52 modified = 0; 53 54 cmp_n = LOG_COMPARE(lsnp, &LSN(pagep)); 55 cmp_p = LOG_COMPARE(&LSN(pagep), &argp->pagelsn); 56 CHECK_LSN(env, op, cmp_p, &LSN(pagep), &argp->pagelsn); 57 if ((cmp_p == 0 && DB_REDO(op) && argp->opcode == DB_ADD_DUP) || 58 (cmp_n == 0 && DB_UNDO(op) && argp->opcode == DB_REM_DUP)) { 59 /* Need to redo an add, or undo a delete. */ 60 REC_DIRTY(mpf, ip, dbc->priority, &pagep); 61 if ((ret = __db_pitem(dbc, pagep, argp->indx, argp->nbytes, 62 argp->hdr.size == 0 ? NULL : &argp->hdr, 63 argp->dbt.size == 0 ? NULL : &argp->dbt)) != 0) 64 goto out; 65 modified = 1; 66 67 } else if ((cmp_n == 0 && DB_UNDO(op) && argp->opcode == DB_ADD_DUP) || 68 (cmp_p == 0 && DB_REDO(op) && argp->opcode == DB_REM_DUP)) { 69 /* Need to undo an add, or redo a delete. */ 70 REC_DIRTY(mpf, ip, dbc->priority, &pagep); 71 if ((ret = __db_ditem(dbc, 72 pagep, argp->indx, argp->nbytes)) != 0) 73 goto out; 74 modified = 1; 75 } 76 77 if (modified) { 78 if (DB_REDO(op)) 79 LSN(pagep) = *lsnp; 80 else 81 LSN(pagep) = argp->pagelsn; 82 } 83 84 if ((ret = __memp_fput(mpf, ip, pagep, dbc->priority)) != 0) 85 goto out; 86 pagep = NULL; 87 88done: *lsnp = argp->prev_lsn; 89 ret = 0; 90 91out: if (pagep != NULL) 92 (void)__memp_fput(mpf, ip, pagep, dbc->priority); 93 REC_CLOSE; 94} 95 96/* 97 * PUBLIC: int __db_big_recover 98 * PUBLIC: __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); 99 */ 100int 101__db_big_recover(env, dbtp, lsnp, op, info) 102 ENV *env; 103 DBT *dbtp; 104 DB_LSN *lsnp; 105 db_recops op; 106 void *info; 107{ 108 __db_big_args *argp; 109 DB_THREAD_INFO *ip; 110 DB *file_dbp; 111 DBC *dbc; 112 DB_MPOOLFILE *mpf; 113 PAGE *pagep; 114 int cmp_n, cmp_p, modified, ret; 115 116 ip = ((DB_TXNHEAD *)info)->thread_info; 117 pagep = NULL; 118 REC_PRINT(__db_big_print); 119 REC_INTRO(__db_big_read, ip, 0); 120 121 REC_FGET(mpf, ip, argp->pgno, &pagep, ppage); 122 modified = 0; 123 124 /* 125 * There are three pages we need to check. The one on which we are 126 * adding data, the previous one whose next_pointer may have 127 * been updated, and the next one whose prev_pointer may have 128 * been updated. 129 */ 130 cmp_n = LOG_COMPARE(lsnp, &LSN(pagep)); 131 cmp_p = LOG_COMPARE(&LSN(pagep), &argp->pagelsn); 132 CHECK_LSN(env, op, cmp_p, &LSN(pagep), &argp->pagelsn); 133 if ((cmp_p == 0 && DB_REDO(op) && argp->opcode == DB_ADD_BIG) || 134 (cmp_n == 0 && DB_UNDO(op) && argp->opcode == DB_REM_BIG)) { 135 /* We are either redo-ing an add, or undoing a delete. */ 136 REC_DIRTY(mpf, ip, file_dbp->priority, &pagep); 137 P_INIT(pagep, file_dbp->pgsize, argp->pgno, argp->prev_pgno, 138 argp->next_pgno, 0, P_OVERFLOW); 139 OV_LEN(pagep) = argp->dbt.size; 140 OV_REF(pagep) = 1; 141 memcpy((u_int8_t *)pagep + P_OVERHEAD(file_dbp), argp->dbt.data, 142 argp->dbt.size); 143 PREV_PGNO(pagep) = argp->prev_pgno; 144 modified = 1; 145 } else if ((cmp_n == 0 && DB_UNDO(op) && argp->opcode == DB_ADD_BIG) || 146 (cmp_p == 0 && DB_REDO(op) && argp->opcode == DB_REM_BIG)) { 147 /* 148 * We are either undo-ing an add or redo-ing a delete. 149 * The page is about to be reclaimed in either case, so 150 * there really isn't anything to do here. 151 */ 152 REC_DIRTY(mpf, ip, file_dbp->priority, &pagep); 153 modified = 1; 154 } 155 if (modified) 156 LSN(pagep) = DB_REDO(op) ? *lsnp : argp->pagelsn; 157 158 ret = __memp_fput(mpf, ip, pagep, file_dbp->priority); 159 pagep = NULL; 160 if (ret != 0) 161 goto out; 162 163 /* 164 * We only delete a whole chain of overflow. 165 * Each page is handled individually 166 */ 167 if (argp->opcode == DB_REM_BIG) 168 goto done; 169 170 /* Now check the previous page. */ 171ppage: if (argp->prev_pgno != PGNO_INVALID) { 172 REC_FGET(mpf, ip, argp->prev_pgno, &pagep, npage); 173 modified = 0; 174 175 cmp_n = LOG_COMPARE(lsnp, &LSN(pagep)); 176 cmp_p = LOG_COMPARE(&LSN(pagep), &argp->prevlsn); 177 CHECK_LSN(env, op, cmp_p, &LSN(pagep), &argp->prevlsn); 178 179 if (cmp_p == 0 && DB_REDO(op) && argp->opcode == DB_ADD_BIG) { 180 /* Redo add, undo delete. */ 181 REC_DIRTY(mpf, ip, file_dbp->priority, &pagep); 182 NEXT_PGNO(pagep) = argp->pgno; 183 modified = 1; 184 } else if (cmp_n == 0 && 185 DB_UNDO(op) && argp->opcode == DB_ADD_BIG) { 186 /* Redo delete, undo add. */ 187 REC_DIRTY(mpf, ip, file_dbp->priority, &pagep); 188 NEXT_PGNO(pagep) = argp->next_pgno; 189 modified = 1; 190 } 191 if (modified) 192 LSN(pagep) = DB_REDO(op) ? *lsnp : argp->prevlsn; 193 ret = __memp_fput(mpf, ip, pagep, file_dbp->priority); 194 pagep = NULL; 195 if (ret != 0) 196 goto out; 197 } 198 pagep = NULL; 199 200 /* Now check the next page. Can only be set on a delete. */ 201npage: if (argp->next_pgno != PGNO_INVALID) { 202 REC_FGET(mpf, ip, argp->next_pgno, &pagep, done); 203 modified = 0; 204 205 cmp_n = LOG_COMPARE(lsnp, &LSN(pagep)); 206 cmp_p = LOG_COMPARE(&LSN(pagep), &argp->nextlsn); 207 CHECK_LSN(env, op, cmp_p, &LSN(pagep), &argp->nextlsn); 208 if (cmp_p == 0 && DB_REDO(op)) { 209 REC_DIRTY(mpf, ip, file_dbp->priority, &pagep); 210 PREV_PGNO(pagep) = PGNO_INVALID; 211 modified = 1; 212 } else if (cmp_n == 0 && DB_UNDO(op)) { 213 REC_DIRTY(mpf, ip, file_dbp->priority, &pagep); 214 PREV_PGNO(pagep) = argp->pgno; 215 modified = 1; 216 } 217 if (modified) 218 LSN(pagep) = DB_REDO(op) ? *lsnp : argp->nextlsn; 219 ret = __memp_fput(mpf, ip, pagep, file_dbp->priority); 220 pagep = NULL; 221 if (ret != 0) 222 goto out; 223 } 224 pagep = NULL; 225 226done: *lsnp = argp->prev_lsn; 227 ret = 0; 228 229out: if (pagep != NULL) 230 (void)__memp_fput(mpf, ip, pagep, file_dbp->priority); 231 REC_CLOSE; 232} 233 234/* 235 * __db_ovref_recover -- 236 * Recovery function for __db_ovref(). 237 * 238 * PUBLIC: int __db_ovref_recover 239 * PUBLIC: __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); 240 */ 241int 242__db_ovref_recover(env, dbtp, lsnp, op, info) 243 ENV *env; 244 DBT *dbtp; 245 DB_LSN *lsnp; 246 db_recops op; 247 void *info; 248{ 249 __db_ovref_args *argp; 250 DB_THREAD_INFO *ip; 251 DB *file_dbp; 252 DBC *dbc; 253 DB_MPOOLFILE *mpf; 254 PAGE *pagep; 255 int cmp, ret; 256 257 ip = ((DB_TXNHEAD *)info)->thread_info; 258 pagep = NULL; 259 REC_PRINT(__db_ovref_print); 260 REC_INTRO(__db_ovref_read, ip, 0); 261 262 REC_FGET(mpf, ip, argp->pgno, &pagep, done); 263 264 cmp = LOG_COMPARE(&LSN(pagep), &argp->lsn); 265 CHECK_LSN(env, op, cmp, &LSN(pagep), &argp->lsn); 266 if (cmp == 0 && DB_REDO(op)) { 267 /* Need to redo update described. */ 268 REC_DIRTY(mpf, ip, file_dbp->priority, &pagep); 269 OV_REF(pagep) += argp->adjust; 270 pagep->lsn = *lsnp; 271 } else if (LOG_COMPARE(lsnp, &LSN(pagep)) == 0 && DB_UNDO(op)) { 272 /* Need to undo update described. */ 273 REC_DIRTY(mpf, ip, file_dbp->priority, &pagep); 274 OV_REF(pagep) -= argp->adjust; 275 pagep->lsn = argp->lsn; 276 } 277 ret = __memp_fput(mpf, ip, pagep, file_dbp->priority); 278 pagep = NULL; 279 if (ret != 0) 280 goto out; 281 pagep = NULL; 282 283done: *lsnp = argp->prev_lsn; 284 ret = 0; 285 286out: if (pagep != NULL) 287 (void)__memp_fput(mpf, ip, pagep, file_dbp->priority); 288 REC_CLOSE; 289} 290 291/* 292 * __db_debug_recover -- 293 * Recovery function for debug. 294 * 295 * PUBLIC: int __db_debug_recover __P((ENV *, 296 * PUBLIC: DBT *, DB_LSN *, db_recops, void *)); 297 */ 298int 299__db_debug_recover(env, dbtp, lsnp, op, info) 300 ENV *env; 301 DBT *dbtp; 302 DB_LSN *lsnp; 303 db_recops op; 304 void *info; 305{ 306 __db_debug_args *argp; 307 int ret; 308 309 COMPQUIET(op, DB_TXN_ABORT); 310 COMPQUIET(info, NULL); 311 312 REC_PRINT(__db_debug_print); 313 REC_NOOP_INTRO(__db_debug_read); 314 315 *lsnp = argp->prev_lsn; 316 ret = 0; 317 318 REC_NOOP_CLOSE; 319} 320 321/* 322 * __db_noop_recover -- 323 * Recovery function for noop. 324 * 325 * PUBLIC: int __db_noop_recover __P((ENV *, 326 * PUBLIC: DBT *, DB_LSN *, db_recops, void *)); 327 */ 328int 329__db_noop_recover(env, dbtp, lsnp, op, info) 330 ENV *env; 331 DBT *dbtp; 332 DB_LSN *lsnp; 333 db_recops op; 334 void *info; 335{ 336 __db_noop_args *argp; 337 DB_THREAD_INFO *ip; 338 DB *file_dbp; 339 DBC *dbc; 340 DB_MPOOLFILE *mpf; 341 PAGE *pagep; 342 int cmp_n, cmp_p, ret; 343 344 ip = ((DB_TXNHEAD *)info)->thread_info; 345 pagep = NULL; 346 REC_PRINT(__db_noop_print); 347 REC_INTRO(__db_noop_read, ip, 0); 348 349 REC_FGET(mpf, ip, argp->pgno, &pagep, done); 350 351 cmp_n = LOG_COMPARE(lsnp, &LSN(pagep)); 352 cmp_p = LOG_COMPARE(&LSN(pagep), &argp->prevlsn); 353 CHECK_LSN(env, op, cmp_p, &LSN(pagep), &argp->prevlsn); 354 if (cmp_p == 0 && DB_REDO(op)) { 355 REC_DIRTY(mpf, ip, file_dbp->priority, &pagep); 356 LSN(pagep) = *lsnp; 357 } else if (cmp_n == 0 && DB_UNDO(op)) { 358 REC_DIRTY(mpf, ip, file_dbp->priority, &pagep); 359 LSN(pagep) = argp->prevlsn; 360 } 361 ret = __memp_fput(mpf, ip, pagep, file_dbp->priority); 362 pagep = NULL; 363 364done: *lsnp = argp->prev_lsn; 365out: if (pagep != NULL) 366 (void)__memp_fput(mpf, 367 ip, pagep, file_dbp->priority); 368 REC_CLOSE; 369} 370 371/* 372 * __db_pg_alloc_recover -- 373 * Recovery function for pg_alloc. 374 * 375 * PUBLIC: int __db_pg_alloc_recover 376 * PUBLIC: __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); 377 */ 378int 379__db_pg_alloc_recover(env, dbtp, lsnp, op, info) 380 ENV *env; 381 DBT *dbtp; 382 DB_LSN *lsnp; 383 db_recops op; 384 void *info; 385{ 386 __db_pg_alloc_args *argp; 387 DB_THREAD_INFO *ip; 388 DB *file_dbp; 389 DBC *dbc; 390 DBMETA *meta; 391 DB_MPOOLFILE *mpf; 392 PAGE *pagep; 393 db_pgno_t pgno; 394 int cmp_n, cmp_p, created, level, ret; 395 396 ip = ((DB_TXNHEAD *)info)->thread_info; 397 meta = NULL; 398 pagep = NULL; 399 created = 0; 400 REC_PRINT(__db_pg_alloc_print); 401 REC_INTRO(__db_pg_alloc_read, ip, 0); 402 403 /* 404 * Fix up the metadata page. If we're redoing the operation, we have 405 * to get the metadata page and update its LSN and its free pointer. 406 * If we're undoing the operation and the page was ever created, we put 407 * it on the freelist. 408 */ 409 pgno = PGNO_BASE_MD; 410 if ((ret = __memp_fget(mpf, &pgno, ip, NULL, 0, &meta)) != 0) { 411 /* The metadata page must always exist on redo. */ 412 if (DB_REDO(op)) { 413 ret = __db_pgerr(file_dbp, pgno, ret); 414 goto out; 415 } else 416 goto done; 417 } 418 cmp_n = LOG_COMPARE(lsnp, &LSN(meta)); 419 cmp_p = LOG_COMPARE(&LSN(meta), &argp->meta_lsn); 420 CHECK_LSN(env, op, cmp_p, &LSN(meta), &argp->meta_lsn); 421 if (cmp_p == 0 && DB_REDO(op)) { 422 /* Need to redo update described. */ 423 REC_DIRTY(mpf, ip, file_dbp->priority, &meta); 424 LSN(meta) = *lsnp; 425 meta->free = argp->next; 426 if (argp->pgno > meta->last_pgno) 427 meta->last_pgno = argp->pgno; 428 } else if (cmp_n == 0 && DB_UNDO(op)) { 429 /* Need to undo update described. */ 430 REC_DIRTY(mpf, ip, file_dbp->priority, &meta); 431 LSN(meta) = argp->meta_lsn; 432 /* 433 * If the page has a zero LSN then its newly created and 434 * will be truncated rather than go on the free list. 435 */ 436 if (!IS_ZERO_LSN(argp->page_lsn)) 437 meta->free = argp->pgno; 438 meta->last_pgno = argp->last_pgno; 439 } 440 441#ifdef HAVE_FTRUNCATE 442 /* 443 * Check to see if we are keeping a sorted 444 * freelist, if so put this back in the in 445 * memory list. It must be the first element. 446 */ 447 if (op == DB_TXN_ABORT && !IS_ZERO_LSN(argp->page_lsn)) { 448 db_pgno_t *list; 449 u_int32_t nelem; 450 451 if ((ret = __memp_get_freelist(mpf, &nelem, &list)) != 0) 452 goto out; 453 if (list != NULL) { 454 if ((ret = 455 __memp_extend_freelist(mpf, nelem + 1, &list)) != 0) 456 goto out; 457 if (nelem != 0) 458 memmove(list + 1, list, nelem * sizeof(list)); 459 *list = argp->pgno; 460 } 461 } 462#endif 463 464 /* 465 * Fix up the allocated page. If the page does not exist 466 * and we can truncate it then don't create it. 467 * Otherwise if we're redoing the operation, we have 468 * to get the page (creating it if it doesn't exist), and update its 469 * LSN. If we're undoing the operation, we have to reset the page's 470 * LSN and put it on the free list. 471 */ 472 if ((ret = __memp_fget(mpf, &argp->pgno, ip, NULL, 0, &pagep)) != 0) { 473 /* 474 * We have to be able to identify if a page was newly 475 * created so we can recover it properly. We cannot simply 476 * look for an empty header, because hash uses a pgin 477 * function that will set the header. Instead, we explicitly 478 * try for the page without CREATE and if that fails, then 479 * create it. 480 */ 481 if (DB_UNDO(op)) 482 goto do_truncate; 483 if ((ret = __memp_fget(mpf, &argp->pgno, ip, NULL, 484 DB_MPOOL_CREATE, &pagep)) != 0) { 485 if (DB_UNDO(op) && ret == ENOSPC) 486 goto do_truncate; 487 ret = __db_pgerr(file_dbp, argp->pgno, ret); 488 goto out; 489 } 490 created = 1; 491 } 492 493 /* Fix up the allocated page. */ 494 cmp_n = LOG_COMPARE(lsnp, &LSN(pagep)); 495 cmp_p = LOG_COMPARE(&LSN(pagep), &argp->page_lsn); 496 497 /* 498 * If an initial allocation is aborted and then reallocated during 499 * an archival restore the log record will have an LSN for the page 500 * but the page will be empty. 501 */ 502 if (IS_ZERO_LSN(LSN(pagep))) 503 cmp_p = 0; 504 505 CHECK_LSN(env, op, cmp_p, &LSN(pagep), &argp->page_lsn); 506 /* 507 * Another special case we have to handle is if we ended up with a 508 * page of all 0's which can happen if we abort between allocating a 509 * page in mpool and initializing it. In that case, even if we're 510 * undoing, we need to re-initialize the page. 511 */ 512 if (DB_REDO(op) && cmp_p == 0) { 513 /* Need to redo update described. */ 514 REC_DIRTY(mpf, ip, file_dbp->priority, &pagep); 515 switch (argp->ptype) { 516 case P_LBTREE: 517 case P_LRECNO: 518 case P_LDUP: 519 level = LEAFLEVEL; 520 break; 521 default: 522 level = 0; 523 break; 524 } 525 P_INIT(pagep, file_dbp->pgsize, 526 argp->pgno, PGNO_INVALID, PGNO_INVALID, level, argp->ptype); 527 528 pagep->lsn = *lsnp; 529 } else if (DB_UNDO(op) && (cmp_n == 0 || created)) { 530 /* 531 * This is where we handle the case of a 0'd page (pagep->pgno 532 * is equal to PGNO_INVALID). 533 * Undo the allocation, reinitialize the page and 534 * link its next pointer to the free list. 535 */ 536 REC_DIRTY(mpf, ip, file_dbp->priority, &pagep); 537 P_INIT(pagep, file_dbp->pgsize, 538 argp->pgno, PGNO_INVALID, argp->next, 0, P_INVALID); 539 540 pagep->lsn = argp->page_lsn; 541 } 542 543do_truncate: 544 /* 545 * If the page was newly created, give it back. 546 */ 547 if ((pagep == NULL || IS_ZERO_LSN(LSN(pagep))) && 548 IS_ZERO_LSN(argp->page_lsn) && DB_UNDO(op)) { 549 /* Discard the page. */ 550 if (pagep != NULL) { 551 if ((ret = __memp_fput(mpf, ip, 552 pagep, DB_PRIORITY_VERY_LOW)) != 0) 553 goto out; 554 pagep = NULL; 555 } 556 /* Give the page back to the OS. */ 557 if (meta->last_pgno <= argp->pgno && (ret = __memp_ftruncate( 558 mpf, ip, argp->pgno, MP_TRUNC_RECOVER)) != 0) 559 goto out; 560 } 561 562 if (pagep != NULL) { 563 ret = __memp_fput(mpf, ip, pagep, file_dbp->priority); 564 pagep = NULL; 565 if (ret != 0) 566 goto out; 567 } 568 569 ret = __memp_fput(mpf, ip, meta, file_dbp->priority); 570 meta = NULL; 571 if (ret != 0) 572 goto out; 573 574done: *lsnp = argp->prev_lsn; 575 ret = 0; 576 577out: if (pagep != NULL) 578 (void)__memp_fput(mpf, ip, pagep, file_dbp->priority); 579 if (meta != NULL) 580 (void)__memp_fput(mpf, ip, meta, file_dbp->priority); 581 REC_CLOSE; 582} 583 584/* 585 * __db_pg_free_recover_int -- 586 */ 587static int 588__db_pg_free_recover_int(env, ip, argp, file_dbp, lsnp, mpf, op, data) 589 ENV *env; 590 DB_THREAD_INFO *ip; 591 __db_pg_freedata_args *argp; 592 DB *file_dbp; 593 DB_LSN *lsnp; 594 DB_MPOOLFILE *mpf; 595 db_recops op; 596 int data; 597{ 598 DBMETA *meta; 599 DB_LSN copy_lsn; 600 PAGE *pagep, *prevp; 601 int cmp_n, cmp_p, is_meta, ret; 602 603 meta = NULL; 604 pagep = prevp = NULL; 605 606 /* 607 * Get the "metapage". This will either be the metapage 608 * or the previous page in the free list if we are doing 609 * sorted allocations. If its a previous page then 610 * we will not be truncating. 611 */ 612 is_meta = argp->meta_pgno == PGNO_BASE_MD; 613 614 REC_FGET(mpf, ip, argp->meta_pgno, &meta, check_meta); 615 616 if (argp->meta_pgno != PGNO_BASE_MD) 617 prevp = (PAGE *)meta; 618 619 cmp_n = LOG_COMPARE(lsnp, &LSN(meta)); 620 cmp_p = LOG_COMPARE(&LSN(meta), &argp->meta_lsn); 621 CHECK_LSN(env, op, cmp_p, &LSN(meta), &argp->meta_lsn); 622 623 /* 624 * Fix up the metadata page. If we're redoing or undoing the operation 625 * we get the page and update its LSN, last and free pointer. 626 */ 627 if (cmp_p == 0 && DB_REDO(op)) { 628 REC_DIRTY(mpf, ip, file_dbp->priority, &meta); 629 /* 630 * If we are at the end of the file truncate, otherwise 631 * put on the free list. 632 */ 633 if (argp->pgno == argp->last_pgno) 634 meta->last_pgno = argp->pgno - 1; 635 else if (is_meta) 636 meta->free = argp->pgno; 637 else 638 NEXT_PGNO(prevp) = argp->pgno; 639 LSN(meta) = *lsnp; 640 } else if (cmp_n == 0 && DB_UNDO(op)) { 641 /* Need to undo the deallocation. */ 642 REC_DIRTY(mpf, ip, file_dbp->priority, &meta); 643 if (is_meta) { 644 if (meta->last_pgno < argp->pgno) 645 meta->last_pgno = argp->pgno; 646 meta->free = argp->next; 647 } else 648 NEXT_PGNO(prevp) = argp->next; 649 LSN(meta) = argp->meta_lsn; 650 } 651 652check_meta: 653 if (ret != 0 && is_meta) { 654 /* The metadata page must always exist. */ 655 ret = __db_pgerr(file_dbp, argp->meta_pgno, ret); 656 goto out; 657 } 658 659 /* 660 * Get the freed page. Don't create the page if we are going to 661 * free it. If we're redoing the operation we get the page and 662 * explicitly discard its contents, then update its LSN. If we're 663 * undoing the operation, we get the page and restore its header. 664 */ 665 if (DB_REDO(op) || (is_meta && meta->last_pgno < argp->pgno)) { 666 if ((ret = __memp_fget(mpf, &argp->pgno, 667 ip, NULL, 0, &pagep)) != 0) { 668 if (ret != DB_PAGE_NOTFOUND) 669 goto out; 670 if (is_meta && 671 DB_REDO(op) && meta->last_pgno <= argp->pgno) 672 goto trunc; 673 goto done; 674 } 675 } else if ((ret = __memp_fget(mpf, &argp->pgno, 676 ip, NULL, DB_MPOOL_CREATE, &pagep)) != 0) 677 goto out; 678 679 (void)__ua_memcpy(©_lsn, &LSN(argp->header.data), sizeof(DB_LSN)); 680 cmp_n = IS_ZERO_LSN(LSN(pagep)) ? 0 : LOG_COMPARE(lsnp, &LSN(pagep)); 681 cmp_p = LOG_COMPARE(&LSN(pagep), ©_lsn); 682 683 /* 684 * This page got extended by a later allocation, 685 * but its allocation was not in the scope of this 686 * recovery pass. 687 */ 688 if (IS_ZERO_LSN(LSN(pagep))) 689 cmp_p = 0; 690 691 CHECK_LSN(env, op, cmp_p, &LSN(pagep), ©_lsn); 692 if (DB_REDO(op) && 693 (cmp_p == 0 || 694 (IS_ZERO_LSN(copy_lsn) && 695 LOG_COMPARE(&LSN(pagep), &argp->meta_lsn) <= 0))) { 696 /* Need to redo the deallocation. */ 697 /* 698 * The page can be truncated if it was truncated at runtime 699 * and the current metapage reflects the truncation. 700 */ 701 if (is_meta && meta->last_pgno <= argp->pgno && 702 argp->last_pgno <= argp->pgno) { 703 if ((ret = __memp_fput(mpf, ip, 704 pagep, DB_PRIORITY_VERY_LOW)) != 0) 705 goto out; 706 pagep = NULL; 707trunc: if ((ret = __memp_ftruncate(mpf, ip, 708 argp->pgno, MP_TRUNC_RECOVER)) != 0) 709 goto out; 710 } else if (argp->last_pgno == argp->pgno) { 711 /* The page was truncated at runtime, zero it out. */ 712 REC_DIRTY(mpf, ip, file_dbp->priority, &pagep); 713 P_INIT(pagep, 0, PGNO_INVALID, 714 PGNO_INVALID, PGNO_INVALID, 0, P_INVALID); 715 ZERO_LSN(pagep->lsn); 716 } else { 717 REC_DIRTY(mpf, ip, file_dbp->priority, &pagep); 718 P_INIT(pagep, file_dbp->pgsize, 719 argp->pgno, PGNO_INVALID, argp->next, 0, P_INVALID); 720 pagep->lsn = *lsnp; 721 722 } 723 } else if (cmp_n == 0 && DB_UNDO(op)) { 724 /* Need to reallocate the page. */ 725 REC_DIRTY(mpf, ip, file_dbp->priority, &pagep); 726 memcpy(pagep, argp->header.data, argp->header.size); 727 if (data) 728 memcpy((u_int8_t*)pagep + HOFFSET(pagep), 729 argp->data.data, argp->data.size); 730 } 731 if (pagep != NULL && 732 (ret = __memp_fput(mpf, ip, pagep, file_dbp->priority)) != 0) 733 goto out; 734 735 pagep = NULL; 736#ifdef HAVE_FTRUNCATE 737 /* 738 * If we are keeping an in memory free list remove this 739 * element from the list. 740 */ 741 if (op == DB_TXN_ABORT && argp->pgno != argp->last_pgno) { 742 db_pgno_t *lp; 743 u_int32_t nelem, pos; 744 745 if ((ret = __memp_get_freelist(mpf, &nelem, &lp)) != 0) 746 goto out; 747 if (lp != NULL) { 748 pos = 0; 749 if (!is_meta) { 750 __db_freelist_pos(argp->pgno, lp, nelem, &pos); 751 752 DB_ASSERT(env, argp->pgno == lp[pos]); 753 DB_ASSERT(env, 754 argp->meta_pgno == lp[pos - 1]); 755 } 756 757 if (pos < nelem) 758 memmove(&lp[pos], &lp[pos + 1], 759 ((nelem - pos) - 1) * sizeof(*lp)); 760 761 /* Shrink the list */ 762 if ((ret = 763 __memp_extend_freelist(mpf, nelem - 1, &lp)) != 0) 764 goto out; 765 } 766 } 767#endif 768done: 769 if (meta != NULL && 770 (ret = __memp_fput(mpf, ip, meta, file_dbp->priority)) != 0) 771 goto out; 772 meta = NULL; 773 ret = 0; 774 775out: if (pagep != NULL) 776 (void)__memp_fput(mpf, ip, pagep, file_dbp->priority); 777 if (meta != NULL) 778 (void)__memp_fput(mpf, ip, meta, file_dbp->priority); 779 780 return (ret); 781} 782 783/* 784 * __db_pg_free_recover -- 785 * Recovery function for pg_free. 786 * 787 * PUBLIC: int __db_pg_free_recover 788 * PUBLIC: __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); 789 */ 790int 791__db_pg_free_recover(env, dbtp, lsnp, op, info) 792 ENV *env; 793 DBT *dbtp; 794 DB_LSN *lsnp; 795 db_recops op; 796 void *info; 797{ 798 __db_pg_free_args *argp; 799 DB *file_dbp; 800 DBC *dbc; 801 DB_MPOOLFILE *mpf; 802 DB_THREAD_INFO *ip; 803 int ret; 804 805 ip = ((DB_TXNHEAD *)info)->thread_info; 806 REC_PRINT(__db_pg_free_print); 807 REC_INTRO(__db_pg_free_read, ip, 0); 808 809 ret = __db_pg_free_recover_int(env, ip, 810 (__db_pg_freedata_args *)argp, file_dbp, lsnp, mpf, op, 0); 811 812done: *lsnp = argp->prev_lsn; 813out: 814 REC_CLOSE; 815} 816 817/* 818 * __db_pg_freedata_recover -- 819 * Recovery function for pg_freedata. 820 * 821 * PUBLIC: int __db_pg_freedata_recover 822 * PUBLIC: __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); 823 */ 824int 825__db_pg_freedata_recover(env, dbtp, lsnp, op, info) 826 ENV *env; 827 DBT *dbtp; 828 DB_LSN *lsnp; 829 db_recops op; 830 void *info; 831{ 832 __db_pg_freedata_args *argp; 833 DB *file_dbp; 834 DBC *dbc; 835 DB_MPOOLFILE *mpf; 836 DB_THREAD_INFO *ip; 837 int ret; 838 839 ip = ((DB_TXNHEAD *)info)->thread_info; 840 REC_PRINT(__db_pg_freedata_print); 841 REC_INTRO(__db_pg_freedata_read, ip, 0); 842 843 ret = __db_pg_free_recover_int(env, 844 ip, argp, file_dbp, lsnp, mpf, op, 1); 845 846done: *lsnp = argp->prev_lsn; 847out: 848 REC_CLOSE; 849} 850 851/* 852 * __db_cksum_recover -- 853 * Recovery function for checksum failure log record. 854 * 855 * PUBLIC: int __db_cksum_recover __P((ENV *, 856 * PUBLIC: DBT *, DB_LSN *, db_recops, void *)); 857 */ 858int 859__db_cksum_recover(env, dbtp, lsnp, op, info) 860 ENV *env; 861 DBT *dbtp; 862 DB_LSN *lsnp; 863 db_recops op; 864 void *info; 865{ 866 __db_cksum_args *argp; 867 int ret; 868 869 COMPQUIET(info, NULL); 870 COMPQUIET(lsnp, NULL); 871 COMPQUIET(op, DB_TXN_ABORT); 872 873 REC_PRINT(__db_cksum_print); 874 875 if ((ret = __db_cksum_read(env, dbtp->data, &argp)) != 0) 876 return (ret); 877 878 /* 879 * We had a checksum failure -- the only option is to run catastrophic 880 * recovery. 881 */ 882 if (F_ISSET(env, ENV_RECOVER_FATAL)) 883 ret = 0; 884 else { 885 __db_errx(env, 886 "Checksum failure requires catastrophic recovery"); 887 ret = __env_panic(env, DB_RUNRECOVERY); 888 } 889 890 __os_free(env, argp); 891 return (ret); 892} 893 894/* 895 * __db_pg_init_recover -- 896 * Recovery function to reinit pages after truncation. 897 * 898 * PUBLIC: int __db_pg_init_recover 899 * PUBLIC: __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); 900 */ 901int 902__db_pg_init_recover(env, dbtp, lsnp, op, info) 903 ENV *env; 904 DBT *dbtp; 905 DB_LSN *lsnp; 906 db_recops op; 907 void *info; 908{ 909 __db_pg_init_args *argp; 910 DB_THREAD_INFO *ip; 911 DB *file_dbp; 912 DBC *dbc; 913 DB_LSN copy_lsn; 914 DB_MPOOLFILE *mpf; 915 PAGE *pagep; 916 int cmp_n, cmp_p, ret, type; 917 918 ip = ((DB_TXNHEAD *)info)->thread_info; 919 REC_PRINT(__db_pg_init_print); 920 REC_INTRO(__db_pg_init_read, ip, 0); 921 922 mpf = file_dbp->mpf; 923 if ((ret = __memp_fget(mpf, &argp->pgno, ip, NULL, 0, &pagep)) != 0) { 924 if (DB_UNDO(op)) { 925 if (ret == DB_PAGE_NOTFOUND) 926 goto done; 927 else { 928 ret = __db_pgerr(file_dbp, argp->pgno, ret); 929 goto out; 930 } 931 } 932 933 /* 934 * This page was truncated and may simply not have 935 * had an item written to it yet. This should only 936 * happen on hash databases, so confirm that. 937 */ 938 DB_ASSERT(env, file_dbp->type == DB_HASH); 939 if ((ret = __memp_fget(mpf, &argp->pgno, 940 ip, NULL, DB_MPOOL_CREATE, &pagep)) != 0) { 941 ret = __db_pgerr(file_dbp, argp->pgno, ret); 942 goto out; 943 } 944 } 945 946 (void)__ua_memcpy(©_lsn, &LSN(argp->header.data), sizeof(DB_LSN)); 947 cmp_n = LOG_COMPARE(lsnp, &LSN(pagep)); 948 cmp_p = LOG_COMPARE(&LSN(pagep), ©_lsn); 949 CHECK_LSN(env, op, cmp_p, &LSN(pagep), ©_lsn); 950 951 if (cmp_p == 0 && DB_REDO(op)) { 952 if (TYPE(pagep) == P_HASH) 953 type = P_HASH; 954 else 955 type = file_dbp->type == DB_RECNO ? P_LRECNO : P_LBTREE; 956 REC_DIRTY(mpf, ip, file_dbp->priority, &pagep); 957 P_INIT(pagep, file_dbp->pgsize, PGNO(pagep), PGNO_INVALID, 958 PGNO_INVALID, TYPE(pagep) == P_HASH ? 0 : 1, type); 959 pagep->lsn = *lsnp; 960 } else if (cmp_n == 0 && DB_UNDO(op)) { 961 /* Put the data back on the page. */ 962 REC_DIRTY(mpf, ip, file_dbp->priority, &pagep); 963 memcpy(pagep, argp->header.data, argp->header.size); 964 if (argp->data.size > 0) 965 memcpy((u_int8_t*)pagep + HOFFSET(pagep), 966 argp->data.data, argp->data.size); 967 } 968 if ((ret = __memp_fput(mpf, ip, pagep, file_dbp->priority)) != 0) 969 goto out; 970 971done: *lsnp = argp->prev_lsn; 972out: 973 REC_CLOSE; 974} 975 976/* 977 * __db_pg_sort_recover -- 978 * Recovery function for pg_sort. 979 * 980 * PUBLIC: int __db_pg_sort_recover 981 * PUBLIC: __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); 982 */ 983int 984__db_pg_sort_recover(env, dbtp, lsnp, op, info) 985 ENV *env; 986 DBT *dbtp; 987 DB_LSN *lsnp; 988 db_recops op; 989 void *info; 990{ 991#ifdef HAVE_FTRUNCATE 992 __db_pg_sort_args *argp; 993 DB_THREAD_INFO *ip; 994 DB *file_dbp; 995 DBC *dbc; 996 DBMETA *meta; 997 DB_MPOOLFILE *mpf; 998 PAGE *pagep; 999 db_pglist_t *pglist, *lp; 1000 db_pgno_t pgno, *list; 1001 u_int32_t felem, nelem; 1002 int ret; 1003 1004 ip = ((DB_TXNHEAD *)info)->thread_info; 1005 REC_PRINT(__db_pg_sort_print); 1006 REC_INTRO(__db_pg_sort_read, ip, 1); 1007 1008 pglist = (db_pglist_t *) argp->list.data; 1009 nelem = argp->list.size / sizeof(db_pglist_t); 1010 if (DB_REDO(op)) { 1011 pgno = argp->last_pgno; 1012 if ((ret = __db_pg_truncate(dbc, NULL, 1013 pglist, NULL, &nelem, &pgno, lsnp, 1)) != 0) 1014 goto out; 1015 1016 if (argp->last_free != PGNO_INVALID) { 1017 if ((ret = __memp_fget(mpf, 1018 &argp->last_free, ip, NULL, 0, &meta)) == 0) { 1019 if (LOG_COMPARE(&LSN(meta), 1020 &argp->last_lsn) == 0) { 1021 REC_DIRTY(mpf, 1022 ip, dbc->priority, &meta); 1023 NEXT_PGNO(meta) = PGNO_INVALID; 1024 LSN(meta) = *lsnp; 1025 } 1026 if ((ret = __memp_fput(mpf, ip, 1027 meta, file_dbp->priority)) != 0) 1028 goto out; 1029 meta = NULL; 1030 } else if (ret != DB_PAGE_NOTFOUND) 1031 goto out; 1032 } 1033 if ((ret = __memp_fget(mpf, &argp->meta, ip, NULL, 1034 0, &meta)) != 0) 1035 goto out; 1036 if (LOG_COMPARE(&LSN(meta), &argp->meta_lsn) == 0) { 1037 REC_DIRTY(mpf, ip, dbc->priority, &meta); 1038 if (argp->last_free == PGNO_INVALID) { 1039 if (nelem == 0) 1040 meta->free = PGNO_INVALID; 1041 else 1042 meta->free = pglist->pgno; 1043 } 1044 meta->last_pgno = pgno; 1045 LSN(meta) = *lsnp; 1046 } 1047 } else { 1048 /* Put the free list back in its original order. */ 1049 for (lp = pglist; lp < &pglist[nelem]; lp++) { 1050 if ((ret = __memp_fget(mpf, &lp->pgno, ip, 1051 NULL, DB_MPOOL_CREATE, &pagep)) != 0) 1052 goto out; 1053 if (IS_ZERO_LSN(LSN(pagep)) || 1054 LOG_COMPARE(&LSN(pagep), lsnp) == 0) { 1055 REC_DIRTY(mpf, ip, dbc->priority, &pagep); 1056 if (lp == &pglist[nelem - 1]) 1057 pgno = PGNO_INVALID; 1058 else 1059 pgno = lp[1].pgno; 1060 1061 P_INIT(pagep, file_dbp->pgsize, 1062 lp->pgno, PGNO_INVALID, pgno, 0, P_INVALID); 1063 LSN(pagep) = lp->lsn; 1064 } 1065 if ((ret = __memp_fput(mpf, 1066 ip, pagep, file_dbp->priority)) != 0) 1067 goto out; 1068 } 1069 if (argp->last_free != PGNO_INVALID) { 1070 if ((ret = __memp_fget(mpf, &argp->last_free, 1071 ip, NULL, DB_MPOOL_EDIT, &meta)) == 0) { 1072 if (LOG_COMPARE(&LSN(meta), lsnp) == 0) { 1073 REC_DIRTY(mpf, 1074 ip, dbc->priority, &pagep); 1075 NEXT_PGNO(meta) = pglist->pgno; 1076 LSN(meta) = argp->last_lsn; 1077 } 1078 if ((ret = __memp_fput(mpf, ip, 1079 meta, file_dbp->priority)) != 0) 1080 goto out; 1081 } else if (ret != DB_PAGE_NOTFOUND) 1082 goto out; 1083 meta = NULL; 1084 } 1085 if ((ret = __memp_fget(mpf, &argp->meta, 1086 ip, NULL, DB_MPOOL_EDIT, &meta)) != 0) 1087 goto out; 1088 if (LOG_COMPARE(&LSN(meta), lsnp) == 0) { 1089 REC_DIRTY(mpf, ip, dbc->priority, &meta); 1090 meta->last_pgno = argp->last_pgno; 1091 if (argp->last_free == PGNO_INVALID) 1092 meta->free = pglist->pgno; 1093 LSN(meta) = argp->meta_lsn; 1094 } 1095 } 1096 if (op == DB_TXN_ABORT) { 1097 if ((ret = __memp_get_freelist(mpf, &felem, &list)) != 0) 1098 goto out; 1099 if (list != NULL) { 1100 DB_ASSERT(env, felem == 0 || 1101 argp->last_free == list[felem - 1]); 1102 if ((ret = __memp_extend_freelist( 1103 mpf, felem + nelem, &list)) != 0) 1104 goto out; 1105 for (lp = pglist; lp < &pglist[nelem]; lp++) 1106 list[felem++] = lp->pgno; 1107 } 1108 } 1109 1110 if ((ret = __memp_fput(mpf, ip, meta, file_dbp->priority)) != 0) 1111 goto out; 1112 1113done: *lsnp = argp->prev_lsn; 1114 ret = 0; 1115 1116out: REC_CLOSE; 1117#else 1118 /* 1119 * If HAVE_FTRUNCATE is not defined, we'll never see pg_sort records 1120 * to recover. 1121 */ 1122 COMPQUIET(env, NULL); 1123 COMPQUIET(dbtp, NULL); 1124 COMPQUIET(lsnp, NULL); 1125 COMPQUIET(op, DB_TXN_ABORT); 1126 COMPQUIET(info, NULL); 1127 return (EINVAL); 1128#endif 1129} 1130 1131/* 1132 * __db_pg_alloc_42_recover -- 1133 * Recovery function for pg_alloc. 1134 * 1135 * PUBLIC: int __db_pg_alloc_42_recover 1136 * PUBLIC: __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); 1137 */ 1138int 1139__db_pg_alloc_42_recover(env, dbtp, lsnp, op, info) 1140 ENV *env; 1141 DBT *dbtp; 1142 DB_LSN *lsnp; 1143 db_recops op; 1144 void *info; 1145{ 1146 __db_pg_alloc_42_args *argp; 1147 DB_THREAD_INFO *ip; 1148 DB *file_dbp; 1149 DBC *dbc; 1150 DBMETA *meta; 1151 DB_MPOOLFILE *mpf; 1152 PAGE *pagep; 1153 db_pgno_t pgno; 1154 int cmp_n, cmp_p, created, level, ret; 1155 1156 ip = ((DB_TXNHEAD *)info)->thread_info; 1157 meta = NULL; 1158 pagep = NULL; 1159 created = 0; 1160 REC_PRINT(__db_pg_alloc_42_print); 1161 REC_INTRO(__db_pg_alloc_42_read, ip, 0); 1162 1163 /* 1164 * Fix up the metadata page. If we're redoing the operation, we have 1165 * to get the metadata page and update its LSN and its free pointer. 1166 * If we're undoing the operation and the page was ever created, we put 1167 * it on the freelist. 1168 */ 1169 pgno = PGNO_BASE_MD; 1170 if ((ret = __memp_fget(mpf, &pgno, ip, NULL, 0, &meta)) != 0) { 1171 /* The metadata page must always exist on redo. */ 1172 if (DB_REDO(op)) { 1173 ret = __db_pgerr(file_dbp, pgno, ret); 1174 goto out; 1175 } else 1176 goto done; 1177 } 1178 cmp_n = LOG_COMPARE(lsnp, &LSN(meta)); 1179 cmp_p = LOG_COMPARE(&LSN(meta), &argp->meta_lsn); 1180 CHECK_LSN(env, op, cmp_p, &LSN(meta), &argp->meta_lsn); 1181 if (cmp_p == 0 && DB_REDO(op)) { 1182 /* Need to redo update described. */ 1183 REC_DIRTY(mpf, ip, file_dbp->priority, &meta); 1184 LSN(meta) = *lsnp; 1185 meta->free = argp->next; 1186 if (argp->pgno > meta->last_pgno) 1187 meta->last_pgno = argp->pgno; 1188 } else if (cmp_n == 0 && DB_UNDO(op)) { 1189 goto no_rollback; 1190 } 1191 1192 /* 1193 * Fix up the allocated page. If the page does not exist 1194 * and we can truncate it then don't create it. 1195 * Otherwise if we're redoing the operation, we have 1196 * to get the page (creating it if it doesn't exist), and update its 1197 * LSN. If we're undoing the operation, we have to reset the page's 1198 * LSN and put it on the free list, or truncate it. 1199 */ 1200 if ((ret = __memp_fget(mpf, &argp->pgno, ip, NULL, 0, &pagep)) != 0) { 1201 /* 1202 * We have to be able to identify if a page was newly 1203 * created so we can recover it properly. We cannot simply 1204 * look for an empty header, because hash uses a pgin 1205 * function that will set the header. Instead, we explicitly 1206 * try for the page without CREATE and if that fails, then 1207 * create it. 1208 */ 1209 if ((ret = __memp_fget(mpf, &argp->pgno, 1210 ip, NULL, DB_MPOOL_CREATE, &pagep)) != 0) { 1211 if (DB_UNDO(op) && ret == ENOSPC) 1212 goto do_truncate; 1213 ret = __db_pgerr(file_dbp, argp->pgno, ret); 1214 goto out; 1215 } 1216 created = 1; 1217 } 1218 1219 /* Fix up the allocated page. */ 1220 cmp_n = LOG_COMPARE(lsnp, &LSN(pagep)); 1221 cmp_p = LOG_COMPARE(&LSN(pagep), &argp->page_lsn); 1222 1223 /* 1224 * If an initial allocation is aborted and then reallocated during 1225 * an archival restore the log record will have an LSN for the page 1226 * but the page will be empty. 1227 */ 1228 if (IS_ZERO_LSN(LSN(pagep)) || 1229 (IS_ZERO_LSN(argp->page_lsn) && IS_INIT_LSN(LSN(pagep)))) 1230 cmp_p = 0; 1231 1232 CHECK_LSN(env, op, cmp_p, &LSN(pagep), &argp->page_lsn); 1233 /* 1234 * Another special case we have to handle is if we ended up with a 1235 * page of all 0's which can happen if we abort between allocating a 1236 * page in mpool and initializing it. In that case, even if we're 1237 * undoing, we need to re-initialize the page. 1238 */ 1239 if (DB_REDO(op) && cmp_p == 0) { 1240 /* Need to redo update described. */ 1241 switch (argp->ptype) { 1242 case P_LBTREE: 1243 case P_LRECNO: 1244 case P_LDUP: 1245 level = LEAFLEVEL; 1246 break; 1247 default: 1248 level = 0; 1249 break; 1250 } 1251 REC_DIRTY(mpf, ip, file_dbp->priority, &pagep); 1252 P_INIT(pagep, file_dbp->pgsize, 1253 argp->pgno, PGNO_INVALID, PGNO_INVALID, level, argp->ptype); 1254 1255 pagep->lsn = *lsnp; 1256 } else if (DB_UNDO(op) && (cmp_n == 0 || created)) { 1257 /* 1258 * This is where we handle the case of a 0'd page (pagep->pgno 1259 * is equal to PGNO_INVALID). 1260 * Undo the allocation, reinitialize the page and 1261 * link its next pointer to the free list. 1262 */ 1263 REC_DIRTY(mpf, ip, file_dbp->priority, &pagep); 1264 P_INIT(pagep, file_dbp->pgsize, 1265 argp->pgno, PGNO_INVALID, argp->next, 0, P_INVALID); 1266 1267 pagep->lsn = argp->page_lsn; 1268 } 1269 1270do_truncate: 1271 /* 1272 * We cannot undo things from 4.2 land, because we nolonger 1273 * have limbo processing. 1274 */ 1275 if ((pagep == NULL || IS_ZERO_LSN(LSN(pagep))) && 1276 IS_ZERO_LSN(argp->page_lsn) && DB_UNDO(op)) { 1277no_rollback: __db_errx(env, 1278"Cannot replicate prepared transactions from master running release 4.2 "); 1279 ret = __env_panic(env, EINVAL); 1280 } 1281 1282 if (pagep != NULL && 1283 (ret = __memp_fput(mpf, ip, pagep, file_dbp->priority)) != 0) 1284 goto out; 1285 pagep = NULL; 1286 1287 if ((ret = __memp_fput(mpf, ip, meta, file_dbp->priority)) != 0) 1288 goto out; 1289 meta = NULL; 1290 1291done: *lsnp = argp->prev_lsn; 1292 ret = 0; 1293 1294out: if (pagep != NULL) 1295 (void)__memp_fput(mpf, ip, pagep, file_dbp->priority); 1296 if (meta != NULL) 1297 (void)__memp_fput(mpf, ip, meta, file_dbp->priority); 1298 REC_CLOSE; 1299} 1300 1301/* 1302 * __db_pg_free_recover_42_int -- 1303 */ 1304static int 1305__db_pg_free_recover_42_int(env, ip, argp, file_dbp, lsnp, mpf, op, data) 1306 ENV *env; 1307 DB_THREAD_INFO *ip; 1308 __db_pg_freedata_42_args *argp; 1309 DB *file_dbp; 1310 DB_LSN *lsnp; 1311 DB_MPOOLFILE *mpf; 1312 db_recops op; 1313 int data; 1314{ 1315 DBMETA *meta; 1316 DB_LSN copy_lsn; 1317 PAGE *pagep, *prevp; 1318 int cmp_n, cmp_p, is_meta, ret; 1319 1320 meta = NULL; 1321 pagep = NULL; 1322 prevp = NULL; 1323 1324 /* 1325 * Get the "metapage". This will either be the metapage 1326 * or the previous page in the free list if we are doing 1327 * sorted allocations. If its a previous page then 1328 * we will not be truncating. 1329 */ 1330 is_meta = argp->meta_pgno == PGNO_BASE_MD; 1331 1332 REC_FGET(mpf, ip, argp->meta_pgno, &meta, check_meta); 1333 1334 if (argp->meta_pgno != PGNO_BASE_MD) 1335 prevp = (PAGE *)meta; 1336 1337 cmp_n = LOG_COMPARE(lsnp, &LSN(meta)); 1338 cmp_p = LOG_COMPARE(&LSN(meta), &argp->meta_lsn); 1339 CHECK_LSN(env, op, cmp_p, &LSN(meta), &argp->meta_lsn); 1340 1341 /* 1342 * Fix up the metadata page. If we're redoing or undoing the operation 1343 * we get the page and update its LSN, last and free pointer. 1344 */ 1345 if (cmp_p == 0 && DB_REDO(op)) { 1346 /* Need to redo the deallocation. */ 1347 REC_DIRTY(mpf, ip, file_dbp->priority, &meta); 1348 if (prevp == NULL) 1349 meta->free = argp->pgno; 1350 else 1351 NEXT_PGNO(prevp) = argp->pgno; 1352 /* 1353 * If this was a compensating transaction and 1354 * we are a replica, then we never executed the 1355 * original allocation which incremented meta->free. 1356 */ 1357 if (prevp == NULL && meta->last_pgno < meta->free) 1358 meta->last_pgno = meta->free; 1359 LSN(meta) = *lsnp; 1360 } else if (cmp_n == 0 && DB_UNDO(op)) { 1361 /* Need to undo the deallocation. */ 1362 REC_DIRTY(mpf, ip, file_dbp->priority, &meta); 1363 if (prevp == NULL) 1364 meta->free = argp->next; 1365 else 1366 NEXT_PGNO(prevp) = argp->next; 1367 LSN(meta) = argp->meta_lsn; 1368 if (prevp == NULL && meta->last_pgno < argp->pgno) 1369 meta->last_pgno = argp->pgno; 1370 } 1371 1372check_meta: 1373 if (ret != 0 && is_meta) { 1374 /* The metadata page must always exist. */ 1375 ret = __db_pgerr(file_dbp, argp->meta_pgno, ret); 1376 goto out; 1377 } 1378 1379 /* 1380 * Get the freed page. If we support truncate then don't 1381 * create the page if we are going to free it. If we're 1382 * redoing the operation we get the page and explicitly discard 1383 * its contents, then update its LSN. If we're undoing the 1384 * operation, we get the page and restore its header. 1385 * If we don't support truncate, then we must create the page 1386 * and roll it back. 1387 */ 1388 if ((ret = __memp_fget(mpf, &argp->pgno, 1389 ip, NULL, DB_MPOOL_CREATE, &pagep)) != 0) 1390 goto out; 1391 1392 (void)__ua_memcpy(©_lsn, &LSN(argp->header.data), sizeof(DB_LSN)); 1393 cmp_n = IS_ZERO_LSN(LSN(pagep)) ? 0 : LOG_COMPARE(lsnp, &LSN(pagep)); 1394 cmp_p = LOG_COMPARE(&LSN(pagep), ©_lsn); 1395 1396 CHECK_LSN(env, op, cmp_p, &LSN(pagep), ©_lsn); 1397 if (DB_REDO(op) && 1398 (cmp_p == 0 || 1399 (IS_ZERO_LSN(copy_lsn) && 1400 LOG_COMPARE(&LSN(pagep), &argp->meta_lsn) <= 0))) { 1401 /* Need to redo the deallocation. */ 1402 REC_DIRTY(mpf, ip, file_dbp->priority, &pagep); 1403 P_INIT(pagep, file_dbp->pgsize, 1404 argp->pgno, PGNO_INVALID, argp->next, 0, P_INVALID); 1405 pagep->lsn = *lsnp; 1406 } else if (cmp_n == 0 && DB_UNDO(op)) { 1407 /* Need to reallocate the page. */ 1408 REC_DIRTY(mpf, ip, file_dbp->priority, &pagep); 1409 memcpy(pagep, argp->header.data, argp->header.size); 1410 if (data) 1411 memcpy((u_int8_t*)pagep + HOFFSET(pagep), 1412 argp->data.data, argp->data.size); 1413 } 1414 if (pagep != NULL && 1415 (ret = __memp_fput(mpf, ip, pagep, file_dbp->priority)) != 0) 1416 goto out; 1417 1418 pagep = NULL; 1419 if (meta != NULL && 1420 (ret = __memp_fput(mpf, ip, meta, file_dbp->priority)) != 0) 1421 goto out; 1422 meta = NULL; 1423 1424 ret = 0; 1425 1426out: if (pagep != NULL) 1427 (void)__memp_fput(mpf, ip, pagep, file_dbp->priority); 1428 if (meta != NULL) 1429 (void)__memp_fput(mpf, ip, meta, file_dbp->priority); 1430 1431 return (ret); 1432} 1433 1434/* 1435 * __db_pg_free_42_recover -- 1436 * Recovery function for pg_free. 1437 * 1438 * PUBLIC: int __db_pg_free_42_recover 1439 * PUBLIC: __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); 1440 */ 1441int 1442__db_pg_free_42_recover(env, dbtp, lsnp, op, info) 1443 ENV *env; 1444 DBT *dbtp; 1445 DB_LSN *lsnp; 1446 db_recops op; 1447 void *info; 1448{ 1449 __db_pg_free_42_args *argp; 1450 DB *file_dbp; 1451 DBC *dbc; 1452 DB_MPOOLFILE *mpf; 1453 DB_THREAD_INFO *ip; 1454 int ret; 1455 1456 ip = ((DB_TXNHEAD *)info)->thread_info; 1457 REC_PRINT(__db_pg_free_42_print); 1458 REC_INTRO(__db_pg_free_42_read, ip, 0); 1459 1460 ret = __db_pg_free_recover_42_int(env, ip, 1461 (__db_pg_freedata_42_args *)argp, file_dbp, lsnp, mpf, op, 0); 1462 1463done: *lsnp = argp->prev_lsn; 1464out: 1465 REC_CLOSE; 1466} 1467 1468/* 1469 * __db_pg_freedata_42_recover -- 1470 * Recovery function for pg_freedata. 1471 * 1472 * PUBLIC: int __db_pg_freedata_42_recover 1473 * PUBLIC: __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); 1474 */ 1475int 1476__db_pg_freedata_42_recover(env, dbtp, lsnp, op, info) 1477 ENV *env; 1478 DBT *dbtp; 1479 DB_LSN *lsnp; 1480 db_recops op; 1481 void *info; 1482{ 1483 __db_pg_freedata_42_args *argp; 1484 DB *file_dbp; 1485 DBC *dbc; 1486 DB_MPOOLFILE *mpf; 1487 DB_THREAD_INFO *ip; 1488 int ret; 1489 1490 ip = ((DB_TXNHEAD *)info)->thread_info; 1491 REC_PRINT(__db_pg_freedata_42_print); 1492 REC_INTRO(__db_pg_freedata_42_read, ip, 0); 1493 1494 ret = __db_pg_free_recover_42_int( 1495 env, ip, argp, file_dbp, lsnp, mpf, op, 1); 1496 1497done: *lsnp = argp->prev_lsn; 1498out: 1499 REC_CLOSE; 1500} 1501 1502/* 1503 * __db_relink_42_recover -- 1504 * Recovery function for relink. 1505 * 1506 * PUBLIC: int __db_relink_42_recover 1507 * PUBLIC: __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); 1508 */ 1509int 1510__db_relink_42_recover(env, dbtp, lsnp, op, info) 1511 ENV *env; 1512 DBT *dbtp; 1513 DB_LSN *lsnp; 1514 db_recops op; 1515 void *info; 1516{ 1517 __db_relink_42_args *argp; 1518 DB_THREAD_INFO *ip; 1519 DB *file_dbp; 1520 DBC *dbc; 1521 DB_MPOOLFILE *mpf; 1522 PAGE *pagep; 1523 int cmp_n, cmp_p, modified, ret; 1524 1525 ip = ((DB_TXNHEAD *)info)->thread_info; 1526 pagep = NULL; 1527 REC_PRINT(__db_relink_42_print); 1528 REC_INTRO(__db_relink_42_read, ip, 0); 1529 1530 /* 1531 * There are up to three pages we need to check -- the page, and the 1532 * previous and next pages, if they existed. For a page add operation, 1533 * the current page is the result of a split and is being recovered 1534 * elsewhere, so all we need do is recover the next page. 1535 */ 1536 if ((ret = __memp_fget(mpf, &argp->pgno, ip, NULL, 0, &pagep)) != 0) { 1537 if (DB_REDO(op)) { 1538 ret = __db_pgerr(file_dbp, argp->pgno, ret); 1539 goto out; 1540 } 1541 goto next2; 1542 } 1543 if (argp->opcode == DB_ADD_PAGE_COMPAT) 1544 goto next1; 1545 1546 cmp_p = LOG_COMPARE(&LSN(pagep), &argp->lsn); 1547 CHECK_LSN(env, op, cmp_p, &LSN(pagep), &argp->lsn); 1548 if (cmp_p == 0 && DB_REDO(op)) { 1549 /* Redo the relink. */ 1550 REC_DIRTY(mpf, ip, file_dbp->priority, &pagep); 1551 pagep->lsn = *lsnp; 1552 } else if (LOG_COMPARE(lsnp, &LSN(pagep)) == 0 && DB_UNDO(op)) { 1553 /* Undo the relink. */ 1554 REC_DIRTY(mpf, ip, file_dbp->priority, &pagep); 1555 pagep->next_pgno = argp->next; 1556 pagep->prev_pgno = argp->prev; 1557 pagep->lsn = argp->lsn; 1558 } 1559next1: if ((ret = __memp_fput(mpf, ip, pagep, file_dbp->priority)) != 0) 1560 goto out; 1561 pagep = NULL; 1562 1563next2: if ((ret = __memp_fget(mpf, &argp->next, ip, NULL, 0, &pagep)) != 0) { 1564 if (DB_REDO(op)) { 1565 ret = __db_pgerr(file_dbp, argp->next, ret); 1566 goto out; 1567 } 1568 goto prev; 1569 } 1570 modified = 0; 1571 cmp_n = LOG_COMPARE(lsnp, &LSN(pagep)); 1572 cmp_p = LOG_COMPARE(&LSN(pagep), &argp->lsn_next); 1573 CHECK_LSN(env, op, cmp_p, &LSN(pagep), &argp->lsn_next); 1574 if ((argp->opcode == DB_REM_PAGE_COMPAT && cmp_p == 0 && DB_REDO(op)) || 1575 (argp->opcode == DB_ADD_PAGE_COMPAT && cmp_n == 0 && DB_UNDO(op))) { 1576 /* Redo the remove or undo the add. */ 1577 REC_DIRTY(mpf, ip, file_dbp->priority, &pagep); 1578 pagep->prev_pgno = argp->prev; 1579 modified = 1; 1580 } else if ((argp->opcode == DB_REM_PAGE_COMPAT && 1581 cmp_n == 0 && DB_UNDO(op)) || 1582 (argp->opcode == DB_ADD_PAGE_COMPAT && cmp_p == 0 && DB_REDO(op))) { 1583 /* Undo the remove or redo the add. */ 1584 REC_DIRTY(mpf, ip, file_dbp->priority, &pagep); 1585 pagep->prev_pgno = argp->pgno; 1586 modified = 1; 1587 } 1588 if (modified) { 1589 if (DB_UNDO(op)) 1590 pagep->lsn = argp->lsn_next; 1591 else 1592 pagep->lsn = *lsnp; 1593 } 1594 if ((ret = __memp_fput(mpf, ip, pagep, file_dbp->priority)) != 0) 1595 goto out; 1596 pagep = NULL; 1597 if (argp->opcode == DB_ADD_PAGE_COMPAT) 1598 goto done; 1599 1600prev: if ((ret = __memp_fget(mpf, &argp->prev, ip, NULL, 0, &pagep)) != 0) { 1601 if (DB_REDO(op)) { 1602 ret = __db_pgerr(file_dbp, argp->prev, ret); 1603 goto out; 1604 } 1605 goto done; 1606 } 1607 modified = 0; 1608 cmp_p = LOG_COMPARE(&LSN(pagep), &argp->lsn_prev); 1609 CHECK_LSN(env, op, cmp_p, &LSN(pagep), &argp->lsn_prev); 1610 if (cmp_p == 0 && DB_REDO(op)) { 1611 /* Redo the relink. */ 1612 REC_DIRTY(mpf, ip, file_dbp->priority, &pagep); 1613 pagep->next_pgno = argp->next; 1614 modified = 1; 1615 } else if (LOG_COMPARE(lsnp, &LSN(pagep)) == 0 && DB_UNDO(op)) { 1616 /* Undo the relink. */ 1617 REC_DIRTY(mpf, ip, file_dbp->priority, &pagep); 1618 pagep->next_pgno = argp->pgno; 1619 modified = 1; 1620 } 1621 if (modified) { 1622 if (DB_UNDO(op)) 1623 pagep->lsn = argp->lsn_prev; 1624 else 1625 pagep->lsn = *lsnp; 1626 } 1627 if ((ret = __memp_fput(mpf, ip, pagep, file_dbp->priority)) != 0) 1628 goto out; 1629 pagep = NULL; 1630 1631done: *lsnp = argp->prev_lsn; 1632 ret = 0; 1633 1634out: if (pagep != NULL) 1635 (void)__memp_fput(mpf, ip, pagep, file_dbp->priority); 1636 REC_CLOSE; 1637} 1638