1/*- 2 * See the file LICENSE for redistribution information. 3 * 4 * Copyright (c) 1996,2008 Oracle. All rights reserved. 5 * 6 * $Id: env_recover.c,v 12.60 2008/03/12 20:52:53 mbrey Exp $ 7 */ 8 9#include "db_config.h" 10 11#include "db_int.h" 12#include "dbinc/db_page.h" 13#include "dbinc/fop.h" 14#include "dbinc/btree.h" 15#include "dbinc/hash.h" 16#include "dbinc/log.h" 17#include "dbinc/mp.h" 18#include "dbinc/qam.h" 19#include "dbinc/txn.h" 20 21#ifndef lint 22static const char copyright[] = 23 "Copyright (c) 1996,2008 Oracle. All rights reserved.\n"; 24#endif 25 26static int __db_log_corrupt __P((ENV *, DB_LSN *)); 27static int __env_init_rec_42 __P((ENV *)); 28static int __env_init_rec_43 __P((ENV *)); 29static int __env_init_rec_46 __P((ENV *)); 30static int __env_init_rec_47 __P((ENV *)); 31static int __log_earliest __P((ENV *, DB_LOGC *, int32_t *, DB_LSN *)); 32 33#ifndef HAVE_BREW 34static double __lsn_diff __P((DB_LSN *, DB_LSN *, DB_LSN *, u_int32_t, int)); 35#endif 36 37/* 38 * __db_apprec -- 39 * Perform recovery. If max_lsn is non-NULL, then we are trying 40 * to synchronize this system up with another system that has a max 41 * LSN of max_lsn, so we need to roll back sufficiently far for that 42 * to work. See __log_backup for details. 43 * 44 * PUBLIC: int __db_apprec __P((ENV *, 45 * PUBLIC: DB_THREAD_INFO *, DB_LSN *, DB_LSN *, int, u_int32_t)); 46 */ 47int 48__db_apprec(env, ip, max_lsn, trunclsn, update, flags) 49 ENV *env; 50 DB_THREAD_INFO *ip; 51 DB_LSN *max_lsn, *trunclsn; 52 int update; 53 u_int32_t flags; 54{ 55 DBT data; 56 DB_ENV *dbenv; 57 DB_LOGC *logc; 58 DB_LSN ckp_lsn, first_lsn, last_lsn, lowlsn, lsn, stop_lsn, tlsn; 59 DB_TXNHEAD *txninfo; 60 DB_TXNREGION *region; 61 REGENV *renv; 62 REGINFO *infop; 63 __txn_ckp_args *ckp_args; 64 time_t now, tlow; 65 double nfiles; 66 u_int32_t hi_txn, log_size, txnid; 67 int32_t low; 68 int have_rec, progress, ret, t_ret; 69 char *p, *pass; 70 char t1[CTIME_BUFLEN], t2[CTIME_BUFLEN], time_buf[CTIME_BUFLEN]; 71 72 COMPQUIET(nfiles, (double)0.001); 73 74 dbenv = env->dbenv; 75 logc = NULL; 76 ckp_args = NULL; 77 hi_txn = TXN_MAXIMUM; 78 txninfo = NULL; 79 pass = "initial"; 80 ZERO_LSN(lsn); 81 82 /* 83 * XXX 84 * Get the log size. No locking required because we're single-threaded 85 * during recovery. 86 */ 87 log_size = ((LOG *)env->lg_handle->reginfo.primary)->log_size; 88 89 /* 90 * If we need to, update the env handle timestamp. 91 */ 92 if (update && REP_ON(env)) { 93 infop = env->reginfo; 94 renv = infop->primary; 95 (void)time(&renv->rep_timestamp); 96 } 97 98 /* Set in-recovery flags. */ 99 F_SET(env->lg_handle, DBLOG_RECOVER); 100 region = env->tx_handle->reginfo.primary; 101 F_SET(region, TXN_IN_RECOVERY); 102 103 /* Allocate a cursor for the log. */ 104 if ((ret = __log_cursor(env, &logc)) != 0) 105 goto err; 106 107 /* 108 * If the user is specifying recovery to a particular point in time 109 * or to a particular LSN, find the point to start recovery from. 110 */ 111 ZERO_LSN(lowlsn); 112 if (max_lsn != NULL) { 113 if ((ret = __log_backup(env, logc, max_lsn, &lowlsn, 114 CKPLSN_CMP)) != 0) 115 goto err; 116 } else if (dbenv->tx_timestamp != 0) { 117 if ((ret = __log_earliest(env, logc, &low, &lowlsn)) != 0) 118 goto err; 119 if ((int32_t)dbenv->tx_timestamp < low) { 120 t1[sizeof(t1) - 1] = '\0'; 121 (void)strncpy(t1, __os_ctime( 122 &dbenv->tx_timestamp, time_buf), sizeof(t1) - 1); 123 if ((p = strchr(t1, '\n')) != NULL) 124 *p = '\0'; 125 126 t2[sizeof(t2) - 1] = '\0'; 127 tlow = (time_t)low; 128 (void)strncpy(t2, __os_ctime( 129 &tlow, time_buf), sizeof(t2) - 1); 130 if ((p = strchr(t2, '\n')) != NULL) 131 *p = '\0'; 132 133 __db_errx(env, 134 "Invalid recovery timestamp %s; earliest time is %s", 135 t1, t2); 136 ret = EINVAL; 137 goto err; 138 } 139 } 140 141 /* 142 * Recovery is done in three passes: 143 * Pass #0: 144 * We need to find the position from which we will open files. 145 * We need to open files beginning with the earlier of the 146 * most recent checkpoint LSN and a checkpoint LSN before the 147 * recovery timestamp, if specified. We need to be before the 148 * most recent checkpoint LSN because we are going to collect 149 * information about which transactions were begun before we 150 * start rolling forward. Those that were should never be undone 151 * because queue cannot use LSNs to determine what operations can 152 * safely be aborted and it cannot rollback operations in 153 * transactions for which there may be records not processed 154 * during recovery. We need to consider earlier points in time 155 * in case we are recovering to a particular timestamp. 156 * 157 * Pass #1: 158 * Read forward through the log from the position found in pass 0 159 * opening and closing files, and recording transactions for which 160 * we've seen their first record (the transaction's prev_lsn is 161 * 0,0). At the end of this pass, we know all transactions for 162 * which we've seen begins and we have the "current" set of files 163 * open. 164 * 165 * Pass #2: 166 * Read backward through the log undoing any uncompleted TXNs. 167 * There are four cases: 168 * 1. If doing catastrophic recovery, we read to the 169 * beginning of the log 170 * 2. If we are doing normal reovery, then we have to roll 171 * back to the most recent checkpoint LSN. 172 * 3. If we are recovering to a point in time, then we have 173 * to roll back to the checkpoint whose ckp_lsn is earlier 174 * than the specified time. __log_earliest will figure 175 * this out for us. 176 * 4. If we are recovering back to a particular LSN, then 177 * we have to roll back to the checkpoint whose ckp_lsn 178 * is earlier than the max_lsn. __log_backup will figure 179 * that out for us. 180 * In case 2, "uncompleted TXNs" include all those who committed 181 * after the user's specified timestamp. 182 * 183 * Pass #3: 184 * Read forward through the log from the LSN found in pass #2, 185 * redoing any committed TXNs (which committed after any user- 186 * specified rollback point). During this pass, checkpoint 187 * file information is ignored, and file openings and closings 188 * are redone. 189 * 190 * ckp_lsn -- lsn of the last checkpoint or the first in the log. 191 * first_lsn -- the lsn where the forward passes begin. 192 * last_lsn -- the last lsn in the log, used for feedback 193 * lowlsn -- the lsn we are rolling back to, if we are recovering 194 * to a point in time. 195 * lsn -- temporary use lsn. 196 * stop_lsn -- the point at which forward roll should stop 197 */ 198 199 /* 200 * Find out the last lsn, so that we can estimate how far along we 201 * are in recovery. This will help us determine how much log there 202 * is between the first LSN that we're going to be working with and 203 * the last one. We assume that each of the three phases takes the 204 * same amount of time (a false assumption) and then use the %-age 205 * of the amount of log traversed to figure out how much of the 206 * pass we've accomplished. 207 * 208 * If we can't find any log records, we're kind of done. 209 */ 210#ifdef UMRW 211 ZERO_LSN(last_lsn); 212#endif 213 memset(&data, 0, sizeof(data)); 214 if ((ret = __logc_get(logc, &last_lsn, &data, DB_LAST)) != 0) { 215 if (ret == DB_NOTFOUND) 216 ret = 0; 217 else 218 __db_errx(env, "Last log record not found"); 219 goto err; 220 } 221 222 do { 223 /* txnid is after rectype, which is a u_int32. */ 224 LOGCOPY_32(env, &txnid, 225 (u_int8_t *)data.data + sizeof(u_int32_t)); 226 227 if (txnid != 0) 228 break; 229 } while ((ret = __logc_get(logc, &lsn, &data, DB_PREV)) == 0); 230 231 /* 232 * There are no transactions, so there is nothing to do unless 233 * we're recovering to an LSN. If we are, we need to proceed since 234 * we'll still need to do a vtruncate based on information we haven't 235 * yet collected. 236 */ 237 if (ret == DB_NOTFOUND) 238 ret = 0; 239 else if (ret != 0) 240 goto err; 241 242 hi_txn = txnid; 243 244 /* 245 * Pass #0 246 * Find the LSN from which we begin OPENFILES. 247 * 248 * If this is a catastrophic recovery, or if no checkpoint exists 249 * in the log, the LSN is the first LSN in the log. 250 * 251 * Otherwise, it is the minimum of (1) the LSN in the last checkpoint 252 * and (2) the LSN in the checkpoint before any specified recovery 253 * timestamp or max_lsn. 254 */ 255 /* 256 * Get the first LSN in the log; it's an initial default 257 * even if this is not a catastrophic recovery. 258 */ 259 if ((ret = __logc_get(logc, &ckp_lsn, &data, DB_FIRST)) != 0) { 260 if (ret == DB_NOTFOUND) 261 ret = 0; 262 else 263 __db_errx(env, "First log record not found"); 264 goto err; 265 } 266 first_lsn = ckp_lsn; 267 have_rec = 1; 268 269 if (!LF_ISSET(DB_RECOVER_FATAL)) { 270 if ((ret = __txn_getckp(env, &ckp_lsn)) == 0 && 271 (ret = __logc_get(logc, &ckp_lsn, &data, DB_SET)) == 0) { 272 /* We have a recent checkpoint. This is LSN (1). */ 273 if ((ret = __txn_ckp_read(env, 274 data.data, &ckp_args)) != 0) { 275 __db_errx(env, 276 "Invalid checkpoint record at [%ld][%ld]", 277 (u_long)ckp_lsn.file, 278 (u_long)ckp_lsn.offset); 279 goto err; 280 } 281 first_lsn = ckp_args->ckp_lsn; 282 __os_free(env, ckp_args); 283 have_rec = 0; 284 } 285 286 /* 287 * If LSN (2) exists, use it if it's before LSN (1). 288 * (If LSN (1) doesn't exist, first_lsn is the 289 * beginning of the log, so will "win" this check.) 290 * 291 * XXX 292 * In the recovery-to-a-timestamp case, lowlsn is chosen by 293 * __log_earliest, and is the checkpoint LSN of the 294 * *earliest* checkpoint in the unreclaimed log. I 295 * (krinsky) believe that we could optimize this by looking 296 * instead for the LSN of the *latest* checkpoint before 297 * the timestamp of interest, but I'm not sure that this 298 * is worth doing right now. (We have to look for lowlsn 299 * and low anyway, to make sure the requested timestamp is 300 * somewhere in the logs we have, and all that's required 301 * is that we pick *some* checkpoint after the beginning of 302 * the logs and before the timestamp. 303 */ 304 if ((dbenv->tx_timestamp != 0 || max_lsn != NULL) && 305 LOG_COMPARE(&lowlsn, &first_lsn) < 0) { 306 DB_ASSERT(env, have_rec == 0); 307 first_lsn = lowlsn; 308 } 309 } 310 311 /* Get the record at first_lsn if we don't have it already. */ 312 if (!have_rec && 313 (ret = __logc_get(logc, &first_lsn, &data, DB_SET)) != 0) { 314 __db_errx(env, "Checkpoint LSN record [%ld][%ld] not found", 315 (u_long)first_lsn.file, (u_long)first_lsn.offset); 316 goto err; 317 } 318 319#ifndef HAVE_BREW 320 if (dbenv->db_feedback != NULL) { 321 if (last_lsn.file == first_lsn.file) 322 nfiles = (double) 323 (last_lsn.offset - first_lsn.offset) / log_size; 324 else 325 nfiles = (double)(last_lsn.file - first_lsn.file) + 326 (double)((log_size - first_lsn.offset) + 327 last_lsn.offset) / log_size; 328 /* We are going to divide by nfiles; make sure it isn't 0. */ 329 if (nfiles < 0.001) 330 nfiles = 0.001; 331 } 332#endif 333 334 /* Find a low txnid. */ 335 ret = 0; 336 if (hi_txn != 0) do { 337 /* txnid is after rectype, which is a u_int32. */ 338 LOGCOPY_32(env, &txnid, 339 (u_int8_t *)data.data + sizeof(u_int32_t)); 340 341 if (txnid != 0) 342 break; 343 } while ((ret = __logc_get(logc, &lsn, &data, DB_NEXT)) == 0); 344 345 /* 346 * There are no transactions and we're not recovering to an LSN (see 347 * above), so there is nothing to do. 348 */ 349 if (ret == DB_NOTFOUND) { 350 if (LOG_COMPARE(&lsn, &last_lsn) != 0) 351 ret = __db_log_corrupt(env, &lsn); 352 else 353 ret = 0; 354 } 355 356 /* Reset to the first lsn. */ 357 if (ret != 0 || 358 (ret = __logc_get(logc, &first_lsn, &data, DB_SET)) != 0) 359 goto err; 360 361 /* Initialize the transaction list. */ 362 if ((ret = __db_txnlist_init(env, ip, 363 txnid, hi_txn, max_lsn, &txninfo)) != 0) 364 goto err; 365 366 /* 367 * Pass #1 368 * Run forward through the log starting at the first relevant lsn. 369 */ 370 if ((ret = __env_openfiles(env, logc, 371 txninfo, &data, &first_lsn, &last_lsn, nfiles, 1)) != 0) 372 goto err; 373 374 /* If there were no transactions, then we can bail out early. */ 375 if (hi_txn == 0 && max_lsn == NULL) 376 goto done; 377 378 /* 379 * Pass #2. 380 * 381 * We used first_lsn to tell us how far back we need to recover, 382 * use it here. 383 */ 384 if (FLD_ISSET(dbenv->verbose, DB_VERB_RECOVERY)) 385 __db_msg(env, "Recovery starting from [%lu][%lu]", 386 (u_long)first_lsn.file, (u_long)first_lsn.offset); 387 388 pass = "backward"; 389 for (ret = __logc_get(logc, &lsn, &data, DB_LAST); 390 ret == 0 && LOG_COMPARE(&lsn, &first_lsn) >= 0; 391 ret = __logc_get(logc, &lsn, &data, DB_PREV)) { 392#ifdef HAVE_BREW 393 COMPQUIET(progress, 0); 394#else 395 if (dbenv->db_feedback != NULL) { 396 progress = 34 + (int)(33 * (__lsn_diff(&first_lsn, 397 &last_lsn, &lsn, log_size, 0) / nfiles)); 398 dbenv->db_feedback(dbenv, DB_RECOVER, progress); 399 } 400#endif 401 tlsn = lsn; 402 ret = __db_dispatch(env, &env->recover_dtab, 403 &data, &tlsn, DB_TXN_BACKWARD_ROLL, txninfo); 404 if (ret != 0) { 405 if (ret != DB_TXN_CKP) 406 goto msgerr; 407 else 408 ret = 0; 409 } 410 } 411 if (ret == DB_NOTFOUND) { 412 if (LOG_COMPARE(&lsn, &first_lsn) > 0) 413 ret = __db_log_corrupt(env, &lsn); 414 else 415 ret = 0; 416 } 417 if (ret != 0) 418 goto err; 419 420 /* 421 * Pass #3. If we are recovering to a timestamp or to an LSN, 422 * we need to make sure that we don't roll-forward beyond that 423 * point because there may be non-transactional operations (e.g., 424 * closes that would fail). The last_lsn variable is used for 425 * feedback calculations, but use it to set an initial stopping 426 * point for the forward pass, and then reset appropriately to 427 * derive a real stop_lsn that tells how far the forward pass 428 * should go. 429 */ 430 pass = "forward"; 431 stop_lsn = last_lsn; 432 if (max_lsn != NULL || dbenv->tx_timestamp != 0) 433 stop_lsn = ((DB_TXNHEAD *)txninfo)->maxlsn; 434 435 for (ret = __logc_get(logc, &lsn, &data, DB_NEXT); 436 ret == 0; ret = __logc_get(logc, &lsn, &data, DB_NEXT)) { 437#ifndef HAVE_BREW 438 if (dbenv->db_feedback != NULL) { 439 progress = 67 + (int)(33 * (__lsn_diff(&first_lsn, 440 &last_lsn, &lsn, log_size, 1) / nfiles)); 441 dbenv->db_feedback(dbenv, DB_RECOVER, progress); 442 } 443#endif 444 tlsn = lsn; 445 ret = __db_dispatch(env, &env->recover_dtab, 446 &data, &tlsn, DB_TXN_FORWARD_ROLL, txninfo); 447 if (ret != 0) { 448 if (ret != DB_TXN_CKP) 449 goto msgerr; 450 else 451 ret = 0; 452 } 453 /* 454 * If we are recovering to a timestamp or an LSN, 455 * we need to make sure that we don't try to roll 456 * forward beyond the soon-to-be end of log. 457 */ 458 if (LOG_COMPARE(&lsn, &stop_lsn) >= 0) 459 break; 460 461 } 462 if (ret == DB_NOTFOUND) 463 ret = __db_log_corrupt(env, &lsn); 464 if (ret != 0) 465 goto err; 466 467 if (max_lsn == NULL) 468 region->last_txnid = ((DB_TXNHEAD *)txninfo)->maxid; 469 470 if (dbenv->tx_timestamp != 0) { 471 /* We are going to truncate, so we'd best close the cursor. */ 472 if (logc != NULL) { 473 if ((ret = __logc_close(logc)) != 0) 474 goto err; 475 logc = NULL; 476 } 477 478 /* 479 * Flush everything to disk, we are losing the log. It's 480 * recovery, ignore any application max-write configuration. 481 */ 482 if ((ret = __memp_sync_int(env, NULL, 0, 483 DB_SYNC_CACHE | DB_SYNC_SUPPRESS_WRITE, NULL, NULL)) != 0) 484 goto err; 485 region->last_ckp = ((DB_TXNHEAD *)txninfo)->ckplsn; 486 if ((ret = __log_vtruncate(env, 487 &((DB_TXNHEAD *)txninfo)->maxlsn, 488 &((DB_TXNHEAD *)txninfo)->ckplsn, trunclsn)) != 0) 489 goto err; 490 } 491 492done: 493 /* Take a checkpoint here to force any dirty data pages to disk. */ 494 if (!IS_REP_CLIENT(env) && (ret = __txn_checkpoint(env, 0, 0, 495 DB_CKP_INTERNAL | DB_FORCE)) != 0) { 496 /* 497 * If there was no space for the checkpoint we can 498 * still bring the environment up. No updates will 499 * be able to commit either, but the environment can 500 * be used read only. 501 */ 502 if (max_lsn == NULL && ret == ENOSPC) 503 ret = 0; 504 else 505 goto err; 506 } 507 508 if (region->stat.st_nrestores == 0) { 509 /* Close all the db files that are open. */ 510 if ((ret = __dbreg_close_files(env, 0)) != 0) 511 goto err; 512 } else { 513 if ((ret = __dbreg_mark_restored(env)) != 0) 514 goto err; 515 F_SET(env->lg_handle, DBLOG_OPENFILES); 516 } 517 518 if (max_lsn != NULL) { 519 if (!IS_ZERO_LSN(((DB_TXNHEAD *)txninfo)->ckplsn)) 520 region->last_ckp = ((DB_TXNHEAD *)txninfo)->ckplsn; 521 else if ((ret = 522 __txn_findlastckp(env, ®ion->last_ckp, max_lsn)) != 0) 523 goto err; 524 525 /* We are going to truncate, so we'd best close the cursor. */ 526 if (logc != NULL && (ret = __logc_close(logc)) != 0) 527 goto err; 528 logc = NULL; 529 if ((ret = __log_vtruncate(env, 530 max_lsn, &((DB_TXNHEAD *)txninfo)->ckplsn, trunclsn)) != 0) 531 goto err; 532 533 /* 534 * Now we need to open files that should be open in order for 535 * client processing to continue. However, since we've 536 * truncated the log, we need to recompute from where the 537 * openfiles pass should begin. 538 */ 539 if ((ret = __log_cursor(env, &logc)) != 0) 540 goto err; 541 if ((ret = 542 __logc_get(logc, &first_lsn, &data, DB_FIRST)) != 0) { 543 if (ret == DB_NOTFOUND) 544 ret = 0; 545 else 546 __db_errx(env, "First log record not found"); 547 goto err; 548 } 549 if ((ret = __txn_getckp(env, &first_lsn)) == 0 && 550 (ret = __logc_get(logc, &first_lsn, &data, DB_SET)) == 0) { 551 /* We have a recent checkpoint. This is LSN (1). */ 552 if ((ret = __txn_ckp_read(env, 553 data.data, &ckp_args)) != 0) { 554 __db_errx(env, 555 "Invalid checkpoint record at [%ld][%ld]", 556 (u_long)first_lsn.file, 557 (u_long)first_lsn.offset); 558 goto err; 559 } 560 first_lsn = ckp_args->ckp_lsn; 561 __os_free(env, ckp_args); 562 } 563 if ((ret = __logc_get(logc, &first_lsn, &data, DB_SET)) != 0) 564 goto err; 565 if ((ret = __env_openfiles(env, logc, 566 txninfo, &data, &first_lsn, max_lsn, nfiles, 1)) != 0) 567 goto err; 568 } else if (region->stat.st_nrestores == 0) { 569 /* 570 * If there are no prepared transactions that need resolution, 571 * we need to reset the transaction ID space and log this fact. 572 */ 573 if ((ret = __txn_reset(env)) != 0) 574 goto err; 575 } else { 576 if ((ret = __txn_recycle_id(env)) != 0) 577 goto err; 578 } 579 580 /* 581 * We must be sure to zero the tail of the log. Otherwise a partial 582 * record may be at the end of the log and it may never be fully 583 * overwritten. 584 */ 585 if (max_lsn == NULL && dbenv->tx_timestamp == 0) { 586 /* We are going to truncate, so we'd best close the cursor. */ 587 if (logc != NULL && (ret = __logc_close(logc)) != 0) 588 goto err; 589 logc = NULL; 590 591 /* Truncate from beyond the last record in the log. */ 592 if ((ret = 593 __log_current_lsn(env, &last_lsn, NULL, NULL)) != 0) 594 goto err; 595 if ((ret = __log_vtruncate(env, 596 &last_lsn, ®ion->last_ckp, NULL)) != 0) 597 goto err; 598 } 599 600 if (FLD_ISSET(dbenv->verbose, DB_VERB_RECOVERY)) { 601 (void)time(&now); 602 __db_msg(env, 603 "Recovery complete at %.24s", __os_ctime(&now, time_buf)); 604 __db_msg(env, "%s %lx %s [%lu][%lu]", 605 "Maximum transaction ID", 606 (u_long)(txninfo == NULL ? 607 TXN_MINIMUM : ((DB_TXNHEAD *)txninfo)->maxid), 608 "Recovery checkpoint", 609 (u_long)region->last_ckp.file, 610 (u_long)region->last_ckp.offset); 611 } 612 613 if (0) { 614msgerr: __db_errx(env, 615 "Recovery function for LSN %lu %lu failed on %s pass", 616 (u_long)lsn.file, (u_long)lsn.offset, pass); 617 } 618 619err: if (logc != NULL && (t_ret = __logc_close(logc)) != 0 && ret == 0) 620 ret = t_ret; 621 622 if (txninfo != NULL) 623 __db_txnlist_end(env, txninfo); 624 625 dbenv->tx_timestamp = 0; 626 627 F_CLR(env->lg_handle, DBLOG_RECOVER); 628 F_CLR(region, TXN_IN_RECOVERY); 629 630 return (ret); 631} 632 633#ifndef HAVE_BREW 634/* 635 * Figure out how many logfiles we have processed. If we are moving 636 * forward (is_forward != 0), then we're computing current - low. If 637 * we are moving backward, we are computing high - current. max is 638 * the number of bytes per logfile. 639 */ 640static double 641__lsn_diff(low, high, current, max, is_forward) 642 DB_LSN *low, *high, *current; 643 u_int32_t max; 644 int is_forward; 645{ 646 double nf; 647 648 /* 649 * There are three cases in each direction. If you are in the 650 * same file, then all you need worry about is the difference in 651 * offsets. If you are in different files, then either your offsets 652 * put you either more or less than the integral difference in the 653 * number of files -- we need to handle both of these. 654 */ 655 if (is_forward) { 656 if (current->file == low->file) 657 nf = (double)(current->offset - low->offset) / max; 658 else if (current->offset < low->offset) 659 nf = (double)((current->file - low->file) - 1) + 660 (double)((max - low->offset) + current->offset) / 661 max; 662 else 663 nf = (double)(current->file - low->file) + 664 (double)(current->offset - low->offset) / max; 665 } else { 666 if (current->file == high->file) 667 nf = (double)(high->offset - current->offset) / max; 668 else if (current->offset > high->offset) 669 nf = (double)((high->file - current->file) - 1) + 670 (double) 671 ((max - current->offset) + high->offset) / max; 672 else 673 nf = (double)(high->file - current->file) + 674 (double)(high->offset - current->offset) / max; 675 } 676 return (nf); 677} 678#endif 679 680/* 681 * __log_backup -- 682 * 683 * This is used to find the earliest log record to process when a client 684 * is trying to sync up with a master whose max LSN is less than this 685 * client's max lsn; we want to roll back everything after that. 686 * Also used in the verify phase to walk back via checkpoints. 687 * 688 * Find the latest checkpoint whose ckp_lsn is less than the max lsn. 689 * PUBLIC: int __log_backup __P((ENV *, DB_LOGC *, DB_LSN *, 690 * PUBLIC: DB_LSN *, u_int32_t)); 691 */ 692int 693__log_backup(env, logc, max_lsn, start_lsn, cmp) 694 ENV *env; 695 DB_LOGC *logc; 696 DB_LSN *max_lsn, *start_lsn; 697 u_int32_t cmp; 698{ 699 DBT data; 700 DB_LSN cmp_lsn, lsn; 701 __txn_ckp_args *ckp_args; 702 int lcmp, ret; 703 704 memset(&data, 0, sizeof(data)); 705 ckp_args = NULL; 706 707 if (cmp != CKPLSN_CMP && cmp != LASTCKP_CMP) 708 return (EINVAL); 709 710 if ((ret = __txn_getckp(env, &lsn)) != 0) 711 goto err; 712 /* 713 * Cmp tells us whether to check the ckp_lsn or the last_ckp 714 * fields in the checkpoint record. 715 */ 716 while ((ret = __logc_get(logc, &lsn, &data, DB_SET)) == 0) { 717 if ((ret = __txn_ckp_read( 718 env, data.data, &ckp_args)) != 0) 719 return (ret); 720 if (cmp == CKPLSN_CMP) { 721 /* 722 * Follow checkpoints through the log until 723 * we find one with a ckp_lsn less than 724 * or equal max_lsn. 725 */ 726 cmp_lsn = ckp_args->ckp_lsn; 727 lcmp = (LOG_COMPARE(&cmp_lsn, max_lsn) <= 0); 728 } else { 729 /* 730 * When we're walking back through the checkpoints 731 * we want the LSN of this checkpoint strictly less 732 * than the max_lsn (also a ckp LSN). 733 */ 734 cmp_lsn = lsn; 735 lcmp = (LOG_COMPARE(&cmp_lsn, max_lsn) < 0); 736 } 737 if (lcmp) { 738 *start_lsn = cmp_lsn; 739 break; 740 } 741 742 lsn = ckp_args->last_ckp; 743 /* 744 * If there are no more checkpoints behind us, we're 745 * done. Break with DB_NOTFOUND. 746 */ 747 if (IS_ZERO_LSN(lsn)) { 748 ret = DB_NOTFOUND; 749 break; 750 } 751 __os_free(env, ckp_args); 752 ckp_args = NULL; 753 } 754 755 if (ckp_args != NULL) 756 __os_free(env, ckp_args); 757 /* 758 * For CKPLSN_CMP if we walked back through all the checkpoints, 759 * set the cursor on the first log record. For LASTCKP_CMP 760 * we want to return 0,0 in start_lsn. 761 */ 762err: if (IS_ZERO_LSN(*start_lsn) && cmp == CKPLSN_CMP && 763 (ret == 0 || ret == DB_NOTFOUND)) 764 ret = __logc_get(logc, start_lsn, &data, DB_FIRST); 765 return (ret); 766} 767 768/* 769 * __log_earliest -- 770 * 771 * Return the earliest recovery point for the log files present. The 772 * earliest recovery time is the time stamp of the first checkpoint record 773 * whose checkpoint LSN is greater than the first LSN we process. 774 */ 775static int 776__log_earliest(env, logc, lowtime, lowlsn) 777 ENV *env; 778 DB_LOGC *logc; 779 int32_t *lowtime; 780 DB_LSN *lowlsn; 781{ 782 __txn_ckp_args *ckpargs; 783 DB_LSN first_lsn, lsn; 784 DBT data; 785 u_int32_t rectype; 786 int cmp, ret; 787 788 memset(&data, 0, sizeof(data)); 789 790 /* 791 * Read forward through the log looking for the first checkpoint 792 * record whose ckp_lsn is greater than first_lsn. 793 */ 794 for (ret = __logc_get(logc, &first_lsn, &data, DB_FIRST); 795 ret == 0; ret = __logc_get(logc, &lsn, &data, DB_NEXT)) { 796 LOGCOPY_32(env, &rectype, data.data); 797 if (rectype != DB___txn_ckp) 798 continue; 799 if ((ret = 800 __txn_ckp_read(env, data.data, &ckpargs)) == 0) { 801 cmp = LOG_COMPARE(&ckpargs->ckp_lsn, &first_lsn); 802 *lowlsn = ckpargs->ckp_lsn; 803 *lowtime = ckpargs->timestamp; 804 805 __os_free(env, ckpargs); 806 if (cmp >= 0) 807 break; 808 } 809 } 810 811 return (ret); 812} 813 814/* 815 * __env_openfiles -- 816 * Perform the pass of recovery that opens files. This is used 817 * both during regular recovery and an initial call to txn_recover (since 818 * we need files open in order to abort prepared, but not yet committed 819 * transactions). 820 * 821 * See the comments in db_apprec for a detailed description of the 822 * various recovery passes. 823 * 824 * If we are not doing feedback processing (i.e., we are doing txn_recover 825 * processing and in_recovery is zero), then last_lsn can be NULL. 826 * 827 * PUBLIC: int __env_openfiles __P((ENV *, 828 * PUBLIC: DB_LOGC *, void *, DBT *, DB_LSN *, DB_LSN *, double, int)); 829 */ 830int 831__env_openfiles(env, logc, txninfo, 832 data, open_lsn, last_lsn, nfiles, in_recovery) 833 ENV *env; 834 DB_LOGC *logc; 835 void *txninfo; 836 DBT *data; 837 DB_LSN *open_lsn, *last_lsn; 838 double nfiles; 839 int in_recovery; 840{ 841 DB_ENV *dbenv; 842 DB_LSN lsn, tlsn; 843 u_int32_t log_size; 844 int progress, ret; 845 846 dbenv = env->dbenv; 847 848 /* 849 * XXX 850 * Get the log size. No locking required because we're single-threaded 851 * during recovery. 852 */ 853 log_size = ((LOG *)env->lg_handle->reginfo.primary)->log_size; 854 855 lsn = *open_lsn; 856 for (;;) { 857#ifdef HAVE_BREW 858 COMPQUIET(nfiles, (double)0.001); 859 COMPQUIET(progress, 0); 860#else 861 if (in_recovery && dbenv->db_feedback != NULL) { 862 DB_ASSERT(env, last_lsn != NULL); 863 progress = (int)(33 * (__lsn_diff(open_lsn, 864 last_lsn, &lsn, log_size, 1) / nfiles)); 865 dbenv->db_feedback(dbenv, DB_RECOVER, progress); 866 } 867#endif 868 tlsn = lsn; 869 ret = __db_dispatch(env, &env->recover_dtab, data, &tlsn, 870 in_recovery ? DB_TXN_OPENFILES : DB_TXN_POPENFILES, 871 txninfo); 872 if (ret != 0 && ret != DB_TXN_CKP) { 873 __db_errx(env, 874 "Recovery function for LSN %lu %lu failed", 875 (u_long)lsn.file, (u_long)lsn.offset); 876 break; 877 } 878 if ((ret = __logc_get(logc, &lsn, data, DB_NEXT)) != 0) { 879 if (ret == DB_NOTFOUND) { 880 if (last_lsn != NULL && 881 LOG_COMPARE(&lsn, last_lsn) != 0) 882 ret = __db_log_corrupt(env, &lsn); 883 else 884 ret = 0; 885 } 886 break; 887 } 888 } 889 890 return (ret); 891} 892 893static int 894__db_log_corrupt(env, lsnp) 895 ENV *env; 896 DB_LSN *lsnp; 897{ 898 __db_errx(env, "Log file corrupt at LSN: [%lu][%lu]", 899 (u_long)lsnp->file, (u_long)lsnp->offset); 900 return (EINVAL); 901} 902 903/* 904 * __env_init_rec -- 905 * 906 * PUBLIC: int __env_init_rec __P((ENV *, u_int32_t)); 907 */ 908int 909__env_init_rec(env, version) 910 ENV *env; 911 u_int32_t version; 912{ 913 int ret; 914 915 /* 916 * We need to prime the recovery table with the current recovery 917 * functions. Then we overwrite only specific entries based on 918 * each previous version we support. 919 */ 920 if ((ret = __env_init_rec_47(env)) != 0) 921 return (ret); 922 ret = 0; 923 switch (version) { 924 case DB_LOGVERSION_47: 925 break; 926 /* 927 * There are no log record/recovery differences between 4.4 and 4.5. 928 * The log version changed due to checksum. There are no log recovery 929 * differences between 4.5 and 4.6. The name of the rep_gen in 930 * txn_checkpoint changed (to spare, since we don't use it anymore). 931 */ 932 case DB_LOGVERSION_46: 933 case DB_LOGVERSION_45: 934 case DB_LOGVERSION_44: 935 ret = __env_init_rec_46(env); 936 break; 937 case DB_LOGVERSION_43: 938 ret = __env_init_rec_43(env); 939 break; 940 case DB_LOGVERSION_42: 941 ret = __env_init_rec_42(env); 942 break; 943 default: 944 __db_errx(env, "Unknown version %lu", (u_long)version); 945 ret = EINVAL; 946 break; 947 } 948 return (ret); 949} 950 951static int 952__env_init_rec_42(env) 953 ENV *env; 954{ 955 int ret; 956 957 if ((ret = __db_add_recovery_int(env, &env->recover_dtab, 958 __db_relink_42_recover, DB___db_relink_42)) != 0) 959 goto err; 960 if ((ret = __db_add_recovery_int(env, &env->recover_dtab, 961 __db_pg_alloc_42_recover, DB___db_pg_alloc_42)) != 0) 962 goto err; 963 if ((ret = __db_add_recovery_int(env, &env->recover_dtab, 964 __db_pg_free_42_recover, DB___db_pg_free_42)) != 0) 965 goto err; 966 if ((ret = __db_add_recovery_int(env, &env->recover_dtab, 967 __db_pg_freedata_42_recover, DB___db_pg_freedata_42)) != 0) 968 goto err; 969 if ((ret = __db_add_recovery_int(env, &env->recover_dtab, 970 __ham_metagroup_42_recover, DB___ham_metagroup_42)) != 0) 971 goto err; 972 if ((ret = __db_add_recovery_int(env, &env->recover_dtab, 973 __ham_groupalloc_42_recover, DB___ham_groupalloc_42)) != 0) 974 goto err; 975 if ((ret = __db_add_recovery_int(env, &env->recover_dtab, 976 __txn_ckp_42_recover, DB___txn_ckp_42)) != 0) 977 goto err; 978 if ((ret = __db_add_recovery_int(env, &env->recover_dtab, 979 __txn_regop_42_recover, DB___txn_regop_42)) != 0) 980 goto err; 981err: 982 return (ret); 983} 984 985static int 986__env_init_rec_43(env) 987 ENV *env; 988{ 989 int ret; 990 991 if ((ret = __db_add_recovery_int(env, &env->recover_dtab, 992 __bam_relink_43_recover, DB___bam_relink_43)) != 0) 993 goto err; 994 /* 995 * We want to use the 4.2-based txn_regop record. 996 */ 997 if ((ret = __db_add_recovery_int(env, &env->recover_dtab, 998 __txn_regop_42_recover, DB___txn_regop_42)) != 0) 999 goto err; 1000err: 1001 return (ret); 1002} 1003 1004static int 1005__env_init_rec_46(env) 1006 ENV *env; 1007{ 1008 int ret; 1009 1010 if ((ret = __db_add_recovery_int(env, &env->recover_dtab, 1011 __bam_merge_44_recover, DB___bam_merge_44)) != 0) 1012 goto err; 1013 1014err: return (ret); 1015} 1016 1017static int 1018__env_init_rec_47(env) 1019 ENV *env; 1020{ 1021 int ret; 1022 1023 if ((ret = __bam_init_recover(env, &env->recover_dtab)) != 0) 1024 goto err; 1025 if ((ret = __crdel_init_recover(env, &env->recover_dtab)) != 0) 1026 goto err; 1027 if ((ret = __db_init_recover(env, &env->recover_dtab)) != 0) 1028 goto err; 1029 if ((ret = __dbreg_init_recover(env, &env->recover_dtab)) != 0) 1030 goto err; 1031 if ((ret = __fop_init_recover(env, &env->recover_dtab)) != 0) 1032 goto err; 1033 if ((ret = __ham_init_recover(env, &env->recover_dtab)) != 0) 1034 goto err; 1035 if ((ret = __qam_init_recover(env, &env->recover_dtab)) != 0) 1036 goto err; 1037 if ((ret = __txn_init_recover(env, &env->recover_dtab)) != 0) 1038 goto err; 1039err: 1040 return (ret); 1041} 1042