1/*- 2 * See the file LICENSE for redistribution information. 3 * 4 * Copyright (c) 1996,2008 Oracle. All rights reserved. 5 * 6 * $Id: log_get.c,v 12.56 2008/05/05 02:01:21 mjc Exp $ 7 */ 8 9#include "db_config.h" 10 11#include "db_int.h" 12#include "dbinc/crypto.h" 13#include "dbinc/db_page.h" 14#include "dbinc/hmac.h" 15#include "dbinc/log.h" 16#include "dbinc/hash.h" 17 18typedef enum { L_ALREADY, L_ACQUIRED, L_NONE } RLOCK; 19 20static int __logc_close_pp __P((DB_LOGC *, u_int32_t)); 21static int __logc_get_pp __P((DB_LOGC *, DB_LSN *, DBT *, u_int32_t)); 22static int __logc_get_int __P((DB_LOGC *, DB_LSN *, DBT *, u_int32_t)); 23static int __logc_hdrchk __P((DB_LOGC *, DB_LSN *, HDR *, int *)); 24static int __logc_incursor __P((DB_LOGC *, DB_LSN *, HDR *, u_int8_t **)); 25static int __logc_inregion __P((DB_LOGC *, 26 DB_LSN *, RLOCK *, DB_LSN *, HDR *, u_int8_t **, int *)); 27static int __logc_io __P((DB_LOGC *, 28 u_int32_t, u_int32_t, void *, size_t *, int *)); 29static int __logc_ondisk __P((DB_LOGC *, 30 DB_LSN *, DB_LSN *, u_int32_t, HDR *, u_int8_t **, int *)); 31static int __logc_set_maxrec __P((DB_LOGC *, char *)); 32static int __logc_shortread __P((DB_LOGC *, DB_LSN *, int)); 33static int __logc_version_pp __P((DB_LOGC *, u_int32_t *, u_int32_t)); 34 35/* 36 * __log_cursor_pp -- 37 * ENV->log_cursor 38 * 39 * PUBLIC: int __log_cursor_pp __P((DB_ENV *, DB_LOGC **, u_int32_t)); 40 */ 41int 42__log_cursor_pp(dbenv, logcp, flags) 43 DB_ENV *dbenv; 44 DB_LOGC **logcp; 45 u_int32_t flags; 46{ 47 DB_THREAD_INFO *ip; 48 ENV *env; 49 int ret; 50 51 env = dbenv->env; 52 53 ENV_REQUIRES_CONFIG(env, 54 env->lg_handle, "DB_ENV->log_cursor", DB_INIT_LOG); 55 56 /* Validate arguments. */ 57 if ((ret = __db_fchk(env, "DB_ENV->log_cursor", flags, 0)) != 0) 58 return (ret); 59 60 ENV_ENTER(env, ip); 61 REPLICATION_WRAP(env, (__log_cursor(env, logcp)), 0, ret); 62 ENV_LEAVE(env, ip); 63 return (ret); 64} 65 66/* 67 * __log_cursor -- 68 * Create a log cursor. 69 * 70 * PUBLIC: int __log_cursor __P((ENV *, DB_LOGC **)); 71 */ 72int 73__log_cursor(env, logcp) 74 ENV *env; 75 DB_LOGC **logcp; 76{ 77 DB_LOGC *logc; 78 int ret; 79 80 *logcp = NULL; 81 82 /* Allocate memory for the cursor. */ 83 if ((ret = __os_calloc(env, 1, sizeof(DB_LOGC), &logc)) != 0) 84 return (ret); 85 86 logc->bp_size = LG_CURSOR_BUF_SIZE; 87 /* 88 * Set this to something positive. 89 */ 90 logc->bp_maxrec = MEGABYTE; 91 if ((ret = __os_malloc(env, logc->bp_size, &logc->bp)) != 0) { 92 __os_free(env, logc); 93 return (ret); 94 } 95 96 logc->env = env; 97 logc->close = __logc_close_pp; 98 logc->get = __logc_get_pp; 99 logc->version = __logc_version_pp; 100 101 *logcp = logc; 102 return (0); 103} 104 105/* 106 * __logc_close_pp -- 107 * DB_LOGC->close pre/post processing. 108 */ 109static int 110__logc_close_pp(logc, flags) 111 DB_LOGC *logc; 112 u_int32_t flags; 113{ 114 DB_THREAD_INFO *ip; 115 ENV *env; 116 int ret; 117 118 env = logc->env; 119 120 if ((ret = __db_fchk(env, "DB_LOGC->close", flags, 0)) != 0) 121 return (ret); 122 123 ENV_ENTER(env, ip); 124 REPLICATION_WRAP(env, (__logc_close(logc)), 0, ret); 125 ENV_LEAVE(env, ip); 126 return (ret); 127} 128 129/* 130 * __logc_close -- 131 * DB_LOGC->close. 132 * 133 * PUBLIC: int __logc_close __P((DB_LOGC *)); 134 */ 135int 136__logc_close(logc) 137 DB_LOGC *logc; 138{ 139 ENV *env; 140 141 env = logc->env; 142 143 if (logc->fhp != NULL) { 144 (void)__os_closehandle(env, logc->fhp); 145 logc->fhp = NULL; 146 } 147 148 if (logc->dbt.data != NULL) 149 __os_free(env, logc->dbt.data); 150 151 __os_free(env, logc->bp); 152 __os_free(env, logc); 153 154 return (0); 155} 156 157/* 158 * __logc_version_pp -- 159 * DB_LOGC->version. 160 */ 161static int 162__logc_version_pp(logc, versionp, flags) 163 DB_LOGC *logc; 164 u_int32_t *versionp; 165 u_int32_t flags; 166{ 167 DB_THREAD_INFO *ip; 168 ENV *env; 169 int ret; 170 171 env = logc->env; 172 173 if ((ret = __db_fchk(env, "DB_LOGC->version", flags, 0)) != 0) 174 return (ret); 175 176 ENV_ENTER(env, ip); 177 REPLICATION_WRAP(env, (__logc_version(logc, versionp)), 0, ret); 178 ENV_LEAVE(env, ip); 179 return (ret); 180} 181 182/* 183 * __logc_version -- 184 * DB_LOGC->version. 185 * 186 * PUBLIC: int __logc_version __P((DB_LOGC *, u_int32_t *)); 187 */ 188int 189__logc_version(logc, versionp) 190 DB_LOGC *logc; 191 u_int32_t *versionp; 192{ 193 DBT hdrdbt; 194 DB_LOGC *plogc; 195 DB_LSN plsn; 196 ENV *env; 197 LOGP *persist; 198 int ret, t_ret; 199 200 env = logc->env; 201 if (IS_ZERO_LSN(logc->lsn)) { 202 __db_errx(env, "DB_LOGC->get: unset cursor"); 203 return (EINVAL); 204 } 205 ret = 0; 206 /* 207 * Check if the persist info we have is for the same file 208 * as the current cursor position. If we already have the 209 * information, then we're done. If not, we open a new 210 * log cursor and get the header. 211 * 212 * Since most users walk forward through the log when 213 * using this feature (i.e. printlog) we're likely to 214 * have the information we need. 215 */ 216 if (logc->lsn.file != logc->p_lsn.file) { 217 if ((ret = __log_cursor(env, &plogc)) != 0) 218 return (ret); 219 plsn.file = logc->lsn.file; 220 plsn.offset = 0; 221 plogc->lsn = plsn; 222 memset(&hdrdbt, 0, sizeof(DBT)); 223 if ((ret = __logc_get_int(plogc, 224 &plsn, &hdrdbt, DB_SET)) == 0) { 225 persist = (LOGP *)hdrdbt.data; 226 if (LOG_SWAPPED(env)) 227 __log_persistswap(persist); 228 logc->p_lsn = logc->lsn; 229 logc->p_version = persist->version; 230 } 231 if ((t_ret = __logc_close(plogc)) != 0 && ret == 0) 232 ret = t_ret; 233 } 234 /* Return the version. */ 235 if (ret == 0) 236 *versionp = logc->p_version; 237 return (ret); 238} 239 240/* 241 * __logc_get_pp -- 242 * DB_LOGC->get pre/post processing. 243 */ 244static int 245__logc_get_pp(logc, alsn, dbt, flags) 246 DB_LOGC *logc; 247 DB_LSN *alsn; 248 DBT *dbt; 249 u_int32_t flags; 250{ 251 DB_THREAD_INFO *ip; 252 ENV *env; 253 int ret; 254 255 env = logc->env; 256 257 /* Validate arguments. */ 258 switch (flags) { 259 case DB_CURRENT: 260 case DB_FIRST: 261 case DB_LAST: 262 case DB_NEXT: 263 case DB_PREV: 264 break; 265 case DB_SET: 266 if (IS_ZERO_LSN(*alsn)) { 267 __db_errx(env, "DB_LOGC->get: invalid LSN: %lu/%lu", 268 (u_long)alsn->file, (u_long)alsn->offset); 269 return (EINVAL); 270 } 271 break; 272 default: 273 return (__db_ferr(env, "DB_LOGC->get", 1)); 274 } 275 276 ENV_ENTER(env, ip); 277 REPLICATION_WRAP(env, (__logc_get(logc, alsn, dbt, flags)), 0, ret); 278 ENV_LEAVE(env, ip); 279 return (ret); 280} 281 282/* 283 * __logc_get -- 284 * DB_LOGC->get. 285 * 286 * PUBLIC: int __logc_get __P((DB_LOGC *, DB_LSN *, DBT *, u_int32_t)); 287 */ 288int 289__logc_get(logc, alsn, dbt, flags) 290 DB_LOGC *logc; 291 DB_LSN *alsn; 292 DBT *dbt; 293 u_int32_t flags; 294{ 295 DB_LSN saved_lsn; 296 ENV *env; 297 LOGP *persist; 298 int ret; 299 300 env = logc->env; 301 302 /* 303 * On error, we take care not to overwrite the caller's LSN. This 304 * is because callers looking for the end of the log loop using the 305 * DB_NEXT flag, and expect to take the last successful lsn out of 306 * the passed-in structure after DB_LOGC->get fails with DB_NOTFOUND. 307 * 308 * !!! 309 * This line is often flagged an uninitialized memory read during a 310 * Purify or similar tool run, as the application didn't initialize 311 * *alsn. If the application isn't setting the DB_SET flag, there is 312 * no reason it should have initialized *alsn, but we can't know that 313 * and we want to make sure we never overwrite whatever the application 314 * put in there. 315 */ 316 saved_lsn = *alsn; 317 /* 318 * If we get one of the log's header records as a result of doing a 319 * DB_FIRST, DB_NEXT, DB_LAST or DB_PREV, repeat the operation, log 320 * file header records aren't useful to applications. 321 */ 322 if ((ret = __logc_get_int(logc, alsn, dbt, flags)) != 0) { 323 *alsn = saved_lsn; 324 return (ret); 325 } 326 if (alsn->offset == 0 && (flags == DB_FIRST || 327 flags == DB_NEXT || flags == DB_LAST || flags == DB_PREV)) { 328 switch (flags) { 329 case DB_FIRST: 330 flags = DB_NEXT; 331 break; 332 case DB_LAST: 333 flags = DB_PREV; 334 break; 335 case DB_NEXT: 336 case DB_PREV: 337 default: 338 break; 339 } 340 /* 341 * If we're walking the log and we find a persist header 342 * then store so that we may use it later if needed. 343 */ 344 persist = (LOGP *)dbt->data; 345 if (LOG_SWAPPED(env)) 346 __log_persistswap(persist); 347 logc->p_lsn = *alsn; 348 logc->p_version = persist->version; 349 if (F_ISSET(dbt, DB_DBT_MALLOC)) { 350 __os_free(env, dbt->data); 351 dbt->data = NULL; 352 } 353 if ((ret = __logc_get_int(logc, alsn, dbt, flags)) != 0) { 354 *alsn = saved_lsn; 355 return (ret); 356 } 357 } 358 359 return (0); 360} 361 362/* 363 * __logc_get_int -- 364 * Get a log record; internal version. 365 */ 366static int 367__logc_get_int(logc, alsn, dbt, flags) 368 DB_LOGC *logc; 369 DB_LSN *alsn; 370 DBT *dbt; 371 u_int32_t flags; 372{ 373 DB_CIPHER *db_cipher; 374 DB_LOG *dblp; 375 DB_LSN last_lsn, nlsn; 376 ENV *env; 377 HDR hdr; 378 LOG *lp; 379 RLOCK rlock; 380 logfile_validity status; 381 u_int32_t cnt; 382 u_int8_t *rp; 383 int eof, is_hmac, need_cksum, ret; 384 385 env = logc->env; 386 db_cipher = env->crypto_handle; 387 dblp = env->lg_handle; 388 lp = dblp->reginfo.primary; 389 is_hmac = 0; 390 391 /* 392 * We don't acquire the log region lock until we need it, and we 393 * release it as soon as we're done. 394 */ 395 rlock = F_ISSET(logc, DB_LOG_LOCKED) ? L_ALREADY : L_NONE; 396 397 nlsn = logc->lsn; 398 switch (flags) { 399 case DB_NEXT: /* Next log record. */ 400 if (!IS_ZERO_LSN(nlsn)) { 401 /* Increment the cursor by the cursor record size. */ 402 nlsn.offset += logc->len; 403 break; 404 } 405 flags = DB_FIRST; 406 /* FALLTHROUGH */ 407 case DB_FIRST: /* First log record. */ 408 /* Find the first log file. */ 409 if ((ret = __log_find(dblp, 1, &cnt, &status)) != 0) 410 goto err; 411 412 /* 413 * DB_LV_INCOMPLETE: 414 * Theoretically, the log file we want could be created 415 * but not yet written, the "first" log record must be 416 * in the log buffer. 417 * DB_LV_NORMAL: 418 * DB_LV_OLD_READABLE: 419 * We found a log file we can read. 420 * DB_LV_NONEXISTENT: 421 * No log files exist, the "first" log record must be in 422 * the log buffer. 423 * DB_LV_OLD_UNREADABLE: 424 * No readable log files exist, we're at the cross-over 425 * point between two versions. The "first" log record 426 * must be in the log buffer. 427 */ 428 switch (status) { 429 case DB_LV_INCOMPLETE: 430 DB_ASSERT(env, lp->lsn.file == cnt); 431 /* FALLTHROUGH */ 432 case DB_LV_NORMAL: 433 case DB_LV_OLD_READABLE: 434 nlsn.file = cnt; 435 break; 436 case DB_LV_NONEXISTENT: 437 nlsn.file = 1; 438 DB_ASSERT(env, lp->lsn.file == nlsn.file); 439 break; 440 case DB_LV_OLD_UNREADABLE: 441 nlsn.file = cnt + 1; 442 DB_ASSERT(env, lp->lsn.file == nlsn.file); 443 break; 444 } 445 nlsn.offset = 0; 446 break; 447 case DB_CURRENT: /* Current log record. */ 448 break; 449 case DB_PREV: /* Previous log record. */ 450 if (!IS_ZERO_LSN(nlsn)) { 451 /* If at start-of-file, move to the previous file. */ 452 if (nlsn.offset == 0) { 453 if (nlsn.file == 1) { 454 ret = DB_NOTFOUND; 455 goto err; 456 } 457 if ((!lp->db_log_inmemory && 458 (__log_valid(dblp, nlsn.file - 1, 0, NULL, 459 0, &status, NULL) != 0 || 460 (status != DB_LV_NORMAL && 461 status != DB_LV_OLD_READABLE)))) { 462 ret = DB_NOTFOUND; 463 goto err; 464 } 465 466 --nlsn.file; 467 } 468 nlsn.offset = logc->prev; 469 break; 470 } 471 /* FALLTHROUGH */ 472 case DB_LAST: /* Last log record. */ 473 if (rlock == L_NONE) { 474 rlock = L_ACQUIRED; 475 LOG_SYSTEM_LOCK(env); 476 } 477 nlsn.file = lp->lsn.file; 478 nlsn.offset = lp->lsn.offset - lp->len; 479 break; 480 case DB_SET: /* Set log record. */ 481 nlsn = *alsn; 482 break; 483 default: 484 ret = __db_unknown_path(env, "__logc_get_int"); 485 goto err; 486 } 487 488 if (0) { /* Move to the next file. */ 489next_file: ++nlsn.file; 490 nlsn.offset = 0; 491 } 492 493 /* 494 * The above switch statement should have set nlsn to the lsn of 495 * the requested record. 496 */ 497 498 if (CRYPTO_ON(env)) { 499 hdr.size = HDR_CRYPTO_SZ; 500 is_hmac = 1; 501 } else { 502 hdr.size = HDR_NORMAL_SZ; 503 is_hmac = 0; 504 } 505 506 /* 507 * Check to see if the record is in the cursor's buffer -- if so, 508 * we'll need to checksum it. 509 */ 510 if ((ret = __logc_incursor(logc, &nlsn, &hdr, &rp)) != 0) 511 goto err; 512 if (rp != NULL) 513 goto cksum; 514 515 /* 516 * Look to see if we're moving backward in the log with the last record 517 * coming from the disk -- it means the record can't be in the region's 518 * buffer. Else, check the region's buffer. 519 * 520 * If the record isn't in the region's buffer, then either logs are 521 * in-memory, and we're done, or we're going to have to read the 522 * record from disk. We want to make a point of not reading past the 523 * end of the logical log (after recovery, there may be data after the 524 * end of the logical log, not to mention the log file may have been 525 * pre-allocated). So, zero out last_lsn, and initialize it inside 526 * __logc_inregion -- if it's still zero when we check it in 527 * __logc_ondisk, that's OK, it just means the logical end of the log 528 * isn't an issue for this request. 529 */ 530 ZERO_LSN(last_lsn); 531 if (!F_ISSET(logc, DB_LOG_DISK) || 532 LOG_COMPARE(&nlsn, &logc->lsn) > 0) { 533 F_CLR(logc, DB_LOG_DISK); 534 535 if ((ret = __logc_inregion(logc, 536 &nlsn, &rlock, &last_lsn, &hdr, &rp, &need_cksum)) != 0) 537 goto err; 538 if (rp != NULL) { 539 /* 540 * If we read the entire record from the in-memory log 541 * buffer, we don't need to checksum it, nor do we need 542 * to worry about vtruncate issues. 543 */ 544 if (need_cksum) 545 goto cksum; 546 goto from_memory; 547 } 548 if (lp->db_log_inmemory) 549 goto nohdr; 550 } 551 552 /* 553 * We have to read from an on-disk file to retrieve the record. 554 * If we ever can't retrieve the record at offset 0, we're done, 555 * return EOF/DB_NOTFOUND. 556 * 557 * Discard the region lock if we're still holding it, the on-disk 558 * reading routines don't need it. 559 */ 560 if (rlock == L_ACQUIRED) { 561 rlock = L_NONE; 562 LOG_SYSTEM_UNLOCK(env); 563 } 564 if ((ret = __logc_ondisk( 565 logc, &nlsn, &last_lsn, flags, &hdr, &rp, &eof)) != 0) 566 goto err; 567 568 /* 569 * If we got a 0-length record, that means we're in the midst of some 570 * bytes that got 0'd as the result of a vtruncate. In that case or at 571 * the end of a file, with DB_NEXT we're going to have to retry. 572 */ 573 if (eof || hdr.len == 0) { 574nohdr: switch (flags) { 575 case DB_LAST: 576 case DB_PREV: 577 /* 578 * We should never get here. If we recover a log 579 * file with 0's at the end, we'll treat the 0'd 580 * headers as the end of log and ignore them. If 581 * we're reading backwards from another file, then 582 * the first record in that new file should have its 583 * prev field set correctly. 584 */ 585 __db_errx(env, 586 "Encountered zero length records while traversing backwards"); 587 ret = __env_panic(env, DB_RUNRECOVERY); 588 goto err; 589 case DB_FIRST: 590 case DB_NEXT: 591 /* 592 * Zero'd records always indicate the end of a file, 593 * but only go to the next file once. 594 */ 595 if (nlsn.offset != 0) 596 goto next_file; 597 /* FALLTHROUGH */ 598 case DB_SET: 599 default: 600 ret = DB_NOTFOUND; 601 goto err; 602 } 603 } 604 605 F_SET(logc, DB_LOG_DISK); 606 607cksum: /* 608 * Discard the region lock if we're still holding it. (The path to 609 * get here is we acquired the region lock because of the caller's 610 * flag argument, but we found the record in the in-memory or cursor 611 * buffers. Improbable, but it's easy to avoid.) 612 */ 613 if (rlock == L_ACQUIRED) { 614 rlock = L_NONE; 615 LOG_SYSTEM_UNLOCK(env); 616 } 617 618 /* 619 * Checksum: there are two types of errors -- a configuration error 620 * or a checksum mismatch. The former is always bad. The latter is 621 * OK if we're searching for the end of the log, and very, very bad 622 * if we're reading random log records. 623 */ 624 if ((ret = __db_check_chksum(env, &hdr, db_cipher, 625 hdr.chksum, rp + hdr.size, hdr.len - hdr.size, is_hmac)) != 0) { 626 if (F_ISSET(logc, DB_LOG_SILENT_ERR)) { 627 if (ret == 0 || ret == -1) 628 ret = EIO; 629 } else if (ret == -1) { 630 __db_errx(env, 631 "DB_LOGC->get: log record LSN %lu/%lu: checksum mismatch", 632 (u_long)nlsn.file, (u_long)nlsn.offset); 633 __db_errx(env, 634 "DB_LOGC->get: catastrophic recovery may be required"); 635 ret = __env_panic(env, DB_RUNRECOVERY); 636 } 637 goto err; 638 } 639 640from_memory: 641 /* 642 * Discard the region lock if we're still holding it. (The path to 643 * get here is we acquired the region lock because of the caller's 644 * flag argument, but we found the record in the in-memory or cursor 645 * buffers. Improbable, but it's easy to avoid.) 646 */ 647 if (rlock == L_ACQUIRED) { 648 rlock = L_NONE; 649 LOG_SYSTEM_UNLOCK(env); 650 } 651 652 /* Copy the record into the user's DBT. */ 653 if ((ret = __db_retcopy(env, dbt, rp + hdr.size, 654 (u_int32_t)(hdr.len - hdr.size), 655 &logc->dbt.data, &logc->dbt.ulen)) != 0) 656 goto err; 657 658 if (CRYPTO_ON(env)) { 659 if ((ret = db_cipher->decrypt(env, db_cipher->data, 660 hdr.iv, dbt->data, hdr.len - hdr.size)) != 0) { 661 ret = EAGAIN; 662 goto err; 663 } 664 /* 665 * Return the original log record size to the user, 666 * even though we've allocated more than that, possibly. 667 * The log record is decrypted in the user dbt, not in 668 * the buffer, so we must do this here after decryption, 669 * not adjust the len passed to the __db_retcopy call. 670 */ 671 dbt->size = hdr.orig_size; 672 } 673 674 /* Update the cursor and the returned LSN. */ 675 *alsn = nlsn; 676 logc->lsn = nlsn; 677 logc->len = hdr.len; 678 logc->prev = hdr.prev; 679 680err: if (rlock == L_ACQUIRED) 681 LOG_SYSTEM_UNLOCK(env); 682 683 return (ret); 684} 685 686/* 687 * __logc_incursor -- 688 * Check to see if the requested record is in the cursor's buffer. 689 */ 690static int 691__logc_incursor(logc, lsn, hdr, pp) 692 DB_LOGC *logc; 693 DB_LSN *lsn; 694 HDR *hdr; 695 u_int8_t **pp; 696{ 697 ENV *env; 698 u_int8_t *p; 699 int eof; 700 701 env = logc->env; 702 *pp = NULL; 703 704 /* 705 * Test to see if the requested LSN could be part of the cursor's 706 * buffer. 707 * 708 * The record must be part of the same file as the cursor's buffer. 709 * The record must start at a byte offset equal to or greater than 710 * the cursor buffer. 711 * The record must not start at a byte offset after the cursor 712 * buffer's end. 713 */ 714 if (logc->bp_lsn.file != lsn->file) 715 return (0); 716 if (logc->bp_lsn.offset > lsn->offset) 717 return (0); 718 if (logc->bp_lsn.offset + logc->bp_rlen <= lsn->offset + hdr->size) 719 return (0); 720 721 /* 722 * Read the record's header and check if the record is entirely held 723 * in the buffer. If the record is not entirely held, get it again. 724 * (The only advantage in having part of the record locally is that 725 * we might avoid a system call because we already have the HDR in 726 * memory.) 727 * 728 * If the header check fails for any reason, it must be because the 729 * LSN is bogus. Fail hard. 730 */ 731 p = logc->bp + (lsn->offset - logc->bp_lsn.offset); 732 memcpy(hdr, p, hdr->size); 733 if (LOG_SWAPPED(env)) 734 __log_hdrswap(hdr, CRYPTO_ON(env)); 735 if (__logc_hdrchk(logc, lsn, hdr, &eof)) 736 return (DB_NOTFOUND); 737 if (eof || logc->bp_lsn.offset + logc->bp_rlen < lsn->offset + hdr->len) 738 return (0); 739 740 *pp = p; /* Success. */ 741 742 return (0); 743} 744 745/* 746 * __logc_inregion -- 747 * Check to see if the requested record is in the region's buffer. 748 */ 749static int 750__logc_inregion(logc, lsn, rlockp, last_lsn, hdr, pp, need_cksump) 751 DB_LOGC *logc; 752 DB_LSN *lsn, *last_lsn; 753 RLOCK *rlockp; 754 HDR *hdr; 755 u_int8_t **pp; 756 int *need_cksump; 757{ 758 DB_LOG *dblp; 759 ENV *env; 760 LOG *lp; 761 size_t b_region, len, nr; 762 u_int32_t b_disk; 763 int eof, ret; 764 u_int8_t *p; 765 766 env = logc->env; 767 dblp = env->lg_handle; 768 lp = env->lg_handle->reginfo.primary; 769 770 ret = 0; 771 b_region = 0; 772 *pp = NULL; 773 *need_cksump = 0; 774 775 /* If we haven't yet acquired the log region lock, do so. */ 776 if (*rlockp == L_NONE) { 777 *rlockp = L_ACQUIRED; 778 LOG_SYSTEM_LOCK(env); 779 } 780 781 /* 782 * The routines to read from disk must avoid reading past the logical 783 * end of the log, so pass that information back to it. 784 * 785 * Since they're reading directly from the disk, they must also avoid 786 * reading past the offset we've written out. If the log was 787 * truncated, it's possible that there are zeroes or garbage on 788 * disk after this offset, and the logical end of the log can 789 * come later than this point if the log buffer isn't empty. 790 */ 791 *last_lsn = lp->lsn; 792 if (!lp->db_log_inmemory && last_lsn->offset > lp->w_off) 793 last_lsn->offset = lp->w_off; 794 795 /* 796 * Test to see if the requested LSN could be part of the region's 797 * buffer. 798 * 799 * During recovery, we read the log files getting the information to 800 * initialize the region. In that case, the region's lsn field will 801 * not yet have been filled in, use only the disk. 802 * 803 * The record must not start at a byte offset after the region buffer's 804 * end, since that means the request is for a record after the end of 805 * the log. Do this test even if the region's buffer is empty -- after 806 * recovery, the log files may continue past the declared end-of-log, 807 * and the disk reading routine will incorrectly attempt to read the 808 * remainder of the log. 809 * 810 * Otherwise, test to see if the region's buffer actually has what we 811 * want: 812 * 813 * The buffer must have some useful content. 814 * The record must be in the same file as the region's buffer and must 815 * start at a byte offset equal to or greater than the region's buffer. 816 */ 817 if (IS_ZERO_LSN(lp->lsn)) 818 return (0); 819 if (LOG_COMPARE(lsn, &lp->lsn) >= 0) 820 return (DB_NOTFOUND); 821 else if (lp->db_log_inmemory) { 822 if ((ret = __log_inmem_lsnoff(dblp, lsn, &b_region)) != 0) 823 return (ret); 824 } else if (lp->b_off == 0 || LOG_COMPARE(lsn, &lp->f_lsn) < 0) 825 return (0); 826 827 /* 828 * The current contents of the cursor's buffer will be useless for a 829 * future call, we're about to overwrite it -- trash it rather than 830 * try and make it look correct. 831 */ 832 logc->bp_rlen = 0; 833 834 /* 835 * If the requested LSN is greater than the region buffer's first 836 * byte, we know the entire record is in the buffer on a good LSN. 837 * 838 * If we're given a bad LSN, the "entire" record might not be in 839 * our buffer in order to fail at the chksum. __logc_hdrchk made 840 * sure our dest buffer fits, via bp_maxrec, but we also need to 841 * make sure we don't run off the end of this buffer, the src. 842 * 843 * There is one case where the header check can fail: on a scan through 844 * in-memory logs, when we reach the end of a file we can read an empty 845 * header. In that case, it's safe to return zero, here: it will be 846 * caught in our caller. Otherwise, the LSN is bogus. Fail hard. 847 */ 848 if (lp->db_log_inmemory || LOG_COMPARE(lsn, &lp->f_lsn) > 0) { 849 if (!lp->db_log_inmemory) 850 b_region = lsn->offset - lp->w_off; 851 __log_inmem_copyout(dblp, b_region, hdr, hdr->size); 852 if (LOG_SWAPPED(env)) 853 __log_hdrswap(hdr, CRYPTO_ON(env)); 854 if (__logc_hdrchk(logc, lsn, hdr, &eof) != 0) 855 return (DB_NOTFOUND); 856 if (eof) 857 return (0); 858 if (lp->db_log_inmemory) { 859 if (RINGBUF_LEN(lp, b_region, lp->b_off) < hdr->len) 860 return (DB_NOTFOUND); 861 } else if (lsn->offset + hdr->len > lp->w_off + lp->buffer_size) 862 return (DB_NOTFOUND); 863 if (logc->bp_size <= hdr->len) { 864 len = (size_t)DB_ALIGN((uintmax_t)hdr->len * 2, 128); 865 if ((ret = 866 __os_realloc(logc->env, len, &logc->bp)) != 0) 867 return (ret); 868 logc->bp_size = (u_int32_t)len; 869 } 870 __log_inmem_copyout(dblp, b_region, logc->bp, hdr->len); 871 *pp = logc->bp; 872 return (0); 873 } 874 875 DB_ASSERT(env, !lp->db_log_inmemory); 876 877 /* 878 * There's a partial record, that is, the requested record starts 879 * in a log file and finishes in the region buffer. We have to 880 * find out how many bytes of the record are in the region buffer 881 * so we can copy them out into the cursor buffer. First, check 882 * to see if the requested record is the only record in the region 883 * buffer, in which case we should copy the entire region buffer. 884 * 885 * Else, walk back through the region's buffer to find the first LSN 886 * after the record that crosses the buffer boundary -- we can detect 887 * that LSN, because its "prev" field will reference the record we 888 * want. The bytes we need to copy from the region buffer are the 889 * bytes up to the record we find. The bytes we'll need to allocate 890 * to hold the log record are the bytes between the two offsets. 891 */ 892 b_disk = lp->w_off - lsn->offset; 893 if (lp->b_off <= lp->len) 894 b_region = (u_int32_t)lp->b_off; 895 else 896 for (p = dblp->bufp + (lp->b_off - lp->len);;) { 897 memcpy(hdr, p, hdr->size); 898 if (LOG_SWAPPED(env)) 899 __log_hdrswap(hdr, CRYPTO_ON(env)); 900 if (hdr->prev == lsn->offset) { 901 b_region = (u_int32_t)(p - dblp->bufp); 902 break; 903 } 904 p = dblp->bufp + (hdr->prev - lp->w_off); 905 } 906 907 /* 908 * If we don't have enough room for the record, we have to allocate 909 * space. We have to do it while holding the region lock, which is 910 * truly annoying, but there's no way around it. This call is why 911 * we allocate cursor buffer space when allocating the cursor instead 912 * of waiting. 913 */ 914 if (logc->bp_size <= b_region + b_disk) { 915 len = (size_t)DB_ALIGN((uintmax_t)(b_region + b_disk) * 2, 128); 916 if ((ret = __os_realloc(logc->env, len, &logc->bp)) != 0) 917 return (ret); 918 logc->bp_size = (u_int32_t)len; 919 } 920 921 /* Copy the region's bytes to the end of the cursor's buffer. */ 922 p = (logc->bp + logc->bp_size) - b_region; 923 memcpy(p, dblp->bufp, b_region); 924 925 /* Release the region lock. */ 926 if (*rlockp == L_ACQUIRED) { 927 *rlockp = L_NONE; 928 LOG_SYSTEM_UNLOCK(env); 929 } 930 931 /* 932 * Read the rest of the information from disk. Neither short reads 933 * or EOF are acceptable, the bytes we want had better be there. 934 */ 935 if (b_disk != 0) { 936 p -= b_disk; 937 nr = b_disk; 938 if ((ret = __logc_io( 939 logc, lsn->file, lsn->offset, p, &nr, NULL)) != 0) 940 return (ret); 941 if (nr < b_disk) 942 return (__logc_shortread(logc, lsn, 0)); 943 944 /* We read bytes from the disk, we'll need to checksum them. */ 945 *need_cksump = 1; 946 } 947 948 /* Copy the header information into the caller's structure. */ 949 memcpy(hdr, p, hdr->size); 950 if (LOG_SWAPPED(env)) 951 __log_hdrswap(hdr, CRYPTO_ON(env)); 952 953 *pp = p; 954 return (0); 955} 956 957/* 958 * __log_hdrswap -- 959 * Swap the bytes in a log header from machines with different endianness. 960 * 961 * PUBLIC: void __log_hdrswap __P((HDR *, int)); 962 */ 963void 964__log_hdrswap(hdr, is_hmac) 965 HDR *hdr; 966 int is_hmac; 967{ 968 M_32_SWAP(hdr->prev); 969 M_32_SWAP(hdr->len); 970 if (!is_hmac) 971 P_32_SWAP(hdr->chksum); 972} 973 974/* 975 * __log_persistswap -- 976 * Swap the bytes in a log file persistent header from machines with 977 * different endianness. 978 * 979 * PUBLIC: void __log_persistswap __P((LOGP *)); 980 */ 981void 982__log_persistswap(persist) 983 LOGP *persist; 984{ 985 M_32_SWAP(persist->magic); 986 M_32_SWAP(persist->version); 987 M_32_SWAP(persist->log_size); 988 M_32_SWAP(persist->notused); 989} 990 991/* 992 * __logc_ondisk -- 993 * Read a record off disk. 994 */ 995static int 996__logc_ondisk(logc, lsn, last_lsn, flags, hdr, pp, eofp) 997 DB_LOGC *logc; 998 DB_LSN *lsn, *last_lsn; 999 u_int32_t flags; 1000 int *eofp; 1001 HDR *hdr; 1002 u_int8_t **pp; 1003{ 1004 ENV *env; 1005 size_t len, nr; 1006 u_int32_t offset; 1007 int ret; 1008 1009 env = logc->env; 1010 *eofp = 0; 1011 1012 nr = hdr->size; 1013 if ((ret = 1014 __logc_io(logc, lsn->file, lsn->offset, hdr, &nr, eofp)) != 0) 1015 return (ret); 1016 if (*eofp) 1017 return (0); 1018 1019 if (LOG_SWAPPED(env)) 1020 __log_hdrswap(hdr, CRYPTO_ON(env)); 1021 1022 /* 1023 * If the read was successful, but we can't read a full header, assume 1024 * we've hit EOF. We can't check that the header has been partially 1025 * zeroed out, but it's unlikely that this is caused by a write failure 1026 * since the header is written as a single write call and it's less 1027 * than sector. 1028 */ 1029 if (nr < hdr->size) { 1030 *eofp = 1; 1031 return (0); 1032 } 1033 1034 /* Check the HDR. */ 1035 if ((ret = __logc_hdrchk(logc, lsn, hdr, eofp)) != 0) 1036 return (ret); 1037 if (*eofp) 1038 return (0); 1039 1040 /* 1041 * Regardless of how we return, the previous contents of the cursor's 1042 * buffer are useless -- trash it. 1043 */ 1044 logc->bp_rlen = 0; 1045 1046 /* 1047 * Otherwise, we now (finally!) know how big the record is. (Maybe 1048 * we should have just stuck the length of the record into the LSN!?) 1049 * Make sure we have enough space. 1050 */ 1051 if (logc->bp_size <= hdr->len) { 1052 len = (size_t)DB_ALIGN((uintmax_t)hdr->len * 2, 128); 1053 if ((ret = __os_realloc(env, len, &logc->bp)) != 0) 1054 return (ret); 1055 logc->bp_size = (u_int32_t)len; 1056 } 1057 1058 /* 1059 * If we're moving forward in the log file, read this record in at the 1060 * beginning of the buffer. Otherwise, read this record in at the end 1061 * of the buffer, making sure we don't try and read before the start 1062 * of the file. (We prefer positioning at the end because transaction 1063 * aborts use DB_SET to move backward through the log and we might get 1064 * lucky.) 1065 * 1066 * Read a buffer's worth, without reading past the logical EOF. The 1067 * last_lsn may be a zero LSN, but that's OK, the test works anyway. 1068 */ 1069 if (flags == DB_FIRST || flags == DB_NEXT) 1070 offset = lsn->offset; 1071 else if (lsn->offset + hdr->len < logc->bp_size) 1072 offset = 0; 1073 else 1074 offset = (lsn->offset + hdr->len) - logc->bp_size; 1075 1076 nr = logc->bp_size; 1077 if (lsn->file == last_lsn->file && offset + nr >= last_lsn->offset) 1078 nr = last_lsn->offset - offset; 1079 1080 if ((ret = 1081 __logc_io(logc, lsn->file, offset, logc->bp, &nr, eofp)) != 0) 1082 return (ret); 1083 1084 /* 1085 * We should have at least gotten the bytes up-to-and-including the 1086 * record we're reading. 1087 */ 1088 if (nr < (lsn->offset + hdr->len) - offset) 1089 return (__logc_shortread(logc, lsn, 1)); 1090 1091 /* 1092 * Set up the return information. 1093 * 1094 * !!! 1095 * No need to set the bp_lsn.file field, __logc_io set it for us. 1096 */ 1097 logc->bp_rlen = (u_int32_t)nr; 1098 logc->bp_lsn.offset = offset; 1099 1100 *pp = logc->bp + (lsn->offset - offset); 1101 1102 return (0); 1103} 1104 1105/* 1106 * __logc_hdrchk -- 1107 * 1108 * Check for corrupted HDRs before we use them to allocate memory or find 1109 * records. 1110 * 1111 * If the log files were pre-allocated, a zero-filled HDR structure is the 1112 * logical file end. However, we can see buffers filled with 0's during 1113 * recovery, too (because multiple log buffers were written asynchronously, 1114 * and one made it to disk before a different one that logically precedes 1115 * it in the log file. 1116 * 1117 * Check for impossibly large records. The malloc should fail later, but we 1118 * have customers that run mallocs that treat all allocation failures as fatal 1119 * errors. 1120 * 1121 * Note that none of this is necessarily something awful happening. We let 1122 * the application hand us any LSN they want, and it could be a pointer into 1123 * the middle of a log record, there's no way to tell. 1124 */ 1125static int 1126__logc_hdrchk(logc, lsn, hdr, eofp) 1127 DB_LOGC *logc; 1128 DB_LSN *lsn; 1129 HDR *hdr; 1130 int *eofp; 1131{ 1132 ENV *env; 1133 int ret; 1134 1135 env = logc->env; 1136 1137 /* 1138 * Check EOF before we do any other processing. 1139 */ 1140 if (eofp != NULL) { 1141 if (hdr->prev == 0 && hdr->chksum[0] == 0 && hdr->len == 0) { 1142 *eofp = 1; 1143 return (0); 1144 } 1145 *eofp = 0; 1146 } 1147 1148 /* 1149 * Sanity check the log record's size. 1150 * We must check it after "virtual" EOF above. 1151 */ 1152 if (hdr->len <= hdr->size) 1153 goto err; 1154 1155 /* 1156 * If the cursor's max-record value isn't yet set, it means we aren't 1157 * reading these records from a log file and no check is necessary. 1158 */ 1159 if (logc->bp_maxrec != 0 && hdr->len > logc->bp_maxrec) { 1160 /* 1161 * If we fail the check, there's the pathological case that 1162 * we're reading the last file, it's growing, and our initial 1163 * check information was wrong. Get it again, to be sure. 1164 */ 1165 if ((ret = __logc_set_maxrec(logc, NULL)) != 0) { 1166 __db_err(env, ret, "DB_LOGC->get"); 1167 return (ret); 1168 } 1169 if (logc->bp_maxrec != 0 && hdr->len > logc->bp_maxrec) 1170 goto err; 1171 } 1172 return (0); 1173 1174err: if (!F_ISSET(logc, DB_LOG_SILENT_ERR)) 1175 __db_errx(env, 1176 "DB_LOGC->get: LSN %lu/%lu: invalid log record header", 1177 (u_long)lsn->file, (u_long)lsn->offset); 1178 return (EIO); 1179} 1180 1181/* 1182 * __logc_io -- 1183 * Read records from a log file. 1184 */ 1185static int 1186__logc_io(logc, fnum, offset, p, nrp, eofp) 1187 DB_LOGC *logc; 1188 u_int32_t fnum, offset; 1189 void *p; 1190 size_t *nrp; 1191 int *eofp; 1192{ 1193 DB_LOG *dblp; 1194 ENV *env; 1195 LOG *lp; 1196 int ret; 1197 char *np; 1198 1199 env = logc->env; 1200 dblp = env->lg_handle; 1201 lp = dblp->reginfo.primary; 1202 1203 /* 1204 * If we've switched files, discard the current file handle and acquire 1205 * a new one. 1206 */ 1207 if (logc->fhp != NULL && logc->bp_lsn.file != fnum) { 1208 ret = __os_closehandle(env, logc->fhp); 1209 logc->fhp = NULL; 1210 logc->bp_lsn.file = 0; 1211 1212 if (ret != 0) 1213 return (ret); 1214 } 1215 if (logc->fhp == NULL) { 1216 if ((ret = __log_name(dblp, fnum, 1217 &np, &logc->fhp, DB_OSO_RDONLY | DB_OSO_SEQ)) != 0) { 1218 /* 1219 * If we're allowed to return EOF, assume that's the 1220 * problem, set the EOF status flag and return 0. 1221 */ 1222 if (eofp != NULL) { 1223 *eofp = 1; 1224 ret = 0; 1225 } else if (!F_ISSET(logc, DB_LOG_SILENT_ERR)) 1226 __db_err(env, ret, "DB_LOGC->get: %s", 1227 np == NULL ? "__log_name failed" : np); 1228 __os_free(env, np); 1229 return (ret); 1230 } 1231 1232 if ((ret = __logc_set_maxrec(logc, np)) != 0) { 1233 __db_err(env, ret, "DB_LOGC->get: %s", np); 1234 __os_free(env, np); 1235 return (ret); 1236 } 1237 __os_free(env, np); 1238 1239 logc->bp_lsn.file = fnum; 1240 } 1241 1242 STAT(++lp->stat.st_rcount); 1243 /* Seek to the record's offset and read the data. */ 1244 if ((ret = __os_io(env, DB_IO_READ, 1245 logc->fhp, 0, 0, offset, (u_int32_t)*nrp, p, nrp)) != 0) { 1246 if (!F_ISSET(logc, DB_LOG_SILENT_ERR)) 1247 __db_err(env, ret, 1248 "DB_LOGC->get: LSN: %lu/%lu: read", 1249 (u_long)fnum, (u_long)offset); 1250 return (ret); 1251 } 1252 1253 return (0); 1254} 1255 1256/* 1257 * __logc_shortread -- 1258 * Read was short -- return a consistent error message and error. 1259 */ 1260static int 1261__logc_shortread(logc, lsn, check_silent) 1262 DB_LOGC *logc; 1263 DB_LSN *lsn; 1264 int check_silent; 1265{ 1266 if (!check_silent || !F_ISSET(logc, DB_LOG_SILENT_ERR)) 1267 __db_errx(logc->env, "DB_LOGC->get: LSN: %lu/%lu: short read", 1268 (u_long)lsn->file, (u_long)lsn->offset); 1269 return (EIO); 1270} 1271 1272/* 1273 * __logc_set_maxrec -- 1274 * Bound the maximum log record size in a log file. 1275 */ 1276static int 1277__logc_set_maxrec(logc, np) 1278 DB_LOGC *logc; 1279 char *np; 1280{ 1281 DB_LOG *dblp; 1282 ENV *env; 1283 LOG *lp; 1284 u_int32_t mbytes, bytes; 1285 int ret; 1286 1287 env = logc->env; 1288 dblp = env->lg_handle; 1289 1290 /* 1291 * We don't want to try and allocate huge chunks of memory because 1292 * applications with error-checking malloc's often consider that a 1293 * hard failure. If we're about to look at a corrupted record with 1294 * a bizarre size, we need to know before trying to allocate space 1295 * to hold it. We could read the persistent data at the beginning 1296 * of the file but that's hard -- we may have to decrypt it, checksum 1297 * it and so on. Stat the file instead. 1298 */ 1299 if (logc->fhp != NULL) { 1300 if ((ret = __os_ioinfo(env, np, logc->fhp, 1301 &mbytes, &bytes, NULL)) != 0) 1302 return (ret); 1303 if (logc->bp_maxrec < (mbytes * MEGABYTE + bytes)) 1304 logc->bp_maxrec = mbytes * MEGABYTE + bytes; 1305 } 1306 1307 /* 1308 * If reading from the log file currently being written, we could get 1309 * an incorrect size, that is, if the cursor was opened on the file 1310 * when it had only a few hundred bytes, and then the cursor used to 1311 * move forward in the file, after more log records were written, the 1312 * original stat value would be wrong. Use the maximum of the current 1313 * log file size and the size of the buffer -- that should represent 1314 * the max of any log record currently in the file. 1315 * 1316 * The log buffer size is set when the environment is opened and never 1317 * changed, we don't need a lock on it. 1318 */ 1319 lp = dblp->reginfo.primary; 1320 if (logc->bp_maxrec < lp->buffer_size) 1321 logc->bp_maxrec = lp->buffer_size; 1322 1323 return (0); 1324} 1325 1326#ifdef HAVE_REPLICATION 1327/* 1328 * __log_rep_split -- 1329 * - Split a log buffer into individual records. 1330 * 1331 * This is used by a replication client to process a bulk log message from the 1332 * master and convert it into individual __rep_apply requests. 1333 * 1334 * PUBLIC: int __log_rep_split __P((ENV *, DB_THREAD_INFO *, 1335 * PUBLIC: __rep_control_args *, DBT *, DB_LSN *, DB_LSN *)); 1336 */ 1337int 1338__log_rep_split(env, ip, rp, rec, ret_lsnp, last_lsnp) 1339 ENV *env; 1340 DB_THREAD_INFO *ip; 1341 __rep_control_args *rp; 1342 DBT *rec; 1343 DB_LSN *ret_lsnp; 1344 DB_LSN *last_lsnp; 1345{ 1346 DBT logrec; 1347 DB_LSN save_lsn, tmp_lsn; 1348 __rep_control_args tmprp; 1349 __rep_bulk_args b_args; 1350 int ret, save_ret; 1351 u_int32_t save_flags; 1352 u_int8_t *p, *ep; 1353 1354 memset(&logrec, 0, sizeof(logrec)); 1355 memset(&save_lsn, 0, sizeof(save_lsn)); 1356 memset(&tmp_lsn, 0, sizeof(tmp_lsn)); 1357 /* 1358 * We're going to be modifying the rp LSN contents so make 1359 * our own private copy to play with. 1360 */ 1361 memcpy(&tmprp, rp, sizeof(tmprp)); 1362 /* 1363 * We send the bulk buffer on a PERM record, so often we will have 1364 * DB_LOG_PERM set. However, we only want to mark the last LSN 1365 * we have as a PERM record. So clear it here, and when we're on 1366 * the last record below, set it. The same applies if the sender 1367 * set REPCTL_LOG_END on this message. We want the end of the 1368 * bulk buffer to be marked as the end. 1369 */ 1370 save_flags = F_ISSET(rp, REPCTL_LOG_END | REPCTL_PERM); 1371 F_CLR(&tmprp, REPCTL_LOG_END | REPCTL_PERM); 1372 ret = save_ret = 0; 1373 for (ep = (u_int8_t *)rec->data + rec->size, p = (u_int8_t *)rec->data; 1374 p < ep; ) { 1375 /* 1376 * First thing in the buffer is the length. Then the LSN 1377 * of this record, then the record itself. 1378 */ 1379 if (rp->rep_version < DB_REPVERSION_47) { 1380 memcpy(&b_args.len, p, sizeof(b_args.len)); 1381 p += sizeof(b_args.len); 1382 memcpy(&tmprp.lsn, p, sizeof(DB_LSN)); 1383 p += sizeof(DB_LSN); 1384 logrec.data = p; 1385 logrec.size = b_args.len; 1386 p += b_args.len; 1387 } else { 1388 if ((ret = __rep_bulk_unmarshal(env, 1389 &b_args, p, rec->size, &p)) != 0) 1390 return (ret); 1391 tmprp.lsn = b_args.lsn; 1392 logrec.data = b_args.bulkdata.data; 1393 logrec.size = b_args.len; 1394 } 1395 RPRINT(env, DB_VERB_REP_MISC, (env, 1396 "log_rep_split: Processing LSN [%lu][%lu]", 1397 (u_long)tmprp.lsn.file, (u_long)tmprp.lsn.offset)); 1398 RPRINT(env, DB_VERB_REP_MISC, (env, 1399 "log_rep_split: p %#lx ep %#lx logrec data %#lx, size %lu (%#lx)", 1400 P_TO_ULONG(p), P_TO_ULONG(ep), P_TO_ULONG(logrec.data), 1401 (u_long)logrec.size, (u_long)logrec.size)); 1402 if (p >= ep && save_flags) 1403 F_SET(&tmprp, save_flags); 1404 ret = __rep_apply(env, ip, 1405 &tmprp, &logrec, &tmp_lsn, NULL, last_lsnp); 1406 RPRINT(env, DB_VERB_REP_MISC, (env, 1407 "log_split: rep_apply ret %d, tmp_lsn [%lu][%lu]", 1408 ret, (u_long)tmp_lsn.file, (u_long)tmp_lsn.offset)); 1409 switch (ret) { 1410 /* 1411 * If we received the pieces we need for running recovery, 1412 * short-circuit because recovery will truncate the log to 1413 * the LSN we want anyway. 1414 */ 1415 case DB_REP_LOGREADY: 1416 goto out; 1417 /* 1418 * If we just handled a special record, retain that information. 1419 */ 1420 case DB_REP_ISPERM: 1421 case DB_REP_NOTPERM: 1422 save_ret = ret; 1423 save_lsn = tmp_lsn; 1424 ret = 0; 1425 break; 1426 /* 1427 * Normal processing, do nothing, just continue. 1428 */ 1429 case 0: 1430 break; 1431 /* 1432 * If we get an error, then stop immediately. 1433 */ 1434 default: 1435 goto out; 1436 } 1437 } 1438out: 1439 /* 1440 * If we finish processing successfully, set our return values 1441 * based on what we saw. 1442 */ 1443 if (ret == 0) { 1444 ret = save_ret; 1445 *ret_lsnp = save_lsn; 1446 } 1447 return (ret); 1448} 1449#endif 1450