1/*- 2 * See the file LICENSE for redistribution information. 3 * 4 * Copyright (c) 1996-2009 Oracle. All rights reserved. 5 * 6 * $Id$ 7 */ 8 9#include "db_config.h" 10 11#include "db_int.h" 12#include "dbinc/crypto.h" 13#include "dbinc/db_page.h" 14#include "dbinc/hmac.h" 15#include "dbinc/log.h" 16#include "dbinc/hash.h" 17 18typedef enum { L_ALREADY, L_ACQUIRED, L_NONE } RLOCK; 19 20static int __logc_close_pp __P((DB_LOGC *, u_int32_t)); 21static int __logc_get_pp __P((DB_LOGC *, DB_LSN *, DBT *, u_int32_t)); 22static int __logc_get_int __P((DB_LOGC *, DB_LSN *, DBT *, u_int32_t)); 23static int __logc_hdrchk __P((DB_LOGC *, DB_LSN *, HDR *, int *)); 24static int __logc_incursor __P((DB_LOGC *, DB_LSN *, HDR *, u_int8_t **)); 25static int __logc_inregion __P((DB_LOGC *, 26 DB_LSN *, RLOCK *, DB_LSN *, HDR *, u_int8_t **, int *)); 27static int __logc_io __P((DB_LOGC *, 28 u_int32_t, u_int32_t, void *, size_t *, int *)); 29static int __logc_ondisk __P((DB_LOGC *, 30 DB_LSN *, DB_LSN *, u_int32_t, HDR *, u_int8_t **, int *)); 31static int __logc_set_maxrec __P((DB_LOGC *, char *)); 32static int __logc_shortread __P((DB_LOGC *, DB_LSN *, int)); 33static int __logc_version_pp __P((DB_LOGC *, u_int32_t *, u_int32_t)); 34 35/* 36 * __log_cursor_pp -- 37 * ENV->log_cursor 38 * 39 * PUBLIC: int __log_cursor_pp __P((DB_ENV *, DB_LOGC **, u_int32_t)); 40 */ 41int 42__log_cursor_pp(dbenv, logcp, flags) 43 DB_ENV *dbenv; 44 DB_LOGC **logcp; 45 u_int32_t flags; 46{ 47 DB_THREAD_INFO *ip; 48 ENV *env; 49 int ret; 50 51 env = dbenv->env; 52 53 ENV_REQUIRES_CONFIG(env, 54 env->lg_handle, "DB_ENV->log_cursor", DB_INIT_LOG); 55 56 /* Validate arguments. */ 57 if ((ret = __db_fchk(env, "DB_ENV->log_cursor", flags, 0)) != 0) 58 return (ret); 59 60 ENV_ENTER(env, ip); 61 REPLICATION_WRAP(env, (__log_cursor(env, logcp)), 0, ret); 62 ENV_LEAVE(env, ip); 63 return (ret); 64} 65 66/* 67 * __log_cursor -- 68 * Create a log cursor. 69 * 70 * PUBLIC: int __log_cursor __P((ENV *, DB_LOGC **)); 71 */ 72int 73__log_cursor(env, logcp) 74 ENV *env; 75 DB_LOGC **logcp; 76{ 77 DB_LOGC *logc; 78 int ret; 79 80 *logcp = NULL; 81 82 /* Allocate memory for the cursor. */ 83 if ((ret = __os_calloc(env, 1, sizeof(DB_LOGC), &logc)) != 0) 84 return (ret); 85 86 logc->bp_size = LG_CURSOR_BUF_SIZE; 87 /* 88 * Set this to something positive. 89 */ 90 logc->bp_maxrec = MEGABYTE; 91 if ((ret = __os_malloc(env, logc->bp_size, &logc->bp)) != 0) { 92 __os_free(env, logc); 93 return (ret); 94 } 95 96 logc->env = env; 97 logc->close = __logc_close_pp; 98 logc->get = __logc_get_pp; 99 logc->version = __logc_version_pp; 100 101 *logcp = logc; 102 return (0); 103} 104 105/* 106 * __logc_close_pp -- 107 * DB_LOGC->close pre/post processing. 108 */ 109static int 110__logc_close_pp(logc, flags) 111 DB_LOGC *logc; 112 u_int32_t flags; 113{ 114 DB_THREAD_INFO *ip; 115 ENV *env; 116 int ret; 117 118 env = logc->env; 119 120 if ((ret = __db_fchk(env, "DB_LOGC->close", flags, 0)) != 0) 121 return (ret); 122 123 ENV_ENTER(env, ip); 124 REPLICATION_WRAP(env, (__logc_close(logc)), 0, ret); 125 ENV_LEAVE(env, ip); 126 return (ret); 127} 128 129/* 130 * __logc_close -- 131 * DB_LOGC->close. 132 * 133 * PUBLIC: int __logc_close __P((DB_LOGC *)); 134 */ 135int 136__logc_close(logc) 137 DB_LOGC *logc; 138{ 139 ENV *env; 140 141 env = logc->env; 142 143 if (logc->fhp != NULL) { 144 (void)__os_closehandle(env, logc->fhp); 145 logc->fhp = NULL; 146 } 147 148 if (logc->dbt.data != NULL) 149 __os_free(env, logc->dbt.data); 150 151 __os_free(env, logc->bp); 152 __os_free(env, logc); 153 154 return (0); 155} 156 157/* 158 * __logc_version_pp -- 159 * DB_LOGC->version. 160 */ 161static int 162__logc_version_pp(logc, versionp, flags) 163 DB_LOGC *logc; 164 u_int32_t *versionp; 165 u_int32_t flags; 166{ 167 DB_THREAD_INFO *ip; 168 ENV *env; 169 int ret; 170 171 env = logc->env; 172 173 if ((ret = __db_fchk(env, "DB_LOGC->version", flags, 0)) != 0) 174 return (ret); 175 176 ENV_ENTER(env, ip); 177 REPLICATION_WRAP(env, (__logc_version(logc, versionp)), 0, ret); 178 ENV_LEAVE(env, ip); 179 return (ret); 180} 181 182/* 183 * __logc_version -- 184 * DB_LOGC->version. 185 * 186 * PUBLIC: int __logc_version __P((DB_LOGC *, u_int32_t *)); 187 */ 188int 189__logc_version(logc, versionp) 190 DB_LOGC *logc; 191 u_int32_t *versionp; 192{ 193 DBT hdrdbt; 194 DB_LOGC *plogc; 195 DB_LSN plsn; 196 ENV *env; 197 LOGP *persist; 198 int ret, t_ret; 199 200 env = logc->env; 201 if (IS_ZERO_LSN(logc->lsn)) { 202 __db_errx(env, "DB_LOGC->get: unset cursor"); 203 return (EINVAL); 204 } 205 ret = 0; 206 /* 207 * Check if the persist info we have is for the same file 208 * as the current cursor position. If we already have the 209 * information, then we're done. If not, we open a new 210 * log cursor and get the header. 211 * 212 * Since most users walk forward through the log when 213 * using this feature (i.e. printlog) we're likely to 214 * have the information we need. 215 */ 216 if (logc->lsn.file != logc->p_lsn.file) { 217 if ((ret = __log_cursor(env, &plogc)) != 0) 218 return (ret); 219 plsn.file = logc->lsn.file; 220 plsn.offset = 0; 221 plogc->lsn = plsn; 222 memset(&hdrdbt, 0, sizeof(DBT)); 223 if ((ret = __logc_get_int(plogc, 224 &plsn, &hdrdbt, DB_SET)) == 0) { 225 persist = (LOGP *)hdrdbt.data; 226 if (LOG_SWAPPED(env)) 227 __log_persistswap(persist); 228 logc->p_lsn = logc->lsn; 229 logc->p_version = persist->version; 230 } 231 if ((t_ret = __logc_close(plogc)) != 0 && ret == 0) 232 ret = t_ret; 233 } 234 /* Return the version. */ 235 if (ret == 0) 236 *versionp = logc->p_version; 237 return (ret); 238} 239 240/* 241 * __logc_get_pp -- 242 * DB_LOGC->get pre/post processing. 243 */ 244static int 245__logc_get_pp(logc, alsn, dbt, flags) 246 DB_LOGC *logc; 247 DB_LSN *alsn; 248 DBT *dbt; 249 u_int32_t flags; 250{ 251 DB_THREAD_INFO *ip; 252 ENV *env; 253 int ret; 254 255 env = logc->env; 256 257 /* Validate arguments. */ 258 switch (flags) { 259 case DB_CURRENT: 260 case DB_FIRST: 261 case DB_LAST: 262 case DB_NEXT: 263 case DB_PREV: 264 break; 265 case DB_SET: 266 if (IS_ZERO_LSN(*alsn)) { 267 __db_errx(env, "DB_LOGC->get: invalid LSN: %lu/%lu", 268 (u_long)alsn->file, (u_long)alsn->offset); 269 return (EINVAL); 270 } 271 break; 272 default: 273 return (__db_ferr(env, "DB_LOGC->get", 1)); 274 } 275 276 ENV_ENTER(env, ip); 277 REPLICATION_WRAP(env, (__logc_get(logc, alsn, dbt, flags)), 0, ret); 278 ENV_LEAVE(env, ip); 279 return (ret); 280} 281 282/* 283 * __logc_get -- 284 * DB_LOGC->get. 285 * 286 * PUBLIC: int __logc_get __P((DB_LOGC *, DB_LSN *, DBT *, u_int32_t)); 287 */ 288int 289__logc_get(logc, alsn, dbt, flags) 290 DB_LOGC *logc; 291 DB_LSN *alsn; 292 DBT *dbt; 293 u_int32_t flags; 294{ 295 DB_LSN saved_lsn; 296 ENV *env; 297 LOGP *persist; 298 int ret; 299 300 env = logc->env; 301 302 /* 303 * On error, we take care not to overwrite the caller's LSN. This 304 * is because callers looking for the end of the log loop using the 305 * DB_NEXT flag, and expect to take the last successful lsn out of 306 * the passed-in structure after DB_LOGC->get fails with DB_NOTFOUND. 307 * 308 * !!! 309 * This line is often flagged an uninitialized memory read during a 310 * Purify or similar tool run, as the application didn't initialize 311 * *alsn. If the application isn't setting the DB_SET flag, there is 312 * no reason it should have initialized *alsn, but we can't know that 313 * and we want to make sure we never overwrite whatever the application 314 * put in there. 315 */ 316 saved_lsn = *alsn; 317 /* 318 * If we get one of the log's header records as a result of doing a 319 * DB_FIRST, DB_NEXT, DB_LAST or DB_PREV, repeat the operation, log 320 * file header records aren't useful to applications. 321 */ 322 if ((ret = __logc_get_int(logc, alsn, dbt, flags)) != 0) { 323 *alsn = saved_lsn; 324 return (ret); 325 } 326 /* 327 * The DBT was populated by the call to __logc_get_int, copy the data 328 * out of DB_DBT_USERMEM space if it is there. 329 */ 330 if ((ret = __dbt_usercopy(env, dbt)) != 0) 331 return (ret); 332 333 if (alsn->offset == 0 && (flags == DB_FIRST || 334 flags == DB_NEXT || flags == DB_LAST || flags == DB_PREV)) { 335 switch (flags) { 336 case DB_FIRST: 337 flags = DB_NEXT; 338 break; 339 case DB_LAST: 340 flags = DB_PREV; 341 break; 342 case DB_NEXT: 343 case DB_PREV: 344 default: 345 break; 346 } 347 /* 348 * If we're walking the log and we find a persist header 349 * then store so that we may use it later if needed. 350 */ 351 persist = (LOGP *)dbt->data; 352 if (LOG_SWAPPED(env)) 353 __log_persistswap(persist); 354 logc->p_lsn = *alsn; 355 logc->p_version = persist->version; 356 if (F_ISSET(dbt, DB_DBT_MALLOC)) { 357 __os_free(env, dbt->data); 358 dbt->data = NULL; 359 } 360 if ((ret = __logc_get_int(logc, alsn, dbt, flags)) != 0) { 361 *alsn = saved_lsn; 362 goto err; 363 } 364 } 365 366err: __dbt_userfree(env, dbt, NULL, NULL); 367 return (ret); 368} 369 370/* 371 * __logc_get_int -- 372 * Get a log record; internal version. 373 */ 374static int 375__logc_get_int(logc, alsn, dbt, flags) 376 DB_LOGC *logc; 377 DB_LSN *alsn; 378 DBT *dbt; 379 u_int32_t flags; 380{ 381 DB_CIPHER *db_cipher; 382 DB_LOG *dblp; 383 DB_LSN last_lsn, nlsn; 384 ENV *env; 385 HDR hdr; 386 LOG *lp; 387 RLOCK rlock; 388 logfile_validity status; 389 u_int32_t cnt, version; 390 u_int8_t *rp; 391 int eof, is_hmac, need_cksum, ret; 392 393 env = logc->env; 394 db_cipher = env->crypto_handle; 395 dblp = env->lg_handle; 396 lp = dblp->reginfo.primary; 397 is_hmac = 0; 398 399 /* 400 * We don't acquire the log region lock until we need it, and we 401 * release it as soon as we're done. 402 */ 403 rlock = F_ISSET(logc, DB_LOG_LOCKED) ? L_ALREADY : L_NONE; 404 405 nlsn = logc->lsn; 406 switch (flags) { 407 case DB_NEXT: /* Next log record. */ 408 if (!IS_ZERO_LSN(nlsn)) { 409 /* Increment the cursor by the cursor record size. */ 410 nlsn.offset += logc->len; 411 break; 412 } 413 flags = DB_FIRST; 414 /* FALLTHROUGH */ 415 case DB_FIRST: /* First log record. */ 416 /* Find the first log file. */ 417 if ((ret = __log_find(dblp, 1, &cnt, &status)) != 0) 418 goto err; 419 420 /* 421 * DB_LV_INCOMPLETE: 422 * Theoretically, the log file we want could be created 423 * but not yet written, the "first" log record must be 424 * in the log buffer. 425 * DB_LV_NORMAL: 426 * DB_LV_OLD_READABLE: 427 * We found a log file we can read. 428 * DB_LV_NONEXISTENT: 429 * No log files exist, the "first" log record must be in 430 * the log buffer. 431 * DB_LV_OLD_UNREADABLE: 432 * No readable log files exist, we're at the cross-over 433 * point between two versions. The "first" log record 434 * must be in the log buffer. 435 */ 436 switch (status) { 437 case DB_LV_INCOMPLETE: 438 DB_ASSERT(env, lp->lsn.file == cnt); 439 /* FALLTHROUGH */ 440 case DB_LV_NORMAL: 441 case DB_LV_OLD_READABLE: 442 nlsn.file = cnt; 443 break; 444 case DB_LV_NONEXISTENT: 445 nlsn.file = 1; 446 DB_ASSERT(env, lp->lsn.file == nlsn.file); 447 break; 448 case DB_LV_OLD_UNREADABLE: 449 nlsn.file = cnt + 1; 450 DB_ASSERT(env, lp->lsn.file == nlsn.file); 451 break; 452 } 453 nlsn.offset = 0; 454 break; 455 case DB_CURRENT: /* Current log record. */ 456 break; 457 case DB_PREV: /* Previous log record. */ 458 if (!IS_ZERO_LSN(nlsn)) { 459 /* If at start-of-file, move to the previous file. */ 460 if (nlsn.offset == 0) { 461 if (nlsn.file == 1) { 462 ret = DB_NOTFOUND; 463 goto err; 464 } 465 if ((!lp->db_log_inmemory && 466 (__log_valid(dblp, nlsn.file - 1, 0, NULL, 467 0, &status, NULL) != 0 || 468 (status != DB_LV_NORMAL && 469 status != DB_LV_OLD_READABLE)))) { 470 ret = DB_NOTFOUND; 471 goto err; 472 } 473 474 --nlsn.file; 475 } 476 nlsn.offset = logc->prev; 477 break; 478 } 479 /* FALLTHROUGH */ 480 case DB_LAST: /* Last log record. */ 481 if (rlock == L_NONE) { 482 rlock = L_ACQUIRED; 483 LOG_SYSTEM_LOCK(env); 484 } 485 nlsn.file = lp->lsn.file; 486 nlsn.offset = lp->lsn.offset - lp->len; 487 break; 488 case DB_SET: /* Set log record. */ 489 nlsn = *alsn; 490 break; 491 default: 492 ret = __db_unknown_path(env, "__logc_get_int"); 493 goto err; 494 } 495 496 if (0) { /* Move to the next file. */ 497next_file: ++nlsn.file; 498 nlsn.offset = 0; 499 } 500 501 /* 502 * The above switch statement should have set nlsn to the lsn of 503 * the requested record. 504 */ 505 506 if (CRYPTO_ON(env)) { 507 hdr.size = HDR_CRYPTO_SZ; 508 is_hmac = 1; 509 } else { 510 hdr.size = HDR_NORMAL_SZ; 511 is_hmac = 0; 512 } 513 514 /* 515 * Check to see if the record is in the cursor's buffer -- if so, 516 * we'll need to checksum it. 517 */ 518 if ((ret = __logc_incursor(logc, &nlsn, &hdr, &rp)) != 0) 519 goto err; 520 if (rp != NULL) 521 goto cksum; 522 523 /* 524 * Look to see if we're moving backward in the log with the last record 525 * coming from the disk -- it means the record can't be in the region's 526 * buffer. Else, check the region's buffer. 527 * 528 * If the record isn't in the region's buffer, then either logs are 529 * in-memory, and we're done, or we're going to have to read the 530 * record from disk. We want to make a point of not reading past the 531 * end of the logical log (after recovery, there may be data after the 532 * end of the logical log, not to mention the log file may have been 533 * pre-allocated). So, zero out last_lsn, and initialize it inside 534 * __logc_inregion -- if it's still zero when we check it in 535 * __logc_ondisk, that's OK, it just means the logical end of the log 536 * isn't an issue for this request. 537 */ 538 ZERO_LSN(last_lsn); 539 if (!F_ISSET(logc, DB_LOG_DISK) || 540 LOG_COMPARE(&nlsn, &logc->lsn) > 0) { 541 F_CLR(logc, DB_LOG_DISK); 542 543 if ((ret = __logc_inregion(logc, 544 &nlsn, &rlock, &last_lsn, &hdr, &rp, &need_cksum)) != 0) 545 goto err; 546 if (rp != NULL) { 547 /* 548 * If we read the entire record from the in-memory log 549 * buffer, we don't need to checksum it, nor do we need 550 * to worry about vtruncate issues. 551 */ 552 if (need_cksum) 553 goto cksum; 554 goto from_memory; 555 } 556 if (lp->db_log_inmemory) 557 goto nohdr; 558 } 559 560 /* 561 * We have to read from an on-disk file to retrieve the record. 562 * If we ever can't retrieve the record at offset 0, we're done, 563 * return EOF/DB_NOTFOUND. 564 * 565 * Discard the region lock if we're still holding it, the on-disk 566 * reading routines don't need it. 567 */ 568 if (rlock == L_ACQUIRED) { 569 rlock = L_NONE; 570 LOG_SYSTEM_UNLOCK(env); 571 } 572 if ((ret = __logc_ondisk( 573 logc, &nlsn, &last_lsn, flags, &hdr, &rp, &eof)) != 0) 574 goto err; 575 576 /* 577 * If we got a 0-length record, that means we're in the midst of some 578 * bytes that got 0'd as the result of a vtruncate. In that case or at 579 * the end of a file, with DB_NEXT we're going to have to retry. 580 */ 581 if (eof || hdr.len == 0) { 582nohdr: switch (flags) { 583 case DB_LAST: 584 case DB_PREV: 585 /* 586 * We should never get here. If we recover a log 587 * file with 0's at the end, we'll treat the 0'd 588 * headers as the end of log and ignore them. If 589 * we're reading backwards from another file, then 590 * the first record in that new file should have its 591 * prev field set correctly. 592 */ 593 __db_errx(env, 594 "Encountered zero length records while traversing backwards"); 595 ret = __env_panic(env, DB_RUNRECOVERY); 596 goto err; 597 case DB_FIRST: 598 case DB_NEXT: 599 /* 600 * Zero'd records always indicate the end of a file, 601 * but only go to the next file once. 602 */ 603 if (nlsn.offset != 0) 604 goto next_file; 605 /* FALLTHROUGH */ 606 case DB_SET: 607 default: 608 ret = DB_NOTFOUND; 609 goto err; 610 } 611 } 612 613 F_SET(logc, DB_LOG_DISK); 614 615cksum: /* 616 * Discard the region lock if we're still holding it. (The path to 617 * get here is we acquired the region lock because of the caller's 618 * flag argument, but we found the record in the in-memory or cursor 619 * buffers. Improbable, but it's easy to avoid.) 620 */ 621 if (rlock == L_ACQUIRED) { 622 rlock = L_NONE; 623 LOG_SYSTEM_UNLOCK(env); 624 } 625 626 /* 627 * Checksum: there are two types of errors -- a configuration error 628 * or a checksum mismatch. The former is always bad. The latter is 629 * OK if we're searching for the end of the log, and very, very bad 630 * if we're reading random log records. 631 */ 632 if ((ret = __db_check_chksum(env, &hdr, db_cipher, 633 hdr.chksum, rp + hdr.size, hdr.len - hdr.size, is_hmac)) != 0) { 634 /* 635 * We may be dealing with a version that does not 636 * checksum the header. Try again without the header. 637 * Set the cursor to the LSN we are trying to look at. 638 */ 639 last_lsn = logc->lsn; 640 logc->lsn = nlsn; 641 if (__logc_version(logc, &version) == 0 && 642 version < DB_LOGCHKSUM && 643 __db_check_chksum(env, NULL, db_cipher, hdr.chksum, 644 rp + hdr.size, hdr.len - hdr.size, is_hmac) == 0) { 645 logc->lsn = last_lsn; 646 goto from_memory; 647 } 648 649 if (F_ISSET(logc, DB_LOG_SILENT_ERR)) { 650 if (ret == -1) 651 ret = EIO; 652 } else if (ret == -1) { 653 __db_errx(env, 654 "DB_LOGC->get: log record LSN %lu/%lu: checksum mismatch", 655 (u_long)nlsn.file, (u_long)nlsn.offset); 656 __db_errx(env, 657 "DB_LOGC->get: catastrophic recovery may be required"); 658 ret = __env_panic(env, DB_RUNRECOVERY); 659 } 660 logc->lsn = last_lsn; 661 goto err; 662 } 663 664from_memory: 665 /* 666 * Discard the region lock if we're still holding it. (The path to 667 * get here is we acquired the region lock because of the caller's 668 * flag argument, but we found the record in the in-memory or cursor 669 * buffers. Improbable, but it's easy to avoid.) 670 */ 671 if (rlock == L_ACQUIRED) { 672 rlock = L_NONE; 673 LOG_SYSTEM_UNLOCK(env); 674 } 675 676 /* Copy the record into the user's DBT. */ 677 if ((ret = __db_retcopy(env, dbt, rp + hdr.size, 678 (u_int32_t)(hdr.len - hdr.size), 679 &logc->dbt.data, &logc->dbt.ulen)) != 0) 680 goto err; 681 682 if (CRYPTO_ON(env)) { 683 if ((ret = db_cipher->decrypt(env, db_cipher->data, 684 hdr.iv, dbt->data, hdr.len - hdr.size)) != 0) { 685 ret = EAGAIN; 686 goto err; 687 } 688 /* 689 * Return the original log record size to the user, 690 * even though we've allocated more than that, possibly. 691 * The log record is decrypted in the user dbt, not in 692 * the buffer, so we must do this here after decryption, 693 * not adjust the len passed to the __db_retcopy call. 694 */ 695 dbt->size = hdr.orig_size; 696 } 697 698 /* Update the cursor and the returned LSN. */ 699 *alsn = nlsn; 700 logc->lsn = nlsn; 701 logc->len = hdr.len; 702 logc->prev = hdr.prev; 703 704err: if (rlock == L_ACQUIRED) 705 LOG_SYSTEM_UNLOCK(env); 706 707 return (ret); 708} 709 710/* 711 * __logc_incursor -- 712 * Check to see if the requested record is in the cursor's buffer. 713 */ 714static int 715__logc_incursor(logc, lsn, hdr, pp) 716 DB_LOGC *logc; 717 DB_LSN *lsn; 718 HDR *hdr; 719 u_int8_t **pp; 720{ 721 ENV *env; 722 u_int8_t *p; 723 int eof; 724 725 env = logc->env; 726 *pp = NULL; 727 728 /* 729 * Test to see if the requested LSN could be part of the cursor's 730 * buffer. 731 * 732 * The record must be part of the same file as the cursor's buffer. 733 * The record must start at a byte offset equal to or greater than 734 * the cursor buffer. 735 * The record must not start at a byte offset after the cursor 736 * buffer's end. 737 */ 738 if (logc->bp_lsn.file != lsn->file) 739 return (0); 740 if (logc->bp_lsn.offset > lsn->offset) 741 return (0); 742 if (logc->bp_lsn.offset + logc->bp_rlen <= lsn->offset + hdr->size) 743 return (0); 744 745 /* 746 * Read the record's header and check if the record is entirely held 747 * in the buffer. If the record is not entirely held, get it again. 748 * (The only advantage in having part of the record locally is that 749 * we might avoid a system call because we already have the HDR in 750 * memory.) 751 * 752 * If the header check fails for any reason, it must be because the 753 * LSN is bogus. Fail hard. 754 */ 755 p = logc->bp + (lsn->offset - logc->bp_lsn.offset); 756 memcpy(hdr, p, hdr->size); 757 if (LOG_SWAPPED(env)) 758 __log_hdrswap(hdr, CRYPTO_ON(env)); 759 if (__logc_hdrchk(logc, lsn, hdr, &eof)) 760 return (DB_NOTFOUND); 761 if (eof || logc->bp_lsn.offset + logc->bp_rlen < lsn->offset + hdr->len) 762 return (0); 763 764 *pp = p; /* Success. */ 765 766 return (0); 767} 768 769/* 770 * __logc_inregion -- 771 * Check to see if the requested record is in the region's buffer. 772 */ 773static int 774__logc_inregion(logc, lsn, rlockp, last_lsn, hdr, pp, need_cksump) 775 DB_LOGC *logc; 776 DB_LSN *lsn, *last_lsn; 777 RLOCK *rlockp; 778 HDR *hdr; 779 u_int8_t **pp; 780 int *need_cksump; 781{ 782 DB_LOG *dblp; 783 ENV *env; 784 LOG *lp; 785 size_t b_region, len, nr; 786 u_int32_t b_disk; 787 int eof, ret; 788 u_int8_t *p; 789 790 env = logc->env; 791 dblp = env->lg_handle; 792 lp = env->lg_handle->reginfo.primary; 793 794 ret = 0; 795 b_region = 0; 796 *pp = NULL; 797 *need_cksump = 0; 798 799 /* If we haven't yet acquired the log region lock, do so. */ 800 if (*rlockp == L_NONE) { 801 *rlockp = L_ACQUIRED; 802 LOG_SYSTEM_LOCK(env); 803 } 804 805 /* 806 * The routines to read from disk must avoid reading past the logical 807 * end of the log, so pass that information back to it. 808 * 809 * Since they're reading directly from the disk, they must also avoid 810 * reading past the offset we've written out. If the log was 811 * truncated, it's possible that there are zeroes or garbage on 812 * disk after this offset, and the logical end of the log can 813 * come later than this point if the log buffer isn't empty. 814 */ 815 *last_lsn = lp->lsn; 816 if (!lp->db_log_inmemory && last_lsn->offset > lp->w_off) 817 last_lsn->offset = lp->w_off; 818 819 /* 820 * Test to see if the requested LSN could be part of the region's 821 * buffer. 822 * 823 * During recovery, we read the log files getting the information to 824 * initialize the region. In that case, the region's lsn field will 825 * not yet have been filled in, use only the disk. 826 * 827 * The record must not start at a byte offset after the region buffer's 828 * end, since that means the request is for a record after the end of 829 * the log. Do this test even if the region's buffer is empty -- after 830 * recovery, the log files may continue past the declared end-of-log, 831 * and the disk reading routine will incorrectly attempt to read the 832 * remainder of the log. 833 * 834 * Otherwise, test to see if the region's buffer actually has what we 835 * want: 836 * 837 * The buffer must have some useful content. 838 * The record must be in the same file as the region's buffer and must 839 * start at a byte offset equal to or greater than the region's buffer. 840 */ 841 if (IS_ZERO_LSN(lp->lsn)) 842 return (0); 843 if (LOG_COMPARE(lsn, &lp->lsn) >= 0) 844 return (DB_NOTFOUND); 845 else if (lp->db_log_inmemory) { 846 if ((ret = __log_inmem_lsnoff(dblp, lsn, &b_region)) != 0) 847 return (ret); 848 } else if (lp->b_off == 0 || LOG_COMPARE(lsn, &lp->f_lsn) < 0) 849 return (0); 850 851 /* 852 * The current contents of the cursor's buffer will be useless for a 853 * future call, we're about to overwrite it -- trash it rather than 854 * try and make it look correct. 855 */ 856 logc->bp_rlen = 0; 857 858 /* 859 * If the requested LSN is greater than the region buffer's first 860 * byte, we know the entire record is in the buffer on a good LSN. 861 * 862 * If we're given a bad LSN, the "entire" record might not be in 863 * our buffer in order to fail at the chksum. __logc_hdrchk made 864 * sure our dest buffer fits, via bp_maxrec, but we also need to 865 * make sure we don't run off the end of this buffer, the src. 866 * 867 * There is one case where the header check can fail: on a scan through 868 * in-memory logs, when we reach the end of a file we can read an empty 869 * header. In that case, it's safe to return zero, here: it will be 870 * caught in our caller. Otherwise, the LSN is bogus. Fail hard. 871 */ 872 if (lp->db_log_inmemory || LOG_COMPARE(lsn, &lp->f_lsn) > 0) { 873 if (!lp->db_log_inmemory) 874 b_region = lsn->offset - lp->w_off; 875 __log_inmem_copyout(dblp, b_region, hdr, hdr->size); 876 if (LOG_SWAPPED(env)) 877 __log_hdrswap(hdr, CRYPTO_ON(env)); 878 if (__logc_hdrchk(logc, lsn, hdr, &eof) != 0) 879 return (DB_NOTFOUND); 880 if (eof) 881 return (0); 882 if (lp->db_log_inmemory) { 883 if (RINGBUF_LEN(lp, b_region, lp->b_off) < hdr->len) 884 return (DB_NOTFOUND); 885 } else if (lsn->offset + hdr->len > lp->w_off + lp->buffer_size) 886 return (DB_NOTFOUND); 887 if (logc->bp_size <= hdr->len) { 888 len = (size_t)DB_ALIGN((uintmax_t)hdr->len * 2, 128); 889 if ((ret = 890 __os_realloc(logc->env, len, &logc->bp)) != 0) 891 return (ret); 892 logc->bp_size = (u_int32_t)len; 893 } 894 __log_inmem_copyout(dblp, b_region, logc->bp, hdr->len); 895 *pp = logc->bp; 896 return (0); 897 } 898 899 DB_ASSERT(env, !lp->db_log_inmemory); 900 901 /* 902 * There's a partial record, that is, the requested record starts 903 * in a log file and finishes in the region buffer. We have to 904 * find out how many bytes of the record are in the region buffer 905 * so we can copy them out into the cursor buffer. First, check 906 * to see if the requested record is the only record in the region 907 * buffer, in which case we should copy the entire region buffer. 908 * 909 * Else, walk back through the region's buffer to find the first LSN 910 * after the record that crosses the buffer boundary -- we can detect 911 * that LSN, because its "prev" field will reference the record we 912 * want. The bytes we need to copy from the region buffer are the 913 * bytes up to the record we find. The bytes we'll need to allocate 914 * to hold the log record are the bytes between the two offsets. 915 */ 916 b_disk = lp->w_off - lsn->offset; 917 if (lp->b_off <= lp->len) 918 b_region = (u_int32_t)lp->b_off; 919 else 920 for (p = dblp->bufp + (lp->b_off - lp->len);;) { 921 memcpy(hdr, p, hdr->size); 922 if (LOG_SWAPPED(env)) 923 __log_hdrswap(hdr, CRYPTO_ON(env)); 924 if (hdr->prev == lsn->offset) { 925 b_region = (u_int32_t)(p - dblp->bufp); 926 break; 927 } 928 p = dblp->bufp + (hdr->prev - lp->w_off); 929 } 930 931 /* 932 * If we don't have enough room for the record, we have to allocate 933 * space. We have to do it while holding the region lock, which is 934 * truly annoying, but there's no way around it. This call is why 935 * we allocate cursor buffer space when allocating the cursor instead 936 * of waiting. 937 */ 938 if (logc->bp_size <= b_region + b_disk) { 939 len = (size_t)DB_ALIGN((uintmax_t)(b_region + b_disk) * 2, 128); 940 if ((ret = __os_realloc(logc->env, len, &logc->bp)) != 0) 941 return (ret); 942 logc->bp_size = (u_int32_t)len; 943 } 944 945 /* Copy the region's bytes to the end of the cursor's buffer. */ 946 p = (logc->bp + logc->bp_size) - b_region; 947 memcpy(p, dblp->bufp, b_region); 948 949 /* Release the region lock. */ 950 if (*rlockp == L_ACQUIRED) { 951 *rlockp = L_NONE; 952 LOG_SYSTEM_UNLOCK(env); 953 } 954 955 /* 956 * Read the rest of the information from disk. Neither short reads 957 * or EOF are acceptable, the bytes we want had better be there. 958 */ 959 if (b_disk != 0) { 960 p -= b_disk; 961 nr = b_disk; 962 if ((ret = __logc_io( 963 logc, lsn->file, lsn->offset, p, &nr, NULL)) != 0) 964 return (ret); 965 if (nr < b_disk) 966 return (__logc_shortread(logc, lsn, 0)); 967 968 /* We read bytes from the disk, we'll need to checksum them. */ 969 *need_cksump = 1; 970 } 971 972 /* Copy the header information into the caller's structure. */ 973 memcpy(hdr, p, hdr->size); 974 if (LOG_SWAPPED(env)) 975 __log_hdrswap(hdr, CRYPTO_ON(env)); 976 977 *pp = p; 978 return (0); 979} 980 981/* 982 * __log_hdrswap -- 983 * Swap the bytes in a log header from machines with different endianness. 984 * 985 * PUBLIC: void __log_hdrswap __P((HDR *, int)); 986 */ 987void 988__log_hdrswap(hdr, is_hmac) 989 HDR *hdr; 990 int is_hmac; 991{ 992 M_32_SWAP(hdr->prev); 993 M_32_SWAP(hdr->len); 994 if (!is_hmac) 995 P_32_SWAP(hdr->chksum); 996} 997 998/* 999 * __log_persistswap -- 1000 * Swap the bytes in a log file persistent header from machines with 1001 * different endianness. 1002 * 1003 * PUBLIC: void __log_persistswap __P((LOGP *)); 1004 */ 1005void 1006__log_persistswap(persist) 1007 LOGP *persist; 1008{ 1009 M_32_SWAP(persist->magic); 1010 M_32_SWAP(persist->version); 1011 M_32_SWAP(persist->log_size); 1012 M_32_SWAP(persist->notused); 1013} 1014 1015/* 1016 * __logc_ondisk -- 1017 * Read a record off disk. 1018 */ 1019static int 1020__logc_ondisk(logc, lsn, last_lsn, flags, hdr, pp, eofp) 1021 DB_LOGC *logc; 1022 DB_LSN *lsn, *last_lsn; 1023 u_int32_t flags; 1024 int *eofp; 1025 HDR *hdr; 1026 u_int8_t **pp; 1027{ 1028 ENV *env; 1029 size_t len, nr; 1030 u_int32_t offset; 1031 int ret; 1032 1033 env = logc->env; 1034 *eofp = 0; 1035 1036 nr = hdr->size; 1037 if ((ret = 1038 __logc_io(logc, lsn->file, lsn->offset, hdr, &nr, eofp)) != 0) 1039 return (ret); 1040 if (*eofp) 1041 return (0); 1042 1043 if (LOG_SWAPPED(env)) 1044 __log_hdrswap(hdr, CRYPTO_ON(env)); 1045 1046 /* 1047 * If the read was successful, but we can't read a full header, assume 1048 * we've hit EOF. We can't check that the header has been partially 1049 * zeroed out, but it's unlikely that this is caused by a write failure 1050 * since the header is written as a single write call and it's less 1051 * than sector. 1052 */ 1053 if (nr < hdr->size) { 1054 *eofp = 1; 1055 return (0); 1056 } 1057 1058 /* Check the HDR. */ 1059 if ((ret = __logc_hdrchk(logc, lsn, hdr, eofp)) != 0) 1060 return (ret); 1061 if (*eofp) 1062 return (0); 1063 1064 /* 1065 * Regardless of how we return, the previous contents of the cursor's 1066 * buffer are useless -- trash it. 1067 */ 1068 logc->bp_rlen = 0; 1069 1070 /* 1071 * Otherwise, we now (finally!) know how big the record is. (Maybe 1072 * we should have just stuck the length of the record into the LSN!?) 1073 * Make sure we have enough space. 1074 */ 1075 if (logc->bp_size <= hdr->len) { 1076 len = (size_t)DB_ALIGN((uintmax_t)hdr->len * 2, 128); 1077 if ((ret = __os_realloc(env, len, &logc->bp)) != 0) 1078 return (ret); 1079 logc->bp_size = (u_int32_t)len; 1080 } 1081 1082 /* 1083 * If we're moving forward in the log file, read this record in at the 1084 * beginning of the buffer. Otherwise, read this record in at the end 1085 * of the buffer, making sure we don't try and read before the start 1086 * of the file. (We prefer positioning at the end because transaction 1087 * aborts use DB_SET to move backward through the log and we might get 1088 * lucky.) 1089 * 1090 * Read a buffer's worth, without reading past the logical EOF. The 1091 * last_lsn may be a zero LSN, but that's OK, the test works anyway. 1092 */ 1093 if (flags == DB_FIRST || flags == DB_NEXT) 1094 offset = lsn->offset; 1095 else if (lsn->offset + hdr->len < logc->bp_size) 1096 offset = 0; 1097 else 1098 offset = (lsn->offset + hdr->len) - logc->bp_size; 1099 1100 nr = logc->bp_size; 1101 if (lsn->file == last_lsn->file && offset + nr >= last_lsn->offset) 1102 nr = last_lsn->offset - offset; 1103 1104 if ((ret = 1105 __logc_io(logc, lsn->file, offset, logc->bp, &nr, eofp)) != 0) 1106 return (ret); 1107 1108 /* 1109 * We should have at least gotten the bytes up-to-and-including the 1110 * record we're reading. 1111 */ 1112 if (nr < (lsn->offset + hdr->len) - offset) 1113 return (__logc_shortread(logc, lsn, 1)); 1114 1115 /* 1116 * Set up the return information. 1117 * 1118 * !!! 1119 * No need to set the bp_lsn.file field, __logc_io set it for us. 1120 */ 1121 logc->bp_rlen = (u_int32_t)nr; 1122 logc->bp_lsn.offset = offset; 1123 1124 *pp = logc->bp + (lsn->offset - offset); 1125 1126 return (0); 1127} 1128 1129/* 1130 * __logc_hdrchk -- 1131 * 1132 * Check for corrupted HDRs before we use them to allocate memory or find 1133 * records. 1134 * 1135 * If the log files were pre-allocated, a zero-filled HDR structure is the 1136 * logical file end. However, we can see buffers filled with 0's during 1137 * recovery, too (because multiple log buffers were written asynchronously, 1138 * and one made it to disk before a different one that logically precedes 1139 * it in the log file. 1140 * 1141 * Check for impossibly large records. The malloc should fail later, but we 1142 * have customers that run mallocs that treat all allocation failures as fatal 1143 * errors. 1144 * 1145 * Note that none of this is necessarily something awful happening. We let 1146 * the application hand us any LSN they want, and it could be a pointer into 1147 * the middle of a log record, there's no way to tell. 1148 */ 1149static int 1150__logc_hdrchk(logc, lsn, hdr, eofp) 1151 DB_LOGC *logc; 1152 DB_LSN *lsn; 1153 HDR *hdr; 1154 int *eofp; 1155{ 1156 ENV *env; 1157 int ret; 1158 1159 env = logc->env; 1160 1161 /* 1162 * Check EOF before we do any other processing. 1163 */ 1164 if (eofp != NULL) { 1165 if (hdr->prev == 0 && hdr->chksum[0] == 0 && hdr->len == 0) { 1166 *eofp = 1; 1167 return (0); 1168 } 1169 *eofp = 0; 1170 } 1171 1172 /* 1173 * Sanity check the log record's size. 1174 * We must check it after "virtual" EOF above. 1175 */ 1176 if (hdr->len <= hdr->size) 1177 goto err; 1178 1179 /* 1180 * If the cursor's max-record value isn't yet set, it means we aren't 1181 * reading these records from a log file and no check is necessary. 1182 */ 1183 if (logc->bp_maxrec != 0 && hdr->len > logc->bp_maxrec) { 1184 /* 1185 * If we fail the check, there's the pathological case that 1186 * we're reading the last file, it's growing, and our initial 1187 * check information was wrong. Get it again, to be sure. 1188 */ 1189 if ((ret = __logc_set_maxrec(logc, NULL)) != 0) { 1190 __db_err(env, ret, "DB_LOGC->get"); 1191 return (ret); 1192 } 1193 if (logc->bp_maxrec != 0 && hdr->len > logc->bp_maxrec) 1194 goto err; 1195 } 1196 return (0); 1197 1198err: if (!F_ISSET(logc, DB_LOG_SILENT_ERR)) 1199 __db_errx(env, 1200 "DB_LOGC->get: LSN %lu/%lu: invalid log record header", 1201 (u_long)lsn->file, (u_long)lsn->offset); 1202 return (EIO); 1203} 1204 1205/* 1206 * __logc_io -- 1207 * Read records from a log file. 1208 */ 1209static int 1210__logc_io(logc, fnum, offset, p, nrp, eofp) 1211 DB_LOGC *logc; 1212 u_int32_t fnum, offset; 1213 void *p; 1214 size_t *nrp; 1215 int *eofp; 1216{ 1217 DB_LOG *dblp; 1218 ENV *env; 1219 LOG *lp; 1220 int ret; 1221 char *np; 1222 1223 env = logc->env; 1224 dblp = env->lg_handle; 1225 lp = dblp->reginfo.primary; 1226 1227 /* 1228 * If we've switched files, discard the current file handle and acquire 1229 * a new one. 1230 */ 1231 if (logc->fhp != NULL && logc->bp_lsn.file != fnum) { 1232 ret = __os_closehandle(env, logc->fhp); 1233 logc->fhp = NULL; 1234 logc->bp_lsn.file = 0; 1235 1236 if (ret != 0) 1237 return (ret); 1238 } 1239 if (logc->fhp == NULL) { 1240 if ((ret = __log_name(dblp, fnum, 1241 &np, &logc->fhp, DB_OSO_RDONLY | DB_OSO_SEQ)) != 0) { 1242 /* 1243 * If we're allowed to return EOF, assume that's the 1244 * problem, set the EOF status flag and return 0. 1245 */ 1246 if (eofp != NULL) { 1247 *eofp = 1; 1248 ret = 0; 1249 } else if (!F_ISSET(logc, DB_LOG_SILENT_ERR)) 1250 __db_err(env, ret, "DB_LOGC->get: %s", 1251 np == NULL ? "__log_name failed" : np); 1252 __os_free(env, np); 1253 return (ret); 1254 } 1255 1256 if ((ret = __logc_set_maxrec(logc, np)) != 0) { 1257 __db_err(env, ret, "DB_LOGC->get: %s", np); 1258 __os_free(env, np); 1259 return (ret); 1260 } 1261 __os_free(env, np); 1262 1263 logc->bp_lsn.file = fnum; 1264 } 1265 1266 STAT(++lp->stat.st_rcount); 1267 /* Seek to the record's offset and read the data. */ 1268 if ((ret = __os_io(env, DB_IO_READ, 1269 logc->fhp, 0, 0, offset, (u_int32_t)*nrp, p, nrp)) != 0) { 1270 if (!F_ISSET(logc, DB_LOG_SILENT_ERR)) 1271 __db_err(env, ret, 1272 "DB_LOGC->get: LSN: %lu/%lu: read", 1273 (u_long)fnum, (u_long)offset); 1274 return (ret); 1275 } 1276 1277 return (0); 1278} 1279 1280/* 1281 * __logc_shortread -- 1282 * Read was short -- return a consistent error message and error. 1283 */ 1284static int 1285__logc_shortread(logc, lsn, check_silent) 1286 DB_LOGC *logc; 1287 DB_LSN *lsn; 1288 int check_silent; 1289{ 1290 if (!check_silent || !F_ISSET(logc, DB_LOG_SILENT_ERR)) 1291 __db_errx(logc->env, "DB_LOGC->get: LSN: %lu/%lu: short read", 1292 (u_long)lsn->file, (u_long)lsn->offset); 1293 return (EIO); 1294} 1295 1296/* 1297 * __logc_set_maxrec -- 1298 * Bound the maximum log record size in a log file. 1299 */ 1300static int 1301__logc_set_maxrec(logc, np) 1302 DB_LOGC *logc; 1303 char *np; 1304{ 1305 DB_LOG *dblp; 1306 ENV *env; 1307 LOG *lp; 1308 u_int32_t mbytes, bytes; 1309 int ret; 1310 1311 env = logc->env; 1312 dblp = env->lg_handle; 1313 1314 /* 1315 * We don't want to try and allocate huge chunks of memory because 1316 * applications with error-checking malloc's often consider that a 1317 * hard failure. If we're about to look at a corrupted record with 1318 * a bizarre size, we need to know before trying to allocate space 1319 * to hold it. We could read the persistent data at the beginning 1320 * of the file but that's hard -- we may have to decrypt it, checksum 1321 * it and so on. Stat the file instead. 1322 */ 1323 if (logc->fhp != NULL) { 1324 if ((ret = __os_ioinfo(env, np, logc->fhp, 1325 &mbytes, &bytes, NULL)) != 0) 1326 return (ret); 1327 if (logc->bp_maxrec < (mbytes * MEGABYTE + bytes)) 1328 logc->bp_maxrec = mbytes * MEGABYTE + bytes; 1329 } 1330 1331 /* 1332 * If reading from the log file currently being written, we could get 1333 * an incorrect size, that is, if the cursor was opened on the file 1334 * when it had only a few hundred bytes, and then the cursor used to 1335 * move forward in the file, after more log records were written, the 1336 * original stat value would be wrong. Use the maximum of the current 1337 * log file size and the size of the buffer -- that should represent 1338 * the max of any log record currently in the file. 1339 * 1340 * The log buffer size is set when the environment is opened and never 1341 * changed, we don't need a lock on it. 1342 */ 1343 lp = dblp->reginfo.primary; 1344 if (logc->bp_maxrec < lp->buffer_size) 1345 logc->bp_maxrec = lp->buffer_size; 1346 1347 return (0); 1348} 1349 1350#ifdef HAVE_REPLICATION 1351/* 1352 * __log_rep_split -- 1353 * - Split a log buffer into individual records. 1354 * 1355 * This is used by a replication client to process a bulk log message from the 1356 * master and convert it into individual __rep_apply requests. 1357 * 1358 * PUBLIC: int __log_rep_split __P((ENV *, DB_THREAD_INFO *, 1359 * PUBLIC: __rep_control_args *, DBT *, DB_LSN *, DB_LSN *)); 1360 */ 1361int 1362__log_rep_split(env, ip, rp, rec, ret_lsnp, last_lsnp) 1363 ENV *env; 1364 DB_THREAD_INFO *ip; 1365 __rep_control_args *rp; 1366 DBT *rec; 1367 DB_LSN *ret_lsnp; 1368 DB_LSN *last_lsnp; 1369{ 1370 DBT logrec; 1371 DB_LSN save_lsn, tmp_lsn; 1372 __rep_control_args tmprp; 1373 __rep_bulk_args b_args; 1374 int ret, save_ret; 1375 u_int32_t save_flags; 1376 u_int8_t *p, *ep; 1377 1378 memset(&logrec, 0, sizeof(logrec)); 1379 memset(&save_lsn, 0, sizeof(save_lsn)); 1380 memset(&tmp_lsn, 0, sizeof(tmp_lsn)); 1381 /* 1382 * We're going to be modifying the rp LSN contents so make 1383 * our own private copy to play with. 1384 */ 1385 memcpy(&tmprp, rp, sizeof(tmprp)); 1386 /* 1387 * We send the bulk buffer on a PERM record, so often we will have 1388 * DB_LOG_PERM set. However, we only want to mark the last LSN 1389 * we have as a PERM record. So clear it here, and when we're on 1390 * the last record below, set it. The same applies if the sender 1391 * set REPCTL_LOG_END on this message. We want the end of the 1392 * bulk buffer to be marked as the end. 1393 */ 1394 save_flags = F_ISSET(rp, REPCTL_LOG_END | REPCTL_PERM); 1395 F_CLR(&tmprp, REPCTL_LOG_END | REPCTL_PERM); 1396 ret = save_ret = 0; 1397 for (ep = (u_int8_t *)rec->data + rec->size, p = (u_int8_t *)rec->data; 1398 p < ep; ) { 1399 /* 1400 * First thing in the buffer is the length. Then the LSN 1401 * of this record, then the record itself. 1402 */ 1403 if (rp->rep_version < DB_REPVERSION_47) { 1404 memcpy(&b_args.len, p, sizeof(b_args.len)); 1405 p += sizeof(b_args.len); 1406 memcpy(&tmprp.lsn, p, sizeof(DB_LSN)); 1407 p += sizeof(DB_LSN); 1408 logrec.data = p; 1409 logrec.size = b_args.len; 1410 p += b_args.len; 1411 } else { 1412 if ((ret = __rep_bulk_unmarshal(env, 1413 &b_args, p, rec->size, &p)) != 0) 1414 return (ret); 1415 tmprp.lsn = b_args.lsn; 1416 logrec.data = b_args.bulkdata.data; 1417 logrec.size = b_args.len; 1418 } 1419 RPRINT(env, DB_VERB_REP_MISC, (env, 1420 "log_rep_split: Processing LSN [%lu][%lu]", 1421 (u_long)tmprp.lsn.file, (u_long)tmprp.lsn.offset)); 1422 RPRINT(env, DB_VERB_REP_MISC, (env, 1423 "log_rep_split: p %#lx ep %#lx logrec data %#lx, size %lu (%#lx)", 1424 P_TO_ULONG(p), P_TO_ULONG(ep), P_TO_ULONG(logrec.data), 1425 (u_long)logrec.size, (u_long)logrec.size)); 1426 if (p >= ep && save_flags) 1427 F_SET(&tmprp, save_flags); 1428 ret = __rep_apply(env, ip, 1429 &tmprp, &logrec, &tmp_lsn, NULL, last_lsnp); 1430 RPRINT(env, DB_VERB_REP_MISC, (env, 1431 "log_split: rep_apply ret %d, tmp_lsn [%lu][%lu]", 1432 ret, (u_long)tmp_lsn.file, (u_long)tmp_lsn.offset)); 1433 switch (ret) { 1434 /* 1435 * If we received the pieces we need for running recovery, 1436 * short-circuit because recovery will truncate the log to 1437 * the LSN we want anyway. 1438 */ 1439 case DB_REP_LOGREADY: 1440 goto out; 1441 /* 1442 * If we just handled a special record, retain that information. 1443 */ 1444 case DB_REP_ISPERM: 1445 case DB_REP_NOTPERM: 1446 save_ret = ret; 1447 save_lsn = tmp_lsn; 1448 ret = 0; 1449 break; 1450 /* 1451 * Normal processing, do nothing, just continue. 1452 */ 1453 case 0: 1454 break; 1455 /* 1456 * If we get an error, then stop immediately. 1457 */ 1458 default: 1459 goto out; 1460 } 1461 } 1462out: 1463 /* 1464 * If we finish processing successfully, set our return values 1465 * based on what we saw. 1466 */ 1467 if (ret == 0) { 1468 ret = save_ret; 1469 *ret_lsnp = save_lsn; 1470 } 1471 return (ret); 1472} 1473#endif 1474