1/*- 2 * See the file LICENSE for redistribution information. 3 * 4 * Copyright (c) 1996,2008 Oracle. All rights reserved. 5 * 6 * $Id: log.c,v 12.68 2008/05/05 01:59:52 mjc Exp $ 7 */ 8 9#include "db_config.h" 10 11#include "db_int.h" 12#include "dbinc/crypto.h" 13#include "dbinc/hmac.h" 14#include "dbinc/log.h" 15#include "dbinc/txn.h" 16 17static int __log_init __P((ENV *, DB_LOG *)); 18static int __log_recover __P((DB_LOG *)); 19static size_t __log_region_size __P((ENV *)); 20 21/* 22 * __log_open -- 23 * Internal version of log_open: only called from ENV->open. 24 * 25 * PUBLIC: int __log_open __P((ENV *, int)); 26 */ 27int 28__log_open(env, create_ok) 29 ENV *env; 30 int create_ok; 31{ 32 DB_ENV *dbenv; 33 DB_LOG *dblp; 34 LOG *lp; 35 u_int8_t *bulk; 36 int region_locked, ret; 37 38 dbenv = env->dbenv; 39 region_locked = 0; 40 41 /* Create/initialize the DB_LOG structure. */ 42 if ((ret = __os_calloc(env, 1, sizeof(DB_LOG), &dblp)) != 0) 43 return (ret); 44 dblp->env = env; 45 46 /* Set the default buffer size, if not otherwise configured. */ 47 if (dbenv->lg_bsize == 0) 48 dbenv->lg_bsize = FLD_ISSET(dbenv->lg_flags, DB_LOG_IN_MEMORY) ? 49 LG_BSIZE_INMEM : LG_BSIZE_DEFAULT; 50 51 /* Join/create the log region. */ 52 dblp->reginfo.env = env; 53 dblp->reginfo.type = REGION_TYPE_LOG; 54 dblp->reginfo.id = INVALID_REGION_ID; 55 dblp->reginfo.flags = REGION_JOIN_OK; 56 57 if (create_ok) 58 F_SET(&dblp->reginfo, REGION_CREATE_OK); 59 if ((ret = __env_region_attach( 60 env, &dblp->reginfo, __log_region_size(env))) != 0) 61 goto err; 62 63 /* If we created the region, initialize it. */ 64 if (F_ISSET(&dblp->reginfo, REGION_CREATE)) 65 if ((ret = __log_init(env, dblp)) != 0) 66 goto err; 67 68 /* Set the local addresses. */ 69 lp = dblp->reginfo.primary = 70 R_ADDR(&dblp->reginfo, dblp->reginfo.rp->primary); 71 dblp->bufp = R_ADDR(&dblp->reginfo, lp->buffer_off); 72 73 /* 74 * If the region is threaded, we have to lock the DBREG list, and we 75 * need to allocate a mutex for that purpose. 76 */ 77 if ((ret = __mutex_alloc(env, 78 MTX_LOG_REGION, DB_MUTEX_PROCESS_ONLY, &dblp->mtx_dbreg)) != 0) 79 goto err; 80 81 /* 82 * Set the handle -- we may be about to run recovery, which allocates 83 * log cursors. Log cursors require logging be already configured, 84 * and the handle being set is what demonstrates that. 85 * 86 * If we created the region, run recovery. If that fails, make sure 87 * we reset the log handle before cleaning up, otherwise we will try 88 * and clean up again in the mainline ENV initialization code. 89 */ 90 env->lg_handle = dblp; 91 92 if (F_ISSET(&dblp->reginfo, REGION_CREATE)) { 93 /* 94 * We first take the log file size from the environment, if 95 * specified. If that wasn't set, default it. Regardless, 96 * recovery may set it from the persistent information in a 97 * log file header. 98 */ 99 if (lp->log_size == 0) 100 lp->log_size = 101 FLD_ISSET(dbenv->lg_flags, DB_LOG_IN_MEMORY) ? 102 LG_MAX_INMEM : LG_MAX_DEFAULT; 103 104 if ((ret = __log_recover(dblp)) != 0) 105 goto err; 106 107 /* 108 * If the next log file size hasn't been set yet, default it 109 * to the current log file size. 110 */ 111 if (lp->log_nsize == 0) 112 lp->log_nsize = lp->log_size; 113 114 /* 115 * If we haven't written any log files, write the first one 116 * so that checkpoint gets a valid ckp_lsn value. 117 */ 118 if (IS_INIT_LSN(lp->lsn) && 119 (ret = __log_newfile(dblp, NULL, 0, 0)) != 0) 120 goto err; 121 122 /* 123 * Initialize replication's next-expected LSN value 124 * and replication's bulk buffer. In __env_open, we 125 * always create/open the replication region before 126 * the log region so we're assured that our rep_handle 127 * is valid at this point, if replication is being used. 128 */ 129 lp->ready_lsn = lp->lsn; 130 if (IS_ENV_REPLICATED(env)) { 131 if ((ret = 132 __env_alloc(&dblp->reginfo, MEGABYTE, &bulk)) != 0) 133 goto err; 134 lp->bulk_buf = R_OFFSET(&dblp->reginfo, bulk); 135 lp->bulk_len = MEGABYTE; 136 lp->bulk_off = 0; 137 lp->wait_ts = env->rep_handle->request_gap; 138 __os_gettime(env, &lp->rcvd_ts, 1); 139 } else { 140 lp->bulk_buf = INVALID_ROFF; 141 lp->bulk_len = 0; 142 lp->bulk_off = 0; 143 } 144 } else { 145 /* 146 * A process joining the region may have reset the log file 147 * size, too. If so, it only affects the next log file we 148 * create. We need to check that the size is reasonable given 149 * the buffer size in the region. 150 */ 151 LOG_SYSTEM_LOCK(env); 152 region_locked = 1; 153 154 if (dbenv->lg_size != 0) { 155 if ((ret = 156 __log_check_sizes(env, dbenv->lg_size, 0)) != 0) 157 goto err; 158 159 lp->log_nsize = dbenv->lg_size; 160 } 161 162 LOG_SYSTEM_UNLOCK(env); 163 region_locked = 0; 164 } 165 166 return (0); 167 168err: if (dblp->reginfo.addr != NULL) { 169 if (region_locked) 170 LOG_SYSTEM_UNLOCK(env); 171 (void)__env_region_detach(env, &dblp->reginfo, 0); 172 } 173 env->lg_handle = NULL; 174 175 (void)__mutex_free(env, &dblp->mtx_dbreg); 176 __os_free(env, dblp); 177 178 return (ret); 179} 180 181/* 182 * __log_init -- 183 * Initialize a log region in shared memory. 184 */ 185static int 186__log_init(env, dblp) 187 ENV *env; 188 DB_LOG *dblp; 189{ 190 DB_ENV *dbenv; 191 LOG *lp; 192 int ret; 193 void *p; 194 195 dbenv = env->dbenv; 196 197 /* 198 * This is the first point where we can validate the buffer size, 199 * because we know all three settings have been configured (file size, 200 * buffer size and the in-memory flag). 201 */ 202 if ((ret = 203 __log_check_sizes(env, dbenv->lg_size, dbenv->lg_bsize)) != 0) 204 return (ret); 205 206 if ((ret = __env_alloc(&dblp->reginfo, 207 sizeof(*lp), &dblp->reginfo.primary)) != 0) 208 goto mem_err; 209 dblp->reginfo.rp->primary = 210 R_OFFSET(&dblp->reginfo, dblp->reginfo.primary); 211 lp = dblp->reginfo.primary; 212 memset(lp, 0, sizeof(*lp)); 213 214 if ((ret = 215 __mutex_alloc(env, MTX_LOG_REGION, 0, &lp->mtx_region)) != 0) 216 return (ret); 217 218 lp->fid_max = 0; 219 SH_TAILQ_INIT(&lp->fq); 220 lp->free_fid_stack = INVALID_ROFF; 221 lp->free_fids = lp->free_fids_alloced = 0; 222 223 /* Initialize LOG LSNs. */ 224 INIT_LSN(lp->lsn); 225 INIT_LSN(lp->t_lsn); 226 227 /* 228 * It's possible to be waiting for an LSN of [1][0], if a replication 229 * client gets the first log record out of order. An LSN of [0][0] 230 * signifies that we're not waiting. 231 */ 232 ZERO_LSN(lp->waiting_lsn); 233 234 /* 235 * Log makes note of the fact that it ran into a checkpoint on 236 * startup if it did so, as a recovery optimization. A zero 237 * LSN signifies that it hasn't found one [yet]. 238 */ 239 ZERO_LSN(lp->cached_ckp_lsn); 240 241 if ((ret = 242 __mutex_alloc(env, MTX_LOG_FILENAME, 0, &lp->mtx_filelist)) != 0) 243 return (ret); 244 if ((ret = __mutex_alloc(env, MTX_LOG_FLUSH, 0, &lp->mtx_flush)) != 0) 245 return (ret); 246 247 /* Initialize the buffer. */ 248 if ((ret = __env_alloc(&dblp->reginfo, dbenv->lg_bsize, &p)) != 0) { 249mem_err: __db_errx( env, "unable to allocate log region memory"); 250 return (ret); 251 } 252 lp->regionmax = dbenv->lg_regionmax; 253 lp->buffer_off = R_OFFSET(&dblp->reginfo, p); 254 lp->buffer_size = dbenv->lg_bsize; 255 lp->filemode = dbenv->lg_filemode; 256 lp->log_size = lp->log_nsize = dbenv->lg_size; 257 258 /* Initialize the commit Queue. */ 259 SH_TAILQ_INIT(&lp->free_commits); 260 SH_TAILQ_INIT(&lp->commits); 261 lp->ncommit = 0; 262 263 /* Initialize the logfiles list for in-memory logs. */ 264 SH_TAILQ_INIT(&lp->logfiles); 265 SH_TAILQ_INIT(&lp->free_logfiles); 266 267 /* 268 * Fill in the log's persistent header. Don't fill in the log file 269 * sizes, as they may change at any time and so have to be filled in 270 * as each log file is created. 271 */ 272 lp->persist.magic = DB_LOGMAGIC; 273 /* 274 * Don't use __log_set_version because env->dblp isn't set up yet. 275 */ 276 lp->persist.version = DB_LOGVERSION; 277 lp->persist.notused = 0; 278 env->lg_handle = dblp; 279 280 /* Migrate persistent flags from the ENV into the region. */ 281 if (dbenv->lg_flags != 0 && 282 (ret = __log_set_config_int(dbenv, dbenv->lg_flags, 1, 1)) != 0) 283 return (ret); 284 285 (void)time(&lp->timestamp); 286 return (0); 287} 288 289/* 290 * __log_recover -- 291 * Recover a log. 292 */ 293static int 294__log_recover(dblp) 295 DB_LOG *dblp; 296{ 297 DBT dbt; 298 DB_ENV *dbenv; 299 DB_LOGC *logc; 300 DB_LSN lsn; 301 ENV *env; 302 LOG *lp; 303 u_int32_t cnt, rectype; 304 int ret; 305 logfile_validity status; 306 307 env = dblp->env; 308 dbenv = env->dbenv; 309 logc = NULL; 310 lp = dblp->reginfo.primary; 311 312 /* 313 * Find a log file. If none exist, we simply return, leaving 314 * everything initialized to a new log. 315 */ 316 if ((ret = __log_find(dblp, 0, &cnt, &status)) != 0) 317 return (ret); 318 if (cnt == 0) 319 return (0); 320 321 /* 322 * If the last file is an old, unreadable version, start a new 323 * file. Don't bother finding the end of the last log file; 324 * we assume that it's valid in its entirety, since the user 325 * should have shut down cleanly or run recovery before upgrading. 326 */ 327 if (status == DB_LV_OLD_UNREADABLE) { 328 lp->lsn.file = lp->s_lsn.file = cnt + 1; 329 lp->lsn.offset = lp->s_lsn.offset = 0; 330 goto skipsearch; 331 } 332 DB_ASSERT(env, 333 (status == DB_LV_NORMAL || status == DB_LV_OLD_READABLE)); 334 335 /* 336 * We have the last useful log file and we've loaded any persistent 337 * information. Set the end point of the log past the end of the last 338 * file. Read the last file, looking for the last checkpoint and 339 * the log's end. 340 */ 341 lp->lsn.file = cnt + 1; 342 lp->lsn.offset = 0; 343 lsn.file = cnt; 344 lsn.offset = 0; 345 346 /* 347 * Allocate a cursor and set it to the first record. This shouldn't 348 * fail, leave error messages on. 349 */ 350 if ((ret = __log_cursor(env, &logc)) != 0) 351 return (ret); 352 F_SET(logc, DB_LOG_LOCKED); 353 memset(&dbt, 0, sizeof(dbt)); 354 if ((ret = __logc_get(logc, &lsn, &dbt, DB_SET)) != 0) 355 goto err; 356 357 /* 358 * Read to the end of the file. This may fail at some point, so 359 * turn off error messages. 360 */ 361 F_SET(logc, DB_LOG_SILENT_ERR); 362 while (__logc_get(logc, &lsn, &dbt, DB_NEXT) == 0) { 363 if (dbt.size < sizeof(u_int32_t)) 364 continue; 365 LOGCOPY_32(env, &rectype, dbt.data); 366 if (rectype == DB___txn_ckp) 367 /* 368 * If we happen to run into a checkpoint, cache its 369 * LSN so that the transaction system doesn't have 370 * to walk this log file again looking for it. 371 */ 372 lp->cached_ckp_lsn = lsn; 373 } 374 F_CLR(logc, DB_LOG_SILENT_ERR); 375 376 /* 377 * We now know where the end of the log is. Set the first LSN that 378 * we want to return to an application and the LSN of the last known 379 * record on disk. 380 */ 381 lp->lsn = lsn; 382 lp->s_lsn = lsn; 383 lp->lsn.offset += logc->len; 384 lp->s_lsn.offset += logc->len; 385 386 /* Set up the current buffer information, too. */ 387 lp->len = logc->len; 388 lp->a_off = 0; 389 lp->b_off = 0; 390 lp->w_off = lp->lsn.offset; 391 392skipsearch: 393 if (FLD_ISSET(dbenv->verbose, DB_VERB_RECOVERY)) 394 __db_msg(env, 395 "Finding last valid log LSN: file: %lu offset %lu", 396 (u_long)lp->lsn.file, (u_long)lp->lsn.offset); 397 398err: if (logc != NULL) 399 (void)__logc_close(logc); 400 401 return (ret); 402} 403 404/* 405 * __log_find -- 406 * Try to find a log file. If find_first is set, valp will contain 407 * the number of the first readable log file, else it will contain the number 408 * of the last log file (which may be too old to read). 409 * 410 * PUBLIC: int __log_find __P((DB_LOG *, int, u_int32_t *, logfile_validity *)); 411 */ 412int 413__log_find(dblp, find_first, valp, statusp) 414 DB_LOG *dblp; 415 int find_first; 416 u_int32_t *valp; 417 logfile_validity *statusp; 418{ 419 ENV *env; 420 LOG *lp; 421 logfile_validity logval_status, status; 422 struct __db_filestart *filestart; 423 u_int32_t clv, logval; 424 int cnt, fcnt, ret; 425 const char *dir; 426 char *c, **names, *p, *q; 427 428 env = dblp->env; 429 lp = dblp->reginfo.primary; 430 logval_status = status = DB_LV_NONEXISTENT; 431 432 /* Return a value of 0 as the log file number on failure. */ 433 *valp = 0; 434 435 if (lp->db_log_inmemory) { 436 filestart = find_first ? 437 SH_TAILQ_FIRST(&lp->logfiles, __db_filestart) : 438 SH_TAILQ_LAST(&lp->logfiles, links, __db_filestart); 439 if (filestart != NULL) { 440 *valp = filestart->file; 441 logval_status = DB_LV_NORMAL; 442 } 443 *statusp = logval_status; 444 return (0); 445 } 446 447 /* Find the directory name. */ 448 if ((ret = __log_name(dblp, 1, &p, NULL, 0)) != 0) { 449 __os_free(env, p); 450 return (ret); 451 } 452 if ((q = __db_rpath(p)) == NULL) 453 dir = PATH_DOT; 454 else { 455 *q = '\0'; 456 dir = p; 457 } 458 459 /* Get the list of file names. */ 460 if ((ret = __os_dirlist(env, dir, 0, &names, &fcnt)) != 0) { 461 __db_err(env, ret, "%s", dir); 462 __os_free(env, p); 463 return (ret); 464 } 465 466 /* Search for a valid log file name. */ 467 for (cnt = fcnt, clv = logval = 0; --cnt >= 0;) { 468 if (strncmp(names[cnt], LFPREFIX, sizeof(LFPREFIX) - 1) != 0) 469 continue; 470 471 /* 472 * Names of the form log\.[0-9]* are reserved for DB. Other 473 * names sharing LFPREFIX, such as "log.db", are legal. 474 */ 475 for (c = names[cnt] + sizeof(LFPREFIX) - 1; *c != '\0'; c++) 476 if (!isdigit((int)*c)) 477 break; 478 if (*c != '\0') 479 continue; 480 481 /* 482 * Use atol, not atoi; if an "int" is 16-bits, the largest 483 * log file name won't fit. 484 */ 485 clv = (u_int32_t)atol(names[cnt] + (sizeof(LFPREFIX) - 1)); 486 487 /* 488 * If searching for the first log file, we want to return the 489 * oldest log file we can read, or, if no readable log files 490 * exist, the newest log file we can't read (the crossover 491 * point between the old and new versions of the log file). 492 * 493 * If we're searching for the last log file, we want to return 494 * the newest log file, period. 495 * 496 * Readable log files should never precede unreadable log 497 * files, that would mean the admin seriously screwed up. 498 */ 499 if (find_first) { 500 if (logval != 0 && 501 status != DB_LV_OLD_UNREADABLE && clv > logval) 502 continue; 503 } else 504 if (logval != 0 && clv < logval) 505 continue; 506 507 if ((ret = __log_valid(dblp, clv, 1, NULL, 0, 508 &status, NULL)) != 0) { 509 __db_err( 510 env, ret, "Invalid log file: %s", names[cnt]); 511 goto err; 512 } 513 switch (status) { 514 case DB_LV_NONEXISTENT: 515 /* __log_valid never returns DB_LV_NONEXISTENT. */ 516 DB_ASSERT(env, 0); 517 break; 518 case DB_LV_INCOMPLETE: 519 /* 520 * The last log file may not have been initialized -- 521 * it's possible to create a log file but not write 522 * anything to it. If performing recovery (that is, 523 * if find_first isn't set), ignore the file, it's 524 * not interesting. If we're searching for the first 525 * log record, return the file (assuming we don't find 526 * something better), as the "real" first log record 527 * is likely to be in the log buffer, and we want to 528 * set the file LSN for our return. 529 */ 530 if (find_first) 531 goto found; 532 break; 533 case DB_LV_OLD_UNREADABLE: 534 /* 535 * If we're searching for the first log file, then we 536 * only want this file if we don't yet have a file or 537 * already have an unreadable file and this one is 538 * newer than that one. If we're searching for the 539 * last log file, we always want this file because we 540 * wouldn't be here if it wasn't newer than our current 541 * choice. 542 */ 543 if (!find_first || logval == 0 || 544 (status == DB_LV_OLD_UNREADABLE && clv > logval)) 545 goto found; 546 break; 547 case DB_LV_NORMAL: 548 case DB_LV_OLD_READABLE: 549found: logval = clv; 550 logval_status = status; 551 break; 552 } 553 } 554 555 *valp = logval; 556 557err: __os_dirfree(env, names, fcnt); 558 __os_free(env, p); 559 *statusp = logval_status; 560 561 return (ret); 562} 563 564/* 565 * log_valid -- 566 * Validate a log file. Returns an error code in the event of 567 * a fatal flaw in a the specified log file; returns success with 568 * a code indicating the currentness and completeness of the specified 569 * log file if it is not unexpectedly flawed (that is, if it's perfectly 570 * normal, if it's zero-length, or if it's an old version). 571 * 572 * PUBLIC: int __log_valid __P((DB_LOG *, u_int32_t, int, 573 * PUBLIC: DB_FH **, u_int32_t, logfile_validity *, u_int32_t *)); 574 */ 575int 576__log_valid(dblp, number, set_persist, fhpp, flags, statusp, versionp) 577 DB_LOG *dblp; 578 u_int32_t number; 579 int set_persist; 580 DB_FH **fhpp; 581 u_int32_t flags; 582 logfile_validity *statusp; 583 u_int32_t *versionp; 584{ 585 DB_CIPHER *db_cipher; 586 DB_FH *fhp; 587 ENV *env; 588 HDR *hdr; 589 LOG *lp; 590 LOGP *persist; 591 logfile_validity status; 592 size_t hdrsize, nr, recsize; 593 int is_hmac, ret; 594 u_int8_t *tmp; 595 char *fname; 596 597 env = dblp->env; 598 db_cipher = env->crypto_handle; 599 fhp = NULL; 600 persist = NULL; 601 status = DB_LV_NORMAL; 602 tmp = NULL; 603 604 /* Return the file handle to our caller, on request */ 605 if (fhpp != NULL) 606 *fhpp = NULL; 607 608 if (flags == 0) 609 flags = DB_OSO_RDONLY | DB_OSO_SEQ; 610 /* Try to open the log file. */ 611 if ((ret = __log_name(dblp, number, &fname, &fhp, flags)) != 0) { 612 __os_free(env, fname); 613 return (ret); 614 } 615 616 hdrsize = HDR_NORMAL_SZ; 617 is_hmac = 0; 618 recsize = sizeof(LOGP); 619 if (CRYPTO_ON(env)) { 620 hdrsize = HDR_CRYPTO_SZ; 621 recsize = sizeof(LOGP); 622 recsize += db_cipher->adj_size(recsize); 623 is_hmac = 1; 624 } 625 if ((ret = __os_calloc(env, 1, recsize + hdrsize, &tmp)) != 0) 626 goto err; 627 628 hdr = (HDR *)tmp; 629 persist = (LOGP *)(tmp + hdrsize); 630 631 /* 632 * Try to read the header. This can fail if the log is truncated, or 633 * if we find a preallocated log file where the header has not yet been 634 * written, so we need to check whether the header is zero-filled. 635 */ 636 if ((ret = __os_read(env, fhp, tmp, recsize + hdrsize, &nr)) != 0 || 637 nr != recsize + hdrsize || 638 (hdr->len == 0 && persist->magic == 0 && persist->log_size == 0)) { 639 if (ret == 0) 640 status = DB_LV_INCOMPLETE; 641 else 642 /* 643 * The error was a fatal read error, not just an 644 * incompletely initialized log file. 645 */ 646 __db_err(env, ret, "ignoring log file: %s", fname); 647 goto err; 648 } 649 650 if (LOG_SWAPPED(env)) 651 __log_hdrswap(hdr, CRYPTO_ON(env)); 652 653 /* 654 * Now we have to validate the persistent record. We have 655 * several scenarios we have to deal with: 656 * 657 * 1. User has crypto turned on: 658 * - They're reading an old, unencrypted log file 659 * . We will fail the record size match check below. 660 * - They're reading a current, unencrypted log file 661 * . We will fail the record size match check below. 662 * - They're reading an old, encrypted log file [NOT YET] 663 * . After decryption we'll fail the version check. [NOT YET] 664 * - They're reading a current, encrypted log file 665 * . We should proceed as usual. 666 * 2. User has crypto turned off: 667 * - They're reading an old, unencrypted log file 668 * . We will fail the version check. 669 * - They're reading a current, unencrypted log file 670 * . We should proceed as usual. 671 * - They're reading an old, encrypted log file [NOT YET] 672 * . We'll fail the magic number check (it is encrypted). 673 * - They're reading a current, encrypted log file 674 * . We'll fail the magic number check (it is encrypted). 675 */ 676 if (CRYPTO_ON(env)) { 677 /* 678 * If we are trying to decrypt an unencrypted log 679 * we can only detect that by having an unreasonable 680 * data length for our persistent data. 681 */ 682 if ((hdr->len - hdrsize) != sizeof(LOGP)) { 683 __db_errx(env, "log record size mismatch"); 684 goto err; 685 } 686 /* Check the checksum and decrypt. */ 687 if ((ret = __db_check_chksum(env, hdr, db_cipher, 688 &hdr->chksum[0], (u_int8_t *)persist, 689 hdr->len - hdrsize, is_hmac)) != 0) { 690 __db_errx(env, "log record checksum mismatch"); 691 goto err; 692 } 693 694 if ((ret = db_cipher->decrypt(env, db_cipher->data, 695 &hdr->iv[0], (u_int8_t *)persist, hdr->len - hdrsize)) != 0) 696 goto err; 697 } 698 699 if (LOG_SWAPPED(env)) 700 __log_persistswap(persist); 701 702 /* Validate the header. */ 703 if (persist->magic != DB_LOGMAGIC) { 704 __db_errx(env, 705 "Ignoring log file: %s: magic number %lx, not %lx", 706 fname, (u_long)persist->magic, (u_long)DB_LOGMAGIC); 707 ret = EINVAL; 708 goto err; 709 } 710 711 /* 712 * Set our status code to indicate whether the log file belongs to an 713 * unreadable or readable old version; leave it alone if and only if 714 * the log file version is the current one. 715 */ 716 if (persist->version > DB_LOGVERSION) { 717 /* This is a fatal error--the log file is newer than DB. */ 718 __db_errx(env, 719 "Unacceptable log file %s: unsupported log version %lu", 720 fname, (u_long)persist->version); 721 ret = EINVAL; 722 goto err; 723 } else if (persist->version < DB_LOGOLDVER) { 724 status = DB_LV_OLD_UNREADABLE; 725 /* This is a non-fatal error, but give some feedback. */ 726 __db_errx(env, 727 "Skipping log file %s: historic log version %lu", 728 fname, (u_long)persist->version); 729 /* 730 * We don't want to set persistent info based on an unreadable 731 * region, so jump to "err". 732 */ 733 goto err; 734 } else if (persist->version < DB_LOGVERSION) 735 status = DB_LV_OLD_READABLE; 736 737 /* 738 * Only if we have a current log do we verify the checksum. We could 739 * not check the checksum before checking the magic and version because 740 * old log headers put the length and checksum in a different location. 741 * The checksum was calculated with the swapped byte order, so we need 742 * to check it with the same bytes. 743 */ 744 if (!CRYPTO_ON(env)) { 745 if (LOG_SWAPPED(env)) 746 __log_persistswap(persist); 747 748 if ((ret = __db_check_chksum(env, 749 hdr, db_cipher, &hdr->chksum[0], (u_int8_t *)persist, 750 hdr->len - hdrsize, is_hmac)) != 0) { 751 __db_errx(env, "log record checksum mismatch"); 752 goto err; 753 } 754 755 if (LOG_SWAPPED(env)) 756 __log_persistswap(persist); 757 } 758 759 /* 760 * If the log is readable so far and we're doing system initialization, 761 * set the region's persistent information based on the headers. 762 * 763 * Override the current log file size. 764 */ 765 if (set_persist) { 766 lp = dblp->reginfo.primary; 767 lp->log_size = persist->log_size; 768 lp->persist.version = persist->version; 769 } 770 if (versionp != NULL) 771 *versionp = persist->version; 772 773err: if (fname != NULL) 774 __os_free(env, fname); 775 if (ret == 0 && fhpp != NULL) 776 *fhpp = fhp; 777 else 778 /* Must close on error or if we only used it locally. */ 779 (void)__os_closehandle(env, fhp); 780 if (tmp != NULL) 781 __os_free(env, tmp); 782 783 if (statusp != NULL) 784 *statusp = status; 785 786 return (ret); 787} 788 789/* 790 * __log_env_refresh -- 791 * Clean up after the log system on a close or failed open. 792 * 793 * PUBLIC: int __log_env_refresh __P((ENV *)); 794 */ 795int 796__log_env_refresh(env) 797 ENV *env; 798{ 799 DB_LOG *dblp; 800 LOG *lp; 801 REGINFO *reginfo; 802 struct __fname *fnp; 803 struct __db_commit *commit; 804 struct __db_filestart *filestart; 805 int ret, t_ret; 806 807 dblp = env->lg_handle; 808 reginfo = &dblp->reginfo; 809 lp = reginfo->primary; 810 ret = 0; 811 812 /* 813 * Flush the log if it's private -- there's no Berkeley DB guarantee 814 * that this gets done, but in case the application has forgotten to 815 * flush for durability, it's the polite thing to do. 816 */ 817 if (F_ISSET(env, ENV_PRIVATE) && 818 (t_ret = __log_flush(env, NULL)) != 0 && ret == 0) 819 ret = t_ret; 820 821 /* We may have opened files as part of XA; if so, close them. */ 822 if ((t_ret = __dbreg_close_files(env, 0)) != 0 && ret == 0) 823 ret = t_ret; 824 825 /* 826 * After we close the files, check for any unlogged closes left in 827 * the shared memory queue. If we find any, try to log it, otherwise 828 * return the error. We cannot say the environment was closed 829 * cleanly. 830 */ 831 MUTEX_LOCK(env, lp->mtx_filelist); 832 SH_TAILQ_FOREACH(fnp, &lp->fq, q, __fname) 833 if (F_ISSET(fnp, DB_FNAME_NOTLOGGED) && 834 (t_ret = __dbreg_close_id_int( 835 env, fnp, DBREG_CLOSE, 1)) != 0) 836 ret = t_ret; 837 MUTEX_UNLOCK(env, lp->mtx_filelist); 838 839 /* 840 * If a private region, return the memory to the heap. Not needed for 841 * filesystem-backed or system shared memory regions, that memory isn't 842 * owned by any particular process. 843 */ 844 if (F_ISSET(env, ENV_PRIVATE)) { 845 /* Discard the flush mutex. */ 846 if ((t_ret = 847 __mutex_free(env, &lp->mtx_flush)) != 0 && ret == 0) 848 ret = t_ret; 849 850 /* Discard the buffer. */ 851 __env_alloc_free(reginfo, R_ADDR(reginfo, lp->buffer_off)); 852 853 /* Discard stack of free file IDs. */ 854 if (lp->free_fid_stack != INVALID_ROFF) 855 __env_alloc_free(reginfo, 856 R_ADDR(reginfo, lp->free_fid_stack)); 857 858 /* Discard the list of in-memory log file markers. */ 859 while ((filestart = SH_TAILQ_FIRST(&lp->logfiles, 860 __db_filestart)) != NULL) { 861 SH_TAILQ_REMOVE(&lp->logfiles, filestart, links, 862 __db_filestart); 863 __env_alloc_free(reginfo, filestart); 864 } 865 866 while ((filestart = SH_TAILQ_FIRST(&lp->free_logfiles, 867 __db_filestart)) != NULL) { 868 SH_TAILQ_REMOVE(&lp->free_logfiles, filestart, links, 869 __db_filestart); 870 __env_alloc_free(reginfo, filestart); 871 } 872 873 /* Discord commit queue elements. */ 874 while ((commit = SH_TAILQ_FIRST(&lp->free_commits, 875 __db_commit)) != NULL) { 876 SH_TAILQ_REMOVE(&lp->free_commits, commit, links, 877 __db_commit); 878 __env_alloc_free(reginfo, commit); 879 } 880 881 /* Discard replication bulk buffer. */ 882 if (lp->bulk_buf != INVALID_ROFF) { 883 __env_alloc_free(reginfo, 884 R_ADDR(reginfo, lp->bulk_buf)); 885 lp->bulk_buf = INVALID_ROFF; 886 } 887 } 888 889 /* Discard the per-thread DBREG mutex. */ 890 if ((t_ret = __mutex_free(env, &dblp->mtx_dbreg)) != 0 && ret == 0) 891 ret = t_ret; 892 893 /* Detach from the region. */ 894 if ((t_ret = __env_region_detach(env, reginfo, 0)) != 0 && ret == 0) 895 ret = t_ret; 896 897 /* Close open files, release allocated memory. */ 898 if (dblp->lfhp != NULL) { 899 if ((t_ret = 900 __os_closehandle(env, dblp->lfhp)) != 0 && ret == 0) 901 ret = t_ret; 902 dblp->lfhp = NULL; 903 } 904 if (dblp->dbentry != NULL) 905 __os_free(env, dblp->dbentry); 906 907 __os_free(env, dblp); 908 909 env->lg_handle = NULL; 910 return (ret); 911} 912 913/* 914 * __log_get_cached_ckp_lsn -- 915 * Retrieve any last checkpoint LSN that we may have found on startup. 916 * 917 * PUBLIC: int __log_get_cached_ckp_lsn __P((ENV *, DB_LSN *)); 918 */ 919int 920__log_get_cached_ckp_lsn(env, ckp_lsnp) 921 ENV *env; 922 DB_LSN *ckp_lsnp; 923{ 924 DB_LOG *dblp; 925 LOG *lp; 926 927 dblp = env->lg_handle; 928 lp = (LOG *)dblp->reginfo.primary; 929 930 LOG_SYSTEM_LOCK(env); 931 *ckp_lsnp = lp->cached_ckp_lsn; 932 LOG_SYSTEM_UNLOCK(env); 933 934 return (0); 935} 936 937/* 938 * __log_region_mutex_count -- 939 * Return the number of mutexes the log region will need. 940 * 941 * PUBLIC: u_int32_t __log_region_mutex_count __P((ENV *)); 942 */ 943u_int32_t 944__log_region_mutex_count(env) 945 ENV *env; 946{ 947 /* 948 * We need a few assorted mutexes, and one per transaction waiting 949 * on the group commit list. We can't know how many that will be, 950 * but it should be bounded by the maximum active transactions. 951 */ 952 return (env->dbenv->tx_max + 5); 953} 954 955/* 956 * __log_region_size -- 957 * Return the amount of space needed for the log region. 958 * Make the region large enough to hold txn_max transaction 959 * detail structures plus some space to hold thread handles 960 * and the beginning of the alloc region and anything we 961 * need for mutex system resource recording. 962 */ 963static size_t 964__log_region_size(env) 965 ENV *env; 966{ 967 DB_ENV *dbenv; 968 size_t s; 969 970 dbenv = env->dbenv; 971 972 s = dbenv->lg_regionmax + dbenv->lg_bsize; 973 974 /* 975 * If running with replication, add in space for bulk buffer. 976 * Allocate a megabyte and a little bit more space. 977 */ 978 if (IS_ENV_REPLICATED(env)) 979 s += MEGABYTE; 980 981 return (s); 982} 983 984/* 985 * __log_vtruncate 986 * This is a virtual truncate. We set up the log indicators to 987 * make everyone believe that the given record is the last one in the 988 * log. Returns with the next valid LSN (i.e., the LSN of the next 989 * record to be written). This is used in replication to discard records 990 * in the log file that do not agree with the master. 991 * 992 * PUBLIC: int __log_vtruncate __P((ENV *, DB_LSN *, DB_LSN *, DB_LSN *)); 993 */ 994int 995__log_vtruncate(env, lsn, ckplsn, trunclsn) 996 ENV *env; 997 DB_LSN *lsn, *ckplsn, *trunclsn; 998{ 999 DBT log_dbt; 1000 DB_LOG *dblp; 1001 DB_LOGC *logc; 1002 LOG *lp; 1003 u_int32_t bytes, len; 1004 int ret, t_ret; 1005 1006 /* Need to find out the length of this soon-to-be-last record. */ 1007 if ((ret = __log_cursor(env, &logc)) != 0) 1008 return (ret); 1009 memset(&log_dbt, 0, sizeof(log_dbt)); 1010 ret = __logc_get(logc, lsn, &log_dbt, DB_SET); 1011 len = logc->len; 1012 if ((t_ret = __logc_close(logc)) != 0 && ret == 0) 1013 ret = t_ret; 1014 if (ret != 0) 1015 return (ret); 1016 1017 /* Now do the truncate. */ 1018 dblp = env->lg_handle; 1019 lp = (LOG *)dblp->reginfo.primary; 1020 1021 LOG_SYSTEM_LOCK(env); 1022 1023 /* 1024 * Flush the log so we can simply initialize the in-memory buffer 1025 * after the truncate. 1026 */ 1027 if ((ret = __log_flush_int(dblp, NULL, 0)) != 0) 1028 goto err; 1029 1030 lp->lsn = *lsn; 1031 lp->len = len; 1032 lp->lsn.offset += lp->len; 1033 1034 if (lp->db_log_inmemory && 1035 (ret = __log_inmem_lsnoff(dblp, &lp->lsn, &lp->b_off)) != 0) 1036 goto err; 1037 1038 /* 1039 * I am going to assume that the number of bytes written since 1040 * the last checkpoint doesn't exceed a 32-bit number. 1041 */ 1042 DB_ASSERT(env, lp->lsn.file >= ckplsn->file); 1043 bytes = 0; 1044 if (ckplsn->file != lp->lsn.file) { 1045 bytes = lp->log_size - ckplsn->offset; 1046 if (lp->lsn.file > ckplsn->file + 1) 1047 bytes += lp->log_size * 1048 ((lp->lsn.file - ckplsn->file) - 1); 1049 bytes += lp->lsn.offset; 1050 } else 1051 bytes = lp->lsn.offset - ckplsn->offset; 1052 1053 lp->stat.st_wc_mbytes += bytes / MEGABYTE; 1054 lp->stat.st_wc_bytes += bytes % MEGABYTE; 1055 1056 /* 1057 * If the synced lsn is greater than our new end of log, reset it 1058 * to our current end of log. 1059 */ 1060 MUTEX_LOCK(env, lp->mtx_flush); 1061 if (LOG_COMPARE(&lp->s_lsn, lsn) > 0) 1062 lp->s_lsn = lp->lsn; 1063 MUTEX_UNLOCK(env, lp->mtx_flush); 1064 1065 /* Initialize the in-region buffer to a pristine state. */ 1066 ZERO_LSN(lp->f_lsn); 1067 lp->w_off = lp->lsn.offset; 1068 1069 if (trunclsn != NULL) 1070 *trunclsn = lp->lsn; 1071 1072 /* Truncate the log to the new point. */ 1073 if ((ret = __log_zero(env, &lp->lsn)) != 0) 1074 goto err; 1075 1076err: LOG_SYSTEM_UNLOCK(env); 1077 return (ret); 1078} 1079 1080/* 1081 * __log_is_outdated -- 1082 * Used by the replication system to identify if a client's logs are too 1083 * old. 1084 * 1085 * PUBLIC: int __log_is_outdated __P((ENV *, u_int32_t, int *)); 1086 */ 1087int 1088__log_is_outdated(env, fnum, outdatedp) 1089 ENV *env; 1090 u_int32_t fnum; 1091 int *outdatedp; 1092{ 1093 DB_LOG *dblp; 1094 LOG *lp; 1095 char *name; 1096 int ret; 1097 u_int32_t cfile; 1098 struct __db_filestart *filestart; 1099 1100 dblp = env->lg_handle; 1101 1102 /* 1103 * The log represented by env is compared to the file number passed 1104 * in fnum. If the log file fnum does not exist and is lower-numbered 1105 * than the current logs, return *outdatedp non-zero, else we return 0. 1106 */ 1107 if (FLD_ISSET(env->dbenv->lg_flags, DB_LOG_IN_MEMORY)) { 1108 LOG_SYSTEM_LOCK(env); 1109 lp = (LOG *)dblp->reginfo.primary; 1110 filestart = SH_TAILQ_FIRST(&lp->logfiles, __db_filestart); 1111 *outdatedp = filestart == NULL ? 0 : (fnum < filestart->file); 1112 LOG_SYSTEM_UNLOCK(env); 1113 return (0); 1114 } 1115 1116 *outdatedp = 0; 1117 if ((ret = __log_name(dblp, fnum, &name, NULL, 0)) != 0) { 1118 __os_free(env, name); 1119 return (ret); 1120 } 1121 1122 /* If the file exists, we're just fine. */ 1123 if (__os_exists(env, name, NULL) == 0) 1124 goto out; 1125 1126 /* 1127 * It didn't exist, decide if the file number is too big or 1128 * too little. If it's too little, then we need to indicate 1129 * that the LSN is outdated. 1130 */ 1131 LOG_SYSTEM_LOCK(env); 1132 lp = (LOG *)dblp->reginfo.primary; 1133 cfile = lp->lsn.file; 1134 LOG_SYSTEM_UNLOCK(env); 1135 1136 if (cfile > fnum) 1137 *outdatedp = 1; 1138out: __os_free(env, name); 1139 return (ret); 1140} 1141 1142/* 1143 * __log_zero -- 1144 * Zero out the tail of a log after a truncate. 1145 * 1146 * PUBLIC: int __log_zero __P((ENV *, DB_LSN *)); 1147 */ 1148int 1149__log_zero(env, from_lsn) 1150 ENV *env; 1151 DB_LSN *from_lsn; 1152{ 1153 DB_FH *fhp; 1154 DB_LOG *dblp; 1155 LOG *lp; 1156 struct __db_filestart *filestart, *nextstart; 1157 size_t nbytes, len, nw; 1158 u_int32_t fn, mbytes, bytes; 1159 u_int8_t buf[4096]; 1160 int ret; 1161 char *fname; 1162 1163 dblp = env->lg_handle; 1164 lp = (LOG *)dblp->reginfo.primary; 1165 DB_ASSERT(env, LOG_COMPARE(from_lsn, &lp->lsn) <= 0); 1166 if (LOG_COMPARE(from_lsn, &lp->lsn) > 0) { 1167 __db_errx(env, 1168 "Warning: truncating to point beyond end of log"); 1169 return (0); 1170 } 1171 1172 if (lp->db_log_inmemory) { 1173 /* 1174 * Remove the files that are invalidated by this truncate. 1175 */ 1176 for (filestart = SH_TAILQ_FIRST(&lp->logfiles, __db_filestart); 1177 filestart != NULL; filestart = nextstart) { 1178 nextstart = SH_TAILQ_NEXT(filestart, 1179 links, __db_filestart); 1180 if (filestart->file > from_lsn->file) { 1181 SH_TAILQ_REMOVE(&lp->logfiles, 1182 filestart, links, __db_filestart); 1183 SH_TAILQ_INSERT_HEAD(&lp->free_logfiles, 1184 filestart, links, __db_filestart); 1185 } 1186 } 1187 1188 return (0); 1189 } 1190 1191 /* Close any open file handles so unlinks don't fail. */ 1192 if (dblp->lfhp != NULL) { 1193 (void)__os_closehandle(env, dblp->lfhp); 1194 dblp->lfhp = NULL; 1195 } 1196 1197 /* Throw away any extra log files that we have around. */ 1198 for (fn = from_lsn->file + 1;; fn++) { 1199 if (__log_name(dblp, fn, &fname, &fhp, DB_OSO_RDONLY) != 0) { 1200 __os_free(env, fname); 1201 break; 1202 } 1203 (void)__os_closehandle(env, fhp); 1204 (void)time(&lp->timestamp); 1205 ret = __os_unlink(env, fname, 0); 1206 __os_free(env, fname); 1207 if (ret != 0) 1208 return (ret); 1209 } 1210 1211 /* We removed some log files; have to 0 to end of file. */ 1212 if ((ret = 1213 __log_name(dblp, from_lsn->file, &fname, &dblp->lfhp, 0)) != 0) { 1214 __os_free(env, fname); 1215 return (ret); 1216 } 1217 __os_free(env, fname); 1218 if ((ret = __os_ioinfo(env, 1219 NULL, dblp->lfhp, &mbytes, &bytes, NULL)) != 0) 1220 goto err; 1221 DB_ASSERT(env, (mbytes * MEGABYTE + bytes) >= from_lsn->offset); 1222 len = (mbytes * MEGABYTE + bytes) - from_lsn->offset; 1223 1224 memset(buf, 0, sizeof(buf)); 1225 1226 /* Initialize the write position. */ 1227 if ((ret = __os_seek(env, dblp->lfhp, 0, 0, from_lsn->offset)) != 0) 1228 goto err; 1229 1230 while (len > 0) { 1231 nbytes = len > sizeof(buf) ? sizeof(buf) : len; 1232 if ((ret = 1233 __os_write(env, dblp->lfhp, buf, nbytes, &nw)) != 0) 1234 goto err; 1235 len -= nbytes; 1236 } 1237 1238err: (void)__os_closehandle(env, dblp->lfhp); 1239 dblp->lfhp = NULL; 1240 1241 return (ret); 1242} 1243 1244/* 1245 * __log_inmem_lsnoff -- 1246 * Find the offset in the buffer of a given LSN. 1247 * 1248 * PUBLIC: int __log_inmem_lsnoff __P((DB_LOG *, DB_LSN *, size_t *)); 1249 */ 1250int 1251__log_inmem_lsnoff(dblp, lsnp, offsetp) 1252 DB_LOG *dblp; 1253 DB_LSN *lsnp; 1254 size_t *offsetp; 1255{ 1256 LOG *lp; 1257 struct __db_filestart *filestart; 1258 1259 lp = (LOG *)dblp->reginfo.primary; 1260 1261 SH_TAILQ_FOREACH(filestart, &lp->logfiles, links, __db_filestart) 1262 if (filestart->file == lsnp->file) { 1263 *offsetp = 1264 (filestart->b_off + lsnp->offset) % lp->buffer_size; 1265 return (0); 1266 } 1267 1268 return (DB_NOTFOUND); 1269} 1270 1271/* 1272 * __log_inmem_newfile -- 1273 * Records the offset of the beginning of a new file in the in-memory 1274 * buffer. 1275 * 1276 * PUBLIC: int __log_inmem_newfile __P((DB_LOG *, u_int32_t)); 1277 */ 1278int 1279__log_inmem_newfile(dblp, file) 1280 DB_LOG *dblp; 1281 u_int32_t file; 1282{ 1283 HDR hdr; 1284 LOG *lp; 1285 struct __db_filestart *filestart; 1286 int ret; 1287#ifdef DIAGNOSTIC 1288 struct __db_filestart *first, *last; 1289#endif 1290 1291 lp = (LOG *)dblp->reginfo.primary; 1292 1293 /* 1294 * If the log buffer is empty, reuse the filestart entry. 1295 */ 1296 filestart = SH_TAILQ_FIRST(&lp->logfiles, __db_filestart); 1297 if (filestart != NULL && 1298 RINGBUF_LEN(lp, filestart->b_off, lp->b_off) <= 1299 sizeof(HDR) + sizeof(LOGP)) { 1300 filestart->file = file; 1301 filestart->b_off = lp->b_off; 1302 return (0); 1303 } 1304 1305 /* 1306 * We write an empty header at the end of every in-memory log file. 1307 * This is used during cursor traversal to indicate when to switch the 1308 * LSN to the next file. 1309 */ 1310 if (file > 1) { 1311 memset(&hdr, 0, sizeof(HDR)); 1312 __log_inmem_copyin(dblp, lp->b_off, &hdr, sizeof(HDR)); 1313 lp->b_off = (lp->b_off + sizeof(HDR)) % lp->buffer_size; 1314 } 1315 1316 filestart = SH_TAILQ_FIRST(&lp->free_logfiles, __db_filestart); 1317 if (filestart == NULL) { 1318 if ((ret = __env_alloc(&dblp->reginfo, 1319 sizeof(struct __db_filestart), &filestart)) != 0) 1320 return (ret); 1321 memset(filestart, 0, sizeof(*filestart)); 1322 } else 1323 SH_TAILQ_REMOVE(&lp->free_logfiles, filestart, 1324 links, __db_filestart); 1325 1326 filestart->file = file; 1327 filestart->b_off = lp->b_off; 1328 1329#ifdef DIAGNOSTIC 1330 first = SH_TAILQ_FIRST(&lp->logfiles, __db_filestart); 1331 last = SH_TAILQ_LAST(&(lp)->logfiles, links, __db_filestart); 1332 1333 /* Check that we don't wrap. */ 1334 DB_ASSERT(dblp->env, !first || first == last || 1335 RINGBUF_LEN(lp, first->b_off, lp->b_off) == 1336 RINGBUF_LEN(lp, first->b_off, last->b_off) + 1337 RINGBUF_LEN(lp, last->b_off, lp->b_off)); 1338#endif 1339 1340 SH_TAILQ_INSERT_TAIL(&lp->logfiles, filestart, links); 1341 return (0); 1342} 1343 1344/* 1345 * __log_inmem_chkspace -- 1346 * Ensure that the requested amount of space is available in the buffer, 1347 * and invalidate the region. 1348 * Note: assumes that the region lock is held on entry. 1349 * 1350 * PUBLIC: int __log_inmem_chkspace __P((DB_LOG *, size_t)); 1351 */ 1352int 1353__log_inmem_chkspace(dblp, len) 1354 DB_LOG *dblp; 1355 size_t len; 1356{ 1357 DB_LSN active_lsn, old_active_lsn; 1358 ENV *env; 1359 LOG *lp; 1360 struct __db_filestart *filestart; 1361 int ret; 1362 1363 env = dblp->env; 1364 lp = dblp->reginfo.primary; 1365 1366 DB_ASSERT(env, lp->db_log_inmemory); 1367 1368 /* 1369 * Allow room for an extra header so that we don't need to check for 1370 * space when switching files. 1371 */ 1372 len += sizeof(HDR); 1373 1374 /* 1375 * If transactions are enabled and we're about to fill available space, 1376 * update the active LSN and recheck. If transactions aren't enabled, 1377 * don't even bother checking: in that case we can always overwrite old 1378 * log records, because we're never going to abort. 1379 */ 1380 while (TXN_ON(env) && 1381 RINGBUF_LEN(lp, lp->b_off, lp->a_off) <= len) { 1382 old_active_lsn = lp->active_lsn; 1383 active_lsn = lp->lsn; 1384 1385 /* 1386 * Drop the log region lock so we don't hold it while 1387 * taking the transaction region lock. 1388 */ 1389 LOG_SYSTEM_UNLOCK(env); 1390 ret = __txn_getactive(env, &active_lsn); 1391 LOG_SYSTEM_LOCK(env); 1392 if (ret != 0) 1393 return (ret); 1394 active_lsn.offset = 0; 1395 1396 /* If we didn't make any progress, give up. */ 1397 if (LOG_COMPARE(&active_lsn, &old_active_lsn) == 0) { 1398 __db_errx(env, 1399 "In-memory log buffer is full (an active transaction spans the buffer)"); 1400 return (DB_LOG_BUFFER_FULL); 1401 } 1402 1403 /* Make sure we're moving the region LSN forwards. */ 1404 if (LOG_COMPARE(&active_lsn, &lp->active_lsn) > 0) { 1405 lp->active_lsn = active_lsn; 1406 (void)__log_inmem_lsnoff(dblp, &active_lsn, 1407 &lp->a_off); 1408 } 1409 } 1410 1411 /* 1412 * Remove the first file if it is invalidated by this write. 1413 * Log records can't be bigger than a file, so we only need to 1414 * check the first file. 1415 */ 1416 filestart = SH_TAILQ_FIRST(&lp->logfiles, __db_filestart); 1417 if (filestart != NULL && 1418 RINGBUF_LEN(lp, lp->b_off, filestart->b_off) <= len) { 1419 SH_TAILQ_REMOVE(&lp->logfiles, filestart, 1420 links, __db_filestart); 1421 SH_TAILQ_INSERT_HEAD(&lp->free_logfiles, filestart, 1422 links, __db_filestart); 1423 lp->f_lsn.file = filestart->file + 1; 1424 } 1425 1426 return (0); 1427} 1428 1429/* 1430 * __log_inmem_copyout -- 1431 * Copies the given number of bytes from the buffer -- no checking. 1432 * Note: assumes that the region lock is held on entry. 1433 * 1434 * PUBLIC: void __log_inmem_copyout __P((DB_LOG *, size_t, void *, size_t)); 1435 */ 1436void 1437__log_inmem_copyout(dblp, offset, buf, size) 1438 DB_LOG *dblp; 1439 size_t offset; 1440 void *buf; 1441 size_t size; 1442{ 1443 LOG *lp; 1444 size_t nbytes; 1445 1446 lp = (LOG *)dblp->reginfo.primary; 1447 nbytes = (offset + size < lp->buffer_size) ? 1448 size : lp->buffer_size - offset; 1449 memcpy(buf, dblp->bufp + offset, nbytes); 1450 if (nbytes < size) 1451 memcpy((u_int8_t *)buf + nbytes, dblp->bufp, size - nbytes); 1452} 1453 1454/* 1455 * __log_inmem_copyin -- 1456 * Copies the given number of bytes into the buffer -- no checking. 1457 * Note: assumes that the region lock is held on entry. 1458 * 1459 * PUBLIC: void __log_inmem_copyin __P((DB_LOG *, size_t, void *, size_t)); 1460 */ 1461void 1462__log_inmem_copyin(dblp, offset, buf, size) 1463 DB_LOG *dblp; 1464 size_t offset; 1465 void *buf; 1466 size_t size; 1467{ 1468 LOG *lp; 1469 size_t nbytes; 1470 1471 lp = (LOG *)dblp->reginfo.primary; 1472 nbytes = (offset + size < lp->buffer_size) ? 1473 size : lp->buffer_size - offset; 1474 memcpy(dblp->bufp + offset, buf, nbytes); 1475 if (nbytes < size) 1476 memcpy(dblp->bufp, (u_int8_t *)buf + nbytes, size - nbytes); 1477} 1478 1479/* 1480 * __log_set_version -- 1481 * Sets the current version of the log subsystem to the given version. 1482 * Essentially this modifies the lp->persist.version field in the 1483 * shared memory region. Called when region is initially created 1484 * and when replication is starting up or finds a new master. 1485 * 1486 * PUBLIC: void __log_set_version __P((ENV *, u_int32_t)); 1487 */ 1488void 1489__log_set_version(env, newver) 1490 ENV *env; 1491 u_int32_t newver; 1492{ 1493 DB_LOG *dblp; 1494 LOG *lp; 1495 1496 dblp = env->lg_handle; 1497 lp = (LOG *)dblp->reginfo.primary; 1498 /* 1499 * We should be able to update this atomically without locking. 1500 */ 1501 lp->persist.version = newver; 1502} 1503 1504/* 1505 * __log_get_oldversion -- 1506 * Returns the last version of log that this environment was working 1507 * with. Since there could be several versions of log files, if 1508 * the user upgraded and didn't log archive, we check the version 1509 * of the first log file, compare it to the last log file. If those 1510 * are different, then there is an older log existing, and we then 1511 * walk backward in the log files looking for the version of the 1512 * most recent older log file. 1513 * 1514 * PUBLIC: int __log_get_oldversion __P((ENV *, u_int32_t *)); 1515 */ 1516int 1517__log_get_oldversion(env, ver) 1518 ENV *env; 1519 u_int32_t *ver; 1520{ 1521 DBT rec; 1522 DB_LOG *dblp; 1523 DB_LOGC *logc; 1524 DB_LSN lsn; 1525 LOG *lp; 1526 u_int32_t firstfnum, fnum, lastver, oldver; 1527 int ret, t_ret; 1528 1529 dblp = env->lg_handle; 1530 lp = dblp->reginfo.primary; 1531 1532 logc = NULL; 1533 ret = 0; 1534 oldver = DB_LOGVERSION; 1535 /* 1536 * If we're in-memory logs we're always the current version. 1537 */ 1538 if (lp->db_log_inmemory) { 1539 *ver = oldver; 1540 return (0); 1541 } 1542 memset(&rec, 0, sizeof(rec)); 1543 if ((ret = __log_cursor(env, &logc)) != 0) 1544 goto err; 1545 /* 1546 * Get the version numbers of the first and last log files. 1547 */ 1548 if ((ret = __logc_get(logc, &lsn, &rec, DB_FIRST)) != 0) { 1549 /* 1550 * If there is no log file, we'll get DB_NOTFOUND. 1551 * If we get that, set the version to the current. 1552 */ 1553 if (ret == DB_NOTFOUND) 1554 ret = 0; 1555 goto err; 1556 } 1557 firstfnum = lsn.file; 1558 if ((ret = __logc_get(logc, &lsn, &rec, DB_LAST)) != 0) 1559 goto err; 1560 if ((ret = __log_valid(dblp, firstfnum, 0, NULL, 0, 1561 NULL, &oldver)) != 0) 1562 goto err; 1563 /* 1564 * If the first and last LSN are in the same file, then we 1565 * already have the version in oldver. Return it. 1566 */ 1567 if (firstfnum == lsn.file) 1568 goto err; 1569 1570 /* 1571 * Otherwise they're in different files and we call __log_valid 1572 * to get the version numbers in both files. 1573 */ 1574 if ((ret = __log_valid(dblp, lsn.file, 0, NULL, 0, 1575 NULL, &lastver)) != 0) 1576 goto err; 1577 /* 1578 * If the version numbers are different, walk backward getting 1579 * the version of each log file until we find one that is 1580 * different than the last. 1581 */ 1582 if (oldver != lastver) { 1583 for (fnum = lsn.file - 1; fnum >= firstfnum; fnum--) { 1584 if ((ret = __log_valid(dblp, fnum, 0, NULL, 0, 1585 NULL, &oldver)) != 0) 1586 goto err; 1587 if (oldver != lastver) 1588 break; 1589 } 1590 } 1591err: if (logc != NULL && ((t_ret = __logc_close(logc)) != 0) && ret == 0) 1592 ret = t_ret; 1593 if (ret == 0 && ver != NULL) 1594 *ver = oldver; 1595 return (ret); 1596} 1597