1/*- 2 * See the file LICENSE for redistribution information. 3 * 4 * Copyright (c) 2001-2009 Oracle. All rights reserved. 5 * 6 * $Id$ 7 */ 8 9#include "db_config.h" 10 11#include "db_int.h" 12#include "dbinc/db_page.h" 13#include "dbinc/fop.h" 14#include "dbinc/db_am.h" 15#include "dbinc/mp.h" 16#include "dbinc/txn.h" 17 18static int __fop_rename_recover_int 19 __P((ENV *, DBT *, DB_LSN *, db_recops, void *, int)); 20static int __fop_rename_42_recover_int 21 __P((ENV *, DBT *, DB_LSN *, db_recops, void *, int)); 22 23/* 24 * The transactional guarantees Berkeley DB provides for file 25 * system level operations (database physical file create, delete, 26 * rename) are based on our understanding of current file system 27 * semantics; a system that does not provide these semantics and 28 * guarantees could be in danger. 29 * 30 * First, as in standard database changes, fsync and fdatasync must 31 * work: when applied to the log file, the records written into the 32 * log must be transferred to stable storage. 33 * 34 * Second, it must not be possible for the log file to be removed 35 * without previous file system level operations being flushed to 36 * stable storage. Berkeley DB applications write log records 37 * describing file system operations into the log, then perform the 38 * file system operation, then commit the enclosing transaction 39 * (which flushes the log file to stable storage). Subsequently, 40 * a database environment checkpoint may make it possible for the 41 * application to remove the log file containing the record of the 42 * file system operation. DB's transactional guarantees for file 43 * system operations require the log file removal not succeed until 44 * all previous filesystem operations have been flushed to stable 45 * storage. In other words, the flush of the log file, or the 46 * removal of the log file, must block until all previous 47 * filesystem operations have been flushed to stable storage. This 48 * semantic is not, as far as we know, required by any existing 49 * standards document, but we have never seen a filesystem where 50 * it does not apply. 51 */ 52 53/* 54 * __fop_create_recover -- 55 * Recovery function for create. 56 * 57 * PUBLIC: int __fop_create_recover 58 * PUBLIC: __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); 59 */ 60int 61__fop_create_recover(env, dbtp, lsnp, op, info) 62 ENV *env; 63 DBT *dbtp; 64 DB_LSN *lsnp; 65 db_recops op; 66 void *info; 67{ 68 __fop_create_args *argp; 69 DB_FH *fhp; 70 DBMETA *meta; 71 u_int8_t mbuf[DBMETASIZE]; 72 int ret; 73 char *real_name; 74 const char *dirname; 75 76 COMPQUIET(info, NULL); 77 78 real_name = NULL; 79 REC_PRINT(__fop_create_print); 80 REC_NOOP_INTRO(__fop_create_read); 81 meta = (DBMETA *)mbuf; 82 83 if (argp->dirname.size == 0) 84 dirname = NULL; 85 else 86 dirname = (const char *)argp->dirname.data; 87 88 if ((ret = __db_appname(env, (APPNAME)argp->appname == DB_APP_DATA ? 89 DB_APP_RECOVER : (APPNAME)argp->appname, 90 (const char *)argp->name.data, &dirname, &real_name)) != 0) 91 goto out; 92 93 if (DB_UNDO(op)) { 94 /* 95 * If the file was opened in mpool, we must mark it as 96 * dead via nameop which will also unlink the file. 97 */ 98 if (__os_open(env, real_name, 0, 0, 0, &fhp) == 0) { 99 if (__fop_read_meta(env, 100 real_name, mbuf, DBMETASIZE, fhp, 1, NULL) == 0 && 101 __db_chk_meta(env, NULL, meta, 1) == 0) { 102 if ((ret = __memp_nameop(env, 103 meta->uid, NULL, real_name, NULL, 0)) != 0) 104 goto out; 105 } else { 106 (void)__os_closehandle(env, fhp); 107 goto do_unlink; 108 } 109 (void)__os_closehandle(env, fhp); 110 } else 111do_unlink: (void)__os_unlink(env, real_name, 0); 112 } else if (DB_REDO(op)) { 113 if ((ret = __os_open(env, real_name, 0, 114 DB_OSO_CREATE, (int)argp->mode, &fhp)) == 0) 115 (void)__os_closehandle(env, fhp); 116 else 117 goto out; 118 } 119 120 *lsnp = argp->prev_lsn; 121 122out: if (real_name != NULL) 123 __os_free(env, real_name); 124 125 REC_NOOP_CLOSE; 126} 127 128/* 129 * __fop_create_42_recover -- 130 * Recovery function for create. 131 * 132 * PUBLIC: int __fop_create_42_recover 133 * PUBLIC: __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); 134 */ 135int 136__fop_create_42_recover(env, dbtp, lsnp, op, info) 137 ENV *env; 138 DBT *dbtp; 139 DB_LSN *lsnp; 140 db_recops op; 141 void *info; 142{ 143 __fop_create_args *argp; 144 DB_FH *fhp; 145 DBMETA *meta; 146 u_int8_t mbuf[DBMETASIZE]; 147 int ret; 148 char *real_name; 149 150 COMPQUIET(info, NULL); 151 152 real_name = NULL; 153 REC_PRINT(__fop_create_print); 154 REC_NOOP_INTRO(__fop_create_read); 155 meta = (DBMETA *)mbuf; 156 157 if ((ret = __db_appname(env, (APPNAME)argp->appname, 158 (const char *)argp->name.data, NULL, &real_name)) != 0) 159 goto out; 160 161 if (DB_UNDO(op)) { 162 /* 163 * If the file was opened in mpool, we must mark it as 164 * dead via nameop which will also unlink the file. 165 */ 166 if (__os_open(env, real_name, 0, 0, 0, &fhp) == 0) { 167 if (__fop_read_meta(env, 168 real_name, mbuf, DBMETASIZE, fhp, 1, NULL) == 0 && 169 __db_chk_meta(env, NULL, meta, 1) == 0) { 170 if ((ret = __memp_nameop(env, 171 meta->uid, NULL, real_name, NULL, 0)) != 0) 172 goto out; 173 } else 174 goto do_unlink; 175 (void)__os_closehandle(env, fhp); 176 } else 177do_unlink: (void)__os_unlink(env, real_name, 0); 178 } else if (DB_REDO(op)) { 179 if ((ret = __os_open(env, real_name, 0, 180 DB_OSO_CREATE, (int)argp->mode, &fhp)) == 0) 181 (void)__os_closehandle(env, fhp); 182 else 183 goto out; 184 } 185 186 *lsnp = argp->prev_lsn; 187 188out: if (real_name != NULL) 189 __os_free(env, real_name); 190 191 REC_NOOP_CLOSE; 192} 193 194/* 195 * __fop_remove_recover -- 196 * Recovery function for remove. 197 * 198 * PUBLIC: int __fop_remove_recover 199 * PUBLIC: __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); 200 */ 201int 202__fop_remove_recover(env, dbtp, lsnp, op, info) 203 ENV *env; 204 DBT *dbtp; 205 DB_LSN *lsnp; 206 db_recops op; 207 void *info; 208{ 209 __fop_remove_args *argp; 210 int ret; 211 char *real_name; 212 213 COMPQUIET(info, NULL); 214 215 real_name = NULL; 216 REC_PRINT(__fop_remove_print); 217 REC_NOOP_INTRO(__fop_remove_read); 218 219 if ((ret = __db_appname(env, (APPNAME)argp->appname, 220 (const char *)argp->name.data, NULL, &real_name)) != 0) 221 goto out; 222 223 /* Its ok if the file is not there. */ 224 if (DB_REDO(op)) 225 (void)__memp_nameop(env, 226 (u_int8_t *)argp->fid.data, NULL, real_name, NULL, 0); 227 228 *lsnp = argp->prev_lsn; 229out: if (real_name != NULL) 230 __os_free(env, real_name); 231 REC_NOOP_CLOSE; 232} 233 234/* 235 * __fop_write_recover -- 236 * Recovery function for writechunk. 237 * 238 * PUBLIC: int __fop_write_recover 239 * PUBLIC: __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); 240 */ 241int 242__fop_write_recover(env, dbtp, lsnp, op, info) 243 ENV *env; 244 DBT *dbtp; 245 DB_LSN *lsnp; 246 db_recops op; 247 void *info; 248{ 249 __fop_write_args *argp; 250 int ret; 251 252 COMPQUIET(info, NULL); 253 254 REC_PRINT(__fop_write_print); 255 REC_NOOP_INTRO(__fop_write_read); 256 257 ret = 0; 258 if (DB_UNDO(op)) 259 DB_ASSERT(env, argp->flag != 0); 260 else if (DB_REDO(op)) 261 ret = __fop_write(env, 262 argp->txnp, argp->name.data, 263 argp->dirname.size == 0 ? NULL : argp->dirname.data, 264 (APPNAME)argp->appname == DB_APP_DATA ? DB_APP_RECOVER : 265 (APPNAME)argp->appname, 266 NULL, argp->pgsize, argp->pageno, argp->offset, 267 argp->page.data, argp->page.size, argp->flag, 0); 268 269 if (ret == 0) 270 *lsnp = argp->prev_lsn; 271 REC_NOOP_CLOSE; 272} 273 274/* 275 * __fop_write_42_recover -- 276 * Recovery function for writechunk. 277 * 278 * PUBLIC: int __fop_write_42_recover 279 * PUBLIC: __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); 280 */ 281int 282__fop_write_42_recover(env, dbtp, lsnp, op, info) 283 ENV *env; 284 DBT *dbtp; 285 DB_LSN *lsnp; 286 db_recops op; 287 void *info; 288{ 289 __fop_write_args *argp; 290 int ret; 291 292 COMPQUIET(info, NULL); 293 294 REC_PRINT(__fop_write_print); 295 REC_NOOP_INTRO(__fop_write_read); 296 297 ret = 0; 298 if (DB_UNDO(op)) 299 DB_ASSERT(env, argp->flag != 0); 300 else if (DB_REDO(op)) 301 ret = __fop_write(env, 302 argp->txnp, argp->name.data, NULL, (APPNAME)argp->appname, 303 NULL, argp->pgsize, argp->pageno, argp->offset, 304 argp->page.data, argp->page.size, argp->flag, 0); 305 306 if (ret == 0) 307 *lsnp = argp->prev_lsn; 308 REC_NOOP_CLOSE; 309} 310 311/* 312 * __fop_rename_recover -- 313 * Recovery functions for rename. There are two variants that 314 * both use the same utility function. Had we known about this on day 315 * one, we would have simply added a parameter. However, since we need 316 * to retain old records for backward compatibility (online-upgrade) 317 * wrapping the two seems like the right solution. 318 * 319 * PUBLIC: int __fop_rename_recover 320 * PUBLIC: __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); 321 * 322 * PUBLIC: int __fop_rename_noundo_recover 323 * PUBLIC: __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); 324 */ 325int 326__fop_rename_recover(env, dbtp, lsnp, op, info) 327 ENV *env; 328 DBT *dbtp; 329 DB_LSN *lsnp; 330 db_recops op; 331 void *info; 332{ 333 return (__fop_rename_recover_int(env, dbtp, lsnp, op, info, 1)); 334} 335 336int 337__fop_rename_noundo_recover(env, dbtp, lsnp, op, info) 338 ENV *env; 339 DBT *dbtp; 340 DB_LSN *lsnp; 341 db_recops op; 342 void *info; 343{ 344 return (__fop_rename_recover_int(env, dbtp, lsnp, op, info, 0)); 345} 346 347static int 348__fop_rename_recover_int(env, dbtp, lsnp, op, info, undo) 349 ENV *env; 350 DBT *dbtp; 351 DB_LSN *lsnp; 352 db_recops op; 353 void *info; 354 int undo; 355{ 356 __fop_rename_args *argp; 357 APPNAME appname; 358 DB_FH *fhp; 359 DBMETA *meta; 360 u_int8_t *fileid, mbuf[DBMETASIZE]; 361 int ret; 362 char *real_new, *real_old, *src; 363 const char *dirname; 364 365 COMPQUIET(info, NULL); 366 367 fhp = NULL; 368 meta = (DBMETA *)&mbuf[0]; 369 ret = 0; 370 real_new = real_old = NULL; 371 372 REC_PRINT(__fop_rename_print); 373 REC_NOOP_INTRO(__fop_rename_read); 374 fileid = argp->fileid.data; 375 376 if (argp->dirname.size == 0) 377 dirname = NULL; 378 else 379 dirname = (const char *)argp->dirname.data; 380 381 if ((APPNAME)argp->appname == DB_APP_DATA) 382 appname = DB_APP_RECOVER; 383 else 384 appname = (APPNAME)argp->appname; 385 386 if ((ret = __db_appname(env, appname, (const char *)argp->newname.data, 387 &dirname, &real_new)) != 0) 388 goto out; 389 if ((ret = __db_appname(env, appname, (const char *)argp->oldname.data, 390 &dirname, &real_old)) != 0) 391 goto out; 392 393 /* 394 * Verify that we are manipulating the correct file. We should always 395 * be OK on an ABORT or an APPLY, but during recovery, we have to 396 * check. 397 */ 398 if (op != DB_TXN_ABORT && op != DB_TXN_APPLY) { 399 src = DB_UNDO(op) ? real_new : real_old; 400 /* 401 * Interpret any error as meaning that the file either doesn't 402 * exist, doesn't have a meta-data page, or is in some other 403 * way, shape or form, incorrect, so that we should not restore 404 * it. 405 */ 406 if (__os_open(env, src, 0, 0, 0, &fhp) != 0) 407 goto done; 408 if (__fop_read_meta(env, 409 src, mbuf, DBMETASIZE, fhp, 1, NULL) != 0) 410 goto done; 411 if (__db_chk_meta(env, NULL, meta, 1) != 0) 412 goto done; 413 if (memcmp(argp->fileid.data, meta->uid, DB_FILE_ID_LEN) != 0) 414 goto done; 415 (void)__os_closehandle(env, fhp); 416 fhp = NULL; 417 if (DB_REDO(op)) { 418 /* 419 * Check to see if the target file exists. If it 420 * does and it does not have the proper id then 421 * it is a later version. We just remove the source 422 * file since the state of the world is beyond this 423 * point. 424 */ 425 if (__os_open(env, real_new, 0, 0, 0, &fhp) == 0 && 426 __fop_read_meta(env, src, mbuf, 427 DBMETASIZE, fhp, 1, NULL) == 0 && 428 __db_chk_meta(env, NULL, meta, 1) == 0 && 429 memcmp(argp->fileid.data, 430 meta->uid, DB_FILE_ID_LEN) != 0) { 431 (void)__memp_nameop(env, 432 fileid, NULL, real_old, NULL, 0); 433 goto done; 434 } 435 } 436 } 437 438 if (undo && DB_UNDO(op)) 439 (void)__memp_nameop(env, fileid, 440 (const char *)argp->oldname.data, real_new, real_old, 0); 441 if (DB_REDO(op)) 442 (void)__memp_nameop(env, fileid, 443 (const char *)argp->newname.data, real_old, real_new, 0); 444 445done: *lsnp = argp->prev_lsn; 446out: if (real_new != NULL) 447 __os_free(env, real_new); 448 if (real_old != NULL) 449 __os_free(env, real_old); 450 if (fhp != NULL) 451 (void)__os_closehandle(env, fhp); 452 453 REC_NOOP_CLOSE; 454} 455/* 456 * __fop_rename_42_recover -- 457 * Recovery functions for rename. There are two variants that 458 * both use the same utility function. Had we known about this on day 459 * one, we would have simply added a parameter. However, since we need 460 * to retain old records for backward compatibility (online-upgrade) 461 * wrapping the two seems like the right solution. 462 * 463 * PUBLIC: int __fop_rename_42_recover 464 * PUBLIC: __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); 465 * 466 * PUBLIC: int __fop_rename_noundo_46_recover 467 * PUBLIC: __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); 468 */ 469int 470__fop_rename_42_recover(env, dbtp, lsnp, op, info) 471 ENV *env; 472 DBT *dbtp; 473 DB_LSN *lsnp; 474 db_recops op; 475 void *info; 476{ 477 return (__fop_rename_42_recover_int(env, dbtp, lsnp, op, info, 1)); 478} 479 480int 481__fop_rename_noundo_46_recover(env, dbtp, lsnp, op, info) 482 ENV *env; 483 DBT *dbtp; 484 DB_LSN *lsnp; 485 db_recops op; 486 void *info; 487{ 488 return (__fop_rename_42_recover_int(env, dbtp, lsnp, op, info, 0)); 489} 490 491static int 492__fop_rename_42_recover_int(env, dbtp, lsnp, op, info, undo) 493 ENV *env; 494 DBT *dbtp; 495 DB_LSN *lsnp; 496 db_recops op; 497 void *info; 498 int undo; 499{ 500 __fop_rename_args *argp; 501 DB_FH *fhp; 502 DBMETA *meta; 503 u_int8_t *fileid, mbuf[DBMETASIZE]; 504 int ret; 505 char *real_new, *real_old, *src; 506 507 COMPQUIET(info, NULL); 508 509 fhp = NULL; 510 meta = (DBMETA *)&mbuf[0]; 511 ret = 0; 512 real_new = real_old = NULL; 513 514 REC_PRINT(__fop_rename_print); 515 REC_NOOP_INTRO(__fop_rename_read); 516 fileid = argp->fileid.data; 517 518 if ((ret = __db_appname(env, (APPNAME)argp->appname, 519 (const char *)argp->newname.data, NULL, &real_new)) != 0) 520 goto out; 521 if ((ret = __db_appname(env, (APPNAME)argp->appname, 522 (const char *)argp->oldname.data, NULL, &real_old)) != 0) 523 goto out; 524 525 /* 526 * Verify that we are manipulating the correct file. We should always 527 * be OK on an ABORT or an APPLY, but during recovery, we have to 528 * check. 529 */ 530 if (op != DB_TXN_ABORT && op != DB_TXN_APPLY) { 531 src = DB_UNDO(op) ? real_new : real_old; 532 /* 533 * Interpret any error as meaning that the file either doesn't 534 * exist, doesn't have a meta-data page, or is in some other 535 * way, shape or form, incorrect, so that we should not restore 536 * it. 537 */ 538 if (__os_open(env, src, 0, 0, 0, &fhp) != 0) 539 goto done; 540 if (__fop_read_meta(env, 541 src, mbuf, DBMETASIZE, fhp, 1, NULL) != 0) 542 goto done; 543 if (__db_chk_meta(env, NULL, meta, 1) != 0) 544 goto done; 545 if (memcmp(argp->fileid.data, meta->uid, DB_FILE_ID_LEN) != 0) 546 goto done; 547 (void)__os_closehandle(env, fhp); 548 fhp = NULL; 549 if (DB_REDO(op)) { 550 /* 551 * Check to see if the target file exists. If it 552 * does and it does not have the proper id then 553 * it is a later version. We just remove the source 554 * file since the state of the world is beyond this 555 * point. 556 */ 557 if (__os_open(env, real_new, 0, 0, 0, &fhp) == 0 && 558 __fop_read_meta(env, src, mbuf, 559 DBMETASIZE, fhp, 1, NULL) == 0 && 560 __db_chk_meta(env, NULL, meta, 1) == 0 && 561 memcmp(argp->fileid.data, 562 meta->uid, DB_FILE_ID_LEN) != 0) { 563 (void)__memp_nameop(env, 564 fileid, NULL, real_old, NULL, 0); 565 goto done; 566 } 567 } 568 } 569 570 if (undo && DB_UNDO(op)) 571 (void)__memp_nameop(env, fileid, 572 (const char *)argp->oldname.data, real_new, real_old, 0); 573 if (DB_REDO(op)) 574 (void)__memp_nameop(env, fileid, 575 (const char *)argp->newname.data, real_old, real_new, 0); 576 577done: *lsnp = argp->prev_lsn; 578out: if (real_new != NULL) 579 __os_free(env, real_new); 580 if (real_old != NULL) 581 __os_free(env, real_old); 582 if (fhp != NULL) 583 (void)__os_closehandle(env, fhp); 584 585 REC_NOOP_CLOSE; 586} 587 588/* 589 * __fop_file_remove_recover -- 590 * Recovery function for file_remove. On the REDO pass, we need to 591 * make sure no one recreated the file while we weren't looking. On an 592 * undo pass must check if the file we are interested in is the one that 593 * exists and then set the status of the child transaction depending on 594 * what we find out. 595 * 596 * PUBLIC: int __fop_file_remove_recover 597 * PUBLIC: __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); 598 */ 599int 600__fop_file_remove_recover(env, dbtp, lsnp, op, info) 601 ENV *env; 602 DBT *dbtp; 603 DB_LSN *lsnp; 604 db_recops op; 605 void *info; 606{ 607 __fop_file_remove_args *argp; 608 DBMETA *meta; 609 DB_FH *fhp; 610 size_t len; 611 u_int8_t mbuf[DBMETASIZE]; 612 u_int32_t cstat, ret_stat; 613 int is_real, is_tmp, ret; 614 char *real_name; 615 616 fhp = NULL; 617 meta = (DBMETA *)&mbuf[0]; 618 is_real = is_tmp = 0; 619 real_name = NULL; 620 REC_PRINT(__fop_file_remove_print); 621 REC_NOOP_INTRO(__fop_file_remove_read); 622 623 /* 624 * This record is only interesting on the backward, forward, and 625 * apply phases. 626 */ 627 if (op != DB_TXN_BACKWARD_ROLL && 628 op != DB_TXN_FORWARD_ROLL && op != DB_TXN_APPLY) 629 goto done; 630 631 if ((ret = __db_appname(env, (APPNAME)argp->appname, 632 argp->name.data, NULL, &real_name)) != 0) 633 goto out; 634 635 /* Verify that we are manipulating the correct file. */ 636 len = 0; 637 if (__os_open(env, real_name, 0, 0, 0, &fhp) != 0 || 638 (ret = __fop_read_meta(env, real_name, 639 mbuf, DBMETASIZE, fhp, 1, &len)) != 0) { 640 /* 641 * If len is non-zero, then the file exists and has something 642 * in it, but that something isn't a full meta-data page, so 643 * this is very bad. Bail out! 644 */ 645 if (len != 0) 646 goto out; 647 648 /* File does not exist. */ 649 cstat = TXN_EXPECTED; 650 } else { 651 /* 652 * We can ignore errors here since we'll simply fail the 653 * checks below and assume this is the wrong file. 654 */ 655 (void)__db_chk_meta(env, NULL, meta, 1); 656 is_real = 657 memcmp(argp->real_fid.data, meta->uid, DB_FILE_ID_LEN) == 0; 658 is_tmp = 659 memcmp(argp->tmp_fid.data, meta->uid, DB_FILE_ID_LEN) == 0; 660 661 if (!is_real && !is_tmp) 662 /* File exists, but isn't what we were removing. */ 663 cstat = TXN_IGNORE; 664 else 665 /* File exists and is the one that we were removing. */ 666 cstat = TXN_COMMIT; 667 } 668 if (fhp != NULL) { 669 (void)__os_closehandle(env, fhp); 670 fhp = NULL; 671 } 672 673 if (DB_UNDO(op)) { 674 /* On the backward pass, we leave a note for the child txn. */ 675 if ((ret = __db_txnlist_update(env, 676 info, argp->child, cstat, NULL, &ret_stat, 1)) != 0) 677 goto out; 678 } else if (DB_REDO(op)) { 679 /* 680 * On the forward pass, check if someone recreated the 681 * file while we weren't looking. 682 */ 683 if (cstat == TXN_COMMIT) 684 (void)__memp_nameop(env, 685 is_real ? argp->real_fid.data : argp->tmp_fid.data, 686 NULL, real_name, NULL, 0); 687 } 688 689done: *lsnp = argp->prev_lsn; 690 ret = 0; 691 692out: if (real_name != NULL) 693 __os_free(env, real_name); 694 if (fhp != NULL) 695 (void)__os_closehandle(env, fhp); 696 REC_NOOP_CLOSE; 697} 698