1/*- 2 * See the file LICENSE for redistribution information. 3 * 4 * Copyright (c) 1996,2008 Oracle. All rights reserved. 5 * 6 * $Id: db_open.c,v 12.43 2008/01/08 20:58:10 bostic Exp $ 7 */ 8 9#include "db_config.h" 10 11#include "db_int.h" 12#include "dbinc/db_page.h" 13#include "dbinc/db_swap.h" 14#include "dbinc/btree.h" 15#include "dbinc/crypto.h" 16#include "dbinc/hmac.h" 17#include "dbinc/fop.h" 18#include "dbinc/hash.h" 19#include "dbinc/lock.h" 20#include "dbinc/log.h" 21#include "dbinc/mp.h" 22#include "dbinc/qam.h" 23#include "dbinc/txn.h" 24 25/* 26 * __db_open -- 27 * DB->open method. 28 * 29 * This routine gets called in three different ways: 30 * 31 * 1. It can be called to open a file/database. In this case, subdb will 32 * be NULL and meta_pgno will be PGNO_BASE_MD. 33 * 2. It can be called to open a subdatabase during normal operation. In 34 * this case, name and subname will both be non-NULL and meta_pgno will 35 * be PGNO_BASE_MD (also PGNO_INVALID). 36 * 3. It can be called to open an in-memory database (name == NULL; 37 * subname = name). 38 * 4. It can be called during recovery to open a file/database, in which case 39 * name will be non-NULL, subname will be NULL, and meta-pgno will be 40 * PGNO_BASE_MD. 41 * 5. It can be called during recovery to open a subdatabase, in which case 42 * name will be non-NULL, subname may be NULL and meta-pgno will be 43 * a valid pgno (i.e., not PGNO_BASE_MD). 44 * 6. It can be called during recovery to open an in-memory database. 45 * 46 * PUBLIC: int __db_open __P((DB *, DB_THREAD_INFO *, DB_TXN *, 47 * PUBLIC: const char *, const char *, DBTYPE, u_int32_t, int, db_pgno_t)); 48 */ 49int 50__db_open(dbp, ip, txn, fname, dname, type, flags, mode, meta_pgno) 51 DB *dbp; 52 DB_THREAD_INFO *ip; 53 DB_TXN *txn; 54 const char *fname, *dname; 55 DBTYPE type; 56 u_int32_t flags; 57 int mode; 58 db_pgno_t meta_pgno; 59{ 60 ENV *env; 61 int ret; 62 u_int32_t id; 63 64 env = dbp->env; 65 id = TXN_INVALID; 66 67 DB_TEST_RECOVERY(dbp, DB_TEST_PREOPEN, ret, fname); 68 69 /* 70 * If the environment was configured with threads, the DB handle 71 * must also be free-threaded, so we force the DB_THREAD flag on. 72 * (See SR #2033 for why this is a requirement--recovery needs 73 * to be able to grab a dbp using __db_fileid_to_dbp, and it has 74 * no way of knowing which dbp goes with which thread, so whichever 75 * one it finds has to be usable in any of them.) 76 */ 77 if (F_ISSET(env, ENV_THREAD)) 78 LF_SET(DB_THREAD); 79 80 /* Convert any DB->open flags. */ 81 if (LF_ISSET(DB_RDONLY)) 82 F_SET(dbp, DB_AM_RDONLY); 83 if (LF_ISSET(DB_READ_UNCOMMITTED)) 84 F_SET(dbp, DB_AM_READ_UNCOMMITTED); 85 86 if (IS_REAL_TXN(txn)) 87 F_SET(dbp, DB_AM_TXN); 88 89 /* Fill in the type. */ 90 dbp->type = type; 91 92 /* 93 * If both fname and subname are NULL, it's always a create, so make 94 * sure that we have both DB_CREATE and a type specified. It would 95 * be nice if this checking were done in __db_open where most of the 96 * interface checking is done, but this interface (__db_dbopen) is 97 * used by the recovery and limbo system, so we need to safeguard 98 * this interface as well. 99 */ 100 if (fname == NULL) { 101 if (dname == NULL) { 102 if (!LF_ISSET(DB_CREATE)) { 103 __db_errx(env, 104 "DB_CREATE must be specified to create databases."); 105 return (ENOENT); 106 } 107 108 F_SET(dbp, DB_AM_INMEM); 109 F_SET(dbp, DB_AM_CREATED); 110 111 if (dbp->type == DB_UNKNOWN) { 112 __db_errx(env, 113 "DBTYPE of unknown without existing file"); 114 return (EINVAL); 115 } 116 117 if (dbp->pgsize == 0) 118 dbp->pgsize = DB_DEF_IOSIZE; 119 120 /* 121 * If the file is a temporary file and we're 122 * doing locking, then we have to create a 123 * unique file ID. We can't use our normal 124 * dev/inode pair (or whatever this OS uses 125 * in place of dev/inode pairs) because no 126 * backing file will be created until the 127 * mpool cache is filled forcing the buffers 128 * to disk. Grab a random locker ID to use 129 * as a file ID. The created ID must never 130 * match a potential real file ID -- we know 131 * it won't because real file IDs contain a 132 * time stamp after the dev/inode pair, and 133 * we're simply storing a 4-byte value. 134 135 * !!! 136 * Store the locker in the file id structure 137 * -- we can get it from there as necessary, 138 * and it saves having two copies. 139 */ 140 if (LOCKING_ON(env) && (ret = __lock_id(env, 141 (u_int32_t *)dbp->fileid, NULL)) != 0) 142 return (ret); 143 } else 144 MAKE_INMEM(dbp); 145 146 /* 147 * Normally we would do handle locking here, however, with 148 * in-memory files, we cannot do any database manipulation 149 * until the mpool is open, so it happens later. 150 */ 151 } else if (dname == NULL && meta_pgno == PGNO_BASE_MD) { 152 /* Open/create the underlying file. Acquire locks. */ 153 if ((ret = __fop_file_setup(dbp, ip, 154 txn, fname, mode, flags, &id)) != 0) 155 return (ret); 156 } else { 157 if ((ret = __fop_subdb_setup(dbp, ip, 158 txn, fname, dname, mode, flags)) != 0) 159 return (ret); 160 meta_pgno = dbp->meta_pgno; 161 } 162 163 /* 164 * If we created the file, set the truncate flag for the mpool. This 165 * isn't for anything we've done, it's protection against stupid user 166 * tricks: if the user deleted a file behind Berkeley DB's back, we 167 * may still have pages in the mpool that match the file's "unique" ID. 168 * 169 * Note that if we're opening a subdatabase, we don't want to set 170 * the TRUNCATE flag even if we just created the file--we already 171 * opened and updated the master using access method interfaces, 172 * so we don't want to get rid of any pages that are in the mpool. 173 * If we created the file when we opened the master, we already hit 174 * this check in a non-subdatabase context then. 175 */ 176 if (dname == NULL && F_ISSET(dbp, DB_AM_CREATED)) 177 LF_SET(DB_TRUNCATE); 178 179 /* Set up the underlying environment. */ 180 if ((ret = __env_setup(dbp, txn, fname, dname, id, flags)) != 0) 181 return (ret); 182 183 /* For in-memory databases, we now need to open/create the database. */ 184 if (F_ISSET(dbp, DB_AM_INMEM)) { 185 if (dname == NULL) 186 ret = __db_new_file(dbp, ip, txn, NULL, NULL); 187 else { 188 id = TXN_INVALID; 189 if ((ret = __fop_file_setup(dbp, ip, 190 txn, dname, mode, flags, &id)) == 0 && 191 DBENV_LOGGING(env) && !F_ISSET(dbp, DB_AM_RECOVER) 192#if !defined(DEBUG_ROP) && !defined(DEBUG_WOP) && !defined(DIAGNOSTIC) 193 && txn != NULL 194#endif 195#if !defined(DEBUG_ROP) 196 && !F_ISSET(dbp, DB_AM_RDONLY) 197#endif 198 ) 199 ret = __dbreg_log_id(dbp, 200 txn, dbp->log_filename->id, 1); 201 } 202 if (ret != 0) 203 goto err; 204 } 205 206 switch (dbp->type) { 207 case DB_BTREE: 208 ret = __bam_open(dbp, ip, txn, fname, meta_pgno, flags); 209 break; 210 case DB_HASH: 211 ret = __ham_open(dbp, ip, txn, fname, meta_pgno, flags); 212 break; 213 case DB_RECNO: 214 ret = __ram_open(dbp, ip, txn, fname, meta_pgno, flags); 215 break; 216 case DB_QUEUE: 217 ret = __qam_open( 218 dbp, ip, txn, fname, meta_pgno, mode, flags); 219 break; 220 case DB_UNKNOWN: 221 return ( 222 __db_unknown_type(env, "__db_dbopen", dbp->type)); 223 } 224 if (ret != 0) 225 goto err; 226 227 DB_TEST_RECOVERY(dbp, DB_TEST_POSTOPEN, ret, fname); 228 229 /* 230 * Temporary files don't need handle locks, so we only have to check 231 * for a handle lock downgrade or lockevent in the case of named 232 * files. 233 */ 234 if (!F_ISSET(dbp, DB_AM_RECOVER) && (fname != NULL || dname != NULL) && 235 LOCK_ISSET(dbp->handle_lock)) { 236 if (IS_REAL_TXN(txn)) 237 ret = __txn_lockevent(env, 238 txn, dbp, &dbp->handle_lock, dbp->locker); 239 else if (LOCKING_ON(env)) 240 /* Trade write handle lock for read handle lock. */ 241 ret = __lock_downgrade(env, 242 &dbp->handle_lock, DB_LOCK_READ, 0); 243 } 244DB_TEST_RECOVERY_LABEL 245err: 246 return (ret); 247} 248 249/* 250 * __db_get_open_flags -- 251 * Accessor for flags passed into DB->open call 252 * 253 * PUBLIC: int __db_get_open_flags __P((DB *, u_int32_t *)); 254 */ 255int 256__db_get_open_flags(dbp, flagsp) 257 DB *dbp; 258 u_int32_t *flagsp; 259{ 260 DB_ILLEGAL_BEFORE_OPEN(dbp, "DB->get_open_flags"); 261 262 *flagsp = dbp->open_flags; 263 return (0); 264} 265 266/* 267 * __db_new_file -- 268 * Create a new database file. 269 * 270 * PUBLIC: int __db_new_file __P((DB *, 271 * PUBLIC: DB_THREAD_INFO *, DB_TXN *, DB_FH *, const char *)); 272 */ 273int 274__db_new_file(dbp, ip, txn, fhp, name) 275 DB *dbp; 276 DB_THREAD_INFO *ip; 277 DB_TXN *txn; 278 DB_FH *fhp; 279 const char *name; 280{ 281 int ret; 282 283 switch (dbp->type) { 284 case DB_BTREE: 285 case DB_RECNO: 286 ret = __bam_new_file(dbp, ip, txn, fhp, name); 287 break; 288 case DB_HASH: 289 ret = __ham_new_file(dbp, ip, txn, fhp, name); 290 break; 291 case DB_QUEUE: 292 ret = __qam_new_file(dbp, ip, txn, fhp, name); 293 break; 294 case DB_UNKNOWN: 295 default: 296 __db_errx(dbp->env, 297 "%s: Invalid type %d specified", name, dbp->type); 298 ret = EINVAL; 299 break; 300 } 301 302 DB_TEST_RECOVERY(dbp, DB_TEST_POSTLOGMETA, ret, name); 303 /* Sync the file in preparation for moving it into place. */ 304 if (ret == 0 && fhp != NULL) 305 ret = __os_fsync(dbp->env, fhp); 306 307 DB_TEST_RECOVERY(dbp, DB_TEST_POSTSYNC, ret, name); 308 309DB_TEST_RECOVERY_LABEL 310 return (ret); 311} 312 313/* 314 * __db_init_subdb -- 315 * Initialize the dbp for a subdb. 316 * 317 * PUBLIC: int __db_init_subdb __P((DB *, 318 * PUBLIC: DB *, const char *, DB_THREAD_INFO *, DB_TXN *)); 319 */ 320int 321__db_init_subdb(mdbp, dbp, name, ip, txn) 322 DB *mdbp, *dbp; 323 const char *name; 324 DB_THREAD_INFO *ip; 325 DB_TXN *txn; 326{ 327 DBMETA *meta; 328 DB_MPOOLFILE *mpf; 329 int ret, t_ret; 330 331 ret = 0; 332 if (!F_ISSET(dbp, DB_AM_CREATED)) { 333 /* Subdb exists; read meta-data page and initialize. */ 334 mpf = mdbp->mpf; 335 if ((ret = __memp_fget(mpf, &dbp->meta_pgno, 336 ip, txn, 0, &meta)) != 0) 337 goto err; 338 ret = __db_meta_setup(mdbp->env, dbp, name, meta, 0, 0); 339 if ((t_ret = __memp_fput(mpf, 340 ip, meta, dbp->priority)) != 0 && ret == 0) 341 ret = t_ret; 342 /* 343 * If __db_meta_setup found that the meta-page hadn't 344 * been written out during recovery, we can just return. 345 */ 346 if (ret == ENOENT) 347 ret = 0; 348 goto err; 349 } 350 351 /* Handle the create case here. */ 352 switch (dbp->type) { 353 case DB_BTREE: 354 case DB_RECNO: 355 ret = __bam_new_subdb(mdbp, dbp, ip, txn); 356 break; 357 case DB_HASH: 358 ret = __ham_new_subdb(mdbp, dbp, ip, txn); 359 break; 360 case DB_QUEUE: 361 ret = EINVAL; 362 break; 363 case DB_UNKNOWN: 364 default: 365 __db_errx(dbp->env, 366 "Invalid subdatabase type %d specified", dbp->type); 367 return (EINVAL); 368 } 369 370err: return (ret); 371} 372 373/* 374 * __db_chk_meta -- 375 * Take a buffer containing a meta-data page and check it for a valid LSN, 376 * checksum (and verify the checksum if necessary) and possibly decrypt it. 377 * 378 * Return 0 on success, >0 (errno) on error, -1 on checksum mismatch. 379 * 380 * PUBLIC: int __db_chk_meta __P((ENV *, DB *, DBMETA *, u_int32_t)); 381 */ 382int 383__db_chk_meta(env, dbp, meta, flags) 384 ENV *env; 385 DB *dbp; 386 DBMETA *meta; 387 u_int32_t flags; 388{ 389 DB_LSN swap_lsn; 390 int is_hmac, ret, swapped; 391 u_int32_t magic, orig_chk; 392 u_int8_t *chksum; 393 394 ret = 0; 395 swapped = 0; 396 397 if (FLD_ISSET(meta->metaflags, DBMETA_CHKSUM)) { 398 if (dbp != NULL) 399 F_SET(dbp, DB_AM_CHKSUM); 400 401 is_hmac = meta->encrypt_alg == 0 ? 0 : 1; 402 chksum = ((BTMETA *)meta)->chksum; 403 404 /* 405 * If we need to swap, the checksum function overwrites the 406 * original checksum with 0, so we need to save a copy of the 407 * original for swapping later. 408 */ 409 orig_chk = *(u_int32_t *)chksum; 410 411 /* 412 * We cannot add this to __db_metaswap because that gets done 413 * later after we've verified the checksum or decrypted. 414 */ 415 if (LF_ISSET(DB_CHK_META)) { 416 swapped = 0; 417chk_retry: if ((ret = 418 __db_check_chksum(env, NULL, env->crypto_handle, 419 chksum, meta, DBMETASIZE, is_hmac)) != 0) { 420 if (is_hmac || swapped) 421 return (ret); 422 423 M_32_SWAP(orig_chk); 424 swapped = 1; 425 *(u_int32_t *)chksum = orig_chk; 426 goto chk_retry; 427 } 428 } 429 } else if (dbp != NULL) 430 F_CLR(dbp, DB_AM_CHKSUM); 431 432#ifdef HAVE_CRYPTO 433 ret = __crypto_decrypt_meta(env, 434 dbp, (u_int8_t *)meta, LF_ISSET(DB_CHK_META)); 435#endif 436 437 /* Now that we're decrypted, we can check LSN. */ 438 if (LOGGING_ON(env) && !LF_ISSET(DB_CHK_NOLSN)) { 439 /* 440 * This gets called both before and after swapping, so we 441 * need to check ourselves. If we already swapped it above, 442 * we'll know that here. 443 */ 444 445 swap_lsn = meta->lsn; 446 magic = meta->magic; 447lsn_retry: 448 if (swapped) { 449 M_32_SWAP(swap_lsn.file); 450 M_32_SWAP(swap_lsn.offset); 451 M_32_SWAP(magic); 452 } 453 switch (magic) { 454 case DB_BTREEMAGIC: 455 case DB_HASHMAGIC: 456 case DB_QAMMAGIC: 457 case DB_RENAMEMAGIC: 458 break; 459 default: 460 if (swapped) 461 return (EINVAL); 462 swapped = 1; 463 goto lsn_retry; 464 } 465 if (!IS_REP_CLIENT(env) && 466 !IS_NOT_LOGGED_LSN(swap_lsn) && !IS_ZERO_LSN(swap_lsn)) 467 /* Need to do check. */ 468 ret = __log_check_page_lsn(env, dbp, &swap_lsn); 469 } 470 return (ret); 471} 472 473/* 474 * __db_meta_setup -- 475 * 476 * Take a buffer containing a meta-data page and figure out if it's 477 * valid, and if so, initialize the dbp from the meta-data page. 478 * 479 * PUBLIC: int __db_meta_setup __P((ENV *, 480 * PUBLIC: DB *, const char *, DBMETA *, u_int32_t, u_int32_t)); 481 */ 482int 483__db_meta_setup(env, dbp, name, meta, oflags, flags) 484 ENV *env; 485 DB *dbp; 486 const char *name; 487 DBMETA *meta; 488 u_int32_t oflags; 489 u_int32_t flags; 490{ 491 u_int32_t magic; 492 int ret; 493 494 ret = 0; 495 496 /* 497 * Figure out what access method we're dealing with, and then 498 * call access method specific code to check error conditions 499 * based on conflicts between the found file and application 500 * arguments. A found file overrides some user information -- 501 * we don't consider it an error, for example, if the user set 502 * an expected byte order and the found file doesn't match it. 503 */ 504 F_CLR(dbp, DB_AM_SWAP | DB_AM_IN_RENAME); 505 magic = meta->magic; 506 507swap_retry: 508 switch (magic) { 509 case DB_BTREEMAGIC: 510 case DB_HASHMAGIC: 511 case DB_QAMMAGIC: 512 case DB_RENAMEMAGIC: 513 break; 514 case 0: 515 /* 516 * The only time this should be 0 is if we're in the 517 * midst of opening a subdb during recovery and that 518 * subdatabase had its meta-data page allocated, but 519 * not yet initialized. 520 */ 521 if (F_ISSET(dbp, DB_AM_SUBDB) && ((IS_RECOVERING(env) && 522 F_ISSET(env->lg_handle, DBLOG_FORCE_OPEN)) || 523 meta->pgno != PGNO_INVALID)) 524 return (ENOENT); 525 526 goto bad_format; 527 default: 528 if (F_ISSET(dbp, DB_AM_SWAP)) 529 goto bad_format; 530 531 M_32_SWAP(magic); 532 F_SET(dbp, DB_AM_SWAP); 533 goto swap_retry; 534 } 535 536 /* 537 * We can only check the meta page if we are sure we have a meta page. 538 * If it is random data, then this check can fail. So only now can we 539 * checksum and decrypt. Don't distinguish between configuration and 540 * checksum match errors here, because we haven't opened the database 541 * and even a checksum error isn't a reason to panic the environment. 542 */ 543 if ((ret = __db_chk_meta(env, dbp, meta, flags)) != 0) { 544 if (ret == -1) 545 __db_errx(env, 546 "%s: metadata page checksum error", name); 547 goto bad_format; 548 } 549 550 switch (magic) { 551 case DB_BTREEMAGIC: 552 if (dbp->type != DB_UNKNOWN && 553 dbp->type != DB_RECNO && dbp->type != DB_BTREE) 554 goto bad_format; 555 556 flags = meta->flags; 557 if (F_ISSET(dbp, DB_AM_SWAP)) 558 M_32_SWAP(flags); 559 if (LF_ISSET(BTM_RECNO)) 560 dbp->type = DB_RECNO; 561 else 562 dbp->type = DB_BTREE; 563 if ((oflags & DB_TRUNCATE) == 0 && (ret = 564 __bam_metachk(dbp, name, (BTMETA *)meta)) != 0) 565 return (ret); 566 break; 567 case DB_HASHMAGIC: 568 if (dbp->type != DB_UNKNOWN && dbp->type != DB_HASH) 569 goto bad_format; 570 571 dbp->type = DB_HASH; 572 if ((oflags & DB_TRUNCATE) == 0 && (ret = 573 __ham_metachk(dbp, name, (HMETA *)meta)) != 0) 574 return (ret); 575 break; 576 case DB_QAMMAGIC: 577 if (dbp->type != DB_UNKNOWN && dbp->type != DB_QUEUE) 578 goto bad_format; 579 dbp->type = DB_QUEUE; 580 if ((oflags & DB_TRUNCATE) == 0 && (ret = 581 __qam_metachk(dbp, name, (QMETA *)meta)) != 0) 582 return (ret); 583 break; 584 case DB_RENAMEMAGIC: 585 F_SET(dbp, DB_AM_IN_RENAME); 586 587 /* Copy the file's ID. */ 588 memcpy(dbp->fileid, ((DBMETA *)meta)->uid, DB_FILE_ID_LEN); 589 590 break; 591 default: 592 goto bad_format; 593 } 594 return (0); 595 596bad_format: 597 if (F_ISSET(dbp, DB_AM_RECOVER)) 598 ret = ENOENT; 599 else 600 __db_errx(env, "%s: unexpected file type or format", name); 601 return (ret == 0 ? EINVAL : ret); 602} 603