1/*- 2 * See the file LICENSE for redistribution information. 3 * 4 * Copyright (c) 1996,2008 Oracle. All rights reserved. 5 */ 6/* 7 * Copyright (c) 1990, 1993, 1994, 1995, 1996 8 * Keith Bostic. All rights reserved. 9 */ 10/* 11 * Copyright (c) 1990, 1993, 1994, 1995 12 * The Regents of the University of California. All rights reserved. 13 * 14 * Redistribution and use in source and binary forms, with or without 15 * modification, are permitted provided that the following conditions 16 * are met: 17 * 1. Redistributions of source code must retain the above copyright 18 * notice, this list of conditions and the following disclaimer. 19 * 2. Redistributions in binary form must reproduce the above copyright 20 * notice, this list of conditions and the following disclaimer in the 21 * documentation and/or other materials provided with the distribution. 22 * 3. Neither the name of the University nor the names of its contributors 23 * may be used to endorse or promote products derived from this software 24 * without specific prior written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 36 * SUCH DAMAGE. 37 * 38 * $Id: db.c,v 12.81 2008/02/18 19:11:59 bschmeck Exp $ 39 */ 40 41#include "db_config.h" 42 43#include "db_int.h" 44#include "dbinc/db_page.h" 45#include "dbinc/db_swap.h" 46#include "dbinc/btree.h" 47#include "dbinc/fop.h" 48#include "dbinc/hash.h" 49#include "dbinc/lock.h" 50#include "dbinc/log.h" 51#include "dbinc/mp.h" 52#include "dbinc/qam.h" 53#include "dbinc/txn.h" 54 55static int __db_disassociate __P((DB *)); 56static int __db_disassociate_foreign __P ((DB *)); 57 58#ifdef CONFIG_TEST 59static int __db_makecopy __P((ENV *, const char *, const char *)); 60static int __db_testdocopy __P((ENV *, const char *)); 61static int __qam_testdocopy __P((DB *, const char *)); 62#endif 63 64/* 65 * DB.C -- 66 * This file contains the utility functions for the DBP layer. 67 */ 68 69/* 70 * __db_master_open -- 71 * Open up a handle on a master database. 72 * 73 * PUBLIC: int __db_master_open __P((DB *, DB_THREAD_INFO *, 74 * PUBLIC: DB_TXN *, const char *, u_int32_t, int, DB **)); 75 */ 76int 77__db_master_open(subdbp, ip, txn, name, flags, mode, dbpp) 78 DB *subdbp; 79 DB_THREAD_INFO *ip; 80 DB_TXN *txn; 81 const char *name; 82 u_int32_t flags; 83 int mode; 84 DB **dbpp; 85{ 86 DB *dbp; 87 int ret; 88 89 *dbpp = NULL; 90 91 /* Open up a handle on the main database. */ 92 if ((ret = __db_create_internal(&dbp, subdbp->env, 0)) != 0) 93 return (ret); 94 95 /* 96 * It's always a btree. 97 * Run in the transaction we've created. 98 * Set the pagesize in case we're creating a new database. 99 * Flag that we're creating a database with subdatabases. 100 */ 101 dbp->pgsize = subdbp->pgsize; 102 F_SET(dbp, DB_AM_SUBDB); 103 F_SET(dbp, F_ISSET(subdbp, 104 DB_AM_RECOVER | DB_AM_SWAP | 105 DB_AM_ENCRYPT | DB_AM_CHKSUM | DB_AM_NOT_DURABLE)); 106 107 /* 108 * If there was a subdb specified, then we only want to apply 109 * DB_EXCL to the subdb, not the actual file. We only got here 110 * because there was a subdb specified. 111 */ 112 LF_CLR(DB_EXCL); 113 LF_SET(DB_RDWRMASTER); 114 if ((ret = __db_open(dbp, ip, 115 txn, name, NULL, DB_BTREE, flags, mode, PGNO_BASE_MD)) != 0) 116 goto err; 117 118 /* 119 * The items in dbp are initialized from the master file's meta page. 120 * Other items such as checksum and encryption are checked when we 121 * read the meta-page, so we do not check those here. However, if 122 * the meta-page caused checksumming to be turned on and it wasn't 123 * already, set it here. 124 */ 125 if (F_ISSET(dbp, DB_AM_CHKSUM)) 126 F_SET(subdbp, DB_AM_CHKSUM); 127 128 /* 129 * The user may have specified a page size for an existing file, 130 * which we want to ignore. 131 */ 132 subdbp->pgsize = dbp->pgsize; 133 *dbpp = dbp; 134 135 if (0) { 136err: if (!F_ISSET(dbp, DB_AM_DISCARD)) 137 (void)__db_close(dbp, txn, 0); 138 } 139 140 return (ret); 141} 142 143/* 144 * __db_master_update -- 145 * Add/Open/Remove a subdatabase from a master database. 146 * 147 * PUBLIC: int __db_master_update __P((DB *, DB *, DB_THREAD_INFO *, DB_TXN *, 148 * PUBLIC: const char *, DBTYPE, mu_action, const char *, u_int32_t)); 149 */ 150int 151__db_master_update(mdbp, sdbp, ip, txn, subdb, type, action, newname, flags) 152 DB *mdbp, *sdbp; 153 DB_TXN *txn; 154 DB_THREAD_INFO *ip; 155 const char *subdb; 156 DBTYPE type; 157 mu_action action; 158 const char *newname; 159 u_int32_t flags; 160{ 161 DBC *dbc, *ndbc; 162 DBT key, data, ndata; 163 ENV *env; 164 PAGE *p, *r; 165 db_pgno_t t_pgno; 166 int modify, ret, t_ret; 167 168 env = mdbp->env; 169 dbc = ndbc = NULL; 170 p = NULL; 171 172 /* 173 * Open up a cursor. If this is CDB and we're creating the database, 174 * make it an update cursor. 175 * 176 * Might we modify the master database? If so, we'll need to lock. 177 */ 178 modify = (action != MU_OPEN || LF_ISSET(DB_CREATE)) ? 1 : 0; 179 180 if ((ret = __db_cursor(mdbp, ip, txn, &dbc, 181 (CDB_LOCKING(env) && modify) ? DB_WRITECURSOR : 0)) != 0) 182 return (ret); 183 184 /* 185 * Point the cursor at the record. 186 * 187 * If we're removing or potentially creating an entry, lock the page 188 * with DB_RMW. 189 * 190 * We do multiple cursor operations with the cursor in some cases and 191 * subsequently access the data DBT information. Set DB_DBT_MALLOC so 192 * we don't risk modification of the data between our uses of it. 193 * 194 * !!! 195 * We don't include the name's nul termination in the database. 196 */ 197 DB_INIT_DBT(key, subdb, strlen(subdb)); 198 memset(&data, 0, sizeof(data)); 199 F_SET(&data, DB_DBT_MALLOC); 200 201 ret = __dbc_get(dbc, &key, &data, 202 DB_SET | ((STD_LOCKING(dbc) && modify) ? DB_RMW : 0)); 203 204 /* 205 * What we do next--whether or not we found a record for the 206 * specified subdatabase--depends on what the specified action is. 207 * Handle ret appropriately as the first statement of each case. 208 */ 209 switch (action) { 210 case MU_REMOVE: 211 /* 212 * We should have found something if we're removing it. Note 213 * that in the common case where the DB we're asking to remove 214 * doesn't exist, we won't get this far; __db_subdb_remove 215 * will already have returned an error from __db_open. 216 */ 217 if (ret != 0) 218 goto err; 219 220 /* 221 * Delete the subdatabase entry first; if this fails, 222 * we don't want to touch the actual subdb pages. 223 */ 224 if ((ret = __dbc_del(dbc, 0)) != 0) 225 goto err; 226 227 /* 228 * We're handling actual data, not on-page meta-data, 229 * so it hasn't been converted to/from opposite 230 * endian architectures. Do it explicitly, now. 231 */ 232 memcpy(&sdbp->meta_pgno, data.data, sizeof(db_pgno_t)); 233 DB_NTOHL_SWAP(env, &sdbp->meta_pgno); 234 if ((ret = __memp_fget(mdbp->mpf, &sdbp->meta_pgno, 235 ip, dbc->txn, DB_MPOOL_DIRTY, &p)) != 0) 236 goto err; 237 238 /* Free the root on the master db if it was created. */ 239 if (TYPE(p) == P_BTREEMETA && 240 ((BTMETA *)p)->root != PGNO_INVALID) { 241 if ((ret = __memp_fget(mdbp->mpf, 242 &((BTMETA *)p)->root, ip, dbc->txn, 243 DB_MPOOL_DIRTY, &r)) != 0) 244 goto err; 245 246 /* Free and put the page. */ 247 if ((ret = __db_free(dbc, r)) != 0) { 248 r = NULL; 249 goto err; 250 } 251 } 252 /* Free and put the page. */ 253 if ((ret = __db_free(dbc, p)) != 0) { 254 p = NULL; 255 goto err; 256 } 257 p = NULL; 258 break; 259 case MU_RENAME: 260 /* We should have found something if we're renaming it. */ 261 if (ret != 0) 262 goto err; 263 264 /* 265 * Before we rename, we need to make sure we're not 266 * overwriting another subdatabase, or else this operation 267 * won't be undoable. Open a second cursor and check 268 * for the existence of newname; it shouldn't appear under 269 * us since we hold the metadata lock. 270 */ 271 if ((ret = __db_cursor(mdbp, ip, txn, &ndbc, 272 CDB_LOCKING(env) ? DB_WRITECURSOR : 0)) != 0) 273 goto err; 274 DB_SET_DBT(key, newname, strlen(newname)); 275 276 /* 277 * We don't actually care what the meta page of the potentially- 278 * overwritten DB is; we just care about existence. 279 */ 280 memset(&ndata, 0, sizeof(ndata)); 281 F_SET(&ndata, DB_DBT_USERMEM | DB_DBT_PARTIAL); 282 283 if ((ret = __dbc_get(ndbc, &key, &ndata, DB_SET)) == 0) { 284 /* A subdb called newname exists. Bail. */ 285 ret = EEXIST; 286 __db_errx(env, "rename: database %s exists", newname); 287 goto err; 288 } else if (ret != DB_NOTFOUND) 289 goto err; 290 291 /* 292 * Now do the put first; we don't want to lose our only 293 * reference to the subdb. Use the second cursor so the 294 * first one continues to point to the old record. 295 */ 296 if ((ret = __dbc_put(ndbc, &key, &data, DB_KEYFIRST)) != 0) 297 goto err; 298 if ((ret = __dbc_del(dbc, 0)) != 0) { 299 /* 300 * If the delete fails, try to delete the record 301 * we just put, in case we're not txn-protected. 302 */ 303 (void)__dbc_del(ndbc, 0); 304 goto err; 305 } 306 307 break; 308 case MU_OPEN: 309 /* 310 * Get the subdatabase information. If it already exists, 311 * copy out the page number and we're done. 312 */ 313 switch (ret) { 314 case 0: 315 if (LF_ISSET(DB_CREATE) && LF_ISSET(DB_EXCL)) { 316 ret = EEXIST; 317 goto err; 318 } 319 memcpy(&sdbp->meta_pgno, data.data, sizeof(db_pgno_t)); 320 DB_NTOHL_SWAP(env, &sdbp->meta_pgno); 321 goto done; 322 case DB_NOTFOUND: 323 if (LF_ISSET(DB_CREATE)) 324 break; 325 /* 326 * No db_err, it is reasonable to remove a 327 * nonexistent db. 328 */ 329 ret = ENOENT; 330 goto err; 331 default: 332 goto err; 333 } 334 335 /* Create a subdatabase. */ 336 if ((ret = __db_new(dbc, 337 type == DB_HASH ? P_HASHMETA : P_BTREEMETA, &p)) != 0) 338 goto err; 339 sdbp->meta_pgno = PGNO(p); 340 341 /* 342 * XXX 343 * We're handling actual data, not on-page meta-data, so it 344 * hasn't been converted to/from opposite endian architectures. 345 * Do it explicitly, now. 346 */ 347 t_pgno = PGNO(p); 348 DB_HTONL_SWAP(env, &t_pgno); 349 memset(&ndata, 0, sizeof(ndata)); 350 ndata.data = &t_pgno; 351 ndata.size = sizeof(db_pgno_t); 352 if ((ret = __dbc_put(dbc, &key, &ndata, DB_KEYLAST)) != 0) 353 goto err; 354 F_SET(sdbp, DB_AM_CREATED); 355 break; 356 } 357 358err: 359done: /* 360 * If we allocated a page: if we're successful, mark the page dirty 361 * and return it to the cache, otherwise, discard/free it. 362 */ 363 if (p != NULL && (t_ret = __memp_fput(mdbp->mpf, 364 dbc->thread_info, p, dbc->priority)) != 0 && ret == 0) 365 ret = t_ret; 366 367 /* Discard the cursor(s) and data. */ 368 if (data.data != NULL) 369 __os_ufree(env, data.data); 370 if (dbc != NULL && (t_ret = __dbc_close(dbc)) != 0 && ret == 0) 371 ret = t_ret; 372 if (ndbc != NULL && (t_ret = __dbc_close(ndbc)) != 0 && ret == 0) 373 ret = t_ret; 374 375 return (ret); 376} 377 378/* 379 * __env_setup -- 380 * Set up the underlying environment during a db_open. 381 * 382 * PUBLIC: int __env_setup __P((DB *, 383 * PUBLIC: DB_TXN *, const char *, const char *, u_int32_t, u_int32_t)); 384 */ 385int 386__env_setup(dbp, txn, fname, dname, id, flags) 387 DB *dbp; 388 DB_TXN *txn; 389 const char *fname, *dname; 390 u_int32_t id, flags; 391{ 392 DB *ldbp; 393 DB_ENV *dbenv; 394 ENV *env; 395 u_int32_t maxid; 396 int ret; 397 398 env = dbp->env; 399 dbenv = env->dbenv; 400 401 /* If we don't yet have an environment, it's time to create it. */ 402 if (!F_ISSET(env, ENV_OPEN_CALLED)) { 403 /* Make sure we have at least DB_MINCACHE pages in our cache. */ 404 if (dbenv->mp_gbytes == 0 && 405 dbenv->mp_bytes < dbp->pgsize * DB_MINPAGECACHE && 406 (ret = __memp_set_cachesize( 407 dbenv, 0, dbp->pgsize * DB_MINPAGECACHE, 0)) != 0) 408 return (ret); 409 410 if ((ret = __env_open(dbenv, NULL, DB_CREATE | 411 DB_INIT_MPOOL | DB_PRIVATE | LF_ISSET(DB_THREAD), 0)) != 0) 412 return (ret); 413 } 414 415 /* Join the underlying cache. */ 416 if ((!F_ISSET(dbp, DB_AM_INMEM) || dname == NULL) && 417 (ret = __env_mpool(dbp, fname, flags)) != 0) 418 return (ret); 419 420 /* We may need a per-thread mutex. */ 421 if (LF_ISSET(DB_THREAD) && (ret = __mutex_alloc( 422 env, MTX_DB_HANDLE, DB_MUTEX_PROCESS_ONLY, &dbp->mutex)) != 0) 423 return (ret); 424 425 /* 426 * Set up a bookkeeping entry for this database in the log region, 427 * if such a region exists. Note that even if we're in recovery 428 * or a replication client, where we won't log registries, we'll 429 * still need an FNAME struct, so LOGGING_ON is the correct macro. 430 */ 431 if (LOGGING_ON(env) && dbp->log_filename == NULL 432#if !defined(DEBUG_ROP) && !defined(DEBUG_WOP) && !defined(DIAGNOSTIC) 433 && (txn != NULL || F_ISSET(dbp, DB_AM_RECOVER)) 434#endif 435#if !defined(DEBUG_ROP) 436 && !F_ISSET(dbp, DB_AM_RDONLY) 437#endif 438 ) { 439 if ((ret = __dbreg_setup(dbp, 440 F_ISSET(dbp, DB_AM_INMEM) ? dname : fname, 441 F_ISSET(dbp, DB_AM_INMEM) ? NULL : dname, id)) != 0) 442 return (ret); 443 444 /* 445 * If we're actively logging and our caller isn't a 446 * recovery function that already did so, then assign 447 * this dbp a log fileid. 448 */ 449 if (DBENV_LOGGING(env) && !F_ISSET(dbp, DB_AM_RECOVER) && 450 (ret = __dbreg_new_id(dbp, txn)) != 0) 451 return (ret); 452 } 453 454 /* 455 * Insert ourselves into the ENV's dblist. We allocate a 456 * unique ID to each {fileid, meta page number} pair, and to 457 * each temporary file (since they all have a zero fileid). 458 * This ID gives us something to use to tell which DB handles 459 * go with which databases in all the cursor adjustment 460 * routines, where we don't want to do a lot of ugly and 461 * expensive memcmps. 462 */ 463 MUTEX_LOCK(env, env->mtx_dblist); 464 maxid = 0; 465 TAILQ_FOREACH(ldbp, &env->dblist, dblistlinks) { 466 /* 467 * There are three cases: on-disk database (first clause), 468 * named in-memory database (second clause), temporary database 469 * (never matches; no clause). 470 */ 471 if (!F_ISSET(dbp, DB_AM_INMEM)) { 472 if (memcmp(ldbp->fileid, dbp->fileid, DB_FILE_ID_LEN) 473 == 0 && ldbp->meta_pgno == dbp->meta_pgno) 474 break; 475 } else if (dname != NULL) { 476 if (F_ISSET(ldbp, DB_AM_INMEM) && 477 ldbp->dname != NULL && 478 strcmp(ldbp->dname, dname) == 0) 479 break; 480 } 481 if (ldbp->adj_fileid > maxid) 482 maxid = ldbp->adj_fileid; 483 } 484 485 /* 486 * If ldbp is NULL, we didn't find a match. Assign the dbp an 487 * adj_fileid one higher than the largest we found, and 488 * insert it at the head of the master dbp list. 489 * 490 * If ldbp is not NULL, it is a match for our dbp. Give dbp 491 * the same ID that ldbp has, and add it after ldbp so they're 492 * together in the list. 493 */ 494 if (ldbp == NULL) { 495 dbp->adj_fileid = maxid + 1; 496 TAILQ_INSERT_HEAD(&env->dblist, dbp, dblistlinks); 497 } else { 498 dbp->adj_fileid = ldbp->adj_fileid; 499 TAILQ_INSERT_AFTER(&env->dblist, ldbp, dbp, dblistlinks); 500 } 501 MUTEX_UNLOCK(env, env->mtx_dblist); 502 503 return (0); 504} 505 506/* 507 * __env_mpool -- 508 * Set up the underlying environment cache during a db_open. 509 * 510 * PUBLIC: int __env_mpool __P((DB *, const char *, u_int32_t)); 511 */ 512int 513__env_mpool(dbp, fname, flags) 514 DB *dbp; 515 const char *fname; 516 u_int32_t flags; 517{ 518 DBT pgcookie; 519 DB_MPOOLFILE *mpf; 520 DB_PGINFO pginfo; 521 ENV *env; 522 int fidset, ftype, ret; 523 int32_t lsn_off; 524 u_int8_t nullfid[DB_FILE_ID_LEN]; 525 u_int32_t clear_len; 526 527 env = dbp->env; 528 529 /* The LSN is the first entry on a DB page, byte offset 0. */ 530 lsn_off = F_ISSET(dbp, DB_AM_NOT_DURABLE) ? DB_LSN_OFF_NOTSET : 0; 531 532 /* It's possible that this database is already open. */ 533 if (F_ISSET(dbp, DB_AM_OPEN_CALLED)) 534 return (0); 535 536 /* 537 * If we need to pre- or post-process a file's pages on I/O, set the 538 * file type. If it's a hash file, always call the pgin and pgout 539 * routines. This means that hash files can never be mapped into 540 * process memory. If it's a btree file and requires swapping, we 541 * need to page the file in and out. This has to be right -- we can't 542 * mmap files that are being paged in and out. 543 */ 544 switch (dbp->type) { 545 case DB_BTREE: 546 case DB_RECNO: 547 ftype = F_ISSET(dbp, DB_AM_SWAP | DB_AM_ENCRYPT | DB_AM_CHKSUM) 548 ? DB_FTYPE_SET : DB_FTYPE_NOTSET; 549 clear_len = CRYPTO_ON(env) ? 550 (dbp->pgsize != 0 ? dbp->pgsize : DB_CLEARLEN_NOTSET) : 551 DB_PAGE_DB_LEN; 552 break; 553 case DB_HASH: 554 ftype = DB_FTYPE_SET; 555 clear_len = CRYPTO_ON(env) ? 556 (dbp->pgsize != 0 ? dbp->pgsize : DB_CLEARLEN_NOTSET) : 557 DB_PAGE_DB_LEN; 558 break; 559 case DB_QUEUE: 560 ftype = F_ISSET(dbp, 561 DB_AM_SWAP | DB_AM_ENCRYPT | DB_AM_CHKSUM) ? 562 DB_FTYPE_SET : DB_FTYPE_NOTSET; 563 564 /* 565 * If we came in here without a pagesize set, then we need 566 * to mark the in-memory handle as having clear_len not 567 * set, because we don't really know the clear length or 568 * the page size yet (since the file doesn't yet exist). 569 */ 570 clear_len = dbp->pgsize != 0 ? dbp->pgsize : DB_CLEARLEN_NOTSET; 571 break; 572 case DB_UNKNOWN: 573 /* 574 * If we're running in the verifier, our database might 575 * be corrupt and we might not know its type--but we may 576 * still want to be able to verify and salvage. 577 * 578 * If we can't identify the type, it's not going to be safe 579 * to call __db_pgin--we pretty much have to give up all 580 * hope of salvaging cross-endianness. Proceed anyway; 581 * at worst, the database will just appear more corrupt 582 * than it actually is, but at best, we may be able 583 * to salvage some data even with no metadata page. 584 */ 585 if (F_ISSET(dbp, DB_AM_VERIFYING)) { 586 ftype = DB_FTYPE_NOTSET; 587 clear_len = DB_PAGE_DB_LEN; 588 break; 589 } 590 591 /* 592 * This might be an in-memory file and we won't know its 593 * file type until after we open it and read the meta-data 594 * page. 595 */ 596 if (F_ISSET(dbp, DB_AM_INMEM)) { 597 clear_len = DB_CLEARLEN_NOTSET; 598 ftype = DB_FTYPE_NOTSET; 599 lsn_off = DB_LSN_OFF_NOTSET; 600 break; 601 } 602 /* FALLTHROUGH */ 603 default: 604 return (__db_unknown_type(env, "DB->open", dbp->type)); 605 } 606 607 mpf = dbp->mpf; 608 609 memset(nullfid, 0, DB_FILE_ID_LEN); 610 fidset = memcmp(nullfid, dbp->fileid, DB_FILE_ID_LEN); 611 if (fidset) 612 (void)__memp_set_fileid(mpf, dbp->fileid); 613 614 (void)__memp_set_clear_len(mpf, clear_len); 615 (void)__memp_set_ftype(mpf, ftype); 616 (void)__memp_set_lsn_offset(mpf, lsn_off); 617 618 pginfo.db_pagesize = dbp->pgsize; 619 pginfo.flags = 620 F_ISSET(dbp, (DB_AM_CHKSUM | DB_AM_ENCRYPT | DB_AM_SWAP)); 621 pginfo.type = dbp->type; 622 pgcookie.data = &pginfo; 623 pgcookie.size = sizeof(DB_PGINFO); 624 (void)__memp_set_pgcookie(mpf, &pgcookie); 625 626#ifndef DIAG_MVCC 627 if (F_ISSET(env->dbenv, DB_ENV_MULTIVERSION)) 628#endif 629 if (F_ISSET(dbp, DB_AM_TXN) && 630 dbp->type != DB_QUEUE && dbp->type != DB_UNKNOWN) 631 LF_SET(DB_MULTIVERSION); 632 633 if ((ret = __memp_fopen(mpf, NULL, fname, 634 LF_ISSET(DB_CREATE | DB_DURABLE_UNKNOWN | DB_MULTIVERSION | 635 DB_NOMMAP | DB_ODDFILESIZE | DB_RDONLY | DB_TRUNCATE) | 636 (F_ISSET(env->dbenv, DB_ENV_DIRECT_DB) ? DB_DIRECT : 0) | 637 (F_ISSET(dbp, DB_AM_NOT_DURABLE) ? DB_TXN_NOT_DURABLE : 0), 638 0, dbp->pgsize)) != 0) { 639 /* 640 * The open didn't work; we need to reset the mpf, 641 * retaining the in-memory semantics (if any). 642 */ 643 (void)__memp_fclose(dbp->mpf, 0); 644 (void)__memp_fcreate(env, &dbp->mpf); 645 if (F_ISSET(dbp, DB_AM_INMEM)) 646 MAKE_INMEM(dbp); 647 return (ret); 648 } 649 650 /* 651 * Set the open flag. We use it to mean that the dbp has gone 652 * through mpf setup, including dbreg_register. Also, below, 653 * the underlying access method open functions may want to do 654 * things like acquire cursors, so the open flag has to be set 655 * before calling them. 656 */ 657 F_SET(dbp, DB_AM_OPEN_CALLED); 658 if (!fidset && fname != NULL) { 659 (void)__memp_get_fileid(dbp->mpf, dbp->fileid); 660 dbp->preserve_fid = 1; 661 } 662 663 return (0); 664} 665 666/* 667 * __db_close -- 668 * DB->close method. 669 * 670 * PUBLIC: int __db_close __P((DB *, DB_TXN *, u_int32_t)); 671 */ 672int 673__db_close(dbp, txn, flags) 674 DB *dbp; 675 DB_TXN *txn; 676 u_int32_t flags; 677{ 678 ENV *env; 679 int db_ref, deferred_close, ret, t_ret; 680 681 env = dbp->env; 682 deferred_close = ret = 0; 683 684 /* 685 * Validate arguments, but as a DB handle destructor, we can't fail. 686 * 687 * Check for consistent transaction usage -- ignore errors. Only 688 * internal callers specify transactions, so it's a serious problem 689 * if we get error messages. 690 */ 691 if (txn != NULL) 692 (void)__db_check_txn(dbp, txn, DB_LOCK_INVALIDID, 0); 693 694 /* Refresh the structure and close any underlying resources. */ 695 ret = __db_refresh(dbp, txn, flags, &deferred_close, 0); 696 697 /* 698 * If we've deferred the close because the logging of the close failed, 699 * return our failure right away without destroying the handle. 700 */ 701 if (deferred_close) 702 return (ret); 703 704 /* !!! 705 * This code has an apparent race between the moment we read and 706 * decrement env->db_ref and the moment we check whether it's 0. 707 * However, if the environment is DBLOCAL, the user shouldn't have a 708 * reference to the env handle anyway; the only way we can get 709 * multiple dbps sharing a local env is if we open them internally 710 * during something like a subdatabase open. If any such thing is 711 * going on while the user is closing the original dbp with a local 712 * env, someone's already badly screwed up, so there's no reason 713 * to bother engineering around this possibility. 714 */ 715 MUTEX_LOCK(env, env->mtx_dblist); 716 db_ref = --env->db_ref; 717 MUTEX_UNLOCK(env, env->mtx_dblist); 718 if (F_ISSET(env, ENV_DBLOCAL) && db_ref == 0 && 719 (t_ret = __env_close(env->dbenv, 0)) != 0 && ret == 0) 720 ret = t_ret; 721 722 /* Free the database handle. */ 723 memset(dbp, CLEAR_BYTE, sizeof(*dbp)); 724 __os_free(env, dbp); 725 726 return (ret); 727} 728 729/* 730 * __db_refresh -- 731 * Refresh the DB structure, releasing any allocated resources. 732 * This does most of the work of closing files now because refresh 733 * is what is used during abort processing (since we can't destroy 734 * the actual handle) and during abort processing, we may have a 735 * fully opened handle. 736 * 737 * PUBLIC: int __db_refresh __P((DB *, DB_TXN *, u_int32_t, int *, int)); 738 */ 739int 740__db_refresh(dbp, txn, flags, deferred_closep, reuse) 741 DB *dbp; 742 DB_TXN *txn; 743 u_int32_t flags; 744 int *deferred_closep, reuse; 745{ 746 DB *sdbp; 747 DBC *dbc; 748 DB_FOREIGN_INFO *f_info, *tmp; 749 DB_LOCKER *locker; 750 DB_LOCKREQ lreq; 751 ENV *env; 752 REGENV *renv; 753 REGINFO *infop; 754 u_int32_t save_flags; 755 int resync, ret, t_ret; 756 757 ret = 0; 758 759 env = dbp->env; 760 infop = env->reginfo; 761 if (infop != NULL) 762 renv = infop->primary; 763 else 764 renv = NULL; 765 766 /* 767 * If this dbp is not completely open, avoid trapping by trying to 768 * sync without an mpool file. 769 */ 770 if (dbp->mpf == NULL) 771 LF_SET(DB_NOSYNC); 772 773 /* If never opened, or not currently open, it's easy. */ 774 if (!F_ISSET(dbp, DB_AM_OPEN_CALLED)) 775 goto never_opened; 776 777 /* 778 * If we have any secondary indices, disassociate them from us. 779 * We don't bother with the mutex here; it only protects some 780 * of the ops that will make us core-dump mid-close anyway, and 781 * if you're trying to do something with a secondary *while* you're 782 * closing the primary, you deserve what you get. The disassociation 783 * is mostly done just so we can close primaries and secondaries in 784 * any order--but within one thread of control. 785 */ 786 LIST_FOREACH(sdbp, &dbp->s_secondaries, s_links) { 787 LIST_REMOVE(sdbp, s_links); 788 if ((t_ret = __db_disassociate(sdbp)) != 0 && ret == 0) 789 ret = t_ret; 790 } 791 792 /* 793 * Disassociate ourself from any databases using us as a foreign key 794 * database by clearing the referring db's pointer. Reclaim memory. 795 */ 796 f_info = LIST_FIRST(&dbp->f_primaries); 797 while (f_info != NULL) { 798 tmp = LIST_NEXT(f_info, f_links); 799 LIST_REMOVE(f_info, f_links); 800 f_info->dbp->s_foreign = NULL; 801 __os_free(env, f_info); 802 f_info = tmp; 803 } 804 805 if (dbp->s_foreign != NULL && 806 (t_ret = __db_disassociate_foreign(dbp)) != 0 && ret == 0) 807 ret = t_ret; 808 809 /* 810 * Sync the underlying access method. Do before closing the cursors 811 * because DB->sync allocates cursors in order to write Recno backing 812 * source text files. 813 * 814 * Sync is slow on some systems, notably Solaris filesystems where the 815 * entire buffer cache is searched. If we're in recovery, don't flush 816 * the file, it's not necessary. 817 */ 818 if (!LF_ISSET(DB_NOSYNC) && 819 !F_ISSET(dbp, DB_AM_DISCARD | DB_AM_RECOVER) && 820 (t_ret = __db_sync(dbp)) != 0 && ret == 0) 821 ret = t_ret; 822 823 /* 824 * Go through the active cursors and call the cursor recycle routine, 825 * which resolves pending operations and moves the cursors onto the 826 * free list. Then, walk the free list and call the cursor destroy 827 * routine. Note that any failure on a close is considered "really 828 * bad" and we just break out of the loop and force forward. 829 */ 830 resync = TAILQ_FIRST(&dbp->active_queue) == NULL ? 0 : 1; 831 while ((dbc = TAILQ_FIRST(&dbp->active_queue)) != NULL) 832 if ((t_ret = __dbc_close(dbc)) != 0) { 833 if (ret == 0) 834 ret = t_ret; 835 break; 836 } 837 838 while ((dbc = TAILQ_FIRST(&dbp->free_queue)) != NULL) 839 if ((t_ret = __dbc_destroy(dbc)) != 0) { 840 if (ret == 0) 841 ret = t_ret; 842 break; 843 } 844 845 /* 846 * Close any outstanding join cursors. Join cursors destroy themselves 847 * on close and have no separate destroy routine. We don't have to set 848 * the resync flag here, because join cursors aren't write cursors. 849 */ 850 while ((dbc = TAILQ_FIRST(&dbp->join_queue)) != NULL) 851 if ((t_ret = __db_join_close(dbc)) != 0) { 852 if (ret == 0) 853 ret = t_ret; 854 break; 855 } 856 857 /* 858 * Sync the memory pool, even though we've already called DB->sync, 859 * because closing cursors can dirty pages by deleting items they 860 * referenced. 861 * 862 * Sync is slow on some systems, notably Solaris filesystems where the 863 * entire buffer cache is searched. If we're in recovery, don't flush 864 * the file, it's not necessary. 865 */ 866 if (resync && !LF_ISSET(DB_NOSYNC) && 867 !F_ISSET(dbp, DB_AM_DISCARD | DB_AM_RECOVER) && 868 (t_ret = __memp_fsync(dbp->mpf)) != 0 && ret == 0) 869 ret = t_ret; 870 871never_opened: 872 /* 873 * At this point, we haven't done anything to render the DB handle 874 * unusable, at least by a transaction abort. Take the opportunity 875 * now to log the file close if we have initialized the logging 876 * information. If this log fails and we're in a transaction, 877 * we have to bail out of the attempted close; we'll need a dbp in 878 * order to successfully abort the transaction, and we can't conjure 879 * a new one up because we haven't gotten out the dbreg_register 880 * record that represents the close. In this case, we put off 881 * actually closing the dbp until we've performed the abort. 882 */ 883 if (!reuse && LOGGING_ON(dbp->env) && dbp->log_filename != NULL) { 884 /* 885 * Discard the log file id, if any. We want to log the close 886 * if and only if this is not a recovery dbp or a client dbp, 887 * or a dead dbp handle. 888 */ 889 DB_ASSERT(env, renv != NULL); 890 if (F_ISSET(dbp, DB_AM_RECOVER) || IS_REP_CLIENT(env) || 891 dbp->timestamp != renv->rep_timestamp) { 892 if ((t_ret = __dbreg_revoke_id(dbp, 893 0, DB_LOGFILEID_INVALID)) == 0 && ret == 0) 894 ret = t_ret; 895 if ((t_ret = __dbreg_teardown(dbp)) != 0 && ret == 0) 896 ret = t_ret; 897 } else { 898 if ((t_ret = __dbreg_close_id(dbp, 899 txn, DBREG_CLOSE)) != 0 && txn != NULL) { 900 /* 901 * We're in a txn and the attempt to log the 902 * close failed; let the txn subsystem know 903 * that we need to destroy this dbp once we're 904 * done with the abort, then bail from the 905 * close. 906 * 907 * Note that if the attempt to put off the 908 * close -also- fails--which it won't unless 909 * we're out of heap memory--we're really 910 * screwed. Panic. 911 */ 912 if ((ret = 913 __txn_closeevent(env, txn, dbp)) != 0) 914 return (__env_panic(env, ret)); 915 if (deferred_closep != NULL) 916 *deferred_closep = 1; 917 return (t_ret); 918 } 919 /* 920 * If dbreg_close_id failed and we were not in a 921 * transaction, then we need to finish this close 922 * because the caller can't do anything with the 923 * handle after we return an error. We rely on 924 * dbreg_close_id to mark the entry in some manner 925 * so that we do not do a clean shutdown of this 926 * environment. If shutdown isn't clean, then the 927 * application *must* run recovery and that will 928 * generate the RCLOSE record. 929 */ 930 } 931 932 } 933 934 /* Close any handle we've been holding since the open. */ 935 if (dbp->saved_open_fhp != NULL && 936 (t_ret = __os_closehandle(env, dbp->saved_open_fhp)) != 0 && 937 ret == 0) 938 ret = t_ret; 939 940 /* 941 * Remove this DB handle from the ENV's dblist, if it's been added. 942 * 943 * Close our reference to the underlying cache while locked, we don't 944 * want to race with a thread searching for our underlying cache link 945 * while opening a DB handle. 946 * 947 * The DB handle may not yet have been added to the ENV list, don't 948 * blindly call the underlying TAILQ_REMOVE macro. Explicitly reset 949 * the field values to NULL so that we can't call TAILQ_REMOVE twice. 950 */ 951 MUTEX_LOCK(env, env->mtx_dblist); 952 if (!reuse && 953 (dbp->dblistlinks.tqe_next != NULL || 954 dbp->dblistlinks.tqe_prev != NULL)) { 955 TAILQ_REMOVE(&env->dblist, dbp, dblistlinks); 956 dbp->dblistlinks.tqe_next = NULL; 957 dbp->dblistlinks.tqe_prev = NULL; 958 } 959 960 /* Close the memory pool file handle. */ 961 if (dbp->mpf != NULL) { 962 if ((t_ret = __memp_fclose(dbp->mpf, 963 F_ISSET(dbp, DB_AM_DISCARD) ? DB_MPOOL_DISCARD : 0)) != 0 && 964 ret == 0) 965 ret = t_ret; 966 dbp->mpf = NULL; 967 if (reuse && 968 (t_ret = __memp_fcreate(env, &dbp->mpf)) != 0 && 969 ret == 0) 970 ret = t_ret; 971 } 972 973 MUTEX_UNLOCK(env, env->mtx_dblist); 974 975 /* 976 * Call the access specific close function. 977 * 978 * We do this here rather than in __db_close as we need to do this when 979 * aborting an open so that file descriptors are closed and abort of 980 * renames can succeed on platforms that lock open files (such as 981 * Windows). In particular, we need to ensure that all the extents 982 * associated with a queue are closed so that queue renames can be 983 * aborted. 984 * 985 * It is also important that we do this before releasing the handle 986 * lock, because dbremove and dbrename assume that once they have the 987 * handle lock, it is safe to modify the underlying file(s). 988 * 989 * !!! 990 * Because of where these functions are called in the DB handle close 991 * process, these routines can't do anything that would dirty pages or 992 * otherwise affect closing down the database. Specifically, we can't 993 * abort and recover any of the information they control. 994 */ 995 if ((t_ret = __bam_db_close(dbp)) != 0 && ret == 0) 996 ret = t_ret; 997 if ((t_ret = __ham_db_close(dbp)) != 0 && ret == 0) 998 ret = t_ret; 999 if ((t_ret = __qam_db_close(dbp, dbp->flags)) != 0 && ret == 0) 1000 ret = t_ret; 1001 1002 /* 1003 * !!! 1004 * At this point, the access-method specific information has been 1005 * freed. From now on, we can use the dbp, but not touch any 1006 * access-method specific data. 1007 */ 1008 1009 if (!reuse && dbp->locker != NULL) { 1010 /* We may have pending trade operations on this dbp. */ 1011 if (txn == NULL) 1012 txn = dbp->cur_txn; 1013 if (IS_REAL_TXN(txn)) 1014 __txn_remlock(env, 1015 txn, &dbp->handle_lock, dbp->locker); 1016 1017 /* We may be holding the handle lock; release it. */ 1018 lreq.op = DB_LOCK_PUT_ALL; 1019 lreq.obj = NULL; 1020 if ((t_ret = __lock_vec(env, 1021 dbp->locker, 0, &lreq, 1, NULL)) != 0 && ret == 0) 1022 ret = t_ret; 1023 1024 if ((t_ret = 1025 __lock_id_free(env, dbp->locker)) != 0 && ret == 0) 1026 ret = t_ret; 1027 dbp->locker = NULL; 1028 LOCK_INIT(dbp->handle_lock); 1029 } 1030 1031 /* 1032 * If this is a temporary file (un-named in-memory file), then 1033 * discard the locker ID allocated as the fileid. 1034 */ 1035 if (LOCKING_ON(env) && 1036 F_ISSET(dbp, DB_AM_INMEM) && !dbp->preserve_fid && 1037 *(u_int32_t *)dbp->fileid != DB_LOCK_INVALIDID) { 1038 if ((t_ret = __lock_getlocker(env->lk_handle, 1039 *(u_int32_t *)dbp->fileid, 0, &locker)) == 0) 1040 t_ret = __lock_id_free(env, locker); 1041 if (ret == 0) 1042 ret = t_ret; 1043 } 1044 1045 if (reuse) { 1046 /* 1047 * If we are reusing this dbp, then we're done now. Re-init 1048 * the handle, preserving important flags, and then return. 1049 * This code is borrowed from __db_init, which does more 1050 * than we can do here. 1051 */ 1052 save_flags = F_ISSET(dbp, DB_AM_INMEM | DB_AM_TXN); 1053 1054 /* 1055 * XXX If this is an XA handle, we'll want to specify 1056 * DB_XA_CREATE. 1057 */ 1058 if ((ret = __bam_db_create(dbp)) != 0) 1059 return (ret); 1060 if ((ret = __ham_db_create(dbp)) != 0) 1061 return (ret); 1062 if ((ret = __qam_db_create(dbp)) != 0) 1063 return (ret); 1064 1065 /* Restore flags */ 1066 dbp->flags = dbp->orig_flags | save_flags; 1067 1068 if (FLD_ISSET(save_flags, DB_AM_INMEM)) { 1069 /* 1070 * If this is inmem, then it may have a fileid 1071 * even if it was never opened, and we need to 1072 * clear out that fileid. 1073 */ 1074 memset(dbp->fileid, 0, sizeof(dbp->fileid)); 1075 MAKE_INMEM(dbp); 1076 } 1077 return (ret); 1078 } 1079 1080 dbp->type = DB_UNKNOWN; 1081 1082 /* 1083 * The thread mutex may have been invalidated in __dbreg_close_id if the 1084 * fname refcount did not go to 0. If not, discard the thread mutex. 1085 */ 1086 if ((t_ret = __mutex_free(env, &dbp->mutex)) != 0 && ret == 0) 1087 ret = t_ret; 1088 1089 /* Discard any memory allocated for the file and database names. */ 1090 if (dbp->fname != NULL) { 1091 __os_free(dbp->env, dbp->fname); 1092 dbp->fname = NULL; 1093 } 1094 if (dbp->dname != NULL) { 1095 __os_free(dbp->env, dbp->dname); 1096 dbp->dname = NULL; 1097 } 1098 1099 /* Discard any memory used to store returned data. */ 1100 if (dbp->my_rskey.data != NULL) 1101 __os_free(dbp->env, dbp->my_rskey.data); 1102 if (dbp->my_rkey.data != NULL) 1103 __os_free(dbp->env, dbp->my_rkey.data); 1104 if (dbp->my_rdata.data != NULL) 1105 __os_free(dbp->env, dbp->my_rdata.data); 1106 1107 /* For safety's sake; we may refresh twice. */ 1108 memset(&dbp->my_rskey, 0, sizeof(DBT)); 1109 memset(&dbp->my_rkey, 0, sizeof(DBT)); 1110 memset(&dbp->my_rdata, 0, sizeof(DBT)); 1111 1112 /* Clear out fields that normally get set during open. */ 1113 memset(dbp->fileid, 0, sizeof(dbp->fileid)); 1114 dbp->adj_fileid = 0; 1115 dbp->meta_pgno = 0; 1116 dbp->cur_locker = NULL; 1117 dbp->cur_txn = NULL; 1118 dbp->associate_locker = NULL; 1119 dbp->cl_id = 0; 1120 dbp->open_flags = 0; 1121 1122 /* 1123 * If we are being refreshed with a txn specified, then we need 1124 * to make sure that we clear out the lock handle field, because 1125 * releasing all the locks for this transaction will release this 1126 * lock and we don't want close to stumble upon this handle and 1127 * try to close it. 1128 */ 1129 if (txn != NULL) 1130 LOCK_INIT(dbp->handle_lock); 1131 1132 /* Reset flags to whatever the user configured. */ 1133 dbp->flags = dbp->orig_flags; 1134 1135 return (ret); 1136} 1137 1138/* 1139 * __db_disassociate -- 1140 * Destroy the association between a given secondary and its primary. 1141 */ 1142static int 1143__db_disassociate(sdbp) 1144 DB *sdbp; 1145{ 1146 DBC *dbc; 1147 int ret, t_ret; 1148 1149 ret = 0; 1150 1151 sdbp->s_callback = NULL; 1152 sdbp->s_primary = NULL; 1153 sdbp->get = sdbp->stored_get; 1154 sdbp->close = sdbp->stored_close; 1155 1156 /* 1157 * Complain, but proceed, if we have any active cursors. (We're in 1158 * the middle of a close, so there's really no turning back.) 1159 */ 1160 if (sdbp->s_refcnt != 1 || 1161 TAILQ_FIRST(&sdbp->active_queue) != NULL || 1162 TAILQ_FIRST(&sdbp->join_queue) != NULL) { 1163 __db_errx(sdbp->env, 1164 "Closing a primary DB while a secondary DB has active cursors is unsafe"); 1165 ret = EINVAL; 1166 } 1167 sdbp->s_refcnt = 0; 1168 1169 while ((dbc = TAILQ_FIRST(&sdbp->free_queue)) != NULL) 1170 if ((t_ret = __dbc_destroy(dbc)) != 0 && ret == 0) 1171 ret = t_ret; 1172 1173 F_CLR(sdbp, DB_AM_SECONDARY); 1174 return (ret); 1175} 1176 1177/* 1178 * __db_disassociate_foreign -- 1179 * Destroy the association between a given secondary and its foreign. 1180 */ 1181static int 1182__db_disassociate_foreign(sdbp) 1183 DB *sdbp; 1184{ 1185 DB *fdbp; 1186 DB_FOREIGN_INFO *f_info, *tmp; 1187 int ret; 1188 1189 if (sdbp->s_foreign == NULL) 1190 return (0); 1191 if ((ret = __os_malloc(sdbp->env, sizeof(DB_FOREIGN_INFO), &tmp)) != 0) 1192 return (ret); 1193 1194 fdbp = sdbp->s_foreign; 1195 ret = 0; 1196 f_info = LIST_FIRST(&fdbp->f_primaries); 1197 while (f_info != NULL) { 1198 tmp = LIST_NEXT(f_info, f_links); 1199 if (f_info ->dbp == sdbp) { 1200 LIST_REMOVE(f_info, f_links); 1201 __os_free(sdbp->env, f_info); 1202 } 1203 f_info = tmp; 1204 } 1205 1206 return (ret); 1207} 1208 1209/* 1210 * __db_log_page 1211 * Log a meta-data or root page during a subdatabase create operation. 1212 * 1213 * PUBLIC: int __db_log_page __P((DB *, DB_TXN *, DB_LSN *, db_pgno_t, PAGE *)); 1214 */ 1215int 1216__db_log_page(dbp, txn, lsn, pgno, page) 1217 DB *dbp; 1218 DB_TXN *txn; 1219 DB_LSN *lsn; 1220 db_pgno_t pgno; 1221 PAGE *page; 1222{ 1223 DBT page_dbt; 1224 DB_LSN new_lsn; 1225 int ret; 1226 1227 if (!LOGGING_ON(dbp->env) || txn == NULL) 1228 return (0); 1229 1230 memset(&page_dbt, 0, sizeof(page_dbt)); 1231 page_dbt.size = dbp->pgsize; 1232 page_dbt.data = page; 1233 1234 ret = __crdel_metasub_log(dbp, txn, &new_lsn, 0, pgno, &page_dbt, lsn); 1235 1236 if (ret == 0) 1237 page->lsn = new_lsn; 1238 return (ret); 1239} 1240 1241/* 1242 * __db_backup_name 1243 * Create the backup file name for a given file. 1244 * 1245 * PUBLIC: int __db_backup_name __P((ENV *, 1246 * PUBLIC: const char *, DB_TXN *, char **)); 1247 */ 1248#undef BACKUP_PREFIX 1249#define BACKUP_PREFIX "__db." 1250 1251#undef MAX_INT_TO_HEX 1252#define MAX_INT_TO_HEX 8 1253 1254int 1255__db_backup_name(env, name, txn, backup) 1256 ENV *env; 1257 const char *name; 1258 DB_TXN *txn; 1259 char **backup; 1260{ 1261 u_int32_t id; 1262 size_t len; 1263 int ret; 1264 char *p, *retp; 1265 1266 *backup = NULL; 1267 1268 /* 1269 * Part of the name may be a full path, so we need to make sure that 1270 * we allocate enough space for it, even in the case where we don't 1271 * use the entire filename for the backup name. 1272 */ 1273 len = strlen(name) + strlen(BACKUP_PREFIX) + 2 * MAX_INT_TO_HEX + 1; 1274 if ((ret = __os_malloc(env, len, &retp)) != 0) 1275 return (ret); 1276 1277 /* 1278 * Create the name. Backup file names are in one of 2 forms: in a 1279 * transactional env "__db.TXNID.ID", where ID is a random number, 1280 * and in any other env "__db.FILENAME". 1281 * 1282 * In addition, the name passed may contain an env-relative path. 1283 * In that case, put the "__db." in the right place (in the last 1284 * component of the pathname). 1285 * 1286 * There are four cases here: 1287 * 1. simple path w/out transaction 1288 * 2. simple path + transaction 1289 * 3. multi-component path w/out transaction 1290 * 4. multi-component path + transaction 1291 */ 1292 p = __db_rpath(name); 1293 if (IS_REAL_TXN(txn)) { 1294 __os_unique_id(env, &id); 1295 if (p == NULL) /* Case 2. */ 1296 snprintf(retp, len, "%s%x.%x", 1297 BACKUP_PREFIX, txn->txnid, id); 1298 else /* Case 4. */ 1299 snprintf(retp, len, "%.*s%x.%x", 1300 (int)(p - name) + 1, name, txn->txnid, id); 1301 } else { 1302 if (p == NULL) /* Case 1. */ 1303 snprintf(retp, len, "%s%s", BACKUP_PREFIX, name); 1304 else /* Case 3. */ 1305 snprintf(retp, len, "%.*s%s%s", 1306 (int)(p - name) + 1, name, BACKUP_PREFIX, p + 1); 1307 } 1308 1309 *backup = retp; 1310 return (0); 1311} 1312 1313#ifdef CONFIG_TEST 1314/* 1315 * __db_testcopy 1316 * Create a copy of all backup files and our "main" DB. 1317 * 1318 * PUBLIC: #ifdef CONFIG_TEST 1319 * PUBLIC: int __db_testcopy __P((ENV *, DB *, const char *)); 1320 * PUBLIC: #endif 1321 */ 1322int 1323__db_testcopy(env, dbp, name) 1324 ENV *env; 1325 DB *dbp; 1326 const char *name; 1327{ 1328 DB_MPOOL *dbmp; 1329 DB_MPOOLFILE *mpf; 1330 1331 DB_ASSERT(env, dbp != NULL || name != NULL); 1332 1333 if (name == NULL) { 1334 dbmp = env->mp_handle; 1335 mpf = dbp->mpf; 1336 name = R_ADDR(dbmp->reginfo, mpf->mfp->path_off); 1337 } 1338 1339 if (dbp != NULL && dbp->type == DB_QUEUE) 1340 return (__qam_testdocopy(dbp, name)); 1341 else 1342 return (__db_testdocopy(env, name)); 1343} 1344 1345static int 1346__qam_testdocopy(dbp, name) 1347 DB *dbp; 1348 const char *name; 1349{ 1350 DB_THREAD_INFO *ip; 1351 QUEUE_FILELIST *filelist, *fp; 1352 int ret; 1353 char buf[DB_MAXPATHLEN], *dir; 1354 1355 filelist = NULL; 1356 if ((ret = __db_testdocopy(dbp->env, name)) != 0) 1357 return (ret); 1358 1359 /* Call ENV_GET_THREAD_INFO to get a valid DB_THREAD_INFO */ 1360 ENV_GET_THREAD_INFO(dbp->env, ip); 1361 if (dbp->mpf != NULL && 1362 (ret = __qam_gen_filelist(dbp, ip, &filelist)) != 0) 1363 goto done; 1364 1365 if (filelist == NULL) 1366 return (0); 1367 dir = ((QUEUE *)dbp->q_internal)->dir; 1368 for (fp = filelist; fp->mpf != NULL; fp++) { 1369 snprintf(buf, sizeof(buf), 1370 QUEUE_EXTENT, dir, PATH_SEPARATOR[0], name, fp->id); 1371 if ((ret = __db_testdocopy(dbp->env, buf)) != 0) 1372 return (ret); 1373 } 1374 1375done: __os_free(dbp->env, filelist); 1376 return (0); 1377} 1378 1379/* 1380 * __db_testdocopy 1381 * Create a copy of all backup files and our "main" DB. 1382 */ 1383static int 1384__db_testdocopy(env, name) 1385 ENV *env; 1386 const char *name; 1387{ 1388 size_t len; 1389 int dircnt, i, ret; 1390 char *copy, **namesp, *p, *real_name; 1391 1392 dircnt = 0; 1393 copy = NULL; 1394 namesp = NULL; 1395 1396 /* Create the real backing file name. */ 1397 if ((ret = __db_appname(env, 1398 DB_APP_DATA, name, 0, NULL, &real_name)) != 0) 1399 return (ret); 1400 1401 /* 1402 * !!! 1403 * There are tests that attempt to copy non-existent files. I'd guess 1404 * it's a testing bug, but I don't have time to figure it out. Block 1405 * the case here. 1406 */ 1407 if (__os_exists(env, real_name, NULL) != 0) { 1408 __os_free(env, real_name); 1409 return (0); 1410 } 1411 1412 /* 1413 * Copy the file itself. 1414 * 1415 * Allocate space for the file name, including adding an ".afterop" and 1416 * trailing nul byte. 1417 */ 1418 len = strlen(real_name) + sizeof(".afterop"); 1419 if ((ret = __os_malloc(env, len, ©)) != 0) 1420 goto err; 1421 snprintf(copy, len, "%s.afterop", real_name); 1422 if ((ret = __db_makecopy(env, real_name, copy)) != 0) 1423 goto err; 1424 1425 /* 1426 * Get the directory path to call __os_dirlist(). 1427 */ 1428 if ((p = __db_rpath(real_name)) != NULL) 1429 *p = '\0'; 1430 if ((ret = __os_dirlist(env, real_name, 0, &namesp, &dircnt)) != 0) 1431 goto err; 1432 1433 /* 1434 * Walk the directory looking for backup files. Backup file names in 1435 * transactional environments are of the form: 1436 * 1437 * BACKUP_PREFIX.TXNID.ID 1438 */ 1439 for (i = 0; i < dircnt; i++) { 1440 /* Check for a related backup file name. */ 1441 if (strncmp( 1442 namesp[i], BACKUP_PREFIX, sizeof(BACKUP_PREFIX) - 1) != 0) 1443 continue; 1444 p = namesp[i] + sizeof(BACKUP_PREFIX); 1445 p += strspn(p, "0123456789ABCDEFabcdef"); 1446 if (*p != '.') 1447 continue; 1448 ++p; 1449 p += strspn(p, "0123456789ABCDEFabcdef"); 1450 if (*p != '\0') 1451 continue; 1452 1453 /* 1454 * Copy the backup file. 1455 * 1456 * Allocate space for the file name, including adding a 1457 * ".afterop" and trailing nul byte. 1458 */ 1459 if (real_name != NULL) { 1460 __os_free(env, real_name); 1461 real_name = NULL; 1462 } 1463 if ((ret = __db_appname( 1464 env, DB_APP_DATA, namesp[i], 0, NULL, &real_name)) != 0) 1465 goto err; 1466 if (copy != NULL) { 1467 __os_free(env, copy); 1468 copy = NULL; 1469 } 1470 len = strlen(real_name) + sizeof(".afterop"); 1471 if ((ret = __os_malloc(env, len, ©)) != 0) 1472 goto err; 1473 snprintf(copy, len, "%s.afterop", real_name); 1474 if ((ret = __db_makecopy(env, real_name, copy)) != 0) 1475 goto err; 1476 } 1477 1478err: if (namesp != NULL) 1479 __os_dirfree(env, namesp, dircnt); 1480 if (copy != NULL) 1481 __os_free(env, copy); 1482 if (real_name != NULL) 1483 __os_free(env, real_name); 1484 return (ret); 1485} 1486 1487static int 1488__db_makecopy(env, src, dest) 1489 ENV *env; 1490 const char *src, *dest; 1491{ 1492 DB_FH *rfhp, *wfhp; 1493 size_t rcnt, wcnt; 1494 int ret; 1495 char *buf; 1496 1497 rfhp = wfhp = NULL; 1498 1499 if ((ret = __os_malloc(env, 64 * 1024, &buf)) != 0) 1500 goto err; 1501 1502 if ((ret = __os_open(env, src, 0, 1503 DB_OSO_RDONLY, DB_MODE_600, &rfhp)) != 0) 1504 goto err; 1505 if ((ret = __os_open(env, dest, 0, 1506 DB_OSO_CREATE | DB_OSO_TRUNC, DB_MODE_600, &wfhp)) != 0) 1507 goto err; 1508 1509 for (;;) { 1510 if ((ret = 1511 __os_read(env, rfhp, buf, sizeof(buf), &rcnt)) != 0) 1512 goto err; 1513 if (rcnt == 0) 1514 break; 1515 if ((ret = 1516 __os_write(env, wfhp, buf, sizeof(buf), &wcnt)) != 0) 1517 goto err; 1518 } 1519 1520 if (0) { 1521err: __db_err(env, ret, "__db_makecopy: %s -> %s", src, dest); 1522 } 1523 1524 if (buf != NULL) 1525 __os_free(env, buf); 1526 if (rfhp != NULL) 1527 (void)__os_closehandle(env, rfhp); 1528 if (wfhp != NULL) 1529 (void)__os_closehandle(env, wfhp); 1530 return (ret); 1531} 1532#endif 1533