1/*- 2 * See the file LICENSE for redistribution information. 3 * 4 * Copyright (c) 1996,2008 Oracle. All rights reserved. 5 */ 6/* 7 * Copyright (c) 1990, 1993, 1994 8 * Margo Seltzer. All rights reserved. 9 */ 10/* 11 * Copyright (c) 1990, 1993, 1994 12 * The Regents of the University of California. All rights reserved. 13 * 14 * This code is derived from software contributed to Berkeley by 15 * Margo Seltzer. 16 * 17 * Redistribution and use in source and binary forms, with or without 18 * modification, are permitted provided that the following conditions 19 * are met: 20 * 1. Redistributions of source code must retain the above copyright 21 * notice, this list of conditions and the following disclaimer. 22 * 2. Redistributions in binary form must reproduce the above copyright 23 * notice, this list of conditions and the following disclaimer in the 24 * documentation and/or other materials provided with the distribution. 25 * 3. Neither the name of the University nor the names of its contributors 26 * may be used to endorse or promote products derived from this software 27 * without specific prior written permission. 28 * 29 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 30 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 31 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 32 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 33 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 34 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 35 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 36 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 37 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 38 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 39 * SUCH DAMAGE. 40 * 41 * $Id: hash_open.c,v 12.33 2008/01/30 12:18:22 mjc Exp $ 42 */ 43 44#include "db_config.h" 45 46#include "db_int.h" 47#include "dbinc/crypto.h" 48#include "dbinc/db_page.h" 49#include "dbinc/hash.h" 50#include "dbinc/log.h" 51#include "dbinc/lock.h" 52#include "dbinc/mp.h" 53#include "dbinc/btree.h" 54#include "dbinc/fop.h" 55 56static db_pgno_t __ham_init_meta __P((DB *, HMETA *, db_pgno_t, DB_LSN *)); 57 58/* 59 * __ham_open -- 60 * 61 * PUBLIC: int __ham_open __P((DB *, DB_THREAD_INFO *, 62 * PUBLIC: DB_TXN *, const char * name, db_pgno_t, u_int32_t)); 63 */ 64int 65__ham_open(dbp, ip, txn, name, base_pgno, flags) 66 DB *dbp; 67 DB_THREAD_INFO *ip; 68 DB_TXN *txn; 69 const char *name; 70 db_pgno_t base_pgno; 71 u_int32_t flags; 72{ 73 DBC *dbc; 74 ENV *env; 75 HASH *hashp; 76 HASH_CURSOR *hcp; 77 int ret, t_ret; 78 79 env = dbp->env; 80 dbc = NULL; 81 82 /* 83 * Get a cursor. If DB_CREATE is specified, we may be creating 84 * pages, and to do that safely in CDB we need a write cursor. 85 * In STD_LOCKING mode, we'll synchronize using the meta page 86 * lock instead. 87 */ 88 if ((ret = __db_cursor(dbp, ip, 89 txn, &dbc, LF_ISSET(DB_CREATE) && CDB_LOCKING(env) ? 90 DB_WRITECURSOR : 0)) != 0) 91 return (ret); 92 93 hcp = (HASH_CURSOR *)dbc->internal; 94 hashp = dbp->h_internal; 95 hashp->meta_pgno = base_pgno; 96 if ((ret = __ham_get_meta(dbc)) != 0) 97 goto err; 98 99 /* Initialize the hdr structure. */ 100 if (hcp->hdr->dbmeta.magic == DB_HASHMAGIC) { 101 /* File exists, verify the data in the header. */ 102 if (hashp->h_hash == NULL) 103 hashp->h_hash = hcp->hdr->dbmeta.version < 5 104 ? __ham_func4 : __ham_func5; 105 hashp->h_nelem = hcp->hdr->nelem; 106 if (F_ISSET(&hcp->hdr->dbmeta, DB_HASH_DUP)) 107 F_SET(dbp, DB_AM_DUP); 108 if (F_ISSET(&hcp->hdr->dbmeta, DB_HASH_DUPSORT)) 109 F_SET(dbp, DB_AM_DUPSORT); 110 if (F_ISSET(&hcp->hdr->dbmeta, DB_HASH_SUBDB)) 111 F_SET(dbp, DB_AM_SUBDB); 112 if (PGNO(hcp->hdr) == PGNO_BASE_MD && 113 !F_ISSET(dbp, DB_AM_RECOVER)) 114 __memp_set_last_pgno(dbp->mpf, 115 hcp->hdr->dbmeta.last_pgno); 116 } else if (!IS_RECOVERING(env) && !F_ISSET(dbp, DB_AM_RECOVER)) { 117 __db_errx(env, 118 "%s: Invalid hash meta page %lu", name, (u_long)base_pgno); 119 ret = EINVAL; 120 } 121 122 /* Release the meta data page */ 123 if ((t_ret = __ham_release_meta(dbc)) != 0 && ret == 0) 124 ret = t_ret; 125err: if ((t_ret = __dbc_close(dbc)) != 0 && ret == 0) 126 ret = t_ret; 127 128 return (ret); 129} 130 131/* 132 * __ham_metachk -- 133 * 134 * PUBLIC: int __ham_metachk __P((DB *, const char *, HMETA *)); 135 */ 136int 137__ham_metachk(dbp, name, hashm) 138 DB *dbp; 139 const char *name; 140 HMETA *hashm; 141{ 142 ENV *env; 143 u_int32_t vers; 144 int ret; 145 146 env = dbp->env; 147 148 /* 149 * At this point, all we know is that the magic number is for a Hash. 150 * Check the version, the database may be out of date. 151 */ 152 vers = hashm->dbmeta.version; 153 if (F_ISSET(dbp, DB_AM_SWAP)) 154 M_32_SWAP(vers); 155 switch (vers) { 156 case 4: 157 case 5: 158 case 6: 159 __db_errx(env, 160 "%s: hash version %lu requires a version upgrade", 161 name, (u_long)vers); 162 return (DB_OLD_VERSION); 163 case 7: 164 case 8: 165 case 9: 166 break; 167 default: 168 __db_errx(env, 169 "%s: unsupported hash version: %lu", name, (u_long)vers); 170 return (EINVAL); 171 } 172 173 /* Swap the page if we need to. */ 174 if (F_ISSET(dbp, DB_AM_SWAP) && 175 (ret = __ham_mswap(env, (PAGE *)hashm)) != 0) 176 return (ret); 177 178 /* Check the type. */ 179 if (dbp->type != DB_HASH && dbp->type != DB_UNKNOWN) 180 return (EINVAL); 181 dbp->type = DB_HASH; 182 DB_ILLEGAL_METHOD(dbp, DB_OK_HASH); 183 184 /* 185 * Check application info against metadata info, and set info, flags, 186 * and type based on metadata info. 187 */ 188 if ((ret = __db_fchk(env, 189 "DB->open", hashm->dbmeta.flags, 190 DB_HASH_DUP | DB_HASH_SUBDB | DB_HASH_DUPSORT)) != 0) 191 return (ret); 192 193 if (F_ISSET(&hashm->dbmeta, DB_HASH_DUP)) 194 F_SET(dbp, DB_AM_DUP); 195 else 196 if (F_ISSET(dbp, DB_AM_DUP)) { 197 __db_errx(env, 198 "%s: DB_DUP specified to open method but not set in database", 199 name); 200 return (EINVAL); 201 } 202 203 if (F_ISSET(&hashm->dbmeta, DB_HASH_SUBDB)) 204 F_SET(dbp, DB_AM_SUBDB); 205 else 206 if (F_ISSET(dbp, DB_AM_SUBDB)) { 207 __db_errx(env, 208 "%s: multiple databases specified but not supported in file", 209 name); 210 return (EINVAL); 211 } 212 213 if (F_ISSET(&hashm->dbmeta, DB_HASH_DUPSORT)) { 214 if (dbp->dup_compare == NULL) 215 dbp->dup_compare = __bam_defcmp; 216 } else 217 if (dbp->dup_compare != NULL) { 218 __db_errx(env, 219 "%s: duplicate sort function specified but not set in database", 220 name); 221 return (EINVAL); 222 } 223 224 /* Set the page size. */ 225 dbp->pgsize = hashm->dbmeta.pagesize; 226 227 /* Copy the file's ID. */ 228 memcpy(dbp->fileid, hashm->dbmeta.uid, DB_FILE_ID_LEN); 229 230 return (0); 231} 232 233/* 234 * __ham_init_meta -- 235 * 236 * Initialize a hash meta-data page. We assume that the meta-data page is 237 * contiguous with the initial buckets that we create. If that turns out 238 * to be false, we'll fix it up later. Return the initial number of buckets 239 * allocated. 240 */ 241static db_pgno_t 242__ham_init_meta(dbp, meta, pgno, lsnp) 243 DB *dbp; 244 HMETA *meta; 245 db_pgno_t pgno; 246 DB_LSN *lsnp; 247{ 248 ENV *env; 249 HASH *hashp; 250 db_pgno_t nbuckets; 251 u_int i, l2; 252 253 env = dbp->env; 254 hashp = dbp->h_internal; 255 256 if (hashp->h_hash == NULL) 257 hashp->h_hash = DB_HASHVERSION < 5 ? __ham_func4 : __ham_func5; 258 259 if (hashp->h_nelem != 0 && hashp->h_ffactor != 0) { 260 hashp->h_nelem = (hashp->h_nelem - 1) / hashp->h_ffactor + 1; 261 l2 = __db_log2(hashp->h_nelem > 2 ? hashp->h_nelem : 2); 262 } else 263 l2 = 1; 264 nbuckets = (db_pgno_t)(1 << l2); 265 266 memset(meta, 0, sizeof(HMETA)); 267 meta->dbmeta.lsn = *lsnp; 268 meta->dbmeta.pgno = pgno; 269 meta->dbmeta.magic = DB_HASHMAGIC; 270 meta->dbmeta.version = DB_HASHVERSION; 271 meta->dbmeta.pagesize = dbp->pgsize; 272 if (F_ISSET(dbp, DB_AM_CHKSUM)) 273 FLD_SET(meta->dbmeta.metaflags, DBMETA_CHKSUM); 274 if (F_ISSET(dbp, DB_AM_ENCRYPT)) { 275 meta->dbmeta.encrypt_alg = env->crypto_handle->alg; 276 DB_ASSERT(env, meta->dbmeta.encrypt_alg != 0); 277 meta->crypto_magic = meta->dbmeta.magic; 278 } 279 meta->dbmeta.type = P_HASHMETA; 280 meta->dbmeta.free = PGNO_INVALID; 281 meta->dbmeta.last_pgno = pgno; 282 meta->max_bucket = nbuckets - 1; 283 meta->high_mask = nbuckets - 1; 284 meta->low_mask = (nbuckets >> 1) - 1; 285 meta->ffactor = hashp->h_ffactor; 286 meta->nelem = hashp->h_nelem; 287 meta->h_charkey = hashp->h_hash(dbp, CHARKEY, sizeof(CHARKEY)); 288 memcpy(meta->dbmeta.uid, dbp->fileid, DB_FILE_ID_LEN); 289 290 if (F_ISSET(dbp, DB_AM_DUP)) 291 F_SET(&meta->dbmeta, DB_HASH_DUP); 292 if (F_ISSET(dbp, DB_AM_SUBDB)) 293 F_SET(&meta->dbmeta, DB_HASH_SUBDB); 294 if (dbp->dup_compare != NULL) 295 F_SET(&meta->dbmeta, DB_HASH_DUPSORT); 296 297 /* 298 * Create the first and second buckets pages so that we have the 299 * page numbers for them and we can store that page number in the 300 * meta-data header (spares[0]). 301 */ 302 meta->spares[0] = pgno + 1; 303 304 /* Fill in the last fields of the meta data page. */ 305 for (i = 1; i <= l2; i++) 306 meta->spares[i] = meta->spares[0]; 307 for (; i < NCACHED; i++) 308 meta->spares[i] = PGNO_INVALID; 309 310 return (nbuckets); 311} 312 313/* 314 * __ham_new_file -- 315 * Create the necessary pages to begin a new database file. If name 316 * is NULL, then this is an unnamed file, the mpf has been set in the dbp 317 * and we simply create the pages using mpool. In this case, we don't log 318 * because we never have to redo an unnamed create and the undo simply 319 * frees resources. 320 * 321 * This code appears more complex than it is because of the two cases (named 322 * and unnamed). The way to read the code is that for each page being created, 323 * there are three parts: 1) a "get page" chunk (which either uses malloc'd 324 * memory or calls __memp_fget), 2) the initialization, and 3) the "put page" 325 * chunk which either does a fop write or an __memp_fput. 326 * 327 * PUBLIC: int __ham_new_file __P((DB *, 328 * PUBLIC: DB_THREAD_INFO *, DB_TXN *, DB_FH *, const char *)); 329 */ 330int 331__ham_new_file(dbp, ip, txn, fhp, name) 332 DB *dbp; 333 DB_THREAD_INFO *ip; 334 DB_TXN *txn; 335 DB_FH *fhp; 336 const char *name; 337{ 338 DBT pdbt; 339 DB_LSN lsn; 340 DB_MPOOLFILE *mpf; 341 DB_PGINFO pginfo; 342 ENV *env; 343 HMETA *meta; 344 PAGE *page; 345 int ret; 346 db_pgno_t lpgno; 347 void *buf; 348 349 env = dbp->env; 350 mpf = dbp->mpf; 351 meta = NULL; 352 page = NULL; 353 buf = NULL; 354 355 if (F_ISSET(dbp, DB_AM_INMEM)) { 356 /* Build meta-data page. */ 357 lpgno = PGNO_BASE_MD; 358 if ((ret = __memp_fget(mpf, &lpgno, ip, txn, 359 DB_MPOOL_CREATE | DB_MPOOL_DIRTY, &meta)) != 0) 360 return (ret); 361 LSN_NOT_LOGGED(lsn); 362 lpgno = __ham_init_meta(dbp, meta, PGNO_BASE_MD, &lsn); 363 meta->dbmeta.last_pgno = lpgno; 364 if ((ret = __db_log_page(dbp, 365 txn, &lsn, meta->dbmeta.pgno, (PAGE *)meta)) != 0) 366 goto err; 367 ret = __memp_fput(mpf, ip, meta, dbp->priority); 368 meta = NULL; 369 if (ret != 0) 370 goto err; 371 372 /* Allocate the final hash bucket. */ 373 if ((ret = __memp_fget(mpf, &lpgno, ip, txn, 374 DB_MPOOL_CREATE, &page)) != 0) 375 goto err; 376 P_INIT(page, 377 dbp->pgsize, lpgno, PGNO_INVALID, PGNO_INVALID, 0, P_HASH); 378 LSN_NOT_LOGGED(page->lsn); 379 if ((ret = 380 __db_log_page(dbp, txn, &page->lsn, lpgno, page)) != 0) 381 goto err; 382 ret = __memp_fput(mpf, ip, page, dbp->priority); 383 page = NULL; 384 if (ret != 0) 385 goto err; 386 } else { 387 memset(&pdbt, 0, sizeof(pdbt)); 388 389 /* Build meta-data page. */ 390 pginfo.db_pagesize = dbp->pgsize; 391 pginfo.type = dbp->type; 392 pginfo.flags = 393 F_ISSET(dbp, (DB_AM_CHKSUM | DB_AM_ENCRYPT | DB_AM_SWAP)); 394 pdbt.data = &pginfo; 395 pdbt.size = sizeof(pginfo); 396 if ((ret = __os_calloc(dbp->env, 1, dbp->pgsize, &buf)) != 0) 397 return (ret); 398 meta = (HMETA *)buf; 399 LSN_NOT_LOGGED(lsn); 400 lpgno = __ham_init_meta(dbp, meta, PGNO_BASE_MD, &lsn); 401 meta->dbmeta.last_pgno = lpgno; 402 if ((ret = 403 __db_pgout(env->dbenv, PGNO_BASE_MD, meta, &pdbt)) != 0) 404 goto err; 405 if ((ret = __fop_write(env, txn, name, DB_APP_DATA, fhp, 406 dbp->pgsize, 0, 0, buf, dbp->pgsize, 1, F_ISSET( 407 dbp, DB_AM_NOT_DURABLE) ? DB_LOG_NOT_DURABLE : 0)) != 0) 408 goto err; 409 meta = NULL; 410 411 /* Allocate the final hash bucket. */ 412#ifdef DIAGNOSTIC 413 memset(buf, 0, dbp->pgsize); 414#endif 415 page = (PAGE *)buf; 416 P_INIT(page, 417 dbp->pgsize, lpgno, PGNO_INVALID, PGNO_INVALID, 0, P_HASH); 418 LSN_NOT_LOGGED(page->lsn); 419 if ((ret = __db_pgout(env->dbenv, lpgno, buf, &pdbt)) != 0) 420 goto err; 421 if ((ret = __fop_write(env, txn, name, DB_APP_DATA, fhp, 422 dbp->pgsize, lpgno, 0, buf, dbp->pgsize, 1, F_ISSET( 423 dbp, DB_AM_NOT_DURABLE) ? DB_LOG_NOT_DURABLE : 0)) != 0) 424 goto err; 425 page = NULL; 426 } 427 428err: if (buf != NULL) 429 __os_free(env, buf); 430 else { 431 if (meta != NULL) 432 (void)__memp_fput(mpf, ip, meta, dbp->priority); 433 if (page != NULL) 434 (void)__memp_fput(mpf, ip, page, dbp->priority); 435 } 436 return (ret); 437} 438 439/* 440 * __ham_new_subdb -- 441 * Create the necessary pages to begin a new subdatabase. 442 * 443 * PUBLIC: int __ham_new_subdb __P((DB *, DB *, DB_THREAD_INFO *, DB_TXN *)); 444 */ 445int 446__ham_new_subdb(mdbp, dbp, ip, txn) 447 DB *mdbp, *dbp; 448 DB_THREAD_INFO *ip; 449 DB_TXN *txn; 450{ 451 DBC *dbc; 452 DBMETA *mmeta; 453 DB_LOCK metalock, mmlock; 454 DB_LSN lsn; 455 DB_MPOOLFILE *mpf; 456 ENV *env; 457 HMETA *meta; 458 PAGE *h; 459 int i, ret, t_ret; 460 db_pgno_t lpgno, mpgno; 461 462 env = mdbp->env; 463 mpf = mdbp->mpf; 464 dbc = NULL; 465 meta = NULL; 466 mmeta = NULL; 467 LOCK_INIT(metalock); 468 LOCK_INIT(mmlock); 469 470 if ((ret = __db_cursor(mdbp, ip, txn, 471 &dbc, CDB_LOCKING(env) ? DB_WRITECURSOR : 0)) != 0) 472 return (ret); 473 474 /* Get and lock the new meta data page. */ 475 if ((ret = __db_lget(dbc, 476 0, dbp->meta_pgno, DB_LOCK_WRITE, 0, &metalock)) != 0) 477 goto err; 478 if ((ret = __memp_fget(mpf, &dbp->meta_pgno, ip, dbc->txn, 479 DB_MPOOL_CREATE, &meta)) != 0) 480 goto err; 481 482 /* Initialize the new meta-data page. */ 483 lsn = meta->dbmeta.lsn; 484 lpgno = __ham_init_meta(dbp, meta, dbp->meta_pgno, &lsn); 485 486 /* 487 * We are about to allocate a set of contiguous buckets (lpgno 488 * worth). We need to get the master meta-data page to figure 489 * out where these pages are and to allocate them. So, lock and 490 * get the master meta data page. 491 */ 492 mpgno = PGNO_BASE_MD; 493 if ((ret = __db_lget(dbc, 0, mpgno, DB_LOCK_WRITE, 0, &mmlock)) != 0) 494 goto err; 495 if ((ret = __memp_fget(mpf, &mpgno, ip, dbc->txn, 496 DB_MPOOL_DIRTY, &mmeta)) != 0) 497 goto err; 498 499 /* 500 * Now update the hash meta-data page to reflect where the first 501 * set of buckets are actually located. 502 */ 503 meta->spares[0] = mmeta->last_pgno + 1; 504 for (i = 0; i < NCACHED && meta->spares[i] != PGNO_INVALID; i++) 505 meta->spares[i] = meta->spares[0]; 506 507 /* The new meta data page is now complete; log it. */ 508 if ((ret = __db_log_page(mdbp, 509 txn, &meta->dbmeta.lsn, dbp->meta_pgno, (PAGE *)meta)) != 0) 510 goto err; 511 512 /* Reflect the group allocation. */ 513 if (DBENV_LOGGING(env) 514#if !defined(DEBUG_WOP) 515 && txn != NULL 516#endif 517 ) 518 if ((ret = __ham_groupalloc_log(mdbp, txn, 519 &LSN(mmeta), 0, &LSN(mmeta), meta->spares[0], 520 meta->max_bucket + 1, 0, mmeta->last_pgno)) != 0) 521 goto err; 522 523 /* Release the new meta-data page. */ 524 if ((ret = __memp_fput(mpf, ip, meta, dbc->priority)) != 0) 525 goto err; 526 meta = NULL; 527 528 lpgno += mmeta->last_pgno; 529 530 /* Now allocate the final hash bucket. */ 531 if ((ret = __memp_fget(mpf, &lpgno, ip, dbc->txn, 532 DB_MPOOL_CREATE | DB_MPOOL_DIRTY, &h)) != 0) 533 goto err; 534 535 mmeta->last_pgno = lpgno; 536 P_INIT(h, dbp->pgsize, lpgno, PGNO_INVALID, PGNO_INVALID, 0, P_HASH); 537 LSN(h) = LSN(mmeta); 538 if ((ret = __memp_fput(mpf, ip, h, dbc->priority)) != 0) 539 goto err; 540 541err: /* Now put the master-metadata page back. */ 542 if (mmeta != NULL && (t_ret = __memp_fput(mpf, 543 ip, mmeta, dbc->priority)) != 0 && ret == 0) 544 ret = t_ret; 545 if ((t_ret = __LPUT(dbc, mmlock)) != 0 && ret == 0) 546 ret = t_ret; 547 if (meta != NULL && (t_ret = __memp_fput(mpf, 548 ip, meta, dbc->priority)) != 0 && ret == 0) 549 ret = t_ret; 550 if ((t_ret = __LPUT(dbc, metalock)) != 0 && ret == 0) 551 ret = t_ret; 552 if (dbc != NULL) 553 if ((t_ret = __dbc_close(dbc)) != 0 && ret == 0) 554 ret = t_ret; 555 return (ret); 556} 557