1/*- 2 * See the file LICENSE for redistribution information. 3 * 4 * Copyright (c) 2000,2008 Oracle. All rights reserved. 5 * 6 * $Id: db_vrfy.c,v 12.53 2008/03/12 20:34:13 mbrey Exp $ 7 */ 8 9#include "db_config.h" 10 11#include "db_int.h" 12#include "dbinc/db_page.h" 13#include "dbinc/db_swap.h" 14#include "dbinc/db_verify.h" 15#include "dbinc/btree.h" 16#include "dbinc/hash.h" 17#include "dbinc/lock.h" 18#include "dbinc/mp.h" 19#include "dbinc/qam.h" 20#include "dbinc/txn.h" 21 22/* 23 * This is the code for DB->verify, the DB database consistency checker. 24 * For now, it checks all subdatabases in a database, and verifies 25 * everything it knows how to (i.e. it's all-or-nothing, and one can't 26 * check only for a subset of possible problems). 27 */ 28 29static u_int __db_guesspgsize __P((ENV *, DB_FH *)); 30static int __db_is_valid_magicno __P((u_int32_t, DBTYPE *)); 31static int __db_meta2pgset 32 __P((DB *, VRFY_DBINFO *, db_pgno_t, u_int32_t, DB *)); 33static int __db_salvage_subdbpg __P((DB *, VRFY_DBINFO *, 34 PAGE *, void *, int (*)(void *, const void *), u_int32_t)); 35static int __db_salvage_subdbs __P((DB *, VRFY_DBINFO *, void *, 36 int(*)(void *, const void *), u_int32_t, int *)); 37static int __db_salvage_unknowns __P((DB *, VRFY_DBINFO *, void *, 38 int (*)(void *, const void *), u_int32_t)); 39static int __db_verify __P((DB *, DB_THREAD_INFO *, const char *, 40 const char *, void *, int (*)(void *, const void *), 41 u_int32_t)); 42static int __db_verify_arg __P((DB *, const char *, void *, u_int32_t)); 43static int __db_vrfy_freelist 44 __P((DB *, VRFY_DBINFO *, db_pgno_t, u_int32_t)); 45static int __db_vrfy_invalid 46 __P((DB *, VRFY_DBINFO *, PAGE *, db_pgno_t, u_int32_t)); 47static int __db_vrfy_orderchkonly __P((DB *, 48 VRFY_DBINFO *, const char *, const char *, u_int32_t)); 49static int __db_vrfy_pagezero __P((DB *, VRFY_DBINFO *, DB_FH *, u_int32_t)); 50static int __db_vrfy_subdbs 51 __P((DB *, VRFY_DBINFO *, const char *, u_int32_t)); 52static int __db_vrfy_structure 53 __P((DB *, VRFY_DBINFO *, const char *, db_pgno_t, u_int32_t)); 54static int __db_vrfy_walkpages __P((DB *, VRFY_DBINFO *, 55 void *, int (*)(void *, const void *), u_int32_t)); 56 57#define VERIFY_FLAGS \ 58 (DB_AGGRESSIVE | \ 59 DB_NOORDERCHK | DB_ORDERCHKONLY | DB_PRINTABLE | DB_SALVAGE | DB_UNREF) 60 61/* 62 * __db_verify_pp -- 63 * DB->verify public interface. 64 * 65 * PUBLIC: int __db_verify_pp 66 * PUBLIC: __P((DB *, const char *, const char *, FILE *, u_int32_t)); 67 */ 68int 69__db_verify_pp(dbp, file, database, outfile, flags) 70 DB *dbp; 71 const char *file, *database; 72 FILE *outfile; 73 u_int32_t flags; 74{ 75 /* 76 * __db_verify_pp is a wrapper to __db_verify_internal, which lets 77 * us pass appropriate equivalents to FILE * in from the non-C APIs. 78 * That's why the usual ENV_ENTER macros are in __db_verify_internal, 79 * not here. 80 */ 81 return (__db_verify_internal(dbp, 82 file, database, outfile, __db_pr_callback, flags)); 83} 84 85/* 86 * __db_verify_internal -- 87 * 88 * PUBLIC: int __db_verify_internal __P((DB *, const char *, 89 * PUBLIC: const char *, void *, int (*)(void *, const void *), u_int32_t)); 90 */ 91int 92__db_verify_internal(dbp, fname, dname, handle, callback, flags) 93 DB *dbp; 94 const char *fname, *dname; 95 void *handle; 96 int (*callback) __P((void *, const void *)); 97 u_int32_t flags; 98{ 99 DB_THREAD_INFO *ip; 100 ENV *env; 101 int ret, t_ret; 102 103 env = dbp->env; 104 105 DB_ILLEGAL_AFTER_OPEN(dbp, "DB->verify"); 106 107 if (!LF_ISSET(DB_SALVAGE)) 108 LF_SET(DB_UNREF); 109 110 ENV_ENTER(env, ip); 111 112 if ((ret = __db_verify_arg(dbp, dname, handle, flags)) == 0) 113 ret = __db_verify(dbp, ip, 114 fname, dname, handle, callback, flags); 115 116 /* Db.verify is a DB handle destructor. */ 117 if ((t_ret = __db_close(dbp, NULL, 0)) != 0 && ret == 0) 118 ret = t_ret; 119 120 ENV_LEAVE(env, ip); 121 return (ret); 122} 123 124/* 125 * __db_verify_arg -- 126 * Check DB->verify arguments. 127 */ 128static int 129__db_verify_arg(dbp, dname, handle, flags) 130 DB *dbp; 131 const char *dname; 132 void *handle; 133 u_int32_t flags; 134{ 135 ENV *env; 136 int ret; 137 138 env = dbp->env; 139 140 if ((ret = __db_fchk(env, "DB->verify", flags, VERIFY_FLAGS)) != 0) 141 return (ret); 142 143 /* 144 * DB_SALVAGE is mutually exclusive with the other flags except 145 * DB_AGGRESSIVE, DB_PRINTABLE. 146 * 147 * DB_AGGRESSIVE and DB_PRINTABLE are only meaningful when salvaging. 148 * 149 * DB_SALVAGE requires an output stream. 150 */ 151 if (LF_ISSET(DB_SALVAGE)) { 152 if (LF_ISSET(~(DB_AGGRESSIVE | DB_PRINTABLE | DB_SALVAGE))) 153 return (__db_ferr(env, "DB->verify", 1)); 154 if (handle == NULL) { 155 __db_errx(env, 156 "DB_SALVAGE requires a an output handle"); 157 return (EINVAL); 158 } 159 } else 160 if (LF_ISSET(DB_AGGRESSIVE | DB_PRINTABLE)) 161 return (__db_ferr(env, "DB->verify", 1)); 162 163 /* 164 * DB_ORDERCHKONLY is mutually exclusive with DB_SALVAGE and 165 * DB_NOORDERCHK, and requires a database name. 166 */ 167 if ((ret = __db_fcchk(env, "DB->verify", flags, 168 DB_ORDERCHKONLY, DB_SALVAGE | DB_NOORDERCHK)) != 0) 169 return (ret); 170 if (LF_ISSET(DB_ORDERCHKONLY) && dname == NULL) { 171 __db_errx(env, "DB_ORDERCHKONLY requires a database name"); 172 return (EINVAL); 173 } 174 return (0); 175} 176 177/* 178 * __db_verify -- 179 * Walk the entire file page-by-page, either verifying with or without 180 * dumping in db_dump -d format, or DB_SALVAGE-ing whatever key/data 181 * pairs can be found and dumping them in standard (db_load-ready) 182 * dump format. 183 * 184 * (Salvaging isn't really a verification operation, but we put it 185 * here anyway because it requires essentially identical top-level 186 * code.) 187 * 188 * flags may be 0, DB_NOORDERCHK, DB_ORDERCHKONLY, or DB_SALVAGE 189 * (and optionally DB_AGGRESSIVE). 190 */ 191static int 192__db_verify(dbp, ip, name, subdb, handle, callback, flags) 193 DB *dbp; 194 DB_THREAD_INFO *ip; 195 const char *name, *subdb; 196 void *handle; 197 int (*callback) __P((void *, const void *)); 198 u_int32_t flags; 199{ 200 DB_FH *fhp; 201 ENV *env; 202 VRFY_DBINFO *vdp; 203 int has_subdbs, isbad, ret, t_ret; 204 char *real_name; 205 206 env = dbp->env; 207 fhp = NULL; 208 vdp = NULL; 209 real_name = NULL; 210 has_subdbs = isbad = ret = 0; 211 212 F_SET(dbp, DB_AM_VERIFYING); 213 214 /* Initialize any feedback function. */ 215 if (!LF_ISSET(DB_SALVAGE) && dbp->db_feedback != NULL) 216 dbp->db_feedback(dbp, DB_VERIFY, 0); 217 218 /* 219 * We don't know how large the cache is, and if the database 220 * in question uses a small page size--which we don't know 221 * yet!--it may be uncomfortably small for the default page 222 * size [#2143]. However, the things we need temporary 223 * databases for in dbinfo are largely tiny, so using a 224 * 1024-byte pagesize is probably not going to be a big hit, 225 * and will make us fit better into small spaces. 226 */ 227 if ((ret = __db_vrfy_dbinfo_create(env, ip, 1024, &vdp)) != 0) 228 goto err; 229 230 /* 231 * Note whether the user has requested that we use printable 232 * chars where possible. We won't get here with this flag if 233 * we're not salvaging. 234 */ 235 if (LF_ISSET(DB_PRINTABLE)) 236 F_SET(vdp, SALVAGE_PRINTABLE); 237 238 /* Find the real name of the file. */ 239 if ((ret = __db_appname(env, 240 DB_APP_DATA, name, 0, NULL, &real_name)) != 0) 241 goto err; 242 243 /* 244 * Our first order of business is to verify page 0, which is 245 * the metadata page for the master database of subdatabases 246 * or of the only database in the file. We want to do this by hand 247 * rather than just calling __db_open in case it's corrupt--various 248 * things in __db_open might act funny. 249 * 250 * Once we know the metadata page is healthy, I believe that it's 251 * safe to open the database normally and then use the page swapping 252 * code, which makes life easier. 253 */ 254 if ((ret = __os_open(env, real_name, 0, DB_OSO_RDONLY, 0, &fhp)) != 0) 255 goto err; 256 257 /* Verify the metadata page 0; set pagesize and type. */ 258 if ((ret = __db_vrfy_pagezero(dbp, vdp, fhp, flags)) != 0) { 259 if (ret == DB_VERIFY_BAD) 260 isbad = 1; 261 else 262 goto err; 263 } 264 265 /* 266 * We can assume at this point that dbp->pagesize and dbp->type are 267 * set correctly, or at least as well as they can be, and that 268 * locking, logging, and txns are not in use. Thus we can trust 269 * the memp code not to look at the page, and thus to be safe 270 * enough to use. 271 * 272 * The dbp is not open, but the file is open in the fhp, and we 273 * cannot assume that __db_open is safe. Call __env_setup, 274 * the [safe] part of __db_open that initializes the environment-- 275 * and the mpool--manually. 276 */ 277 if ((ret = __env_setup(dbp, NULL, 278 name, subdb, TXN_INVALID, DB_ODDFILESIZE | DB_RDONLY)) != 0) 279 goto err; 280 281 /* 282 * Set our name in the Queue subsystem; we may need it later 283 * to deal with extents. 284 */ 285 if (dbp->type == DB_QUEUE && 286 (ret = __qam_set_ext_data(dbp, name)) != 0) 287 goto err; 288 289 /* Mark the dbp as opened, so that we correctly handle its close. */ 290 F_SET(dbp, DB_AM_OPEN_CALLED); 291 292 /* Find out the page number of the last page in the database. */ 293 if ((ret = __memp_get_last_pgno(dbp->mpf, &vdp->last_pgno)) != 0) 294 goto err; 295 296 /* 297 * DB_ORDERCHKONLY is a special case; our file consists of 298 * several subdatabases, which use different hash, bt_compare, 299 * and/or dup_compare functions. Consequently, we couldn't verify 300 * sorting and hashing simply by calling DB->verify() on the file. 301 * DB_ORDERCHKONLY allows us to come back and check those things; it 302 * requires a subdatabase, and assumes that everything but that 303 * database's sorting/hashing is correct. 304 */ 305 if (LF_ISSET(DB_ORDERCHKONLY)) { 306 ret = __db_vrfy_orderchkonly(dbp, vdp, name, subdb, flags); 307 goto done; 308 } 309 310 /* 311 * When salvaging, we use a db to keep track of whether we've seen a 312 * given overflow or dup page in the course of traversing normal data. 313 * If in the end we have not, we assume its key got lost and print it 314 * with key "UNKNOWN". 315 */ 316 if (LF_ISSET(DB_SALVAGE)) { 317 if ((ret = __db_salvage_init(vdp)) != 0) 318 goto err; 319 320 /* 321 * If we're not being aggressive, attempt to crack subdatabases. 322 * "has_subdbs" will indicate whether the attempt has succeeded 323 * (even in part), meaning that we have some semblance of 324 * subdatabases; on the walkpages pass, we print out whichever 325 * data pages we have not seen. 326 */ 327 if (!LF_ISSET(DB_AGGRESSIVE) && __db_salvage_subdbs( 328 dbp, vdp, handle, callback, flags, &has_subdbs) != 0) 329 isbad = 1; 330 331 /* 332 * If we have subdatabases, flag if any keys are found that 333 * don't belong to a subdatabase -- they'll need to have an 334 * "__OTHER__" subdatabase header printed first. 335 */ 336 if (has_subdbs) 337 F_SET(vdp, SALVAGE_PRINTHEADER); 338 } 339 340 if ((ret = 341 __db_vrfy_walkpages(dbp, vdp, handle, callback, flags)) != 0) { 342 if (ret == DB_VERIFY_BAD) 343 isbad = 1; 344 else 345 goto err; 346 } 347 348 /* If we're verifying, verify inter-page structure. */ 349 if (!LF_ISSET(DB_SALVAGE) && isbad == 0) 350 if ((ret = 351 __db_vrfy_structure(dbp, vdp, name, 0, flags)) != 0) { 352 if (ret == DB_VERIFY_BAD) 353 isbad = 1; 354 else 355 goto err; 356 } 357 358 /* 359 * If we're salvaging, output with key UNKNOWN any overflow or dup pages 360 * we haven't been able to put in context. Then destroy the salvager's 361 * state-saving database. 362 */ 363 if (LF_ISSET(DB_SALVAGE)) { 364 if ((ret = __db_salvage_unknowns(dbp, 365 vdp, handle, callback, flags)) != 0) 366 isbad = 1; 367 /* No return value, since there's little we can do. */ 368 __db_salvage_destroy(vdp); 369 } 370 371 /* Don't display a footer for a database holding other databases. */ 372 if (LF_ISSET(DB_SALVAGE) && 373 (!has_subdbs || F_ISSET(vdp, SALVAGE_PRINTFOOTER))) 374 (void)__db_prfooter(handle, callback); 375 376done: err: 377 /* Send feedback that we're done. */ 378 if (!LF_ISSET(DB_SALVAGE) && dbp->db_feedback != NULL) 379 dbp->db_feedback(dbp, DB_VERIFY, 100); 380 381 if (fhp != NULL && 382 (t_ret = __os_closehandle(env, fhp)) != 0 && ret == 0) 383 ret = t_ret; 384 if (vdp != NULL && 385 (t_ret = __db_vrfy_dbinfo_destroy(env, vdp)) != 0 && ret == 0) 386 ret = t_ret; 387 if (real_name != NULL) 388 __os_free(env, real_name); 389 390 /* 391 * DB_VERIFY_FATAL is a private error, translate to a public one. 392 * 393 * If we didn't find a page, it's probably a page number was corrupted. 394 * Return the standard corruption error. 395 * 396 * Otherwise, if we found corruption along the way, set the return. 397 */ 398 if (ret == DB_VERIFY_FATAL || 399 ret == DB_PAGE_NOTFOUND || (ret == 0 && isbad == 1)) 400 ret = DB_VERIFY_BAD; 401 402 /* Make sure there's a public complaint if we found corruption. */ 403 if (ret != 0) 404 __db_err(env, ret, "%s", name); 405 406 return (ret); 407} 408 409/* 410 * __db_vrfy_pagezero -- 411 * Verify the master metadata page. Use seek, read, and a local buffer 412 * rather than the DB paging code, for safety. 413 * 414 * Must correctly (or best-guess) set dbp->type and dbp->pagesize. 415 */ 416static int 417__db_vrfy_pagezero(dbp, vdp, fhp, flags) 418 DB *dbp; 419 VRFY_DBINFO *vdp; 420 DB_FH *fhp; 421 u_int32_t flags; 422{ 423 DBMETA *meta; 424 ENV *env; 425 VRFY_PAGEINFO *pip; 426 db_pgno_t freelist; 427 size_t nr; 428 int isbad, ret, swapped; 429 u_int8_t mbuf[DBMETASIZE]; 430 431 isbad = ret = swapped = 0; 432 freelist = 0; 433 env = dbp->env; 434 meta = (DBMETA *)mbuf; 435 dbp->type = DB_UNKNOWN; 436 437 if ((ret = __db_vrfy_getpageinfo(vdp, PGNO_BASE_MD, &pip)) != 0) 438 return (ret); 439 440 /* 441 * Seek to the metadata page. 442 * Note that if we're just starting a verification, dbp->pgsize 443 * may be zero; this is okay, as we want page zero anyway and 444 * 0*0 == 0. 445 */ 446 if ((ret = __os_seek(env, fhp, 0, 0, 0)) != 0 || 447 (ret = __os_read(env, fhp, mbuf, DBMETASIZE, &nr)) != 0) { 448 __db_err(env, ret, 449 "Metadata page %lu cannot be read", (u_long)PGNO_BASE_MD); 450 return (ret); 451 } 452 453 if (nr != DBMETASIZE) { 454 EPRINT((env, 455 "Page %lu: Incomplete metadata page", 456 (u_long)PGNO_BASE_MD)); 457 return (DB_VERIFY_FATAL); 458 } 459 460 if ((ret = __db_chk_meta(env, dbp, meta, 1)) != 0) { 461 EPRINT((env, 462 "Page %lu: metadata page corrupted", (u_long)PGNO_BASE_MD)); 463 isbad = 1; 464 if (ret != -1) { 465 EPRINT((env, 466 "Page %lu: could not check metadata page", 467 (u_long)PGNO_BASE_MD)); 468 return (DB_VERIFY_FATAL); 469 } 470 } 471 472 /* 473 * Check all of the fields that we can. 474 * 475 * 08-11: Current page number. Must == pgno. 476 * Note that endianness doesn't matter--it's zero. 477 */ 478 if (meta->pgno != PGNO_BASE_MD) { 479 isbad = 1; 480 EPRINT((env, "Page %lu: pgno incorrectly set to %lu", 481 (u_long)PGNO_BASE_MD, (u_long)meta->pgno)); 482 } 483 484 /* 12-15: Magic number. Must be one of valid set. */ 485 if (__db_is_valid_magicno(meta->magic, &dbp->type)) 486 swapped = 0; 487 else { 488 M_32_SWAP(meta->magic); 489 if (__db_is_valid_magicno(meta->magic, 490 &dbp->type)) 491 swapped = 1; 492 else { 493 isbad = 1; 494 EPRINT((env, 495 "Page %lu: bad magic number %lu", 496 (u_long)PGNO_BASE_MD, (u_long)meta->magic)); 497 } 498 } 499 500 /* 501 * 16-19: Version. Must be current; for now, we 502 * don't support verification of old versions. 503 */ 504 if (swapped) 505 M_32_SWAP(meta->version); 506 if ((dbp->type == DB_BTREE && 507 (meta->version > DB_BTREEVERSION || 508 meta->version < DB_BTREEOLDVER)) || 509 (dbp->type == DB_HASH && 510 (meta->version > DB_HASHVERSION || 511 meta->version < DB_HASHOLDVER)) || 512 (dbp->type == DB_QUEUE && 513 (meta->version > DB_QAMVERSION || 514 meta->version < DB_QAMOLDVER))) { 515 isbad = 1; 516 EPRINT((env, 517 "Page %lu: unsupported DB version %lu; extraneous errors may result", 518 (u_long)PGNO_BASE_MD, (u_long)meta->version)); 519 } 520 521 /* 522 * 20-23: Pagesize. Must be power of two, 523 * greater than 512, and less than 64K. 524 */ 525 if (swapped) 526 M_32_SWAP(meta->pagesize); 527 if (IS_VALID_PAGESIZE(meta->pagesize)) 528 dbp->pgsize = meta->pagesize; 529 else { 530 isbad = 1; 531 EPRINT((env, "Page %lu: bad page size %lu", 532 (u_long)PGNO_BASE_MD, (u_long)meta->pagesize)); 533 534 /* 535 * Now try to settle on a pagesize to use. 536 * If the user-supplied one is reasonable, 537 * use it; else, guess. 538 */ 539 if (!IS_VALID_PAGESIZE(dbp->pgsize)) 540 dbp->pgsize = __db_guesspgsize(env, fhp); 541 } 542 543 /* 544 * 25: Page type. Must be correct for dbp->type, 545 * which is by now set as well as it can be. 546 */ 547 /* Needs no swapping--only one byte! */ 548 if ((dbp->type == DB_BTREE && meta->type != P_BTREEMETA) || 549 (dbp->type == DB_HASH && meta->type != P_HASHMETA) || 550 (dbp->type == DB_QUEUE && meta->type != P_QAMMETA)) { 551 isbad = 1; 552 EPRINT((env, "Page %lu: bad page type %lu", 553 (u_long)PGNO_BASE_MD, (u_long)meta->type)); 554 } 555 556 /* 557 * 26: Meta-flags. 558 */ 559 if (meta->metaflags != 0) { 560 if (meta->metaflags == DBMETA_CHKSUM) 561 F_SET(pip, VRFY_HAS_CHKSUM); 562 else { 563 isbad = 1; 564 EPRINT((env, 565 "Page %lu: bad meta-data flags value %#lx", 566 (u_long)PGNO_BASE_MD, (u_long)meta->metaflags)); 567 } 568 } 569 570 /* 571 * 28-31: Free list page number. 572 * 32-35: Last page in database file. 573 * We'll verify its sensibility when we do inter-page 574 * verification later; for now, just store it. 575 */ 576 if (swapped) 577 M_32_SWAP(meta->free); 578 freelist = meta->free; 579 if (swapped) 580 M_32_SWAP(meta->last_pgno); 581 vdp->meta_last_pgno = meta->last_pgno; 582 583 /* 584 * Initialize vdp->pages to fit a single pageinfo structure for 585 * this one page. We'll realloc later when we know how many 586 * pages there are. 587 */ 588 pip->pgno = PGNO_BASE_MD; 589 pip->type = meta->type; 590 591 /* 592 * Signal that we still have to check the info specific to 593 * a given type of meta page. 594 */ 595 F_SET(pip, VRFY_INCOMPLETE); 596 597 pip->free = freelist; 598 599 if ((ret = __db_vrfy_putpageinfo(env, vdp, pip)) != 0) 600 return (ret); 601 602 /* Set up the dbp's fileid. We don't use the regular open path. */ 603 memcpy(dbp->fileid, meta->uid, DB_FILE_ID_LEN); 604 605 if (swapped == 1) 606 F_SET(dbp, DB_AM_SWAP); 607 608 return (isbad ? DB_VERIFY_BAD : 0); 609} 610 611/* 612 * __db_vrfy_walkpages -- 613 * Main loop of the verifier/salvager. Walks through, 614 * page by page, and verifies all pages and/or prints all data pages. 615 */ 616static int 617__db_vrfy_walkpages(dbp, vdp, handle, callback, flags) 618 DB *dbp; 619 VRFY_DBINFO *vdp; 620 void *handle; 621 int (*callback) __P((void *, const void *)); 622 u_int32_t flags; 623{ 624 DB_MPOOLFILE *mpf; 625 ENV *env; 626 PAGE *h; 627 VRFY_PAGEINFO *pip; 628 db_pgno_t i; 629 int ret, t_ret, isbad; 630 631 env = dbp->env; 632 mpf = dbp->mpf; 633 h = NULL; 634 ret = isbad = t_ret = 0; 635 636 for (i = 0; i <= vdp->last_pgno; i++) { 637 /* 638 * If DB_SALVAGE is set, we inspect our database of completed 639 * pages, and skip any we've already printed in the subdb pass. 640 */ 641 if (LF_ISSET(DB_SALVAGE) && (__db_salvage_isdone(vdp, i) != 0)) 642 continue; 643 644 /* 645 * An individual page get can fail if: 646 * * This is a hash database, it is expected to find 647 * empty buckets, which don't have allocated pages. Create 648 * a dummy page so the verification can proceed. 649 * * We are salvaging, flag the error and continue. 650 */ 651 if ((t_ret = __memp_fget(mpf, &i, 652 vdp->thread_info, NULL, 0, &h)) != 0) { 653 if (dbp->type == DB_HASH) { 654 if ((t_ret = 655 __db_vrfy_getpageinfo(vdp, i, &pip)) != 0) 656 goto err1; 657 pip->type = P_INVALID; 658 pip->pgno = i; 659 F_CLR(pip, VRFY_IS_ALLZEROES); 660 if ((t_ret = __db_vrfy_putpageinfo( 661 env, vdp, pip)) != 0) 662 goto err1; 663 continue; 664 } 665err1: if (ret == 0) 666 ret = t_ret; 667 if (LF_ISSET(DB_SALVAGE)) 668 continue; 669 return (ret); 670 } 671 672 if (LF_ISSET(DB_SALVAGE)) { 673 /* 674 * We pretty much don't want to quit unless a 675 * bomb hits. May as well return that something 676 * was screwy, however. 677 */ 678 if ((t_ret = __db_salvage(dbp, 679 vdp, i, h, handle, callback, flags)) != 0) { 680 if (ret == 0) 681 ret = t_ret; 682 isbad = 1; 683 } 684 } else { 685 /* 686 * If we are not salvaging, and we get any error 687 * other than DB_VERIFY_BAD, return immediately; 688 * it may not be safe to proceed. If we get 689 * DB_VERIFY_BAD, keep going; listing more errors 690 * may make it easier to diagnose problems and 691 * determine the magnitude of the corruption. 692 * 693 * Verify info common to all page types. 694 */ 695 if (i != PGNO_BASE_MD) { 696 ret = __db_vrfy_common(dbp, vdp, h, i, flags); 697 if (ret == DB_VERIFY_BAD) 698 isbad = 1; 699 else if (ret != 0) 700 goto err; 701 } 702 703 switch (TYPE(h)) { 704 case P_INVALID: 705 ret = __db_vrfy_invalid(dbp, vdp, h, i, flags); 706 break; 707 case __P_DUPLICATE: 708 isbad = 1; 709 EPRINT((env, 710 "Page %lu: old-style duplicate page", 711 (u_long)i)); 712 break; 713 case P_HASH_UNSORTED: 714 case P_HASH: 715 ret = __ham_vrfy(dbp, vdp, h, i, flags); 716 break; 717 case P_IBTREE: 718 case P_IRECNO: 719 case P_LBTREE: 720 case P_LDUP: 721 ret = __bam_vrfy(dbp, vdp, h, i, flags); 722 break; 723 case P_LRECNO: 724 ret = __ram_vrfy_leaf(dbp, vdp, h, i, flags); 725 break; 726 case P_OVERFLOW: 727 ret = __db_vrfy_overflow(dbp, vdp, h, i, flags); 728 break; 729 case P_HASHMETA: 730 ret = __ham_vrfy_meta(dbp, 731 vdp, (HMETA *)h, i, flags); 732 break; 733 case P_BTREEMETA: 734 ret = __bam_vrfy_meta(dbp, 735 vdp, (BTMETA *)h, i, flags); 736 break; 737 case P_QAMMETA: 738 ret = __qam_vrfy_meta(dbp, 739 vdp, (QMETA *)h, i, flags); 740 break; 741 case P_QAMDATA: 742 ret = __qam_vrfy_data(dbp, 743 vdp, (QPAGE *)h, i, flags); 744 break; 745 default: 746 EPRINT((env, 747 "Page %lu: unknown page type %lu", 748 (u_long)i, (u_long)TYPE(h))); 749 isbad = 1; 750 break; 751 } 752 753 /* 754 * Set up error return. 755 */ 756 if (ret == DB_VERIFY_BAD) 757 isbad = 1; 758 else if (ret != 0) 759 goto err; 760 761 /* 762 * Provide feedback to the application about our 763 * progress. The range 0-50% comes from the fact 764 * that this is the first of two passes through the 765 * database (front-to-back, then top-to-bottom). 766 */ 767 if (dbp->db_feedback != NULL) 768 dbp->db_feedback(dbp, DB_VERIFY, 769 (int)((i + 1) * 50 / (vdp->last_pgno + 1))); 770 } 771 772 /* 773 * Just as with the page get, bail if and only if we're 774 * not salvaging. 775 */ 776 if ((t_ret = __memp_fput(mpf, 777 vdp->thread_info, h, dbp->priority)) != 0) { 778 if (ret == 0) 779 ret = t_ret; 780 if (!LF_ISSET(DB_SALVAGE)) 781 return (ret); 782 } 783 } 784 785 /* 786 * If we've seen a Queue metadata page, we may need to walk Queue 787 * extent pages that won't show up between 0 and vdp->last_pgno. 788 */ 789 if (F_ISSET(vdp, VRFY_QMETA_SET) && (t_ret = 790 __qam_vrfy_walkqueue(dbp, vdp, handle, callback, flags)) != 0) { 791 if (ret == 0) 792 ret = t_ret; 793 if (t_ret == DB_VERIFY_BAD) 794 isbad = 1; 795 else if (!LF_ISSET(DB_SALVAGE)) 796 return (ret); 797 } 798 799 if (0) { 800err: if (h != NULL && (t_ret = __memp_fput(mpf, 801 vdp->thread_info, h, dbp->priority)) != 0) 802 return (ret == 0 ? t_ret : ret); 803 } 804 805 return ((isbad == 1 && ret == 0) ? DB_VERIFY_BAD : ret); 806} 807 808/* 809 * __db_vrfy_structure-- 810 * After a beginning-to-end walk through the database has been 811 * completed, put together the information that has been collected 812 * to verify the overall database structure. 813 * 814 * Should only be called if we want to do a database verification, 815 * i.e. if DB_SALVAGE is not set. 816 */ 817static int 818__db_vrfy_structure(dbp, vdp, dbname, meta_pgno, flags) 819 DB *dbp; 820 VRFY_DBINFO *vdp; 821 const char *dbname; 822 db_pgno_t meta_pgno; 823 u_int32_t flags; 824{ 825 DB *pgset; 826 ENV *env; 827 VRFY_PAGEINFO *pip; 828 db_pgno_t i; 829 int ret, isbad, hassubs, p; 830 831 isbad = 0; 832 pip = NULL; 833 env = dbp->env; 834 pgset = vdp->pgset; 835 836 /* 837 * Providing feedback here is tricky; in most situations, 838 * we fetch each page one more time, but we do so in a top-down 839 * order that depends on the access method. Worse, we do this 840 * recursively in btree, such that on any call where we're traversing 841 * a subtree we don't know where that subtree is in the whole database; 842 * worse still, any given database may be one of several subdbs. 843 * 844 * The solution is to decrement a counter vdp->pgs_remaining each time 845 * we verify (and call feedback on) a page. We may over- or 846 * under-count, but the structure feedback function will ensure that we 847 * never give a percentage under 50 or over 100. (The first pass 848 * covered the range 0-50%.) 849 */ 850 if (dbp->db_feedback != NULL) 851 vdp->pgs_remaining = vdp->last_pgno + 1; 852 853 /* 854 * Call the appropriate function to downwards-traverse the db type. 855 */ 856 switch (dbp->type) { 857 case DB_BTREE: 858 case DB_RECNO: 859 if ((ret = __bam_vrfy_structure(dbp, vdp, 0, flags)) != 0) { 860 if (ret == DB_VERIFY_BAD) 861 isbad = 1; 862 else 863 goto err; 864 } 865 866 /* 867 * If we have subdatabases and we know that the database is, 868 * thus far, sound, it's safe to walk the tree of subdatabases. 869 * Do so, and verify the structure of the databases within. 870 */ 871 if ((ret = __db_vrfy_getpageinfo(vdp, 0, &pip)) != 0) 872 goto err; 873 hassubs = F_ISSET(pip, VRFY_HAS_SUBDBS) ? 1 : 0; 874 if ((ret = __db_vrfy_putpageinfo(env, vdp, pip)) != 0) 875 goto err; 876 pip = NULL; 877 878 if (isbad == 0 && hassubs) 879 if ((ret = 880 __db_vrfy_subdbs(dbp, vdp, dbname, flags)) != 0) { 881 if (ret == DB_VERIFY_BAD) 882 isbad = 1; 883 else 884 goto err; 885 } 886 break; 887 case DB_HASH: 888 if ((ret = __ham_vrfy_structure(dbp, vdp, 0, flags)) != 0) { 889 if (ret == DB_VERIFY_BAD) 890 isbad = 1; 891 else 892 goto err; 893 } 894 break; 895 case DB_QUEUE: 896 if ((ret = __qam_vrfy_structure(dbp, vdp, flags)) != 0) { 897 if (ret == DB_VERIFY_BAD) 898 isbad = 1; 899 } 900 901 /* 902 * Queue pages may be unreferenced and totally zeroed, if 903 * they're empty; queue doesn't have much structure, so 904 * this is unlikely to be wrong in any troublesome sense. 905 * Skip to "err". 906 */ 907 goto err; 908 case DB_UNKNOWN: 909 default: 910 ret = __db_unknown_path(env, "__db_vrfy_structure"); 911 goto err; 912 } 913 914 /* Walk free list. */ 915 if ((ret = 916 __db_vrfy_freelist(dbp, vdp, meta_pgno, flags)) == DB_VERIFY_BAD) 917 isbad = 1; 918 919 /* 920 * If structure checks up until now have failed, it's likely that 921 * checking what pages have been missed will result in oodles of 922 * extraneous error messages being EPRINTed. Skip to the end 923 * if this is the case; we're going to be printing at least one 924 * error anyway, and probably all the more salient ones. 925 */ 926 if (ret != 0 || isbad == 1) 927 goto err; 928 929 /* 930 * Make sure no page has been missed and that no page is still marked 931 * "all zeroes" (only certain hash pages can be, and they're unmarked 932 * in __ham_vrfy_structure). 933 */ 934 for (i = 0; i < vdp->last_pgno + 1; i++) { 935 if ((ret = __db_vrfy_getpageinfo(vdp, i, &pip)) != 0) 936 goto err; 937 if ((ret = __db_vrfy_pgset_get(pgset, 938 vdp->thread_info, i, &p)) != 0) 939 goto err; 940 if (pip->type == P_OVERFLOW) { 941 if ((u_int32_t)p != pip->refcount) { 942 EPRINT((env, 943 "Page %lu: overflow refcount %lu, referenced %lu times", 944 (u_long)i, 945 (u_long)pip->refcount, (u_long)p)); 946 isbad = 1; 947 } 948 } else if (p == 0 && 949#ifndef HAVE_FTRUNCATE 950 !(i > vdp->meta_last_pgno && 951 (F_ISSET(pip, VRFY_IS_ALLZEROES) || pip->type == P_HASH)) && 952#endif 953 !(dbp->type == DB_HASH && pip->type == P_INVALID)) { 954 /* 955 * It is OK for unreferenced hash buckets to be 956 * marked invalid and unreferenced. 957 */ 958 EPRINT((env, 959 "Page %lu: unreferenced page", (u_long)i)); 960 isbad = 1; 961 } 962 963 if (F_ISSET(pip, VRFY_IS_ALLZEROES) 964#ifndef HAVE_FTRUNCATE 965 && i <= vdp->meta_last_pgno 966#endif 967 ) { 968 EPRINT((env, 969 "Page %lu: totally zeroed page", (u_long)i)); 970 isbad = 1; 971 } 972 if ((ret = __db_vrfy_putpageinfo(env, vdp, pip)) != 0) 973 goto err; 974 pip = NULL; 975 } 976 977err: if (pip != NULL) 978 (void)__db_vrfy_putpageinfo(env, vdp, pip); 979 980 return ((isbad == 1 && ret == 0) ? DB_VERIFY_BAD : ret); 981} 982 983/* 984 * __db_is_valid_magicno 985 */ 986static int 987__db_is_valid_magicno(magic, typep) 988 u_int32_t magic; 989 DBTYPE *typep; 990{ 991 switch (magic) { 992 case DB_BTREEMAGIC: 993 *typep = DB_BTREE; 994 return (1); 995 case DB_HASHMAGIC: 996 *typep = DB_HASH; 997 return (1); 998 case DB_QAMMAGIC: 999 *typep = DB_QUEUE; 1000 return (1); 1001 default: 1002 break; 1003 } 1004 *typep = DB_UNKNOWN; 1005 return (0); 1006} 1007 1008/* 1009 * __db_vrfy_common -- 1010 * Verify info common to all page types. 1011 * 1012 * PUBLIC: int __db_vrfy_common 1013 * PUBLIC: __P((DB *, VRFY_DBINFO *, PAGE *, db_pgno_t, u_int32_t)); 1014 */ 1015int 1016__db_vrfy_common(dbp, vdp, h, pgno, flags) 1017 DB *dbp; 1018 VRFY_DBINFO *vdp; 1019 PAGE *h; 1020 db_pgno_t pgno; 1021 u_int32_t flags; 1022{ 1023 ENV *env; 1024 VRFY_PAGEINFO *pip; 1025 int ret, t_ret; 1026 u_int8_t *p; 1027 1028 env = dbp->env; 1029 1030 if ((ret = __db_vrfy_getpageinfo(vdp, pgno, &pip)) != 0) 1031 return (ret); 1032 1033 pip->pgno = pgno; 1034 F_CLR(pip, VRFY_IS_ALLZEROES); 1035 1036 /* 1037 * Hash expands the table by leaving some pages between the 1038 * old last and the new last totally zeroed. These pages may 1039 * not be all zero if they were used, freed and then reallocated. 1040 * 1041 * Queue will create sparse files if sparse record numbers are used. 1042 */ 1043 if (pgno != 0 && PGNO(h) == 0) { 1044 F_SET(pip, VRFY_IS_ALLZEROES); 1045 for (p = (u_int8_t *)h; p < (u_int8_t *)h + dbp->pgsize; p++) 1046 if (*p != 0) { 1047 F_CLR(pip, VRFY_IS_ALLZEROES); 1048 break; 1049 } 1050 /* 1051 * Mark it as a hash, and we'll 1052 * check that that makes sense structurally later. 1053 * (The queue verification doesn't care, since queues 1054 * don't really have much in the way of structure.) 1055 */ 1056 pip->type = P_HASH; 1057 ret = 0; 1058 goto err; /* well, not really an err. */ 1059 } 1060 1061 if (PGNO(h) != pgno) { 1062 EPRINT((env, "Page %lu: bad page number %lu", 1063 (u_long)pgno, (u_long)h->pgno)); 1064 ret = DB_VERIFY_BAD; 1065 } 1066 1067 switch (h->type) { 1068 case P_INVALID: /* Order matches ordinal value. */ 1069 case P_HASH_UNSORTED: 1070 case P_IBTREE: 1071 case P_IRECNO: 1072 case P_LBTREE: 1073 case P_LRECNO: 1074 case P_OVERFLOW: 1075 case P_HASHMETA: 1076 case P_BTREEMETA: 1077 case P_QAMMETA: 1078 case P_QAMDATA: 1079 case P_LDUP: 1080 case P_HASH: 1081 break; 1082 default: 1083 EPRINT((env, "Page %lu: bad page type %lu", 1084 (u_long)pgno, (u_long)h->type)); 1085 ret = DB_VERIFY_BAD; 1086 } 1087 pip->type = h->type; 1088 1089err: if ((t_ret = __db_vrfy_putpageinfo(env, vdp, pip)) != 0 && ret == 0) 1090 ret = t_ret; 1091 1092 return (ret); 1093} 1094 1095/* 1096 * __db_vrfy_invalid -- 1097 * Verify P_INVALID page. 1098 * (Yes, there's not much to do here.) 1099 */ 1100static int 1101__db_vrfy_invalid(dbp, vdp, h, pgno, flags) 1102 DB *dbp; 1103 VRFY_DBINFO *vdp; 1104 PAGE *h; 1105 db_pgno_t pgno; 1106 u_int32_t flags; 1107{ 1108 ENV *env; 1109 VRFY_PAGEINFO *pip; 1110 int ret, t_ret; 1111 1112 env = dbp->env; 1113 1114 if ((ret = __db_vrfy_getpageinfo(vdp, pgno, &pip)) != 0) 1115 return (ret); 1116 pip->next_pgno = pip->prev_pgno = 0; 1117 1118 if (!IS_VALID_PGNO(NEXT_PGNO(h))) { 1119 EPRINT((env, "Page %lu: invalid next_pgno %lu", 1120 (u_long)pgno, (u_long)NEXT_PGNO(h))); 1121 ret = DB_VERIFY_BAD; 1122 } else 1123 pip->next_pgno = NEXT_PGNO(h); 1124 1125 if ((t_ret = __db_vrfy_putpageinfo(env, vdp, pip)) != 0 && ret == 0) 1126 ret = t_ret; 1127 return (ret); 1128} 1129 1130/* 1131 * __db_vrfy_datapage -- 1132 * Verify elements common to data pages (P_HASH, P_LBTREE, 1133 * P_IBTREE, P_IRECNO, P_LRECNO, P_OVERFLOW, P_DUPLICATE)--i.e., 1134 * those defined in the PAGE structure. 1135 * 1136 * Called from each of the per-page routines, after the 1137 * all-page-type-common elements of pip have been verified and filled 1138 * in. 1139 * 1140 * PUBLIC: int __db_vrfy_datapage 1141 * PUBLIC: __P((DB *, VRFY_DBINFO *, PAGE *, db_pgno_t, u_int32_t)); 1142 */ 1143int 1144__db_vrfy_datapage(dbp, vdp, h, pgno, flags) 1145 DB *dbp; 1146 VRFY_DBINFO *vdp; 1147 PAGE *h; 1148 db_pgno_t pgno; 1149 u_int32_t flags; 1150{ 1151 ENV *env; 1152 VRFY_PAGEINFO *pip; 1153 u_int32_t smallest_entry; 1154 int isbad, ret, t_ret; 1155 1156 env = dbp->env; 1157 1158 if ((ret = __db_vrfy_getpageinfo(vdp, pgno, &pip)) != 0) 1159 return (ret); 1160 isbad = 0; 1161 1162 /* 1163 * prev_pgno and next_pgno: store for inter-page checks, 1164 * verify that they point to actual pages and not to self. 1165 * 1166 * !!! 1167 * Internal btree pages do not maintain these fields (indeed, 1168 * they overload them). Skip. 1169 */ 1170 if (TYPE(h) != P_IBTREE && TYPE(h) != P_IRECNO) { 1171 if (!IS_VALID_PGNO(PREV_PGNO(h)) || PREV_PGNO(h) == pip->pgno) { 1172 isbad = 1; 1173 EPRINT((env, "Page %lu: invalid prev_pgno %lu", 1174 (u_long)pip->pgno, (u_long)PREV_PGNO(h))); 1175 } 1176 if (!IS_VALID_PGNO(NEXT_PGNO(h)) || NEXT_PGNO(h) == pip->pgno) { 1177 isbad = 1; 1178 EPRINT((env, "Page %lu: invalid next_pgno %lu", 1179 (u_long)pip->pgno, (u_long)NEXT_PGNO(h))); 1180 } 1181 pip->prev_pgno = PREV_PGNO(h); 1182 pip->next_pgno = NEXT_PGNO(h); 1183 } 1184 1185 /* 1186 * Verify the number of entries on the page: there's no good way to 1187 * determine if this is accurate. The best we can do is verify that 1188 * it's not more than can, in theory, fit on the page. Then, we make 1189 * sure there are at least this many valid elements in inp[], and 1190 * hope the test catches most cases. 1191 */ 1192 switch (TYPE(h)) { 1193 case P_HASH_UNSORTED: 1194 case P_HASH: 1195 smallest_entry = HKEYDATA_PSIZE(0); 1196 break; 1197 case P_IBTREE: 1198 smallest_entry = BINTERNAL_PSIZE(0); 1199 break; 1200 case P_IRECNO: 1201 smallest_entry = RINTERNAL_PSIZE; 1202 break; 1203 case P_LBTREE: 1204 case P_LDUP: 1205 case P_LRECNO: 1206 smallest_entry = BKEYDATA_PSIZE(0); 1207 break; 1208 default: 1209 smallest_entry = 0; 1210 break; 1211 } 1212 if (smallest_entry * NUM_ENT(h) / 2 > dbp->pgsize) { 1213 isbad = 1; 1214 EPRINT((env, "Page %lu: too many entries: %lu", 1215 (u_long)pgno, (u_long)NUM_ENT(h))); 1216 } 1217 1218 if (TYPE(h) != P_OVERFLOW) 1219 pip->entries = NUM_ENT(h); 1220 1221 /* 1222 * btree level. Should be zero unless we're a btree; 1223 * if we are a btree, should be between LEAFLEVEL and MAXBTREELEVEL, 1224 * and we need to save it off. 1225 */ 1226 switch (TYPE(h)) { 1227 case P_IBTREE: 1228 case P_IRECNO: 1229 if (LEVEL(h) < LEAFLEVEL + 1) { 1230 isbad = 1; 1231 EPRINT((env, "Page %lu: bad btree level %lu", 1232 (u_long)pgno, (u_long)LEVEL(h))); 1233 } 1234 pip->bt_level = LEVEL(h); 1235 break; 1236 case P_LBTREE: 1237 case P_LDUP: 1238 case P_LRECNO: 1239 if (LEVEL(h) != LEAFLEVEL) { 1240 isbad = 1; 1241 EPRINT((env, 1242 "Page %lu: btree leaf page has incorrect level %lu", 1243 (u_long)pgno, (u_long)LEVEL(h))); 1244 } 1245 break; 1246 default: 1247 if (LEVEL(h) != 0) { 1248 isbad = 1; 1249 EPRINT((env, 1250 "Page %lu: nonzero level %lu in non-btree database", 1251 (u_long)pgno, (u_long)LEVEL(h))); 1252 } 1253 break; 1254 } 1255 1256 /* 1257 * Even though inp[] occurs in all PAGEs, we look at it in the 1258 * access-method-specific code, since btree and hash treat 1259 * item lengths very differently, and one of the most important 1260 * things we want to verify is that the data--as specified 1261 * by offset and length--cover the right part of the page 1262 * without overlaps, gaps, or violations of the page boundary. 1263 */ 1264 if ((t_ret = __db_vrfy_putpageinfo(env, vdp, pip)) != 0 && ret == 0) 1265 ret = t_ret; 1266 1267 return ((ret == 0 && isbad == 1) ? DB_VERIFY_BAD : ret); 1268} 1269 1270/* 1271 * __db_vrfy_meta-- 1272 * Verify the access-method common parts of a meta page, using 1273 * normal mpool routines. 1274 * 1275 * PUBLIC: int __db_vrfy_meta 1276 * PUBLIC: __P((DB *, VRFY_DBINFO *, DBMETA *, db_pgno_t, u_int32_t)); 1277 */ 1278int 1279__db_vrfy_meta(dbp, vdp, meta, pgno, flags) 1280 DB *dbp; 1281 VRFY_DBINFO *vdp; 1282 DBMETA *meta; 1283 db_pgno_t pgno; 1284 u_int32_t flags; 1285{ 1286 DBTYPE dbtype, magtype; 1287 ENV *env; 1288 VRFY_PAGEINFO *pip; 1289 int isbad, ret, t_ret; 1290 1291 isbad = 0; 1292 env = dbp->env; 1293 1294 if ((ret = __db_vrfy_getpageinfo(vdp, pgno, &pip)) != 0) 1295 return (ret); 1296 1297 /* type plausible for a meta page */ 1298 switch (meta->type) { 1299 case P_BTREEMETA: 1300 dbtype = DB_BTREE; 1301 break; 1302 case P_HASHMETA: 1303 dbtype = DB_HASH; 1304 break; 1305 case P_QAMMETA: 1306 dbtype = DB_QUEUE; 1307 break; 1308 default: 1309 ret = __db_unknown_path(env, "__db_vrfy_meta"); 1310 goto err; 1311 } 1312 1313 /* magic number valid */ 1314 if (!__db_is_valid_magicno(meta->magic, &magtype)) { 1315 isbad = 1; 1316 EPRINT((env, 1317 "Page %lu: invalid magic number", (u_long)pgno)); 1318 } 1319 if (magtype != dbtype) { 1320 isbad = 1; 1321 EPRINT((env, 1322 "Page %lu: magic number does not match database type", 1323 (u_long)pgno)); 1324 } 1325 1326 /* version */ 1327 if ((dbtype == DB_BTREE && 1328 (meta->version > DB_BTREEVERSION || 1329 meta->version < DB_BTREEOLDVER)) || 1330 (dbtype == DB_HASH && 1331 (meta->version > DB_HASHVERSION || 1332 meta->version < DB_HASHOLDVER)) || 1333 (dbtype == DB_QUEUE && 1334 (meta->version > DB_QAMVERSION || 1335 meta->version < DB_QAMOLDVER))) { 1336 isbad = 1; 1337 EPRINT((env, 1338 "Page %lu: unsupported database version %lu; extraneous errors may result", 1339 (u_long)pgno, (u_long)meta->version)); 1340 } 1341 1342 /* pagesize */ 1343 if (meta->pagesize != dbp->pgsize) { 1344 isbad = 1; 1345 EPRINT((env, "Page %lu: invalid pagesize %lu", 1346 (u_long)pgno, (u_long)meta->pagesize)); 1347 } 1348 1349 /* Flags */ 1350 if (meta->metaflags != 0) { 1351 if (meta->metaflags == DBMETA_CHKSUM) 1352 F_SET(pip, VRFY_HAS_CHKSUM); 1353 else { 1354 isbad = 1; 1355 EPRINT((env, 1356 "Page %lu: bad meta-data flags value %#lx", 1357 (u_long)PGNO_BASE_MD, (u_long)meta->metaflags)); 1358 } 1359 } 1360 1361 /* 1362 * Free list. 1363 * 1364 * If this is not the main, master-database meta page, it 1365 * should not have a free list. 1366 */ 1367 if (pgno != PGNO_BASE_MD && meta->free != PGNO_INVALID) { 1368 isbad = 1; 1369 EPRINT((env, 1370 "Page %lu: nonempty free list on subdatabase metadata page", 1371 (u_long)pgno)); 1372 } 1373 1374 /* Can correctly be PGNO_INVALID--that's just the end of the list. */ 1375 if (meta->free != PGNO_INVALID && IS_VALID_PGNO(meta->free)) 1376 pip->free = meta->free; 1377 else if (!IS_VALID_PGNO(meta->free)) { 1378 isbad = 1; 1379 EPRINT((env, 1380 "Page %lu: nonsensical free list pgno %lu", 1381 (u_long)pgno, (u_long)meta->free)); 1382 } 1383 1384 /* 1385 * Check that the meta page agrees with what we got from mpool. 1386 * If we don't have FTRUNCATE then mpool could include some 1387 * zeroed pages at the end of the file, we assume the meta page 1388 * is correct. 1389 */ 1390 if (pgno == PGNO_BASE_MD && meta->last_pgno != vdp->last_pgno) { 1391#ifdef HAVE_FTRUNCATE 1392 isbad = 1; 1393 EPRINT((env, 1394 "Page %lu: last_pgno is not correct: %lu != %lu", 1395 (u_long)pgno, 1396 (u_long)meta->last_pgno, (u_long)vdp->last_pgno)); 1397#endif 1398 vdp->meta_last_pgno = meta->last_pgno; 1399 } 1400 1401 /* 1402 * We have now verified the common fields of the metadata page. 1403 * Clear the flag that told us they had been incompletely checked. 1404 */ 1405 F_CLR(pip, VRFY_INCOMPLETE); 1406 1407err: if ((t_ret = __db_vrfy_putpageinfo(env, vdp, pip)) != 0 && ret == 0) 1408 ret = t_ret; 1409 1410 return ((ret == 0 && isbad == 1) ? DB_VERIFY_BAD : ret); 1411} 1412 1413/* 1414 * __db_vrfy_freelist -- 1415 * Walk free list, checking off pages and verifying absence of 1416 * loops. 1417 */ 1418static int 1419__db_vrfy_freelist(dbp, vdp, meta, flags) 1420 DB *dbp; 1421 VRFY_DBINFO *vdp; 1422 db_pgno_t meta; 1423 u_int32_t flags; 1424{ 1425 DB *pgset; 1426 ENV *env; 1427 VRFY_PAGEINFO *pip; 1428 db_pgno_t cur_pgno, next_pgno; 1429 int p, ret, t_ret; 1430 1431 env = dbp->env; 1432 pgset = vdp->pgset; 1433 DB_ASSERT(env, pgset != NULL); 1434 1435 if ((ret = __db_vrfy_getpageinfo(vdp, meta, &pip)) != 0) 1436 return (ret); 1437 for (next_pgno = pip->free; 1438 next_pgno != PGNO_INVALID; next_pgno = pip->next_pgno) { 1439 cur_pgno = pip->pgno; 1440 if ((ret = __db_vrfy_putpageinfo(env, vdp, pip)) != 0) 1441 return (ret); 1442 1443 /* This shouldn't happen, but just in case. */ 1444 if (!IS_VALID_PGNO(next_pgno)) { 1445 EPRINT((env, 1446 "Page %lu: invalid next_pgno %lu on free list page", 1447 (u_long)cur_pgno, (u_long)next_pgno)); 1448 return (DB_VERIFY_BAD); 1449 } 1450 1451 /* Detect cycles. */ 1452 if ((ret = __db_vrfy_pgset_get(pgset, 1453 vdp->thread_info, next_pgno, &p)) != 0) 1454 return (ret); 1455 if (p != 0) { 1456 EPRINT((env, 1457 "Page %lu: page %lu encountered a second time on free list", 1458 (u_long)cur_pgno, (u_long)next_pgno)); 1459 return (DB_VERIFY_BAD); 1460 } 1461 if ((ret = __db_vrfy_pgset_inc(pgset, 1462 vdp->thread_info, next_pgno)) != 0) 1463 return (ret); 1464 1465 if ((ret = __db_vrfy_getpageinfo(vdp, next_pgno, &pip)) != 0) 1466 return (ret); 1467 1468 if (pip->type != P_INVALID) { 1469 EPRINT((env, 1470 "Page %lu: non-invalid page %lu on free list", 1471 (u_long)cur_pgno, (u_long)next_pgno)); 1472 ret = DB_VERIFY_BAD; /* unsafe to continue */ 1473 break; 1474 } 1475 } 1476 1477 if ((t_ret = __db_vrfy_putpageinfo(env, vdp, pip)) != 0) 1478 ret = t_ret; 1479 return (ret); 1480} 1481 1482/* 1483 * __db_vrfy_subdbs -- 1484 * Walk the known-safe master database of subdbs with a cursor, 1485 * verifying the structure of each subdatabase we encounter. 1486 */ 1487static int 1488__db_vrfy_subdbs(dbp, vdp, dbname, flags) 1489 DB *dbp; 1490 VRFY_DBINFO *vdp; 1491 const char *dbname; 1492 u_int32_t flags; 1493{ 1494 DB *mdbp; 1495 DBC *dbc; 1496 DBT key, data; 1497 ENV *env; 1498 VRFY_PAGEINFO *pip; 1499 db_pgno_t meta_pgno; 1500 int ret, t_ret, isbad; 1501 u_int8_t type; 1502 1503 isbad = 0; 1504 dbc = NULL; 1505 env = dbp->env; 1506 1507 if ((ret = __db_master_open(dbp, 1508 vdp->thread_info, NULL, dbname, DB_RDONLY, 0, &mdbp)) != 0) 1509 return (ret); 1510 1511 if ((ret = __db_cursor_int(mdbp, NULL, 1512 NULL, DB_BTREE, PGNO_INVALID, 0, DB_LOCK_INVALIDID, &dbc)) != 0) 1513 goto err; 1514 1515 memset(&key, 0, sizeof(key)); 1516 memset(&data, 0, sizeof(data)); 1517 while ((ret = __dbc_get(dbc, &key, &data, DB_NEXT)) == 0) { 1518 if (data.size != sizeof(db_pgno_t)) { 1519 EPRINT((env, 1520 "Subdatabase entry not page-number size")); 1521 isbad = 1; 1522 goto err; 1523 } 1524 memcpy(&meta_pgno, data.data, data.size); 1525 /* 1526 * Subdatabase meta pgnos are stored in network byte 1527 * order for cross-endian compatibility. Swap if appropriate. 1528 */ 1529 DB_NTOHL_SWAP(env, &meta_pgno); 1530 if (meta_pgno == PGNO_INVALID || meta_pgno > vdp->last_pgno) { 1531 EPRINT((env, 1532 "Subdatabase entry references invalid page %lu", 1533 (u_long)meta_pgno)); 1534 isbad = 1; 1535 goto err; 1536 } 1537 if ((ret = __db_vrfy_getpageinfo(vdp, meta_pgno, &pip)) != 0) 1538 goto err; 1539 type = pip->type; 1540 if ((ret = __db_vrfy_putpageinfo(env, vdp, pip)) != 0) 1541 goto err; 1542 switch (type) { 1543 case P_BTREEMETA: 1544 if ((ret = __bam_vrfy_structure( 1545 dbp, vdp, meta_pgno, flags)) != 0) { 1546 if (ret == DB_VERIFY_BAD) 1547 isbad = 1; 1548 else 1549 goto err; 1550 } 1551 break; 1552 case P_HASHMETA: 1553 if ((ret = __ham_vrfy_structure( 1554 dbp, vdp, meta_pgno, flags)) != 0) { 1555 if (ret == DB_VERIFY_BAD) 1556 isbad = 1; 1557 else 1558 goto err; 1559 } 1560 break; 1561 case P_QAMMETA: 1562 default: 1563 EPRINT((env, 1564 "Subdatabase entry references page %lu of invalid type %lu", 1565 (u_long)meta_pgno, (u_long)type)); 1566 ret = DB_VERIFY_BAD; 1567 goto err; 1568 } 1569 } 1570 1571 if (ret == DB_NOTFOUND) 1572 ret = 0; 1573 1574err: if (dbc != NULL && (t_ret = __dbc_close(dbc)) != 0 && ret == 0) 1575 ret = t_ret; 1576 1577 if ((t_ret = __db_close(mdbp, NULL, 0)) != 0 && ret == 0) 1578 ret = t_ret; 1579 1580 return ((ret == 0 && isbad == 1) ? DB_VERIFY_BAD : ret); 1581} 1582 1583/* 1584 * __db_vrfy_struct_feedback -- 1585 * Provide feedback during top-down database structure traversal. 1586 * (See comment at the beginning of __db_vrfy_structure.) 1587 * 1588 * PUBLIC: void __db_vrfy_struct_feedback __P((DB *, VRFY_DBINFO *)); 1589 */ 1590void 1591__db_vrfy_struct_feedback(dbp, vdp) 1592 DB *dbp; 1593 VRFY_DBINFO *vdp; 1594{ 1595 int progress; 1596 1597 if (dbp->db_feedback == NULL) 1598 return; 1599 1600 if (vdp->pgs_remaining > 0) 1601 vdp->pgs_remaining--; 1602 1603 /* Don't allow a feedback call of 100 until we're really done. */ 1604 progress = 100 - (int)(vdp->pgs_remaining * 50 / (vdp->last_pgno + 1)); 1605 dbp->db_feedback(dbp, DB_VERIFY, progress == 100 ? 99 : progress); 1606} 1607 1608/* 1609 * __db_vrfy_orderchkonly -- 1610 * Do an sort-order/hashing check on a known-otherwise-good subdb. 1611 */ 1612static int 1613__db_vrfy_orderchkonly(dbp, vdp, name, subdb, flags) 1614 DB *dbp; 1615 VRFY_DBINFO *vdp; 1616 const char *name, *subdb; 1617 u_int32_t flags; 1618{ 1619 BTMETA *btmeta; 1620 DB *mdbp, *pgset; 1621 DBC *pgsc; 1622 DBT key, data; 1623 DB_MPOOLFILE *mpf; 1624 ENV *env; 1625 HASH *h_internal; 1626 HMETA *hmeta; 1627 PAGE *h, *currpg; 1628 db_pgno_t meta_pgno, p, pgno; 1629 u_int32_t bucket; 1630 int t_ret, ret; 1631 1632 pgset = NULL; 1633 pgsc = NULL; 1634 env = dbp->env; 1635 mpf = dbp->mpf; 1636 currpg = h = NULL; 1637 1638 LF_CLR(DB_NOORDERCHK); 1639 1640 /* Open the master database and get the meta_pgno for the subdb. */ 1641 if ((ret = __db_master_open(dbp, 1642 vdp->thread_info, NULL, name, DB_RDONLY, 0, &mdbp)) != 0) 1643 goto err; 1644 1645 DB_INIT_DBT(key, subdb, strlen(subdb)); 1646 memset(&data, 0, sizeof(data)); 1647 if ((ret = __db_get(mdbp, 1648 vdp->thread_info, NULL, &key, &data, 0)) != 0) { 1649 if (ret == DB_NOTFOUND) 1650 ret = ENOENT; 1651 goto err; 1652 } 1653 1654 if (data.size != sizeof(db_pgno_t)) { 1655 EPRINT((env, "Subdatabase entry of invalid size")); 1656 ret = DB_VERIFY_BAD; 1657 goto err; 1658 } 1659 1660 memcpy(&meta_pgno, data.data, data.size); 1661 1662 /* 1663 * Subdatabase meta pgnos are stored in network byte 1664 * order for cross-endian compatibility. Swap if appropriate. 1665 */ 1666 DB_NTOHL_SWAP(env, &meta_pgno); 1667 1668 if ((ret = __memp_fget(mpf, 1669 &meta_pgno, vdp->thread_info, NULL, 0, &h)) != 0) 1670 goto err; 1671 1672 if ((ret = __db_vrfy_pgset(env, 1673 vdp->thread_info, dbp->pgsize, &pgset)) != 0) 1674 goto err; 1675 1676 switch (TYPE(h)) { 1677 case P_BTREEMETA: 1678 btmeta = (BTMETA *)h; 1679 if (F_ISSET(&btmeta->dbmeta, BTM_RECNO)) { 1680 /* Recnos have no order to check. */ 1681 ret = 0; 1682 goto err; 1683 } 1684 if ((ret = 1685 __db_meta2pgset(dbp, vdp, meta_pgno, flags, pgset)) != 0) 1686 goto err; 1687 if ((ret = __db_cursor_int(pgset, NULL, NULL, dbp->type, 1688 PGNO_INVALID, 0, DB_LOCK_INVALIDID, &pgsc)) != 0) 1689 goto err; 1690 while ((ret = __db_vrfy_pgset_next(pgsc, &p)) == 0) { 1691 if ((ret = __memp_fget(mpf, &p, 1692 vdp->thread_info, NULL, 0, &currpg)) != 0) 1693 goto err; 1694 if ((ret = __bam_vrfy_itemorder(dbp, NULL, 1695 vdp->thread_info, currpg, p, NUM_ENT(currpg), 1, 1696 F_ISSET(&btmeta->dbmeta, BTM_DUP), flags)) != 0) 1697 goto err; 1698 if ((ret = __memp_fput(mpf, 1699 vdp->thread_info, currpg, dbp->priority)) != 0) 1700 goto err; 1701 currpg = NULL; 1702 } 1703 1704 /* 1705 * The normal exit condition for the loop above is DB_NOTFOUND. 1706 * If we see that, zero it and continue on to cleanup. 1707 * Otherwise, it's a real error and will be returned. 1708 */ 1709 if (ret == DB_NOTFOUND) 1710 ret = 0; 1711 break; 1712 case P_HASHMETA: 1713 hmeta = (HMETA *)h; 1714 h_internal = (HASH *)dbp->h_internal; 1715 /* 1716 * Make sure h_charkey is right. 1717 */ 1718 if (h_internal == NULL) { 1719 EPRINT((env, 1720 "Page %lu: DB->h_internal field is NULL", 1721 (u_long)meta_pgno)); 1722 ret = DB_VERIFY_BAD; 1723 goto err; 1724 } 1725 if (h_internal->h_hash == NULL) 1726 h_internal->h_hash = hmeta->dbmeta.version < 5 1727 ? __ham_func4 : __ham_func5; 1728 if (hmeta->h_charkey != 1729 h_internal->h_hash(dbp, CHARKEY, sizeof(CHARKEY))) { 1730 EPRINT((env, 1731 "Page %lu: incorrect hash function for database", 1732 (u_long)meta_pgno)); 1733 ret = DB_VERIFY_BAD; 1734 goto err; 1735 } 1736 1737 /* 1738 * Foreach bucket, verify hashing on each page in the 1739 * corresponding chain of pages. 1740 */ 1741 for (bucket = 0; bucket <= hmeta->max_bucket; bucket++) { 1742 pgno = BS_TO_PAGE(bucket, hmeta->spares); 1743 while (pgno != PGNO_INVALID) { 1744 if ((ret = __memp_fget(mpf, &pgno, 1745 vdp->thread_info, NULL, 0, &currpg)) != 0) 1746 goto err; 1747 if ((ret = __ham_vrfy_hashing(dbp, 1748 NUM_ENT(currpg), hmeta, bucket, pgno, 1749 flags, h_internal->h_hash)) != 0) 1750 goto err; 1751 pgno = NEXT_PGNO(currpg); 1752 if ((ret = __memp_fput(mpf, vdp->thread_info, 1753 currpg, dbp->priority)) != 0) 1754 goto err; 1755 currpg = NULL; 1756 } 1757 } 1758 break; 1759 default: 1760 EPRINT((env, "Page %lu: database metapage of bad type %lu", 1761 (u_long)meta_pgno, (u_long)TYPE(h))); 1762 ret = DB_VERIFY_BAD; 1763 break; 1764 } 1765 1766err: if (pgsc != NULL && (t_ret = __dbc_close(pgsc)) != 0 && ret == 0) 1767 ret = t_ret; 1768 if (pgset != NULL && 1769 (t_ret = __db_close(pgset, NULL, 0)) != 0 && ret == 0) 1770 ret = t_ret; 1771 if (h != NULL && (t_ret = __memp_fput(mpf, 1772 vdp->thread_info, h, dbp->priority)) != 0) 1773 ret = t_ret; 1774 if (currpg != NULL && 1775 (t_ret = __memp_fput(mpf, 1776 vdp->thread_info, currpg, dbp->priority)) != 0) 1777 ret = t_ret; 1778 if ((t_ret = __db_close(mdbp, NULL, 0)) != 0) 1779 ret = t_ret; 1780 return (ret); 1781} 1782 1783/* 1784 * __db_salvage -- 1785 * Walk through a page, salvaging all likely or plausible (w/ 1786 * DB_AGGRESSIVE) key/data pairs and marking seen pages in vdp. 1787 * 1788 * PUBLIC: int __db_salvage __P((DB *, VRFY_DBINFO *, db_pgno_t, 1789 * PUBLIC: PAGE *, void *, int (*)(void *, const void *), u_int32_t)); 1790 */ 1791int 1792__db_salvage(dbp, vdp, pgno, h, handle, callback, flags) 1793 DB *dbp; 1794 VRFY_DBINFO *vdp; 1795 db_pgno_t pgno; 1796 PAGE *h; 1797 void *handle; 1798 int (*callback) __P((void *, const void *)); 1799 u_int32_t flags; 1800{ 1801 ENV *env; 1802 VRFY_PAGEINFO *pip; 1803 int keyflag, ret, t_ret; 1804 1805 env = dbp->env; 1806 DB_ASSERT(env, LF_ISSET(DB_SALVAGE)); 1807 1808 /* 1809 * !!! 1810 * We dump record numbers when salvaging Queue databases, but not for 1811 * immutable Recno databases. The problem is we can't figure out the 1812 * record number from the database page in the Recno case, while the 1813 * offset in the file is sufficient for Queue. 1814 */ 1815 keyflag = 0; 1816 1817 /* If we got this page in the subdb pass, we can safely skip it. */ 1818 if (__db_salvage_isdone(vdp, pgno)) 1819 return (0); 1820 1821 switch (TYPE(h)) { 1822 case P_HASHMETA: 1823 ret = __ham_vrfy_meta(dbp, vdp, (HMETA *)h, pgno, flags); 1824 break; 1825 case P_BTREEMETA: 1826 ret = __bam_vrfy_meta(dbp, vdp, (BTMETA *)h, pgno, flags); 1827 break; 1828 case P_QAMMETA: 1829 keyflag = 1; 1830 ret = __qam_vrfy_meta(dbp, vdp, (QMETA *)h, pgno, flags); 1831 break; 1832 case P_HASH_UNSORTED: 1833 case P_HASH: 1834 return (__ham_salvage(dbp, vdp, 1835 pgno, h, handle, callback, flags)); 1836 case P_LBTREE: 1837 return (__bam_salvage(dbp, vdp, 1838 pgno, P_LBTREE, h, handle, callback, NULL, flags)); 1839 case P_LDUP: 1840 return (__db_salvage_markneeded(vdp, pgno, SALVAGE_LDUP)); 1841 case P_OVERFLOW: 1842 return (__db_salvage_markneeded(vdp, pgno, SALVAGE_OVERFLOW)); 1843 case P_LRECNO: 1844 /* 1845 * Recnos are tricky -- they may represent dup pages, or 1846 * they may be subdatabase/regular database pages in their 1847 * own right. If the former, they need to be printed with a 1848 * key, preferably when we hit the corresponding datum in 1849 * a btree/hash page. If the latter, there is no key. 1850 * 1851 * If a database is sufficiently frotzed, we're not going 1852 * to be able to get this right, so we best-guess: just 1853 * mark it needed now, and if we're really a normal recno 1854 * database page, the "unknowns" pass will pick us up. 1855 */ 1856 return (__db_salvage_markneeded(vdp, pgno, SALVAGE_LRECNO)); 1857 case P_QAMDATA: 1858 return (__qam_salvage(dbp, vdp, 1859 pgno, h, handle, callback, flags)); 1860 case P_IBTREE: 1861 /* 1862 * We need to mark any overflow keys on internal pages as seen, 1863 * so we don't print them out in __db_salvage_unknowns. But if 1864 * we're an upgraded database, a P_LBTREE page may very well 1865 * have a reference to the same overflow pages (this practice 1866 * stopped somewhere around db4.5). To give P_LBTREEs a chance 1867 * to print out any keys on shared pages, mark the page now and 1868 * deal with it at the end. 1869 */ 1870 return (__db_salvage_markneeded(vdp, pgno, SALVAGE_IBTREE)); 1871 case P_INVALID: 1872 case P_IRECNO: 1873 case __P_DUPLICATE: 1874 default: 1875 /* 1876 * There's no need to display an error, the page type was 1877 * already checked and reported on. 1878 */ 1879 return (0); 1880 } 1881 if (ret != 0) 1882 return (ret); 1883 1884 /* 1885 * We have to display the dump header if it's a metadata page. It's 1886 * our last chance as the page was marked "seen" in the vrfy routine, 1887 * and we won't see the page again. We don't display headers for 1888 * the first database in a multi-database file, that database simply 1889 * contains a list of subdatabases. 1890 */ 1891 if ((ret = __db_vrfy_getpageinfo(vdp, pgno, &pip)) != 0) 1892 return (ret); 1893 if (!F_ISSET(pip, VRFY_HAS_SUBDBS)) 1894 ret = __db_prheader( 1895 dbp, NULL, 0, keyflag, handle, callback, vdp, pgno); 1896 if ((t_ret = __db_vrfy_putpageinfo(env, vdp, pip)) != 0 && ret == 0) 1897 ret = t_ret; 1898 return (ret); 1899} 1900 1901/* 1902 * __db_salvage_unknowns -- 1903 * Walk through the salvager database, printing with key "UNKNOWN" 1904 * any pages we haven't dealt with. 1905 */ 1906static int 1907__db_salvage_unknowns(dbp, vdp, handle, callback, flags) 1908 DB *dbp; 1909 VRFY_DBINFO *vdp; 1910 void *handle; 1911 int (*callback) __P((void *, const void *)); 1912 u_int32_t flags; 1913{ 1914 DBC *dbc; 1915 DBT unkdbt, key, *dbt; 1916 DB_MPOOLFILE *mpf; 1917 ENV *env; 1918 PAGE *h; 1919 db_pgno_t pgno; 1920 u_int32_t pgtype; 1921 int ret, t_ret; 1922 void *ovflbuf; 1923 1924 dbc = NULL; 1925 env = dbp->env; 1926 mpf = dbp->mpf; 1927 1928 DB_INIT_DBT(unkdbt, "UNKNOWN", sizeof("UNKNOWN") - 1); 1929 1930 if ((ret = __os_malloc(env, dbp->pgsize, &ovflbuf)) != 0) 1931 return (ret); 1932 1933 /* 1934 * We make two passes -- in the first pass, skip SALVAGE_OVERFLOW 1935 * pages, because they may be referenced by the standard database 1936 * pages that we're resolving. 1937 */ 1938 while ((t_ret = 1939 __db_salvage_getnext(vdp, &dbc, &pgno, &pgtype, 1)) == 0) { 1940 if ((t_ret = __memp_fget(mpf, 1941 &pgno, vdp->thread_info, NULL, 0, &h)) != 0) { 1942 if (ret == 0) 1943 ret = t_ret; 1944 continue; 1945 } 1946 1947 dbt = NULL; 1948 switch (pgtype) { 1949 case SALVAGE_LDUP: 1950 case SALVAGE_LRECNODUP: 1951 dbt = &unkdbt; 1952 /* FALLTHROUGH */ 1953 case SALVAGE_IBTREE: 1954 case SALVAGE_LBTREE: 1955 case SALVAGE_LRECNO: 1956 if ((t_ret = __bam_salvage(dbp, vdp, pgno, pgtype, 1957 h, handle, callback, dbt, flags)) != 0 && ret == 0) 1958 ret = t_ret; 1959 break; 1960 case SALVAGE_OVERFLOW: 1961 DB_ASSERT(env, 0); /* Shouldn't ever happen. */ 1962 break; 1963 case SALVAGE_HASH: 1964 if ((t_ret = __ham_salvage(dbp, vdp, 1965 pgno, h, handle, callback, flags)) != 0 && ret == 0) 1966 ret = t_ret; 1967 break; 1968 case SALVAGE_INVALID: 1969 case SALVAGE_IGNORE: 1970 default: 1971 /* 1972 * Shouldn't happen, but if it does, just do what the 1973 * nice man says. 1974 */ 1975 DB_ASSERT(env, 0); 1976 break; 1977 } 1978 if ((t_ret = __memp_fput(mpf, 1979 vdp->thread_info, h, dbp->priority)) != 0 && ret == 0) 1980 ret = t_ret; 1981 } 1982 1983 /* We should have reached the end of the database. */ 1984 if (t_ret == DB_NOTFOUND) 1985 t_ret = 0; 1986 if (t_ret != 0 && ret == 0) 1987 ret = t_ret; 1988 1989 /* Re-open the cursor so we traverse the database again. */ 1990 if ((t_ret = __dbc_close(dbc)) != 0 && ret == 0) 1991 ret = t_ret; 1992 dbc = NULL; 1993 1994 /* Now, deal with any remaining overflow pages. */ 1995 while ((t_ret = 1996 __db_salvage_getnext(vdp, &dbc, &pgno, &pgtype, 0)) == 0) { 1997 if ((t_ret = __memp_fget(mpf, 1998 &pgno, vdp->thread_info, NULL, 0, &h)) != 0) { 1999 if (ret == 0) 2000 ret = t_ret; 2001 continue; 2002 } 2003 2004 switch (pgtype) { 2005 case SALVAGE_OVERFLOW: 2006 /* 2007 * XXX: 2008 * This may generate multiple "UNKNOWN" keys in 2009 * a database with no dups. What to do? 2010 */ 2011 if ((t_ret = __db_safe_goff(dbp, 2012 vdp, pgno, &key, &ovflbuf, flags)) != 0 || 2013 ((vdp->type == DB_BTREE || vdp->type == DB_HASH) && 2014 (t_ret = __db_vrfy_prdbt(&unkdbt, 2015 0, " ", handle, callback, 0, vdp)) != 0) || 2016 (t_ret = __db_vrfy_prdbt( 2017 &key, 0, " ", handle, callback, 0, vdp)) != 0) 2018 if (ret == 0) 2019 ret = t_ret; 2020 break; 2021 default: 2022 DB_ASSERT(env, 0); /* Shouldn't ever happen. */ 2023 break; 2024 } 2025 if ((t_ret = __memp_fput(mpf, 2026 vdp->thread_info, h, dbp->priority)) != 0 && ret == 0) 2027 ret = t_ret; 2028 } 2029 2030 /* We should have reached the end of the database. */ 2031 if (t_ret == DB_NOTFOUND) 2032 t_ret = 0; 2033 if (t_ret != 0 && ret == 0) 2034 ret = t_ret; 2035 2036 if ((t_ret = __dbc_close(dbc)) != 0 && ret == 0) 2037 ret = t_ret; 2038 2039 __os_free(env, ovflbuf); 2040 2041 return (ret); 2042} 2043 2044/* 2045 * Offset of the ith inp array entry, which we can compare to the offset 2046 * the entry stores. 2047 */ 2048#define INP_OFFSET(dbp, h, i) \ 2049 ((db_indx_t)((u_int8_t *)((P_INP(dbp,(h))) + (i)) - (u_int8_t *)(h))) 2050 2051/* 2052 * __db_vrfy_inpitem -- 2053 * Verify that a single entry in the inp array is sane, and update 2054 * the high water mark and current item offset. (The former of these is 2055 * used for state information between calls, and is required; it must 2056 * be initialized to the pagesize before the first call.) 2057 * 2058 * Returns DB_VERIFY_FATAL if inp has collided with the data, 2059 * since verification can't continue from there; returns DB_VERIFY_BAD 2060 * if anything else is wrong. 2061 * 2062 * PUBLIC: int __db_vrfy_inpitem __P((DB *, PAGE *, 2063 * PUBLIC: db_pgno_t, u_int32_t, int, u_int32_t, u_int32_t *, u_int32_t *)); 2064 */ 2065int 2066__db_vrfy_inpitem(dbp, h, pgno, i, is_btree, flags, himarkp, offsetp) 2067 DB *dbp; 2068 PAGE *h; 2069 db_pgno_t pgno; 2070 u_int32_t i; 2071 int is_btree; 2072 u_int32_t flags, *himarkp, *offsetp; 2073{ 2074 BKEYDATA *bk; 2075 ENV *env; 2076 db_indx_t *inp, offset, len; 2077 2078 env = dbp->env; 2079 2080 DB_ASSERT(env, himarkp != NULL); 2081 inp = P_INP(dbp, h); 2082 2083 /* 2084 * Check that the inp array, which grows from the beginning of the 2085 * page forward, has not collided with the data, which grow from the 2086 * end of the page backward. 2087 */ 2088 if (inp + i >= (db_indx_t *)((u_int8_t *)h + *himarkp)) { 2089 /* We've collided with the data. We need to bail. */ 2090 EPRINT((env, "Page %lu: entries listing %lu overlaps data", 2091 (u_long)pgno, (u_long)i)); 2092 return (DB_VERIFY_FATAL); 2093 } 2094 2095 offset = inp[i]; 2096 2097 /* 2098 * Check that the item offset is reasonable: it points somewhere 2099 * after the inp array and before the end of the page. 2100 */ 2101 if (offset <= INP_OFFSET(dbp, h, i) || offset > dbp->pgsize) { 2102 EPRINT((env, "Page %lu: bad offset %lu at page index %lu", 2103 (u_long)pgno, (u_long)offset, (u_long)i)); 2104 return (DB_VERIFY_BAD); 2105 } 2106 2107 /* Update the high-water mark (what HOFFSET should be) */ 2108 if (offset < *himarkp) 2109 *himarkp = offset; 2110 2111 if (is_btree) { 2112 /* 2113 * Check alignment; if it's unaligned, it's unsafe to 2114 * manipulate this item. 2115 */ 2116 if (offset != DB_ALIGN(offset, sizeof(u_int32_t))) { 2117 EPRINT((env, 2118 "Page %lu: unaligned offset %lu at page index %lu", 2119 (u_long)pgno, (u_long)offset, (u_long)i)); 2120 return (DB_VERIFY_BAD); 2121 } 2122 2123 /* 2124 * Check that the item length remains on-page. 2125 */ 2126 bk = GET_BKEYDATA(dbp, h, i); 2127 2128 /* 2129 * We need to verify the type of the item here; 2130 * we can't simply assume that it will be one of the 2131 * expected three. If it's not a recognizable type, 2132 * it can't be considered to have a verifiable 2133 * length, so it's not possible to certify it as safe. 2134 */ 2135 switch (B_TYPE(bk->type)) { 2136 case B_KEYDATA: 2137 len = bk->len; 2138 break; 2139 case B_DUPLICATE: 2140 case B_OVERFLOW: 2141 len = BOVERFLOW_SIZE; 2142 break; 2143 default: 2144 EPRINT((env, 2145 "Page %lu: item %lu of unrecognizable type", 2146 (u_long)pgno, (u_long)i)); 2147 return (DB_VERIFY_BAD); 2148 } 2149 2150 if ((size_t)(offset + len) > dbp->pgsize) { 2151 EPRINT((env, 2152 "Page %lu: item %lu extends past page boundary", 2153 (u_long)pgno, (u_long)i)); 2154 return (DB_VERIFY_BAD); 2155 } 2156 } 2157 2158 if (offsetp != NULL) 2159 *offsetp = offset; 2160 return (0); 2161} 2162 2163/* 2164 * __db_vrfy_duptype-- 2165 * Given a page number and a set of flags to __bam_vrfy_subtree, 2166 * verify that the dup tree type is correct--i.e., it's a recno 2167 * if DUPSORT is not set and a btree if it is. 2168 * 2169 * PUBLIC: int __db_vrfy_duptype 2170 * PUBLIC: __P((DB *, VRFY_DBINFO *, db_pgno_t, u_int32_t)); 2171 */ 2172int 2173__db_vrfy_duptype(dbp, vdp, pgno, flags) 2174 DB *dbp; 2175 VRFY_DBINFO *vdp; 2176 db_pgno_t pgno; 2177 u_int32_t flags; 2178{ 2179 ENV *env; 2180 VRFY_PAGEINFO *pip; 2181 int ret, isbad; 2182 2183 env = dbp->env; 2184 isbad = 0; 2185 2186 if ((ret = __db_vrfy_getpageinfo(vdp, pgno, &pip)) != 0) 2187 return (ret); 2188 2189 switch (pip->type) { 2190 case P_IBTREE: 2191 case P_LDUP: 2192 if (!LF_ISSET(DB_ST_DUPSORT)) { 2193 EPRINT((env, 2194 "Page %lu: sorted duplicate set in unsorted-dup database", 2195 (u_long)pgno)); 2196 isbad = 1; 2197 } 2198 break; 2199 case P_IRECNO: 2200 case P_LRECNO: 2201 if (LF_ISSET(DB_ST_DUPSORT)) { 2202 EPRINT((env, 2203 "Page %lu: unsorted duplicate set in sorted-dup database", 2204 (u_long)pgno)); 2205 isbad = 1; 2206 } 2207 break; 2208 default: 2209 /* 2210 * If the page is entirely zeroed, its pip->type will be a lie 2211 * (we assumed it was a hash page, as they're allowed to be 2212 * zeroed); handle this case specially. 2213 */ 2214 if (F_ISSET(pip, VRFY_IS_ALLZEROES)) 2215 ZEROPG_ERR_PRINT(env, pgno, "duplicate page"); 2216 else 2217 EPRINT((env, 2218 "Page %lu: duplicate page of inappropriate type %lu", 2219 (u_long)pgno, (u_long)pip->type)); 2220 isbad = 1; 2221 break; 2222 } 2223 2224 if ((ret = __db_vrfy_putpageinfo(env, vdp, pip)) != 0) 2225 return (ret); 2226 return (isbad == 1 ? DB_VERIFY_BAD : 0); 2227} 2228 2229/* 2230 * __db_salvage_duptree -- 2231 * Attempt to salvage a given duplicate tree, given its alleged root. 2232 * 2233 * The key that corresponds to this dup set has been passed to us 2234 * in DBT *key. Because data items follow keys, though, it has been 2235 * printed once already. 2236 * 2237 * The basic idea here is that pgno ought to be a P_LDUP, a P_LRECNO, a 2238 * P_IBTREE, or a P_IRECNO. If it's an internal page, use the verifier 2239 * functions to make sure it's safe; if it's not, we simply bail and the 2240 * data will have to be printed with no key later on. if it is safe, 2241 * recurse on each of its children. 2242 * 2243 * Whether or not it's safe, if it's a leaf page, __bam_salvage it. 2244 * 2245 * At all times, use the DB hanging off vdp to mark and check what we've 2246 * done, so each page gets printed exactly once and we don't get caught 2247 * in any cycles. 2248 * 2249 * PUBLIC: int __db_salvage_duptree __P((DB *, VRFY_DBINFO *, db_pgno_t, 2250 * PUBLIC: DBT *, void *, int (*)(void *, const void *), u_int32_t)); 2251 */ 2252int 2253__db_salvage_duptree(dbp, vdp, pgno, key, handle, callback, flags) 2254 DB *dbp; 2255 VRFY_DBINFO *vdp; 2256 db_pgno_t pgno; 2257 DBT *key; 2258 void *handle; 2259 int (*callback) __P((void *, const void *)); 2260 u_int32_t flags; 2261{ 2262 DB_MPOOLFILE *mpf; 2263 PAGE *h; 2264 int ret, t_ret; 2265 2266 mpf = dbp->mpf; 2267 2268 if (pgno == PGNO_INVALID || !IS_VALID_PGNO(pgno)) 2269 return (DB_VERIFY_BAD); 2270 2271 /* We have a plausible page. Try it. */ 2272 if ((ret = __memp_fget(mpf, &pgno, vdp->thread_info, NULL, 0, &h)) != 0) 2273 return (ret); 2274 2275 switch (TYPE(h)) { 2276 case P_IBTREE: 2277 case P_IRECNO: 2278 if ((ret = __db_vrfy_common(dbp, vdp, h, pgno, flags)) != 0) 2279 goto err; 2280 if ((ret = __bam_vrfy(dbp, 2281 vdp, h, pgno, flags | DB_NOORDERCHK)) != 0 || 2282 (ret = __db_salvage_markdone(vdp, pgno)) != 0) 2283 goto err; 2284 /* 2285 * We have a known-healthy internal page. Walk it. 2286 */ 2287 if ((ret = __bam_salvage_walkdupint(dbp, vdp, h, key, 2288 handle, callback, flags)) != 0) 2289 goto err; 2290 break; 2291 case P_LRECNO: 2292 case P_LDUP: 2293 if ((ret = __bam_salvage(dbp, 2294 vdp, pgno, TYPE(h), h, handle, callback, key, flags)) != 0) 2295 goto err; 2296 break; 2297 default: 2298 ret = DB_VERIFY_BAD; 2299 goto err; 2300 } 2301 2302err: if ((t_ret = __memp_fput(mpf, 2303 vdp->thread_info, h, dbp->priority)) != 0 && ret == 0) 2304 ret = t_ret; 2305 return (ret); 2306} 2307 2308/* 2309 * __db_salvage_subdbs -- 2310 * Check and see if this database has subdbs; if so, try to salvage 2311 * them independently. 2312 */ 2313static int 2314__db_salvage_subdbs(dbp, vdp, handle, callback, flags, hassubsp) 2315 DB *dbp; 2316 VRFY_DBINFO *vdp; 2317 void *handle; 2318 int (*callback) __P((void *, const void *)); 2319 u_int32_t flags; 2320 int *hassubsp; 2321{ 2322 DB *pgset; 2323 DBC *pgsc; 2324 DB_MPOOLFILE *mpf; 2325 ENV *env; 2326 PAGE *h; 2327 VRFY_PAGEINFO *pip; 2328 db_pgno_t p, meta_pgno; 2329 int ret, t_ret; 2330 2331 *hassubsp = 0; 2332 2333 env = dbp->env; 2334 pgset = NULL; 2335 pgsc = NULL; 2336 mpf = dbp->mpf; 2337 h = NULL; 2338 pip = NULL; 2339 ret = 0; 2340 2341 /* 2342 * Check to make sure the page is OK and find out if it contains 2343 * subdatabases. 2344 */ 2345 meta_pgno = PGNO_BASE_MD; 2346 if ((t_ret = __memp_fget(mpf, 2347 &meta_pgno, vdp->thread_info, NULL, 0, &h)) == 0 && 2348 (t_ret = __db_vrfy_common(dbp, vdp, h, PGNO_BASE_MD, flags)) == 0 && 2349 (t_ret = __db_salvage( 2350 dbp, vdp, PGNO_BASE_MD, h, handle, callback, flags)) == 0 && 2351 (t_ret = __db_vrfy_getpageinfo(vdp, 0, &pip)) == 0) 2352 if (F_ISSET(pip, VRFY_HAS_SUBDBS)) 2353 *hassubsp = 1; 2354 if (pip != NULL && 2355 (t_ret = __db_vrfy_putpageinfo(env, vdp, pip)) != 0 && ret == 0) 2356 ret = t_ret; 2357 if (h != NULL) { 2358 if ((t_ret = __memp_fput(mpf, 2359 vdp->thread_info, h, dbp->priority)) != 0 && ret == 0) 2360 ret = t_ret; 2361 h = NULL; 2362 } 2363 if (ret != 0 || *hassubsp == 0) 2364 return (ret); 2365 2366 /* 2367 * We have subdbs. Try to crack them. 2368 * 2369 * To do so, get a set of leaf pages in the master database, and then 2370 * walk each of the valid ones, salvaging subdbs as we go. If any 2371 * prove invalid, just drop them; we'll pick them up on a later pass. 2372 */ 2373 if ((ret = __db_vrfy_pgset(env, 2374 vdp->thread_info, dbp->pgsize, &pgset)) != 0) 2375 goto err; 2376 if ((ret = __db_meta2pgset(dbp, vdp, PGNO_BASE_MD, flags, pgset)) != 0) 2377 goto err; 2378 if ((ret = __db_cursor(pgset, vdp->thread_info, NULL, &pgsc, 0)) != 0) 2379 goto err; 2380 while ((t_ret = __db_vrfy_pgset_next(pgsc, &p)) == 0) { 2381 if ((t_ret = __memp_fget(mpf, 2382 &p, vdp->thread_info, NULL, 0, &h)) == 0 && 2383 (t_ret = __db_vrfy_common(dbp, vdp, h, p, flags)) == 0 && 2384 (t_ret = 2385 __bam_vrfy(dbp, vdp, h, p, flags | DB_NOORDERCHK)) == 0) 2386 t_ret = __db_salvage_subdbpg( 2387 dbp, vdp, h, handle, callback, flags); 2388 if (t_ret != 0 && ret == 0) 2389 ret = t_ret; 2390 if (h != NULL) { 2391 if ((t_ret = __memp_fput(mpf, vdp->thread_info, 2392 h, dbp->priority)) != 0 && ret == 0) 2393 ret = t_ret; 2394 h = NULL; 2395 } 2396 } 2397 2398 if (t_ret != DB_NOTFOUND && ret == 0) 2399 ret = t_ret; 2400 2401err: if (pgsc != NULL && (t_ret = __dbc_close(pgsc)) != 0 && ret == 0) 2402 ret = t_ret; 2403 if (pgset != NULL && 2404 (t_ret = __db_close(pgset, NULL, 0)) != 0 && ret ==0) 2405 ret = t_ret; 2406 if (h != NULL && 2407 (t_ret = __memp_fput(mpf, 2408 vdp->thread_info, h, dbp->priority)) != 0 && ret == 0) 2409 ret = t_ret; 2410 return (ret); 2411} 2412 2413/* 2414 * __db_salvage_subdbpg -- 2415 * Given a known-good leaf page in the master database, salvage all 2416 * leaf pages corresponding to each subdb. 2417 */ 2418static int 2419__db_salvage_subdbpg(dbp, vdp, master, handle, callback, flags) 2420 DB *dbp; 2421 VRFY_DBINFO *vdp; 2422 PAGE *master; 2423 void *handle; 2424 int (*callback) __P((void *, const void *)); 2425 u_int32_t flags; 2426{ 2427 BKEYDATA *bkkey, *bkdata; 2428 BOVERFLOW *bo; 2429 DB *pgset; 2430 DBC *pgsc; 2431 DBT key; 2432 DB_MPOOLFILE *mpf; 2433 ENV *env; 2434 PAGE *subpg; 2435 db_indx_t i; 2436 db_pgno_t meta_pgno, p; 2437 int ret, err_ret, t_ret; 2438 char *subdbname; 2439 2440 env = dbp->env; 2441 mpf = dbp->mpf; 2442 ret = err_ret = 0; 2443 subdbname = NULL; 2444 2445 if ((ret = __db_vrfy_pgset(env, 2446 vdp->thread_info, dbp->pgsize, &pgset)) != 0) 2447 return (ret); 2448 2449 /* 2450 * For each entry, get and salvage the set of pages 2451 * corresponding to that entry. 2452 */ 2453 for (i = 0; i < NUM_ENT(master); i += P_INDX) { 2454 bkkey = GET_BKEYDATA(dbp, master, i); 2455 bkdata = GET_BKEYDATA(dbp, master, i + O_INDX); 2456 2457 /* Get the subdatabase name. */ 2458 if (B_TYPE(bkkey->type) == B_OVERFLOW) { 2459 /* 2460 * We can, in principle anyway, have a subdb 2461 * name so long it overflows. Ick. 2462 */ 2463 bo = (BOVERFLOW *)bkkey; 2464 if ((ret = __db_safe_goff(dbp, vdp, 2465 bo->pgno, &key, &subdbname, flags)) != 0) { 2466 err_ret = DB_VERIFY_BAD; 2467 continue; 2468 } 2469 2470 /* Nul-terminate it. */ 2471 if ((ret = __os_realloc(env, 2472 key.size + 1, &subdbname)) != 0) 2473 goto err; 2474 subdbname[key.size] = '\0'; 2475 } else if (B_TYPE(bkkey->type) == B_KEYDATA) { 2476 if ((ret = __os_realloc(env, 2477 bkkey->len + 1, &subdbname)) != 0) 2478 goto err; 2479 memcpy(subdbname, bkkey->data, bkkey->len); 2480 subdbname[bkkey->len] = '\0'; 2481 } 2482 2483 /* Get the corresponding pgno. */ 2484 if (bkdata->len != sizeof(db_pgno_t)) { 2485 err_ret = DB_VERIFY_BAD; 2486 continue; 2487 } 2488 memcpy(&meta_pgno, 2489 (db_pgno_t *)bkdata->data, sizeof(db_pgno_t)); 2490 2491 /* 2492 * Subdatabase meta pgnos are stored in network byte 2493 * order for cross-endian compatibility. Swap if appropriate. 2494 */ 2495 DB_NTOHL_SWAP(env, &meta_pgno); 2496 2497 /* If we can't get the subdb meta page, just skip the subdb. */ 2498 if (!IS_VALID_PGNO(meta_pgno) || (ret = __memp_fget(mpf, 2499 &meta_pgno, vdp->thread_info, NULL, 0, &subpg)) != 0) { 2500 err_ret = ret; 2501 continue; 2502 } 2503 2504 /* 2505 * Verify the subdatabase meta page. This has two functions. 2506 * First, if it's bad, we have no choice but to skip the subdb 2507 * and let the pages just get printed on a later pass. Second, 2508 * the access-method-specific meta verification routines record 2509 * the various state info (such as the presence of dups) 2510 * that we need for __db_prheader(). 2511 */ 2512 if ((ret = 2513 __db_vrfy_common(dbp, vdp, subpg, meta_pgno, flags)) != 0) { 2514 err_ret = ret; 2515 (void)__memp_fput(mpf, 2516 vdp->thread_info, subpg, dbp->priority); 2517 continue; 2518 } 2519 switch (TYPE(subpg)) { 2520 case P_BTREEMETA: 2521 if ((ret = __bam_vrfy_meta(dbp, 2522 vdp, (BTMETA *)subpg, meta_pgno, flags)) != 0) { 2523 err_ret = ret; 2524 (void)__memp_fput(mpf, 2525 vdp->thread_info, subpg, dbp->priority); 2526 continue; 2527 } 2528 break; 2529 case P_HASHMETA: 2530 if ((ret = __ham_vrfy_meta(dbp, 2531 vdp, (HMETA *)subpg, meta_pgno, flags)) != 0) { 2532 err_ret = ret; 2533 (void)__memp_fput(mpf, 2534 vdp->thread_info, subpg, dbp->priority); 2535 continue; 2536 } 2537 break; 2538 default: 2539 /* This isn't an appropriate page; skip this subdb. */ 2540 err_ret = DB_VERIFY_BAD; 2541 continue; 2542 } 2543 2544 if ((ret = __memp_fput(mpf, 2545 vdp->thread_info, subpg, dbp->priority)) != 0) { 2546 err_ret = ret; 2547 continue; 2548 } 2549 2550 /* Print a subdatabase header. */ 2551 if ((ret = __db_prheader(dbp, 2552 subdbname, 0, 0, handle, callback, vdp, meta_pgno)) != 0) 2553 goto err; 2554 2555 if ((ret = __db_meta2pgset(dbp, vdp, meta_pgno, 2556 flags, pgset)) != 0) { 2557 err_ret = ret; 2558 continue; 2559 } 2560 2561 if ((ret = __db_cursor(pgset, 2562 vdp->thread_info, NULL, &pgsc, 0)) != 0) 2563 goto err; 2564 while ((ret = __db_vrfy_pgset_next(pgsc, &p)) == 0) { 2565 if ((ret = __memp_fget(mpf, 2566 &p, vdp->thread_info, NULL, 0, &subpg)) != 0) { 2567 err_ret = ret; 2568 continue; 2569 } 2570 if ((ret = __db_salvage(dbp, vdp, p, subpg, 2571 handle, callback, flags)) != 0) 2572 err_ret = ret; 2573 if ((ret = __memp_fput(mpf, 2574 vdp->thread_info, subpg, dbp->priority)) != 0) 2575 err_ret = ret; 2576 } 2577 2578 if (ret != DB_NOTFOUND) 2579 goto err; 2580 2581 if ((ret = __dbc_close(pgsc)) != 0) 2582 goto err; 2583 if ((ret = __db_prfooter(handle, callback)) != 0) 2584 goto err; 2585 } 2586err: if (subdbname) 2587 __os_free(env, subdbname); 2588 2589 if ((t_ret = __db_close(pgset, NULL, 0)) != 0) 2590 ret = t_ret; 2591 2592 if ((t_ret = __db_salvage_markdone(vdp, PGNO(master))) != 0) 2593 return (t_ret); 2594 2595 return ((err_ret != 0) ? err_ret : ret); 2596} 2597 2598/* 2599 * __db_meta2pgset -- 2600 * Given a known-safe meta page number, return the set of pages 2601 * corresponding to the database it represents. Return DB_VERIFY_BAD if 2602 * it's not a suitable meta page or is invalid. 2603 */ 2604static int 2605__db_meta2pgset(dbp, vdp, pgno, flags, pgset) 2606 DB *dbp; 2607 VRFY_DBINFO *vdp; 2608 db_pgno_t pgno; 2609 u_int32_t flags; 2610 DB *pgset; 2611{ 2612 DB_MPOOLFILE *mpf; 2613 PAGE *h; 2614 int ret, t_ret; 2615 2616 mpf = dbp->mpf; 2617 2618 if ((ret = __memp_fget(mpf, &pgno, vdp->thread_info, NULL, 0, &h)) != 0) 2619 return (ret); 2620 2621 switch (TYPE(h)) { 2622 case P_BTREEMETA: 2623 ret = __bam_meta2pgset(dbp, vdp, (BTMETA *)h, flags, pgset); 2624 break; 2625 case P_HASHMETA: 2626 ret = __ham_meta2pgset(dbp, vdp, (HMETA *)h, flags, pgset); 2627 break; 2628 default: 2629 ret = DB_VERIFY_BAD; 2630 break; 2631 } 2632 2633 if ((t_ret = __memp_fput(mpf, vdp->thread_info, h, dbp->priority)) != 0) 2634 return (t_ret); 2635 return (ret); 2636} 2637 2638/* 2639 * __db_guesspgsize -- 2640 * Try to guess what the pagesize is if the one on the meta page 2641 * and the one in the db are invalid. 2642 */ 2643static u_int 2644__db_guesspgsize(env, fhp) 2645 ENV *env; 2646 DB_FH *fhp; 2647{ 2648 db_pgno_t i; 2649 size_t nr; 2650 u_int32_t guess; 2651 u_int8_t type; 2652 2653 for (guess = DB_MAX_PGSIZE; guess >= DB_MIN_PGSIZE; guess >>= 1) { 2654 /* 2655 * We try to read three pages ahead after the first one 2656 * and make sure we have plausible types for all of them. 2657 * If the seeks fail, continue with a smaller size; 2658 * we're probably just looking past the end of the database. 2659 * If they succeed and the types are reasonable, also continue 2660 * with a size smaller; we may be looking at pages N, 2661 * 2N, and 3N for some N > 1. 2662 * 2663 * As soon as we hit an invalid type, we stop and return 2664 * our previous guess; that last one was probably the page size. 2665 */ 2666 for (i = 1; i <= 3; i++) { 2667 if (__os_seek( 2668 env, fhp, i, guess, SSZ(DBMETA, type)) != 0) 2669 break; 2670 if (__os_read(env, 2671 fhp, &type, 1, &nr) != 0 || nr == 0) 2672 break; 2673 if (type == P_INVALID || type >= P_PAGETYPE_MAX) 2674 return (guess << 1); 2675 } 2676 } 2677 2678 /* 2679 * If we're just totally confused--the corruption takes up most of the 2680 * beginning pages of the database--go with the default size. 2681 */ 2682 return (DB_DEF_IOSIZE); 2683} 2684