1/*- 2 * See the file LICENSE for redistribution information. 3 * 4 * Copyright (c) 1999,2008 Oracle. All rights reserved. 5 * 6 * $Id: hash_verify.c,v 12.33 2008/03/12 22:33:03 mbrey Exp $ 7 */ 8 9#include "db_config.h" 10 11#include "db_int.h" 12#include "dbinc/db_page.h" 13#include "dbinc/db_verify.h" 14#include "dbinc/btree.h" 15#include "dbinc/hash.h" 16#include "dbinc/mp.h" 17 18static int __ham_dups_unsorted __P((DB *, u_int8_t *, u_int32_t)); 19static int __ham_vrfy_bucket __P((DB *, VRFY_DBINFO *, HMETA *, u_int32_t, 20 u_int32_t)); 21static int __ham_vrfy_item __P((DB *, 22 VRFY_DBINFO *, db_pgno_t, PAGE *, u_int32_t, u_int32_t)); 23 24/* 25 * __ham_vrfy_meta -- 26 * Verify the hash-specific part of a metadata page. 27 * 28 * Note that unlike btree, we don't save things off, because we 29 * will need most everything again to verify each page and the 30 * amount of state here is significant. 31 * 32 * PUBLIC: int __ham_vrfy_meta __P((DB *, VRFY_DBINFO *, HMETA *, 33 * PUBLIC: db_pgno_t, u_int32_t)); 34 */ 35int 36__ham_vrfy_meta(dbp, vdp, m, pgno, flags) 37 DB *dbp; 38 VRFY_DBINFO *vdp; 39 HMETA *m; 40 db_pgno_t pgno; 41 u_int32_t flags; 42{ 43 ENV *env; 44 HASH *hashp; 45 VRFY_PAGEINFO *pip; 46 int i, ret, t_ret, isbad; 47 u_int32_t pwr, mbucket; 48 u_int32_t (*hfunc) __P((DB *, const void *, u_int32_t)); 49 50 env = dbp->env; 51 isbad = 0; 52 53 if ((ret = __db_vrfy_getpageinfo(vdp, pgno, &pip)) != 0) 54 return (ret); 55 56 hashp = dbp->h_internal; 57 58 if (hashp != NULL && hashp->h_hash != NULL) 59 hfunc = hashp->h_hash; 60 else 61 hfunc = __ham_func5; 62 63 /* 64 * If we haven't already checked the common fields in pagezero, 65 * check them. 66 */ 67 if (!F_ISSET(pip, VRFY_INCOMPLETE) && 68 (ret = __db_vrfy_meta(dbp, vdp, &m->dbmeta, pgno, flags)) != 0) { 69 if (ret == DB_VERIFY_BAD) 70 isbad = 1; 71 else 72 goto err; 73 } 74 75 /* h_charkey */ 76 if (!LF_ISSET(DB_NOORDERCHK)) 77 if (m->h_charkey != hfunc(dbp, CHARKEY, sizeof(CHARKEY))) { 78 EPRINT((env, 79"Page %lu: database has custom hash function; reverify with DB_NOORDERCHK set", 80 (u_long)pgno)); 81 /* 82 * Return immediately; this is probably a sign of user 83 * error rather than database corruption, so we want to 84 * avoid extraneous errors. 85 */ 86 isbad = 1; 87 goto err; 88 } 89 90 /* max_bucket must be less than the last pgno. */ 91 if (m->max_bucket > vdp->last_pgno) { 92 EPRINT((env, 93 "Page %lu: Impossible max_bucket %lu on meta page", 94 (u_long)pgno, (u_long)m->max_bucket)); 95 /* 96 * Most other fields depend somehow on max_bucket, so 97 * we just return--there will be lots of extraneous 98 * errors. 99 */ 100 isbad = 1; 101 goto err; 102 } 103 104 /* 105 * max_bucket, high_mask and low_mask: high_mask must be one 106 * less than the next power of two above max_bucket, and 107 * low_mask must be one less than the power of two below it. 108 */ 109 pwr = (m->max_bucket == 0) ? 1 : 1 << __db_log2(m->max_bucket + 1); 110 if (m->high_mask != pwr - 1) { 111 EPRINT((env, 112 "Page %lu: incorrect high_mask %lu, should be %lu", 113 (u_long)pgno, (u_long)m->high_mask, (u_long)pwr - 1)); 114 isbad = 1; 115 } 116 pwr >>= 1; 117 if (m->low_mask != pwr - 1) { 118 EPRINT((env, 119 "Page %lu: incorrect low_mask %lu, should be %lu", 120 (u_long)pgno, (u_long)m->low_mask, (u_long)pwr - 1)); 121 isbad = 1; 122 } 123 124 /* ffactor: no check possible. */ 125 pip->h_ffactor = m->ffactor; 126 127 /* 128 * nelem: just make sure it's not astronomical for now. This is the 129 * same check that hash_upgrade does, since there was a bug in 2.X 130 * which could make nelem go "negative". 131 */ 132 if (m->nelem > 0x80000000) { 133 EPRINT((env, 134 "Page %lu: suspiciously high nelem of %lu", 135 (u_long)pgno, (u_long)m->nelem)); 136 isbad = 1; 137 pip->h_nelem = 0; 138 } else 139 pip->h_nelem = m->nelem; 140 141 /* flags */ 142 if (F_ISSET(&m->dbmeta, DB_HASH_DUP)) 143 F_SET(pip, VRFY_HAS_DUPS); 144 if (F_ISSET(&m->dbmeta, DB_HASH_DUPSORT)) 145 F_SET(pip, VRFY_HAS_DUPSORT); 146 /* XXX: Why is the DB_HASH_SUBDB flag necessary? */ 147 148 /* spares array */ 149 for (i = 0; m->spares[i] != 0 && i < NCACHED; i++) { 150 /* 151 * We set mbucket to the maximum bucket that would use a given 152 * spares entry; we want to ensure that it's always less 153 * than last_pgno. 154 */ 155 mbucket = (1 << i) - 1; 156 if (BS_TO_PAGE(mbucket, m->spares) > vdp->last_pgno) { 157 EPRINT((env, 158 "Page %lu: spares array entry %d is invalid", 159 (u_long)pgno, i)); 160 isbad = 1; 161 } 162 } 163 164err: if ((t_ret = __db_vrfy_putpageinfo(env, vdp, pip)) != 0 && ret == 0) 165 ret = t_ret; 166 if (LF_ISSET(DB_SALVAGE) && 167 (t_ret = __db_salvage_markdone(vdp, pgno)) != 0 && ret == 0) 168 ret = t_ret; 169 return ((ret == 0 && isbad == 1) ? DB_VERIFY_BAD : ret); 170} 171 172/* 173 * __ham_vrfy -- 174 * Verify hash page. 175 * 176 * PUBLIC: int __ham_vrfy __P((DB *, VRFY_DBINFO *, PAGE *, db_pgno_t, 177 * PUBLIC: u_int32_t)); 178 */ 179int 180__ham_vrfy(dbp, vdp, h, pgno, flags) 181 DB *dbp; 182 VRFY_DBINFO *vdp; 183 PAGE *h; 184 db_pgno_t pgno; 185 u_int32_t flags; 186{ 187 ENV *env; 188 VRFY_PAGEINFO *pip; 189 u_int32_t ent, himark, inpend; 190 db_indx_t *inp; 191 int isbad, ret, t_ret; 192 193 env = dbp->env; 194 isbad = 0; 195 196 if ((ret = __db_vrfy_getpageinfo(vdp, pgno, &pip)) != 0) 197 return (ret); 198 199 if (TYPE(h) != P_HASH && TYPE(h) != P_HASH_UNSORTED) { 200 ret = __db_unknown_path(env, "__ham_vrfy"); 201 goto err; 202 } 203 204 /* Verify and save off fields common to all PAGEs. */ 205 if ((ret = __db_vrfy_datapage(dbp, vdp, h, pgno, flags)) != 0) { 206 if (ret == DB_VERIFY_BAD) 207 isbad = 1; 208 else 209 goto err; 210 } 211 212 /* 213 * Verify inp[]. Each offset from 0 to NUM_ENT(h) must be lower 214 * than the previous one, higher than the current end of the inp array, 215 * and lower than the page size. 216 * 217 * In any case, we return immediately if things are bad, as it would 218 * be unsafe to proceed. 219 */ 220 inp = P_INP(dbp, h); 221 for (ent = 0, himark = dbp->pgsize, 222 inpend = (u_int32_t)((u_int8_t *)inp - (u_int8_t *)h); 223 ent < NUM_ENT(h); ent++) 224 if (inp[ent] >= himark) { 225 EPRINT((env, 226 "Page %lu: item %lu is out of order or nonsensical", 227 (u_long)pgno, (u_long)ent)); 228 isbad = 1; 229 goto err; 230 } else if (inpend >= himark) { 231 EPRINT((env, 232 "Page %lu: entries array collided with data", 233 (u_long)pgno)); 234 isbad = 1; 235 goto err; 236 237 } else { 238 himark = inp[ent]; 239 inpend += sizeof(db_indx_t); 240 if ((ret = __ham_vrfy_item( 241 dbp, vdp, pgno, h, ent, flags)) != 0) 242 goto err; 243 } 244 245 if (!LF_ISSET(DB_NOORDERCHK) && TYPE(h) == P_HASH && (ret = 246 __ham_verify_sorted_page(dbp, vdp->thread_info, NULL, h)) != 0) 247 isbad = 1; 248 249err: if ((t_ret = 250 __db_vrfy_putpageinfo(env, vdp, pip)) != 0 && ret == 0) 251 ret = t_ret; 252 return (ret == 0 && isbad == 1 ? DB_VERIFY_BAD : ret); 253} 254 255/* 256 * __ham_vrfy_item -- 257 * Given a hash page and an offset, sanity-check the item itself, 258 * and save off any overflow items or off-page dup children as necessary. 259 */ 260static int 261__ham_vrfy_item(dbp, vdp, pgno, h, i, flags) 262 DB *dbp; 263 VRFY_DBINFO *vdp; 264 db_pgno_t pgno; 265 PAGE *h; 266 u_int32_t i, flags; 267{ 268 HOFFDUP hod; 269 HOFFPAGE hop; 270 VRFY_CHILDINFO child; 271 VRFY_PAGEINFO *pip; 272 db_indx_t offset, len, dlen, elen; 273 int ret, t_ret; 274 u_int8_t *databuf; 275 276 if ((ret = __db_vrfy_getpageinfo(vdp, pgno, &pip)) != 0) 277 return (ret); 278 279 switch (HPAGE_TYPE(dbp, h, i)) { 280 case H_KEYDATA: 281 /* Nothing to do here--everything but the type field is data */ 282 break; 283 case H_DUPLICATE: 284 /* Are we a datum or a key? Better be the former. */ 285 if (i % 2 == 0) { 286 EPRINT((dbp->env, 287 "Page %lu: hash key stored as duplicate item %lu", 288 (u_long)pip->pgno, (u_long)i)); 289 } 290 /* 291 * Dups are encoded as a series within a single HKEYDATA, 292 * in which each dup is surrounded by a copy of its length 293 * on either side (so that the series can be walked in either 294 * direction. We loop through this series and make sure 295 * each dup is reasonable. 296 * 297 * Note that at this point, we've verified item i-1, so 298 * it's safe to use LEN_HKEYDATA (which looks at inp[i-1]). 299 */ 300 len = LEN_HKEYDATA(dbp, h, dbp->pgsize, i); 301 databuf = HKEYDATA_DATA(P_ENTRY(dbp, h, i)); 302 for (offset = 0; offset < len; offset += DUP_SIZE(dlen)) { 303 memcpy(&dlen, databuf + offset, sizeof(db_indx_t)); 304 305 /* Make sure the length is plausible. */ 306 if (offset + DUP_SIZE(dlen) > len) { 307 EPRINT((dbp->env, 308 "Page %lu: duplicate item %lu has bad length", 309 (u_long)pip->pgno, (u_long)i)); 310 ret = DB_VERIFY_BAD; 311 goto err; 312 } 313 314 /* 315 * Make sure the second copy of the length is the 316 * same as the first. 317 */ 318 memcpy(&elen, 319 databuf + offset + dlen + sizeof(db_indx_t), 320 sizeof(db_indx_t)); 321 if (elen != dlen) { 322 EPRINT((dbp->env, 323 "Page %lu: duplicate item %lu has two different lengths", 324 (u_long)pip->pgno, (u_long)i)); 325 ret = DB_VERIFY_BAD; 326 goto err; 327 } 328 } 329 F_SET(pip, VRFY_HAS_DUPS); 330 if (!LF_ISSET(DB_NOORDERCHK) && 331 __ham_dups_unsorted(dbp, databuf, len)) 332 F_SET(pip, VRFY_DUPS_UNSORTED); 333 break; 334 case H_OFFPAGE: 335 /* Offpage item. Make sure pgno is sane, save off. */ 336 memcpy(&hop, P_ENTRY(dbp, h, i), HOFFPAGE_SIZE); 337 if (!IS_VALID_PGNO(hop.pgno) || hop.pgno == pip->pgno || 338 hop.pgno == PGNO_INVALID) { 339 EPRINT((dbp->env, 340 "Page %lu: offpage item %lu has bad pgno %lu", 341 (u_long)pip->pgno, (u_long)i, (u_long)hop.pgno)); 342 ret = DB_VERIFY_BAD; 343 goto err; 344 } 345 memset(&child, 0, sizeof(VRFY_CHILDINFO)); 346 child.pgno = hop.pgno; 347 child.type = V_OVERFLOW; 348 child.tlen = hop.tlen; /* This will get checked later. */ 349 if ((ret = __db_vrfy_childput(vdp, pip->pgno, &child)) != 0) 350 goto err; 351 break; 352 case H_OFFDUP: 353 /* Offpage duplicate item. Same drill. */ 354 memcpy(&hod, P_ENTRY(dbp, h, i), HOFFDUP_SIZE); 355 if (!IS_VALID_PGNO(hod.pgno) || hod.pgno == pip->pgno || 356 hod.pgno == PGNO_INVALID) { 357 EPRINT((dbp->env, 358 "Page %lu: offpage item %lu has bad page number", 359 (u_long)pip->pgno, (u_long)i)); 360 ret = DB_VERIFY_BAD; 361 goto err; 362 } 363 memset(&child, 0, sizeof(VRFY_CHILDINFO)); 364 child.pgno = hod.pgno; 365 child.type = V_DUPLICATE; 366 if ((ret = __db_vrfy_childput(vdp, pip->pgno, &child)) != 0) 367 goto err; 368 F_SET(pip, VRFY_HAS_DUPS); 369 break; 370 default: 371 EPRINT((dbp->env, 372 "Page %lu: item %lu has bad type", 373 (u_long)pip->pgno, (u_long)i)); 374 ret = DB_VERIFY_BAD; 375 break; 376 } 377 378err: if ((t_ret = 379 __db_vrfy_putpageinfo(dbp->env, vdp, pip)) != 0 && ret == 0) 380 ret = t_ret; 381 return (ret); 382} 383 384/* 385 * __ham_vrfy_structure -- 386 * Verify the structure of a hash database. 387 * 388 * PUBLIC: int __ham_vrfy_structure __P((DB *, VRFY_DBINFO *, db_pgno_t, 389 * PUBLIC: u_int32_t)); 390 */ 391int 392__ham_vrfy_structure(dbp, vdp, meta_pgno, flags) 393 DB *dbp; 394 VRFY_DBINFO *vdp; 395 db_pgno_t meta_pgno; 396 u_int32_t flags; 397{ 398 DB *pgset; 399 DB_MPOOLFILE *mpf; 400 HMETA *m; 401 PAGE *h; 402 VRFY_PAGEINFO *pip; 403 int isbad, p, ret, t_ret; 404 db_pgno_t pgno; 405 u_int32_t bucket, spares_entry; 406 407 mpf = dbp->mpf; 408 pgset = vdp->pgset; 409 h = NULL; 410 ret = isbad = 0; 411 412 if ((ret = __db_vrfy_pgset_get(pgset, 413 vdp->thread_info, meta_pgno, &p)) != 0) 414 return (ret); 415 if (p != 0) { 416 EPRINT((dbp->env, 417 "Page %lu: Hash meta page referenced twice", 418 (u_long)meta_pgno)); 419 return (DB_VERIFY_BAD); 420 } 421 if ((ret = __db_vrfy_pgset_inc(pgset, 422 vdp->thread_info, meta_pgno)) != 0) 423 return (ret); 424 425 /* Get the meta page; we'll need it frequently. */ 426 if ((ret = __memp_fget(mpf, 427 &meta_pgno, vdp->thread_info, NULL, 0, &m)) != 0) 428 return (ret); 429 430 /* Loop through bucket by bucket. */ 431 for (bucket = 0; bucket <= m->max_bucket; bucket++) 432 if ((ret = 433 __ham_vrfy_bucket(dbp, vdp, m, bucket, flags)) != 0) { 434 if (ret == DB_VERIFY_BAD) 435 isbad = 1; 436 else 437 goto err; 438 } 439 440 /* 441 * There may be unused hash pages corresponding to buckets 442 * that have been allocated but not yet used. These may be 443 * part of the current doubling above max_bucket, or they may 444 * correspond to buckets that were used in a transaction 445 * that then aborted. 446 * 447 * Loop through them, as far as the spares array defines them, 448 * and make sure they're all empty. 449 * 450 * Note that this should be safe, since we've already verified 451 * that the spares array is sane. 452 */ 453 for (bucket = m->max_bucket + 1; spares_entry = __db_log2(bucket + 1), 454 spares_entry < NCACHED && m->spares[spares_entry] != 0; bucket++) { 455 pgno = BS_TO_PAGE(bucket, m->spares); 456 if ((ret = __db_vrfy_getpageinfo(vdp, pgno, &pip)) != 0) 457 goto err; 458 459 /* It's okay if these pages are totally zeroed; unmark it. */ 460 F_CLR(pip, VRFY_IS_ALLZEROES); 461 462 /* It's also OK if this page is simply invalid. */ 463 if (pip->type == P_INVALID) { 464 if ((ret = __db_vrfy_putpageinfo(dbp->env, 465 vdp, pip)) != 0) 466 goto err; 467 continue; 468 } 469 470 if (pip->type != P_HASH && pip->type != P_HASH_UNSORTED) { 471 EPRINT((dbp->env, 472 "Page %lu: hash bucket %lu maps to non-hash page", 473 (u_long)pgno, (u_long)bucket)); 474 isbad = 1; 475 } else if (pip->entries != 0) { 476 EPRINT((dbp->env, 477 "Page %lu: non-empty page in unused hash bucket %lu", 478 (u_long)pgno, (u_long)bucket)); 479 isbad = 1; 480 } else { 481 if ((ret = __db_vrfy_pgset_get(pgset, 482 vdp->thread_info, pgno, &p)) != 0) 483 goto err; 484 if (p != 0) { 485 EPRINT((dbp->env, 486 "Page %lu: above max_bucket referenced", 487 (u_long)pgno)); 488 isbad = 1; 489 } else { 490 if ((ret = __db_vrfy_pgset_inc(pgset, 491 vdp->thread_info, pgno)) != 0) 492 goto err; 493 if ((ret = __db_vrfy_putpageinfo(dbp->env, 494 vdp, pip)) != 0) 495 goto err; 496 continue; 497 } 498 } 499 500 /* If we got here, it's an error. */ 501 (void)__db_vrfy_putpageinfo(dbp->env, vdp, pip); 502 goto err; 503 } 504 505err: if ((t_ret = __memp_fput(mpf, vdp->thread_info, m, dbp->priority)) != 0) 506 return (t_ret); 507 if (h != NULL && 508 (t_ret = __memp_fput(mpf, vdp->thread_info, h, dbp->priority)) != 0) 509 return (t_ret); 510 return ((isbad == 1 && ret == 0) ? DB_VERIFY_BAD: ret); 511} 512 513/* 514 * __ham_vrfy_bucket -- 515 * Verify a given bucket. 516 */ 517static int 518__ham_vrfy_bucket(dbp, vdp, m, bucket, flags) 519 DB *dbp; 520 VRFY_DBINFO *vdp; 521 HMETA *m; 522 u_int32_t bucket, flags; 523{ 524 ENV *env; 525 HASH *hashp; 526 VRFY_CHILDINFO *child; 527 VRFY_PAGEINFO *mip, *pip; 528 int ret, t_ret, isbad, p; 529 db_pgno_t pgno, next_pgno; 530 DBC *cc; 531 u_int32_t (*hfunc) __P((DB *, const void *, u_int32_t)); 532 533 env = dbp->env; 534 isbad = 0; 535 pip = NULL; 536 cc = NULL; 537 538 hashp = dbp->h_internal; 539 if (hashp != NULL && hashp->h_hash != NULL) 540 hfunc = hashp->h_hash; 541 else 542 hfunc = __ham_func5; 543 544 if ((ret = __db_vrfy_getpageinfo(vdp, PGNO(m), &mip)) != 0) 545 return (ret); 546 547 /* Calculate the first pgno for this bucket. */ 548 pgno = BS_TO_PAGE(bucket, m->spares); 549 550 if ((ret = __db_vrfy_getpageinfo(vdp, pgno, &pip)) != 0) 551 goto err; 552 553 /* Make sure we got a plausible page number. */ 554 if (pgno > vdp->last_pgno || 555 (pip->type != P_HASH && pip->type != P_HASH_UNSORTED)) { 556 EPRINT((env, 557 "Page %lu: impossible first page in bucket %lu", 558 (u_long)pgno, (u_long)bucket)); 559 /* Unsafe to continue. */ 560 isbad = 1; 561 goto err; 562 } 563 564 if (pip->prev_pgno != PGNO_INVALID) { 565 EPRINT((env, 566 "Page %lu: first page in hash bucket %lu has a prev_pgno", 567 (u_long)pgno, (u_long)bucket)); 568 isbad = 1; 569 } 570 571 /* 572 * Set flags for dups and sorted dups. 573 */ 574 flags |= F_ISSET(mip, VRFY_HAS_DUPS) ? DB_ST_DUPOK : 0; 575 flags |= F_ISSET(mip, VRFY_HAS_DUPSORT) ? DB_ST_DUPSORT : 0; 576 577 /* Loop until we find a fatal bug, or until we run out of pages. */ 578 for (;;) { 579 /* Provide feedback on our progress to the application. */ 580 if (!LF_ISSET(DB_SALVAGE)) 581 __db_vrfy_struct_feedback(dbp, vdp); 582 583 if ((ret = __db_vrfy_pgset_get(vdp->pgset, 584 vdp->thread_info, pgno, &p)) != 0) 585 goto err; 586 if (p != 0) { 587 EPRINT((env, 588 "Page %lu: hash page referenced twice", 589 (u_long)pgno)); 590 isbad = 1; 591 /* Unsafe to continue. */ 592 goto err; 593 } else if ((ret = __db_vrfy_pgset_inc(vdp->pgset, 594 vdp->thread_info, pgno)) != 0) 595 goto err; 596 597 /* 598 * Hash pages that nothing has ever hashed to may never 599 * have actually come into existence, and may appear to be 600 * entirely zeroed. This is acceptable, and since there's 601 * no real way for us to know whether this has actually 602 * occurred, we clear the "wholly zeroed" flag on every 603 * hash page. A wholly zeroed page, by nature, will appear 604 * to have no flags set and zero entries, so should 605 * otherwise verify correctly. 606 */ 607 F_CLR(pip, VRFY_IS_ALLZEROES); 608 609 /* If we have dups, our meta page had better know about it. */ 610 if (F_ISSET(pip, VRFY_HAS_DUPS) && 611 !F_ISSET(mip, VRFY_HAS_DUPS)) { 612 EPRINT((env, 613 "Page %lu: duplicates present in non-duplicate database", 614 (u_long)pgno)); 615 isbad = 1; 616 } 617 618 /* 619 * If the database has sorted dups, this page had better 620 * not have unsorted ones. 621 */ 622 if (F_ISSET(mip, VRFY_HAS_DUPSORT) && 623 F_ISSET(pip, VRFY_DUPS_UNSORTED)) { 624 EPRINT((env, 625 "Page %lu: unsorted dups in sorted-dup database", 626 (u_long)pgno)); 627 isbad = 1; 628 } 629 630 /* Walk overflow chains and offpage dup trees. */ 631 if ((ret = __db_vrfy_childcursor(vdp, &cc)) != 0) 632 goto err; 633 for (ret = __db_vrfy_ccset(cc, pip->pgno, &child); ret == 0; 634 ret = __db_vrfy_ccnext(cc, &child)) 635 if (child->type == V_OVERFLOW) { 636 if ((ret = __db_vrfy_ovfl_structure(dbp, vdp, 637 child->pgno, child->tlen, 638 flags | DB_ST_OVFL_LEAF)) != 0) { 639 if (ret == DB_VERIFY_BAD) 640 isbad = 1; 641 else 642 goto err; 643 } 644 } else if (child->type == V_DUPLICATE) { 645 if ((ret = __db_vrfy_duptype(dbp, 646 vdp, child->pgno, flags)) != 0) { 647 isbad = 1; 648 continue; 649 } 650 if ((ret = __bam_vrfy_subtree(dbp, vdp, 651 child->pgno, NULL, NULL, 652 flags | DB_ST_RECNUM | DB_ST_DUPSET | DB_ST_TOPLEVEL, 653 NULL, NULL, NULL)) != 0) { 654 if (ret == DB_VERIFY_BAD) 655 isbad = 1; 656 else 657 goto err; 658 } 659 } 660 if ((ret = __db_vrfy_ccclose(cc)) != 0) 661 goto err; 662 cc = NULL; 663 664 /* If it's safe to check that things hash properly, do so. */ 665 if (isbad == 0 && !LF_ISSET(DB_NOORDERCHK) && 666 (ret = __ham_vrfy_hashing(dbp, pip->entries, 667 m, bucket, pgno, flags, hfunc)) != 0) { 668 if (ret == DB_VERIFY_BAD) 669 isbad = 1; 670 else 671 goto err; 672 } 673 674 next_pgno = pip->next_pgno; 675 ret = __db_vrfy_putpageinfo(env, vdp, pip); 676 677 pip = NULL; 678 if (ret != 0) 679 goto err; 680 681 if (next_pgno == PGNO_INVALID) 682 break; /* End of the bucket. */ 683 684 /* We already checked this, but just in case... */ 685 if (!IS_VALID_PGNO(next_pgno)) { 686 EPRINT((env, 687 "Page %lu: hash page has bad next_pgno", 688 (u_long)pgno)); 689 isbad = 1; 690 goto err; 691 } 692 693 if ((ret = __db_vrfy_getpageinfo(vdp, next_pgno, &pip)) != 0) 694 goto err; 695 696 if (pip->prev_pgno != pgno) { 697 EPRINT((env, 698 "Page %lu: hash page has bad prev_pgno", 699 (u_long)next_pgno)); 700 isbad = 1; 701 } 702 pgno = next_pgno; 703 } 704 705err: if (cc != NULL && ((t_ret = __db_vrfy_ccclose(cc)) != 0) && ret == 0) 706 ret = t_ret; 707 if (mip != NULL && ((t_ret = 708 __db_vrfy_putpageinfo(env, vdp, mip)) != 0) && ret == 0) 709 ret = t_ret; 710 if (pip != NULL && ((t_ret = 711 __db_vrfy_putpageinfo(env, vdp, pip)) != 0) && ret == 0) 712 ret = t_ret; 713 return ((ret == 0 && isbad == 1) ? DB_VERIFY_BAD : ret); 714} 715 716/* 717 * __ham_vrfy_hashing -- 718 * Verify that all items on a given hash page hash correctly. 719 * 720 * PUBLIC: int __ham_vrfy_hashing __P((DB *, 721 * PUBLIC: u_int32_t, HMETA *, u_int32_t, db_pgno_t, u_int32_t, 722 * PUBLIC: u_int32_t (*) __P((DB *, const void *, u_int32_t)))); 723 */ 724int 725__ham_vrfy_hashing(dbp, nentries, m, thisbucket, pgno, flags, hfunc) 726 DB *dbp; 727 u_int32_t nentries; 728 HMETA *m; 729 u_int32_t thisbucket; 730 db_pgno_t pgno; 731 u_int32_t flags; 732 u_int32_t (*hfunc) __P((DB *, const void *, u_int32_t)); 733{ 734 DBT dbt; 735 DB_MPOOLFILE *mpf; 736 DB_THREAD_INFO *ip; 737 PAGE *h; 738 db_indx_t i; 739 int ret, t_ret, isbad; 740 u_int32_t hval, bucket; 741 742 mpf = dbp->mpf; 743 ret = isbad = 0; 744 745 memset(&dbt, 0, sizeof(DBT)); 746 F_SET(&dbt, DB_DBT_REALLOC); 747 ENV_GET_THREAD_INFO(dbp->env, ip); 748 749 if ((ret = __memp_fget(mpf, &pgno, ip, NULL, 0, &h)) != 0) 750 return (ret); 751 752 for (i = 0; i < nentries; i += 2) { 753 /* 754 * We've already verified the page integrity and that of any 755 * overflow chains linked off it; it is therefore safe to use 756 * __db_ret. It's also not all that much slower, since we have 757 * to copy every hash item to deal with alignment anyway; we 758 * can tweak this a bit if this proves to be a bottleneck, 759 * but for now, take the easy route. 760 */ 761 if ((ret = __db_ret(dbp, ip, 762 NULL, h, i, &dbt, NULL, NULL)) != 0) 763 goto err; 764 hval = hfunc(dbp, dbt.data, dbt.size); 765 766 bucket = hval & m->high_mask; 767 if (bucket > m->max_bucket) 768 bucket = bucket & m->low_mask; 769 770 if (bucket != thisbucket) { 771 EPRINT((dbp->env, 772 "Page %lu: item %lu hashes incorrectly", 773 (u_long)pgno, (u_long)i)); 774 isbad = 1; 775 } 776 } 777 778err: if (dbt.data != NULL) 779 __os_ufree(dbp->env, dbt.data); 780 if ((t_ret = __memp_fput(mpf, ip, h, dbp->priority)) != 0) 781 return (t_ret); 782 783 return ((ret == 0 && isbad == 1) ? DB_VERIFY_BAD : ret); 784} 785 786/* 787 * __ham_salvage -- 788 * Safely dump out anything that looks like a key on an alleged 789 * hash page. 790 * 791 * PUBLIC: int __ham_salvage __P((DB *, VRFY_DBINFO *, db_pgno_t, PAGE *, 792 * PUBLIC: void *, int (*)(void *, const void *), u_int32_t)); 793 */ 794int 795__ham_salvage(dbp, vdp, pgno, h, handle, callback, flags) 796 DB *dbp; 797 VRFY_DBINFO *vdp; 798 db_pgno_t pgno; 799 PAGE *h; 800 void *handle; 801 int (*callback) __P((void *, const void *)); 802 u_int32_t flags; 803{ 804 DBT dbt, key_dbt, unkdbt; 805 db_pgno_t dpgno; 806 int ret, err_ret, t_ret; 807 u_int32_t himark, i; 808 u_int8_t *hk, *p; 809 void *buf, *key_buf; 810 db_indx_t dlen, len, tlen; 811 812 memset(&dbt, 0, sizeof(DBT)); 813 dbt.flags = DB_DBT_REALLOC; 814 815 DB_INIT_DBT(unkdbt, "UNKNOWN", sizeof("UNKNOWN") - 1); 816 817 err_ret = 0; 818 819 /* 820 * Allocate a buffer for overflow items. Start at one page; 821 * __db_safe_goff will realloc as needed. 822 */ 823 if ((ret = __os_malloc(dbp->env, dbp->pgsize, &buf)) != 0) 824 return (ret); 825 826 himark = dbp->pgsize; 827 for (i = 0;; i++) { 828 /* If we're not aggressive, break when we hit NUM_ENT(h). */ 829 if (!LF_ISSET(DB_AGGRESSIVE) && i >= NUM_ENT(h)) 830 break; 831 832 /* Verify the current item. */ 833 ret = __db_vrfy_inpitem(dbp, 834 h, pgno, i, 0, flags, &himark, NULL); 835 /* If this returned a fatality, it's time to break. */ 836 if (ret == DB_VERIFY_FATAL) 837 break; 838 839 if (ret == 0) { 840 /* Set len to total entry length. */ 841 len = LEN_HITEM(dbp, h, dbp->pgsize, i); 842 hk = P_ENTRY(dbp, h, i); 843 if (len == 0 || len > dbp->pgsize || 844 (u_int32_t)(hk + len - (u_int8_t *)h) > 845 dbp->pgsize) { 846 /* Item is unsafely large; skip it. */ 847 err_ret = DB_VERIFY_BAD; 848 continue; 849 } 850 switch (HPAGE_PTYPE(hk)) { 851 case H_KEYDATA: 852 /* Update len to size of item. */ 853 len = LEN_HKEYDATA(dbp, h, dbp->pgsize, i); 854keydata: memcpy(buf, HKEYDATA_DATA(hk), len); 855 dbt.size = len; 856 dbt.data = buf; 857 if ((ret = __db_vrfy_prdbt(&dbt, 858 0, " ", handle, callback, 0, vdp)) != 0) 859 err_ret = ret; 860 break; 861 case H_OFFPAGE: 862 if (len < HOFFPAGE_SIZE) { 863 err_ret = DB_VERIFY_BAD; 864 continue; 865 } 866 memcpy(&dpgno, 867 HOFFPAGE_PGNO(hk), sizeof(dpgno)); 868 if ((ret = __db_safe_goff(dbp, vdp, 869 dpgno, &dbt, &buf, flags)) != 0) { 870 err_ret = ret; 871 (void)__db_vrfy_prdbt(&unkdbt, 0, " ", 872 handle, callback, 0, vdp); 873 /* fallthrough to end of case */ 874 } else if ((ret = __db_vrfy_prdbt(&dbt, 875 0, " ", handle, callback, 0, vdp)) != 0) 876 err_ret = ret; 877 break; 878 case H_OFFDUP: 879 if (len < HOFFDUP_SIZE) { 880 err_ret = DB_VERIFY_BAD; 881 continue; 882 } 883 memcpy(&dpgno, 884 HOFFDUP_PGNO(hk), sizeof(dpgno)); 885 /* UNKNOWN iff pgno is bad or we're a key. */ 886 if (!IS_VALID_PGNO(dpgno) || (i % 2 == 0)) { 887 if ((ret = 888 __db_vrfy_prdbt(&unkdbt, 0, " ", 889 handle, callback, 0, vdp)) != 0) 890 err_ret = ret; 891 } else if ((ret = __db_salvage_duptree(dbp, 892 vdp, dpgno, &dbt, handle, callback, 893 flags | DB_SA_SKIPFIRSTKEY)) != 0) 894 err_ret = ret; 895 break; 896 case H_DUPLICATE: 897 /* 898 * This is an on-page duplicate item, iterate 899 * over the duplicate set, printing out 900 * key/data pairs. 901 */ 902 len = LEN_HKEYDATA(dbp, h, dbp->pgsize, i); 903 /* 904 * If this item is at an even index it must be 905 * a key item and it should never be of type 906 * H_DUPLICATE. If we are in aggressive mode, 907 * print the item out as a normal key, and let 908 * the user resolve the discrepancy. 909 */ 910 if (i % 2 == 0) { 911 err_ret = ret; 912 if (LF_ISSET(DB_AGGRESSIVE)) 913 goto keydata; 914 break; 915 } 916 917 /* 918 * Check to ensure that the item size is 919 * greater than the smallest possible on page 920 * duplicate. 921 */ 922 if (len < 923 HKEYDATA_SIZE(2 * sizeof(db_indx_t))) { 924 err_ret = DB_VERIFY_BAD; 925 continue; 926 } 927 928 /* 929 * Copy out the key from the dbt, it is still 930 * present from the previous pass. 931 */ 932 memset(&key_dbt, 0, sizeof(key_dbt)); 933 if ((ret = __os_malloc( 934 dbp->env, dbt.size, &key_buf)) != 0) 935 return (ret); 936 memcpy(key_buf, buf, dbt.size); 937 key_dbt.data = key_buf; 938 key_dbt.size = dbt.size; 939 key_dbt.flags = DB_DBT_USERMEM; 940 941 /* Loop until we hit the total length. */ 942 for (tlen = 0; tlen + sizeof(db_indx_t) < len; 943 tlen += dlen + 2 * sizeof(db_indx_t)) { 944 /* 945 * Print the key for every duplicate 946 * item. Except the first dup, since 947 * the key was already output once by 948 * the previous iteration. 949 */ 950 if (tlen != 0) { 951 if ((ret = __db_vrfy_prdbt( 952 &key_dbt, 0, " ", handle, 953 callback, 0, vdp)) != 0) 954 err_ret = ret; 955 } 956 p = HKEYDATA_DATA(hk) + tlen; 957 memcpy(&dlen, p, sizeof(db_indx_t)); 958 p += sizeof(db_indx_t); 959 /* 960 * If dlen is too long, print all the 961 * rest of the dup set in a chunk. 962 */ 963 if (dlen + tlen + sizeof(db_indx_t) > 964 len) { 965 dlen = len - 966 (tlen + sizeof(db_indx_t)); 967 err_ret = DB_VERIFY_BAD; 968 } 969 memcpy(buf, p, dlen); 970 dbt.size = dlen; 971 dbt.data = buf; 972 if ((ret = __db_vrfy_prdbt(&dbt, 0, " ", 973 handle, callback, 0, vdp)) != 0) 974 err_ret = ret; 975 } 976 __os_free(dbp->env, key_buf); 977 break; 978 default: 979 if (!LF_ISSET(DB_AGGRESSIVE)) 980 break; 981 err_ret = DB_VERIFY_BAD; 982 break; 983 } 984 } 985 } 986 987 __os_free(dbp->env, buf); 988 if ((t_ret = __db_salvage_markdone(vdp, pgno)) != 0) 989 return (t_ret); 990 return ((ret == 0 && err_ret != 0) ? err_ret : ret); 991} 992 993/* 994 * __ham_meta2pgset -- 995 * Return the set of hash pages corresponding to the given 996 * known-good meta page. 997 * 998 * PUBLIC: int __ham_meta2pgset __P((DB *, VRFY_DBINFO *, HMETA *, u_int32_t, 999 * PUBLIC: DB *)); 1000 */ 1001int 1002__ham_meta2pgset(dbp, vdp, hmeta, flags, pgset) 1003 DB *dbp; 1004 VRFY_DBINFO *vdp; 1005 HMETA *hmeta; 1006 u_int32_t flags; 1007 DB *pgset; 1008{ 1009 DB_MPOOLFILE *mpf; 1010 DB_THREAD_INFO *ip; 1011 PAGE *h; 1012 db_pgno_t pgno; 1013 u_int32_t bucket, totpgs; 1014 int ret, val; 1015 1016 /* 1017 * We don't really need flags, but leave them for consistency with 1018 * __bam_meta2pgset. 1019 */ 1020 COMPQUIET(flags, 0); 1021 ip = vdp->thread_info; 1022 1023 DB_ASSERT(dbp->env, pgset != NULL); 1024 1025 mpf = dbp->mpf; 1026 totpgs = 0; 1027 1028 /* 1029 * Loop through all the buckets, pushing onto pgset the corresponding 1030 * page(s) for each one. 1031 */ 1032 for (bucket = 0; bucket <= hmeta->max_bucket; bucket++) { 1033 pgno = BS_TO_PAGE(bucket, hmeta->spares); 1034 1035 /* 1036 * We know the initial pgno is safe because the spares array has 1037 * been verified. 1038 * 1039 * Safely walk the list of pages in this bucket. 1040 */ 1041 for (;;) { 1042 if ((ret = 1043 __memp_fget(mpf, &pgno, ip, NULL, 0, &h)) != 0) 1044 return (ret); 1045 if (TYPE(h) == P_HASH || TYPE(h) == P_HASH_UNSORTED) { 1046 1047 /* 1048 * Make sure we don't go past the end of 1049 * pgset. 1050 */ 1051 if (++totpgs > vdp->last_pgno) { 1052 (void)__memp_fput(mpf, 1053 ip, h, dbp->priority); 1054 return (DB_VERIFY_BAD); 1055 } 1056 if ((ret = __db_vrfy_pgset_inc(pgset, 1057 vdp->thread_info, pgno)) != 0) { 1058 (void)__memp_fput(mpf, 1059 ip, h, dbp->priority); 1060 return (ret); 1061 } 1062 1063 pgno = NEXT_PGNO(h); 1064 } else 1065 pgno = PGNO_INVALID; 1066 1067 if ((ret = __memp_fput(mpf, ip, h, dbp->priority)) != 0) 1068 return (ret); 1069 1070 /* If the new pgno is wonky, go onto the next bucket. */ 1071 if (!IS_VALID_PGNO(pgno) || 1072 pgno == PGNO_INVALID) 1073 break; 1074 1075 /* 1076 * If we've touched this page before, we have a cycle; 1077 * go on to the next bucket. 1078 */ 1079 if ((ret = __db_vrfy_pgset_get(pgset, 1080 vdp->thread_info, pgno, &val)) != 0) 1081 return (ret); 1082 if (val != 0) 1083 break; 1084 } 1085 } 1086 return (0); 1087} 1088 1089/* 1090 * __ham_dups_unsorted -- 1091 * Takes a known-safe hash duplicate set and its total length. 1092 * Returns 1 if there are out-of-order duplicates in this set, 1093 * 0 if there are not. 1094 */ 1095static int 1096__ham_dups_unsorted(dbp, buf, len) 1097 DB *dbp; 1098 u_int8_t *buf; 1099 u_int32_t len; 1100{ 1101 DBT a, b; 1102 db_indx_t offset, dlen; 1103 int (*func) __P((DB *, const DBT *, const DBT *)); 1104 1105 memset(&a, 0, sizeof(DBT)); 1106 memset(&b, 0, sizeof(DBT)); 1107 1108 func = (dbp->dup_compare == NULL) ? __bam_defcmp : dbp->dup_compare; 1109 1110 /* 1111 * Loop through the dup set until we hit the end or we find 1112 * a pair of dups that's out of order. b is always the current 1113 * dup, a the one before it. 1114 */ 1115 for (offset = 0; offset < len; offset += DUP_SIZE(dlen)) { 1116 memcpy(&dlen, buf + offset, sizeof(db_indx_t)); 1117 b.data = buf + offset + sizeof(db_indx_t); 1118 b.size = dlen; 1119 1120 if (a.data != NULL && func(dbp, &a, &b) > 0) 1121 return (1); 1122 1123 a.data = b.data; 1124 a.size = b.size; 1125 } 1126 1127 return (0); 1128} 1129