1/*- 2 * See the file LICENSE for redistribution information. 3 * 4 * Copyright (c) 1999-2009 Oracle. All rights reserved. 5 * 6 * $Id$ 7 */ 8 9#include "db_config.h" 10 11#include "db_int.h" 12#include "dbinc/db_page.h" 13#include "dbinc/db_verify.h" 14#include "dbinc/btree.h" 15#include "dbinc/hash.h" 16#include "dbinc/lock.h" 17#include "dbinc/mp.h" 18 19static int __ham_dups_unsorted __P((DB *, u_int8_t *, u_int32_t)); 20static int __ham_vrfy_bucket __P((DB *, VRFY_DBINFO *, HMETA *, u_int32_t, 21 u_int32_t)); 22static int __ham_vrfy_item __P((DB *, 23 VRFY_DBINFO *, db_pgno_t, PAGE *, u_int32_t, u_int32_t)); 24 25/* 26 * __ham_vrfy_meta -- 27 * Verify the hash-specific part of a metadata page. 28 * 29 * Note that unlike btree, we don't save things off, because we 30 * will need most everything again to verify each page and the 31 * amount of state here is significant. 32 * 33 * PUBLIC: int __ham_vrfy_meta __P((DB *, VRFY_DBINFO *, HMETA *, 34 * PUBLIC: db_pgno_t, u_int32_t)); 35 */ 36int 37__ham_vrfy_meta(dbp, vdp, m, pgno, flags) 38 DB *dbp; 39 VRFY_DBINFO *vdp; 40 HMETA *m; 41 db_pgno_t pgno; 42 u_int32_t flags; 43{ 44 ENV *env; 45 HASH *hashp; 46 VRFY_PAGEINFO *pip; 47 int i, ret, t_ret, isbad; 48 u_int32_t pwr, mbucket; 49 u_int32_t (*hfunc) __P((DB *, const void *, u_int32_t)); 50 51 env = dbp->env; 52 isbad = 0; 53 54 if ((ret = __db_vrfy_getpageinfo(vdp, pgno, &pip)) != 0) 55 return (ret); 56 57 hashp = dbp->h_internal; 58 59 if (hashp != NULL && hashp->h_hash != NULL) 60 hfunc = hashp->h_hash; 61 else 62 hfunc = __ham_func5; 63 64 /* 65 * If we haven't already checked the common fields in pagezero, 66 * check them. 67 */ 68 if (!F_ISSET(pip, VRFY_INCOMPLETE) && 69 (ret = __db_vrfy_meta(dbp, vdp, &m->dbmeta, pgno, flags)) != 0) { 70 if (ret == DB_VERIFY_BAD) 71 isbad = 1; 72 else 73 goto err; 74 } 75 76 /* h_charkey */ 77 if (!LF_ISSET(DB_NOORDERCHK)) 78 if (m->h_charkey != hfunc(dbp, CHARKEY, sizeof(CHARKEY))) { 79 EPRINT((env, 80"Page %lu: database has custom hash function; reverify with DB_NOORDERCHK set", 81 (u_long)pgno)); 82 /* 83 * Return immediately; this is probably a sign of user 84 * error rather than database corruption, so we want to 85 * avoid extraneous errors. 86 */ 87 isbad = 1; 88 goto err; 89 } 90 91 /* max_bucket must be less than the last pgno. */ 92 if (m->max_bucket > vdp->last_pgno) { 93 EPRINT((env, 94 "Page %lu: Impossible max_bucket %lu on meta page", 95 (u_long)pgno, (u_long)m->max_bucket)); 96 /* 97 * Most other fields depend somehow on max_bucket, so 98 * we just return--there will be lots of extraneous 99 * errors. 100 */ 101 isbad = 1; 102 goto err; 103 } 104 105 /* 106 * max_bucket, high_mask and low_mask: high_mask must be one 107 * less than the next power of two above max_bucket, and 108 * low_mask must be one less than the power of two below it. 109 */ 110 pwr = (m->max_bucket == 0) ? 1 : 1 << __db_log2(m->max_bucket + 1); 111 if (m->high_mask != pwr - 1) { 112 EPRINT((env, 113 "Page %lu: incorrect high_mask %lu, should be %lu", 114 (u_long)pgno, (u_long)m->high_mask, (u_long)pwr - 1)); 115 isbad = 1; 116 } 117 pwr >>= 1; 118 if (m->low_mask != pwr - 1) { 119 EPRINT((env, 120 "Page %lu: incorrect low_mask %lu, should be %lu", 121 (u_long)pgno, (u_long)m->low_mask, (u_long)pwr - 1)); 122 isbad = 1; 123 } 124 125 /* ffactor: no check possible. */ 126 pip->h_ffactor = m->ffactor; 127 128 /* 129 * nelem: just make sure it's not astronomical for now. This is the 130 * same check that hash_upgrade does, since there was a bug in 2.X 131 * which could make nelem go "negative". 132 */ 133 if (m->nelem > 0x80000000) { 134 EPRINT((env, 135 "Page %lu: suspiciously high nelem of %lu", 136 (u_long)pgno, (u_long)m->nelem)); 137 isbad = 1; 138 pip->h_nelem = 0; 139 } else 140 pip->h_nelem = m->nelem; 141 142 /* flags */ 143 if (F_ISSET(&m->dbmeta, DB_HASH_DUP)) 144 F_SET(pip, VRFY_HAS_DUPS); 145 if (F_ISSET(&m->dbmeta, DB_HASH_DUPSORT)) 146 F_SET(pip, VRFY_HAS_DUPSORT); 147 /* XXX: Why is the DB_HASH_SUBDB flag necessary? */ 148 149 /* spares array */ 150 for (i = 0; m->spares[i] != 0 && i < NCACHED; i++) { 151 /* 152 * We set mbucket to the maximum bucket that would use a given 153 * spares entry; we want to ensure that it's always less 154 * than last_pgno. 155 */ 156 mbucket = (1 << i) - 1; 157 if (BS_TO_PAGE(mbucket, m->spares) > vdp->last_pgno) { 158 EPRINT((env, 159 "Page %lu: spares array entry %d is invalid", 160 (u_long)pgno, i)); 161 isbad = 1; 162 } 163 } 164 165err: if ((t_ret = __db_vrfy_putpageinfo(env, vdp, pip)) != 0 && ret == 0) 166 ret = t_ret; 167 if (LF_ISSET(DB_SALVAGE) && 168 (t_ret = __db_salvage_markdone(vdp, pgno)) != 0 && ret == 0) 169 ret = t_ret; 170 return ((ret == 0 && isbad == 1) ? DB_VERIFY_BAD : ret); 171} 172 173/* 174 * __ham_vrfy -- 175 * Verify hash page. 176 * 177 * PUBLIC: int __ham_vrfy __P((DB *, VRFY_DBINFO *, PAGE *, db_pgno_t, 178 * PUBLIC: u_int32_t)); 179 */ 180int 181__ham_vrfy(dbp, vdp, h, pgno, flags) 182 DB *dbp; 183 VRFY_DBINFO *vdp; 184 PAGE *h; 185 db_pgno_t pgno; 186 u_int32_t flags; 187{ 188 DBC *dbc; 189 ENV *env; 190 VRFY_PAGEINFO *pip; 191 u_int32_t ent, himark, inpend; 192 db_indx_t *inp; 193 int isbad, ret, t_ret; 194 195 env = dbp->env; 196 isbad = 0; 197 198 if ((ret = __db_vrfy_getpageinfo(vdp, pgno, &pip)) != 0) 199 return (ret); 200 201 if (TYPE(h) != P_HASH && TYPE(h) != P_HASH_UNSORTED) { 202 ret = __db_unknown_path(env, "__ham_vrfy"); 203 goto err; 204 } 205 206 /* Verify and save off fields common to all PAGEs. */ 207 if ((ret = __db_vrfy_datapage(dbp, vdp, h, pgno, flags)) != 0) { 208 if (ret == DB_VERIFY_BAD) 209 isbad = 1; 210 else 211 goto err; 212 } 213 214 /* 215 * Verify inp[]. Each offset from 0 to NUM_ENT(h) must be lower 216 * than the previous one, higher than the current end of the inp array, 217 * and lower than the page size. 218 * 219 * In any case, we return immediately if things are bad, as it would 220 * be unsafe to proceed. 221 */ 222 inp = P_INP(dbp, h); 223 for (ent = 0, himark = dbp->pgsize, 224 inpend = (u_int32_t)((u_int8_t *)inp - (u_int8_t *)h); 225 ent < NUM_ENT(h); ent++) 226 if (inp[ent] >= himark) { 227 EPRINT((env, 228 "Page %lu: item %lu is out of order or nonsensical", 229 (u_long)pgno, (u_long)ent)); 230 isbad = 1; 231 goto err; 232 } else if (inpend >= himark) { 233 EPRINT((env, 234 "Page %lu: entries array collided with data", 235 (u_long)pgno)); 236 isbad = 1; 237 goto err; 238 239 } else { 240 himark = inp[ent]; 241 inpend += sizeof(db_indx_t); 242 if ((ret = __ham_vrfy_item( 243 dbp, vdp, pgno, h, ent, flags)) != 0) 244 goto err; 245 } 246 247 if ((ret = __db_cursor_int(dbp, vdp->thread_info, NULL, DB_HASH, 248 PGNO_INVALID, 0, DB_LOCK_INVALIDID, &dbc)) != 0) 249 return (ret); 250 if (!LF_ISSET(DB_NOORDERCHK) && TYPE(h) == P_HASH && 251 (ret = __ham_verify_sorted_page(dbc, h)) != 0) 252 isbad = 1; 253 254err: if ((t_ret = 255 __db_vrfy_putpageinfo(env, vdp, pip)) != 0 && ret == 0) 256 ret = t_ret; 257 return (ret == 0 && isbad == 1 ? DB_VERIFY_BAD : ret); 258} 259 260/* 261 * __ham_vrfy_item -- 262 * Given a hash page and an offset, sanity-check the item itself, 263 * and save off any overflow items or off-page dup children as necessary. 264 */ 265static int 266__ham_vrfy_item(dbp, vdp, pgno, h, i, flags) 267 DB *dbp; 268 VRFY_DBINFO *vdp; 269 db_pgno_t pgno; 270 PAGE *h; 271 u_int32_t i, flags; 272{ 273 HOFFDUP hod; 274 HOFFPAGE hop; 275 VRFY_CHILDINFO child; 276 VRFY_PAGEINFO *pip; 277 db_indx_t offset, len, dlen, elen; 278 int ret, t_ret; 279 u_int8_t *databuf; 280 281 if ((ret = __db_vrfy_getpageinfo(vdp, pgno, &pip)) != 0) 282 return (ret); 283 284 switch (HPAGE_TYPE(dbp, h, i)) { 285 case H_KEYDATA: 286 /* Nothing to do here--everything but the type field is data */ 287 break; 288 case H_DUPLICATE: 289 /* Are we a datum or a key? Better be the former. */ 290 if (i % 2 == 0) { 291 EPRINT((dbp->env, 292 "Page %lu: hash key stored as duplicate item %lu", 293 (u_long)pip->pgno, (u_long)i)); 294 } 295 /* 296 * Dups are encoded as a series within a single HKEYDATA, 297 * in which each dup is surrounded by a copy of its length 298 * on either side (so that the series can be walked in either 299 * direction. We loop through this series and make sure 300 * each dup is reasonable. 301 * 302 * Note that at this point, we've verified item i-1, so 303 * it's safe to use LEN_HKEYDATA (which looks at inp[i-1]). 304 */ 305 len = LEN_HKEYDATA(dbp, h, dbp->pgsize, i); 306 databuf = HKEYDATA_DATA(P_ENTRY(dbp, h, i)); 307 for (offset = 0; offset < len; offset += DUP_SIZE(dlen)) { 308 memcpy(&dlen, databuf + offset, sizeof(db_indx_t)); 309 310 /* Make sure the length is plausible. */ 311 if (offset + DUP_SIZE(dlen) > len) { 312 EPRINT((dbp->env, 313 "Page %lu: duplicate item %lu has bad length", 314 (u_long)pip->pgno, (u_long)i)); 315 ret = DB_VERIFY_BAD; 316 goto err; 317 } 318 319 /* 320 * Make sure the second copy of the length is the 321 * same as the first. 322 */ 323 memcpy(&elen, 324 databuf + offset + dlen + sizeof(db_indx_t), 325 sizeof(db_indx_t)); 326 if (elen != dlen) { 327 EPRINT((dbp->env, 328 "Page %lu: duplicate item %lu has two different lengths", 329 (u_long)pip->pgno, (u_long)i)); 330 ret = DB_VERIFY_BAD; 331 goto err; 332 } 333 } 334 F_SET(pip, VRFY_HAS_DUPS); 335 if (!LF_ISSET(DB_NOORDERCHK) && 336 __ham_dups_unsorted(dbp, databuf, len)) 337 F_SET(pip, VRFY_DUPS_UNSORTED); 338 break; 339 case H_OFFPAGE: 340 /* Offpage item. Make sure pgno is sane, save off. */ 341 memcpy(&hop, P_ENTRY(dbp, h, i), HOFFPAGE_SIZE); 342 if (!IS_VALID_PGNO(hop.pgno) || hop.pgno == pip->pgno || 343 hop.pgno == PGNO_INVALID) { 344 EPRINT((dbp->env, 345 "Page %lu: offpage item %lu has bad pgno %lu", 346 (u_long)pip->pgno, (u_long)i, (u_long)hop.pgno)); 347 ret = DB_VERIFY_BAD; 348 goto err; 349 } 350 memset(&child, 0, sizeof(VRFY_CHILDINFO)); 351 child.pgno = hop.pgno; 352 child.type = V_OVERFLOW; 353 child.tlen = hop.tlen; /* This will get checked later. */ 354 if ((ret = __db_vrfy_childput(vdp, pip->pgno, &child)) != 0) 355 goto err; 356 break; 357 case H_OFFDUP: 358 /* Offpage duplicate item. Same drill. */ 359 memcpy(&hod, P_ENTRY(dbp, h, i), HOFFDUP_SIZE); 360 if (!IS_VALID_PGNO(hod.pgno) || hod.pgno == pip->pgno || 361 hod.pgno == PGNO_INVALID) { 362 EPRINT((dbp->env, 363 "Page %lu: offpage item %lu has bad page number", 364 (u_long)pip->pgno, (u_long)i)); 365 ret = DB_VERIFY_BAD; 366 goto err; 367 } 368 memset(&child, 0, sizeof(VRFY_CHILDINFO)); 369 child.pgno = hod.pgno; 370 child.type = V_DUPLICATE; 371 if ((ret = __db_vrfy_childput(vdp, pip->pgno, &child)) != 0) 372 goto err; 373 F_SET(pip, VRFY_HAS_DUPS); 374 break; 375 default: 376 EPRINT((dbp->env, 377 "Page %lu: item %lu has bad type", 378 (u_long)pip->pgno, (u_long)i)); 379 ret = DB_VERIFY_BAD; 380 break; 381 } 382 383err: if ((t_ret = 384 __db_vrfy_putpageinfo(dbp->env, vdp, pip)) != 0 && ret == 0) 385 ret = t_ret; 386 return (ret); 387} 388 389/* 390 * __ham_vrfy_structure -- 391 * Verify the structure of a hash database. 392 * 393 * PUBLIC: int __ham_vrfy_structure __P((DB *, VRFY_DBINFO *, db_pgno_t, 394 * PUBLIC: u_int32_t)); 395 */ 396int 397__ham_vrfy_structure(dbp, vdp, meta_pgno, flags) 398 DB *dbp; 399 VRFY_DBINFO *vdp; 400 db_pgno_t meta_pgno; 401 u_int32_t flags; 402{ 403 DB *pgset; 404 DB_MPOOLFILE *mpf; 405 HMETA *m; 406 PAGE *h; 407 VRFY_PAGEINFO *pip; 408 int isbad, p, ret, t_ret; 409 db_pgno_t pgno; 410 u_int32_t bucket, spares_entry; 411 412 mpf = dbp->mpf; 413 pgset = vdp->pgset; 414 h = NULL; 415 ret = isbad = 0; 416 417 if ((ret = __db_vrfy_pgset_get(pgset, 418 vdp->thread_info, meta_pgno, &p)) != 0) 419 return (ret); 420 if (p != 0) { 421 EPRINT((dbp->env, 422 "Page %lu: Hash meta page referenced twice", 423 (u_long)meta_pgno)); 424 return (DB_VERIFY_BAD); 425 } 426 if ((ret = __db_vrfy_pgset_inc(pgset, 427 vdp->thread_info, meta_pgno)) != 0) 428 return (ret); 429 430 /* Get the meta page; we'll need it frequently. */ 431 if ((ret = __memp_fget(mpf, 432 &meta_pgno, vdp->thread_info, NULL, 0, &m)) != 0) 433 return (ret); 434 435 /* Loop through bucket by bucket. */ 436 for (bucket = 0; bucket <= m->max_bucket; bucket++) 437 if ((ret = 438 __ham_vrfy_bucket(dbp, vdp, m, bucket, flags)) != 0) { 439 if (ret == DB_VERIFY_BAD) 440 isbad = 1; 441 else 442 goto err; 443 } 444 445 /* 446 * There may be unused hash pages corresponding to buckets 447 * that have been allocated but not yet used. These may be 448 * part of the current doubling above max_bucket, or they may 449 * correspond to buckets that were used in a transaction 450 * that then aborted. 451 * 452 * Loop through them, as far as the spares array defines them, 453 * and make sure they're all empty. 454 * 455 * Note that this should be safe, since we've already verified 456 * that the spares array is sane. 457 */ 458 for (bucket = m->max_bucket + 1; spares_entry = __db_log2(bucket + 1), 459 spares_entry < NCACHED && m->spares[spares_entry] != 0; bucket++) { 460 pgno = BS_TO_PAGE(bucket, m->spares); 461 if ((ret = __db_vrfy_getpageinfo(vdp, pgno, &pip)) != 0) 462 goto err; 463 464 /* It's okay if these pages are totally zeroed; unmark it. */ 465 F_CLR(pip, VRFY_IS_ALLZEROES); 466 467 /* It's also OK if this page is simply invalid. */ 468 if (pip->type == P_INVALID) { 469 if ((ret = __db_vrfy_putpageinfo(dbp->env, 470 vdp, pip)) != 0) 471 goto err; 472 continue; 473 } 474 475 if (pip->type != P_HASH && pip->type != P_HASH_UNSORTED) { 476 EPRINT((dbp->env, 477 "Page %lu: hash bucket %lu maps to non-hash page", 478 (u_long)pgno, (u_long)bucket)); 479 isbad = 1; 480 } else if (pip->entries != 0) { 481 EPRINT((dbp->env, 482 "Page %lu: non-empty page in unused hash bucket %lu", 483 (u_long)pgno, (u_long)bucket)); 484 isbad = 1; 485 } else { 486 if ((ret = __db_vrfy_pgset_get(pgset, 487 vdp->thread_info, pgno, &p)) != 0) 488 goto err; 489 if (p != 0) { 490 EPRINT((dbp->env, 491 "Page %lu: above max_bucket referenced", 492 (u_long)pgno)); 493 isbad = 1; 494 } else { 495 if ((ret = __db_vrfy_pgset_inc(pgset, 496 vdp->thread_info, pgno)) != 0) 497 goto err; 498 if ((ret = __db_vrfy_putpageinfo(dbp->env, 499 vdp, pip)) != 0) 500 goto err; 501 continue; 502 } 503 } 504 505 /* If we got here, it's an error. */ 506 (void)__db_vrfy_putpageinfo(dbp->env, vdp, pip); 507 goto err; 508 } 509 510err: if ((t_ret = __memp_fput(mpf, vdp->thread_info, m, dbp->priority)) != 0) 511 return (t_ret); 512 if (h != NULL && 513 (t_ret = __memp_fput(mpf, vdp->thread_info, h, dbp->priority)) != 0) 514 return (t_ret); 515 return ((isbad == 1 && ret == 0) ? DB_VERIFY_BAD: ret); 516} 517 518/* 519 * __ham_vrfy_bucket -- 520 * Verify a given bucket. 521 */ 522static int 523__ham_vrfy_bucket(dbp, vdp, m, bucket, flags) 524 DB *dbp; 525 VRFY_DBINFO *vdp; 526 HMETA *m; 527 u_int32_t bucket, flags; 528{ 529 ENV *env; 530 HASH *hashp; 531 VRFY_CHILDINFO *child; 532 VRFY_PAGEINFO *mip, *pip; 533 int ret, t_ret, isbad, p; 534 db_pgno_t pgno, next_pgno; 535 DBC *cc; 536 u_int32_t (*hfunc) __P((DB *, const void *, u_int32_t)); 537 538 env = dbp->env; 539 isbad = 0; 540 pip = NULL; 541 cc = NULL; 542 543 hashp = dbp->h_internal; 544 if (hashp != NULL && hashp->h_hash != NULL) 545 hfunc = hashp->h_hash; 546 else 547 hfunc = __ham_func5; 548 549 if ((ret = __db_vrfy_getpageinfo(vdp, PGNO(m), &mip)) != 0) 550 return (ret); 551 552 /* Calculate the first pgno for this bucket. */ 553 pgno = BS_TO_PAGE(bucket, m->spares); 554 555 if ((ret = __db_vrfy_getpageinfo(vdp, pgno, &pip)) != 0) 556 goto err; 557 558 /* Make sure we got a plausible page number. */ 559 if (pgno > vdp->last_pgno || 560 (pip->type != P_HASH && pip->type != P_HASH_UNSORTED)) { 561 EPRINT((env, 562 "Page %lu: impossible first page in bucket %lu", 563 (u_long)pgno, (u_long)bucket)); 564 /* Unsafe to continue. */ 565 isbad = 1; 566 goto err; 567 } 568 569 if (pip->prev_pgno != PGNO_INVALID) { 570 EPRINT((env, 571 "Page %lu: first page in hash bucket %lu has a prev_pgno", 572 (u_long)pgno, (u_long)bucket)); 573 isbad = 1; 574 } 575 576 /* 577 * Set flags for dups and sorted dups. 578 */ 579 flags |= F_ISSET(mip, VRFY_HAS_DUPS) ? DB_ST_DUPOK : 0; 580 flags |= F_ISSET(mip, VRFY_HAS_DUPSORT) ? DB_ST_DUPSORT : 0; 581 582 /* Loop until we find a fatal bug, or until we run out of pages. */ 583 for (;;) { 584 /* Provide feedback on our progress to the application. */ 585 if (!LF_ISSET(DB_SALVAGE)) 586 __db_vrfy_struct_feedback(dbp, vdp); 587 588 if ((ret = __db_vrfy_pgset_get(vdp->pgset, 589 vdp->thread_info, pgno, &p)) != 0) 590 goto err; 591 if (p != 0) { 592 EPRINT((env, 593 "Page %lu: hash page referenced twice", 594 (u_long)pgno)); 595 isbad = 1; 596 /* Unsafe to continue. */ 597 goto err; 598 } else if ((ret = __db_vrfy_pgset_inc(vdp->pgset, 599 vdp->thread_info, pgno)) != 0) 600 goto err; 601 602 /* 603 * Hash pages that nothing has ever hashed to may never 604 * have actually come into existence, and may appear to be 605 * entirely zeroed. This is acceptable, and since there's 606 * no real way for us to know whether this has actually 607 * occurred, we clear the "wholly zeroed" flag on every 608 * hash page. A wholly zeroed page, by nature, will appear 609 * to have no flags set and zero entries, so should 610 * otherwise verify correctly. 611 */ 612 F_CLR(pip, VRFY_IS_ALLZEROES); 613 614 /* If we have dups, our meta page had better know about it. */ 615 if (F_ISSET(pip, VRFY_HAS_DUPS) && 616 !F_ISSET(mip, VRFY_HAS_DUPS)) { 617 EPRINT((env, 618 "Page %lu: duplicates present in non-duplicate database", 619 (u_long)pgno)); 620 isbad = 1; 621 } 622 623 /* 624 * If the database has sorted dups, this page had better 625 * not have unsorted ones. 626 */ 627 if (F_ISSET(mip, VRFY_HAS_DUPSORT) && 628 F_ISSET(pip, VRFY_DUPS_UNSORTED)) { 629 EPRINT((env, 630 "Page %lu: unsorted dups in sorted-dup database", 631 (u_long)pgno)); 632 isbad = 1; 633 } 634 635 /* Walk overflow chains and offpage dup trees. */ 636 if ((ret = __db_vrfy_childcursor(vdp, &cc)) != 0) 637 goto err; 638 for (ret = __db_vrfy_ccset(cc, pip->pgno, &child); ret == 0; 639 ret = __db_vrfy_ccnext(cc, &child)) 640 if (child->type == V_OVERFLOW) { 641 if ((ret = __db_vrfy_ovfl_structure(dbp, vdp, 642 child->pgno, child->tlen, 643 flags | DB_ST_OVFL_LEAF)) != 0) { 644 if (ret == DB_VERIFY_BAD) 645 isbad = 1; 646 else 647 goto err; 648 } 649 } else if (child->type == V_DUPLICATE) { 650 if ((ret = __db_vrfy_duptype(dbp, 651 vdp, child->pgno, flags)) != 0) { 652 isbad = 1; 653 continue; 654 } 655 if ((ret = __bam_vrfy_subtree(dbp, vdp, 656 child->pgno, NULL, NULL, 657 flags | DB_ST_RECNUM | DB_ST_DUPSET | DB_ST_TOPLEVEL, 658 NULL, NULL, NULL)) != 0) { 659 if (ret == DB_VERIFY_BAD) 660 isbad = 1; 661 else 662 goto err; 663 } 664 } 665 /* Close the cursor on vdp, open one on dbp */ 666 if ((ret = __db_vrfy_ccclose(cc)) != 0) 667 goto err; 668 if ((ret = __db_cursor_int(dbp, vdp->thread_info, NULL, 669 DB_HASH, PGNO_INVALID, 0, DB_LOCK_INVALIDID, &cc)) != 0) 670 goto err; 671 /* If it's safe to check that things hash properly, do so. */ 672 if (isbad == 0 && !LF_ISSET(DB_NOORDERCHK) && 673 (ret = __ham_vrfy_hashing(cc, pip->entries, 674 m, bucket, pgno, flags, hfunc)) != 0) { 675 if (ret == DB_VERIFY_BAD) 676 isbad = 1; 677 else 678 goto err; 679 } 680 681 next_pgno = pip->next_pgno; 682 ret = __db_vrfy_putpageinfo(env, vdp, pip); 683 684 pip = NULL; 685 if (ret != 0) 686 goto err; 687 688 if (next_pgno == PGNO_INVALID) 689 break; /* End of the bucket. */ 690 691 /* We already checked this, but just in case... */ 692 if (!IS_VALID_PGNO(next_pgno)) { 693 EPRINT((env, 694 "Page %lu: hash page has bad next_pgno", 695 (u_long)pgno)); 696 isbad = 1; 697 goto err; 698 } 699 700 if ((ret = __db_vrfy_getpageinfo(vdp, next_pgno, &pip)) != 0) 701 goto err; 702 703 if (pip->prev_pgno != pgno) { 704 EPRINT((env, 705 "Page %lu: hash page has bad prev_pgno", 706 (u_long)next_pgno)); 707 isbad = 1; 708 } 709 pgno = next_pgno; 710 } 711 712err: if (cc != NULL && ((t_ret = __db_vrfy_ccclose(cc)) != 0) && ret == 0) 713 ret = t_ret; 714 if (mip != NULL && ((t_ret = 715 __db_vrfy_putpageinfo(env, vdp, mip)) != 0) && ret == 0) 716 ret = t_ret; 717 if (pip != NULL && ((t_ret = 718 __db_vrfy_putpageinfo(env, vdp, pip)) != 0) && ret == 0) 719 ret = t_ret; 720 return ((ret == 0 && isbad == 1) ? DB_VERIFY_BAD : ret); 721} 722 723/* 724 * __ham_vrfy_hashing -- 725 * Verify that all items on a given hash page hash correctly. 726 * 727 * PUBLIC: int __ham_vrfy_hashing __P((DBC *, 728 * PUBLIC: u_int32_t, HMETA *, u_int32_t, db_pgno_t, u_int32_t, 729 * PUBLIC: u_int32_t (*) __P((DB *, const void *, u_int32_t)))); 730 */ 731int 732__ham_vrfy_hashing(dbc, nentries, m, thisbucket, pgno, flags, hfunc) 733 DBC *dbc; 734 u_int32_t nentries; 735 HMETA *m; 736 u_int32_t thisbucket; 737 db_pgno_t pgno; 738 u_int32_t flags; 739 u_int32_t (*hfunc) __P((DB *, const void *, u_int32_t)); 740{ 741 DB *dbp; 742 DBT dbt; 743 DB_MPOOLFILE *mpf; 744 DB_THREAD_INFO *ip; 745 PAGE *h; 746 db_indx_t i; 747 int ret, t_ret, isbad; 748 u_int32_t hval, bucket; 749 750 dbp = dbc->dbp; 751 mpf = dbp->mpf; 752 ret = isbad = 0; 753 754 memset(&dbt, 0, sizeof(DBT)); 755 F_SET(&dbt, DB_DBT_REALLOC); 756 ENV_GET_THREAD_INFO(dbp->env, ip); 757 758 if ((ret = __memp_fget(mpf, &pgno, ip, NULL, 0, &h)) != 0) 759 return (ret); 760 761 for (i = 0; i < nentries; i += 2) { 762 /* 763 * We've already verified the page integrity and that of any 764 * overflow chains linked off it; it is therefore safe to use 765 * __db_ret. It's also not all that much slower, since we have 766 * to copy every hash item to deal with alignment anyway; we 767 * can tweak this a bit if this proves to be a bottleneck, 768 * but for now, take the easy route. 769 */ 770 if ((ret = __db_ret(dbc, h, i, &dbt, NULL, NULL)) != 0) 771 goto err; 772 hval = hfunc(dbp, dbt.data, dbt.size); 773 774 bucket = hval & m->high_mask; 775 if (bucket > m->max_bucket) 776 bucket = bucket & m->low_mask; 777 778 if (bucket != thisbucket) { 779 EPRINT((dbp->env, 780 "Page %lu: item %lu hashes incorrectly", 781 (u_long)pgno, (u_long)i)); 782 isbad = 1; 783 } 784 } 785 786err: if (dbt.data != NULL) 787 __os_ufree(dbp->env, dbt.data); 788 if ((t_ret = __memp_fput(mpf, ip, h, dbp->priority)) != 0) 789 return (t_ret); 790 791 return ((ret == 0 && isbad == 1) ? DB_VERIFY_BAD : ret); 792} 793 794/* 795 * __ham_salvage -- 796 * Safely dump out anything that looks like a key on an alleged 797 * hash page. 798 * 799 * PUBLIC: int __ham_salvage __P((DB *, VRFY_DBINFO *, db_pgno_t, PAGE *, 800 * PUBLIC: void *, int (*)(void *, const void *), u_int32_t)); 801 */ 802int 803__ham_salvage(dbp, vdp, pgno, h, handle, callback, flags) 804 DB *dbp; 805 VRFY_DBINFO *vdp; 806 db_pgno_t pgno; 807 PAGE *h; 808 void *handle; 809 int (*callback) __P((void *, const void *)); 810 u_int32_t flags; 811{ 812 DBT dbt, key_dbt, unkdbt; 813 db_pgno_t dpgno; 814 int ret, err_ret, t_ret; 815 u_int32_t himark, i, ovfl_bufsz; 816 u_int8_t *hk, *p; 817 void *buf, *key_buf; 818 db_indx_t dlen, len, tlen; 819 820 memset(&dbt, 0, sizeof(DBT)); 821 dbt.flags = DB_DBT_REALLOC; 822 823 DB_INIT_DBT(unkdbt, "UNKNOWN", sizeof("UNKNOWN") - 1); 824 825 err_ret = 0; 826 827 /* 828 * Allocate a buffer for overflow items. Start at one page; 829 * __db_safe_goff will realloc as needed. 830 */ 831 if ((ret = __os_malloc(dbp->env, dbp->pgsize, &buf)) != 0) 832 return (ret); 833 ovfl_bufsz = dbp->pgsize; 834 835 himark = dbp->pgsize; 836 for (i = 0;; i++) { 837 /* If we're not aggressive, break when we hit NUM_ENT(h). */ 838 if (!LF_ISSET(DB_AGGRESSIVE) && i >= NUM_ENT(h)) 839 break; 840 841 /* 842 * Verify the current item. If we're beyond NUM_ENT errors are 843 * expected and ignored. 844 */ 845 ret = __db_vrfy_inpitem(dbp, 846 h, pgno, i, 0, flags, &himark, NULL); 847 /* If this returned a fatality, it's time to break. */ 848 if (ret == DB_VERIFY_FATAL) { 849 if (i >= NUM_ENT(h)) 850 ret = 0; 851 break; 852 } else if (ret != 0 && i >= NUM_ENT(h)) { 853 /* Not a reportable error, but don't salvage the item. */ 854 ret = 0; 855 } else if (ret == 0) { 856 /* Set len to total entry length. */ 857 len = LEN_HITEM(dbp, h, dbp->pgsize, i); 858 hk = P_ENTRY(dbp, h, i); 859 if (len == 0 || len > dbp->pgsize || 860 (u_int32_t)(hk + len - (u_int8_t *)h) > 861 dbp->pgsize) { 862 /* Item is unsafely large; skip it. */ 863 err_ret = DB_VERIFY_BAD; 864 continue; 865 } 866 switch (HPAGE_PTYPE(hk)) { 867 case H_KEYDATA: 868 /* Update len to size of item. */ 869 len = LEN_HKEYDATA(dbp, h, dbp->pgsize, i); 870keydata: memcpy(buf, HKEYDATA_DATA(hk), len); 871 dbt.size = len; 872 dbt.data = buf; 873 if ((ret = __db_vrfy_prdbt(&dbt, 874 0, " ", handle, callback, 0, vdp)) != 0) 875 err_ret = ret; 876 break; 877 case H_OFFPAGE: 878 if (len < HOFFPAGE_SIZE) { 879 err_ret = DB_VERIFY_BAD; 880 continue; 881 } 882 memcpy(&dpgno, 883 HOFFPAGE_PGNO(hk), sizeof(dpgno)); 884 if ((ret = __db_safe_goff(dbp, vdp, 885 dpgno, &dbt, &buf, &ovfl_bufsz, flags)) != 0) { 886 err_ret = ret; 887 (void)__db_vrfy_prdbt(&unkdbt, 0, " ", 888 handle, callback, 0, vdp); 889 /* fallthrough to end of case */ 890 } else if ((ret = __db_vrfy_prdbt(&dbt, 891 0, " ", handle, callback, 0, vdp)) != 0) 892 err_ret = ret; 893 break; 894 case H_OFFDUP: 895 if (len < HOFFDUP_SIZE) { 896 err_ret = DB_VERIFY_BAD; 897 continue; 898 } 899 memcpy(&dpgno, 900 HOFFDUP_PGNO(hk), sizeof(dpgno)); 901 /* UNKNOWN iff pgno is bad or we're a key. */ 902 if (!IS_VALID_PGNO(dpgno) || (i % 2 == 0)) { 903 if ((ret = 904 __db_vrfy_prdbt(&unkdbt, 0, " ", 905 handle, callback, 0, vdp)) != 0) 906 err_ret = ret; 907 } else if ((ret = __db_salvage_duptree(dbp, 908 vdp, dpgno, &dbt, handle, callback, 909 flags | DB_SA_SKIPFIRSTKEY)) != 0) 910 err_ret = ret; 911 break; 912 case H_DUPLICATE: 913 /* 914 * This is an on-page duplicate item, iterate 915 * over the duplicate set, printing out 916 * key/data pairs. 917 */ 918 len = LEN_HKEYDATA(dbp, h, dbp->pgsize, i); 919 /* 920 * If this item is at an even index it must be 921 * a key item and it should never be of type 922 * H_DUPLICATE. If we are in aggressive mode, 923 * print the item out as a normal key, and let 924 * the user resolve the discrepancy. 925 */ 926 if (i % 2 == 0) { 927 err_ret = ret; 928 if (LF_ISSET(DB_AGGRESSIVE)) 929 goto keydata; 930 break; 931 } 932 933 /* 934 * Check to ensure that the item size is 935 * greater than the smallest possible on page 936 * duplicate. 937 */ 938 if (len < 939 HKEYDATA_SIZE(2 * sizeof(db_indx_t))) { 940 err_ret = DB_VERIFY_BAD; 941 continue; 942 } 943 944 /* 945 * Copy out the key from the dbt, it is still 946 * present from the previous pass. 947 */ 948 memset(&key_dbt, 0, sizeof(key_dbt)); 949 if ((ret = __os_malloc( 950 dbp->env, dbt.size, &key_buf)) != 0) 951 return (ret); 952 memcpy(key_buf, buf, dbt.size); 953 key_dbt.data = key_buf; 954 key_dbt.size = dbt.size; 955 key_dbt.flags = DB_DBT_USERMEM; 956 957 /* Loop until we hit the total length. */ 958 for (tlen = 0; tlen + sizeof(db_indx_t) < len; 959 tlen += dlen + 2 * sizeof(db_indx_t)) { 960 /* 961 * Print the key for every duplicate 962 * item. Except the first dup, since 963 * the key was already output once by 964 * the previous iteration. 965 */ 966 if (tlen != 0) { 967 if ((ret = __db_vrfy_prdbt( 968 &key_dbt, 0, " ", handle, 969 callback, 0, vdp)) != 0) 970 err_ret = ret; 971 } 972 p = HKEYDATA_DATA(hk) + tlen; 973 memcpy(&dlen, p, sizeof(db_indx_t)); 974 p += sizeof(db_indx_t); 975 /* 976 * If dlen is too long, print all the 977 * rest of the dup set in a chunk. 978 */ 979 if (dlen + tlen + sizeof(db_indx_t) > 980 len) { 981 dlen = len - 982 (tlen + sizeof(db_indx_t)); 983 err_ret = DB_VERIFY_BAD; 984 } 985 memcpy(buf, p, dlen); 986 dbt.size = dlen; 987 dbt.data = buf; 988 if ((ret = __db_vrfy_prdbt(&dbt, 0, " ", 989 handle, callback, 0, vdp)) != 0) 990 err_ret = ret; 991 } 992 __os_free(dbp->env, key_buf); 993 break; 994 default: 995 if (!LF_ISSET(DB_AGGRESSIVE)) 996 break; 997 err_ret = DB_VERIFY_BAD; 998 break; 999 } 1000 } 1001 } 1002 1003 __os_free(dbp->env, buf); 1004 if ((t_ret = __db_salvage_markdone(vdp, pgno)) != 0) 1005 return (t_ret); 1006 return ((ret == 0 && err_ret != 0) ? err_ret : ret); 1007} 1008 1009/* 1010 * __ham_meta2pgset -- 1011 * Return the set of hash pages corresponding to the given 1012 * known-good meta page. 1013 * 1014 * PUBLIC: int __ham_meta2pgset __P((DB *, VRFY_DBINFO *, HMETA *, u_int32_t, 1015 * PUBLIC: DB *)); 1016 */ 1017int 1018__ham_meta2pgset(dbp, vdp, hmeta, flags, pgset) 1019 DB *dbp; 1020 VRFY_DBINFO *vdp; 1021 HMETA *hmeta; 1022 u_int32_t flags; 1023 DB *pgset; 1024{ 1025 DB_MPOOLFILE *mpf; 1026 DB_THREAD_INFO *ip; 1027 PAGE *h; 1028 db_pgno_t pgno; 1029 u_int32_t bucket, totpgs; 1030 int ret, val; 1031 1032 /* 1033 * We don't really need flags, but leave them for consistency with 1034 * __bam_meta2pgset. 1035 */ 1036 COMPQUIET(flags, 0); 1037 ip = vdp->thread_info; 1038 1039 DB_ASSERT(dbp->env, pgset != NULL); 1040 1041 mpf = dbp->mpf; 1042 totpgs = 0; 1043 1044 /* 1045 * Loop through all the buckets, pushing onto pgset the corresponding 1046 * page(s) for each one. 1047 */ 1048 for (bucket = 0; bucket <= hmeta->max_bucket; bucket++) { 1049 pgno = BS_TO_PAGE(bucket, hmeta->spares); 1050 1051 /* 1052 * We know the initial pgno is safe because the spares array has 1053 * been verified. 1054 * 1055 * Safely walk the list of pages in this bucket. 1056 */ 1057 for (;;) { 1058 if ((ret = 1059 __memp_fget(mpf, &pgno, ip, NULL, 0, &h)) != 0) 1060 return (ret); 1061 if (TYPE(h) == P_HASH || TYPE(h) == P_HASH_UNSORTED) { 1062 1063 /* 1064 * Make sure we don't go past the end of 1065 * pgset. 1066 */ 1067 if (++totpgs > vdp->last_pgno) { 1068 (void)__memp_fput(mpf, 1069 ip, h, dbp->priority); 1070 return (DB_VERIFY_BAD); 1071 } 1072 if ((ret = __db_vrfy_pgset_inc(pgset, 1073 vdp->thread_info, pgno)) != 0) { 1074 (void)__memp_fput(mpf, 1075 ip, h, dbp->priority); 1076 return (ret); 1077 } 1078 1079 pgno = NEXT_PGNO(h); 1080 } else 1081 pgno = PGNO_INVALID; 1082 1083 if ((ret = __memp_fput(mpf, ip, h, dbp->priority)) != 0) 1084 return (ret); 1085 1086 /* If the new pgno is wonky, go onto the next bucket. */ 1087 if (!IS_VALID_PGNO(pgno) || 1088 pgno == PGNO_INVALID) 1089 break; 1090 1091 /* 1092 * If we've touched this page before, we have a cycle; 1093 * go on to the next bucket. 1094 */ 1095 if ((ret = __db_vrfy_pgset_get(pgset, 1096 vdp->thread_info, pgno, &val)) != 0) 1097 return (ret); 1098 if (val != 0) 1099 break; 1100 } 1101 } 1102 return (0); 1103} 1104 1105/* 1106 * __ham_dups_unsorted -- 1107 * Takes a known-safe hash duplicate set and its total length. 1108 * Returns 1 if there are out-of-order duplicates in this set, 1109 * 0 if there are not. 1110 */ 1111static int 1112__ham_dups_unsorted(dbp, buf, len) 1113 DB *dbp; 1114 u_int8_t *buf; 1115 u_int32_t len; 1116{ 1117 DBT a, b; 1118 db_indx_t offset, dlen; 1119 int (*func) __P((DB *, const DBT *, const DBT *)); 1120 1121 memset(&a, 0, sizeof(DBT)); 1122 memset(&b, 0, sizeof(DBT)); 1123 1124 func = (dbp->dup_compare == NULL) ? __bam_defcmp : dbp->dup_compare; 1125 1126 /* 1127 * Loop through the dup set until we hit the end or we find 1128 * a pair of dups that's out of order. b is always the current 1129 * dup, a the one before it. 1130 */ 1131 for (offset = 0; offset < len; offset += DUP_SIZE(dlen)) { 1132 memcpy(&dlen, buf + offset, sizeof(db_indx_t)); 1133 b.data = buf + offset + sizeof(db_indx_t); 1134 b.size = dlen; 1135 1136 if (a.data != NULL && func(dbp, &a, &b) > 0) 1137 return (1); 1138 1139 a.data = b.data; 1140 a.size = b.size; 1141 } 1142 1143 return (0); 1144} 1145