1/*-
2 * See the file LICENSE for redistribution information.
3 *
4 * Copyright (c) 2000,2008 Oracle.  All rights reserved.
5 *
6 * $Id: db_vrfy.c,v 12.53 2008/03/12 20:34:13 mbrey Exp $
7 */
8
9#include "db_config.h"
10
11#include "db_int.h"
12#include "dbinc/db_page.h"
13#include "dbinc/db_swap.h"
14#include "dbinc/db_verify.h"
15#include "dbinc/btree.h"
16#include "dbinc/hash.h"
17#include "dbinc/lock.h"
18#include "dbinc/mp.h"
19#include "dbinc/qam.h"
20#include "dbinc/txn.h"
21
22/*
23 * This is the code for DB->verify, the DB database consistency checker.
24 * For now, it checks all subdatabases in a database, and verifies
25 * everything it knows how to (i.e. it's all-or-nothing, and one can't
26 * check only for a subset of possible problems).
27 */
28
29static u_int __db_guesspgsize __P((ENV *, DB_FH *));
30static int   __db_is_valid_magicno __P((u_int32_t, DBTYPE *));
31static int   __db_meta2pgset
32		__P((DB *, VRFY_DBINFO *, db_pgno_t, u_int32_t, DB *));
33static int   __db_salvage_subdbpg __P((DB *, VRFY_DBINFO *,
34		PAGE *, void *, int (*)(void *, const void *), u_int32_t));
35static int   __db_salvage_subdbs __P((DB *, VRFY_DBINFO *, void *,
36		int(*)(void *, const void *), u_int32_t, int *));
37static int   __db_salvage_unknowns __P((DB *, VRFY_DBINFO *, void *,
38		int (*)(void *, const void *), u_int32_t));
39static int   __db_verify __P((DB *, DB_THREAD_INFO *, const char *,
40		const char *, void *, int (*)(void *, const void *),
41		u_int32_t));
42static int   __db_verify_arg __P((DB *, const char *, void *, u_int32_t));
43static int   __db_vrfy_freelist
44		__P((DB *, VRFY_DBINFO *, db_pgno_t, u_int32_t));
45static int   __db_vrfy_invalid
46		__P((DB *, VRFY_DBINFO *, PAGE *, db_pgno_t, u_int32_t));
47static int   __db_vrfy_orderchkonly __P((DB *,
48		VRFY_DBINFO *, const char *, const char *, u_int32_t));
49static int   __db_vrfy_pagezero __P((DB *, VRFY_DBINFO *, DB_FH *, u_int32_t));
50static int   __db_vrfy_subdbs
51		__P((DB *, VRFY_DBINFO *, const char *, u_int32_t));
52static int   __db_vrfy_structure
53		__P((DB *, VRFY_DBINFO *, const char *, db_pgno_t, u_int32_t));
54static int   __db_vrfy_walkpages __P((DB *, VRFY_DBINFO *,
55		void *, int (*)(void *, const void *), u_int32_t));
56
57#define	VERIFY_FLAGS							\
58    (DB_AGGRESSIVE |							\
59     DB_NOORDERCHK | DB_ORDERCHKONLY | DB_PRINTABLE | DB_SALVAGE | DB_UNREF)
60
61/*
62 * __db_verify_pp --
63 *	DB->verify public interface.
64 *
65 * PUBLIC: int __db_verify_pp
66 * PUBLIC:     __P((DB *, const char *, const char *, FILE *, u_int32_t));
67 */
68int
69__db_verify_pp(dbp, file, database, outfile, flags)
70	DB *dbp;
71	const char *file, *database;
72	FILE *outfile;
73	u_int32_t flags;
74{
75	/*
76	 * __db_verify_pp is a wrapper to __db_verify_internal, which lets
77	 * us pass appropriate equivalents to FILE * in from the non-C APIs.
78	 * That's why the usual ENV_ENTER macros are in __db_verify_internal,
79	 * not here.
80	 */
81	return (__db_verify_internal(dbp,
82	    file, database, outfile, __db_pr_callback, flags));
83}
84
85/*
86 * __db_verify_internal --
87 *
88 * PUBLIC: int __db_verify_internal __P((DB *, const char *,
89 * PUBLIC:     const char *, void *, int (*)(void *, const void *), u_int32_t));
90 */
91int
92__db_verify_internal(dbp, fname, dname, handle, callback, flags)
93	DB *dbp;
94	const char *fname, *dname;
95	void *handle;
96	int (*callback) __P((void *, const void *));
97	u_int32_t flags;
98{
99	DB_THREAD_INFO *ip;
100	ENV *env;
101	int ret, t_ret;
102
103	env = dbp->env;
104
105	DB_ILLEGAL_AFTER_OPEN(dbp, "DB->verify");
106
107	if (!LF_ISSET(DB_SALVAGE))
108		LF_SET(DB_UNREF);
109
110	ENV_ENTER(env, ip);
111
112	if ((ret = __db_verify_arg(dbp, dname, handle, flags)) == 0)
113		ret = __db_verify(dbp, ip,
114		     fname, dname, handle, callback, flags);
115
116	/* Db.verify is a DB handle destructor. */
117	if ((t_ret = __db_close(dbp, NULL, 0)) != 0 && ret == 0)
118		ret = t_ret;
119
120	ENV_LEAVE(env, ip);
121	return (ret);
122}
123
124/*
125 * __db_verify_arg --
126 *	Check DB->verify arguments.
127 */
128static int
129__db_verify_arg(dbp, dname, handle, flags)
130	DB *dbp;
131	const char *dname;
132	void *handle;
133	u_int32_t flags;
134{
135	ENV *env;
136	int ret;
137
138	env = dbp->env;
139
140	if ((ret = __db_fchk(env, "DB->verify", flags, VERIFY_FLAGS)) != 0)
141		return (ret);
142
143	/*
144	 * DB_SALVAGE is mutually exclusive with the other flags except
145	 * DB_AGGRESSIVE, DB_PRINTABLE.
146	 *
147	 * DB_AGGRESSIVE and DB_PRINTABLE are only meaningful when salvaging.
148	 *
149	 * DB_SALVAGE requires an output stream.
150	 */
151	if (LF_ISSET(DB_SALVAGE)) {
152		if (LF_ISSET(~(DB_AGGRESSIVE | DB_PRINTABLE | DB_SALVAGE)))
153			return (__db_ferr(env, "DB->verify", 1));
154		if (handle == NULL) {
155			__db_errx(env,
156			    "DB_SALVAGE requires a an output handle");
157			return (EINVAL);
158		}
159	} else
160		if (LF_ISSET(DB_AGGRESSIVE | DB_PRINTABLE))
161			return (__db_ferr(env, "DB->verify", 1));
162
163	/*
164	 * DB_ORDERCHKONLY is mutually exclusive with DB_SALVAGE and
165	 * DB_NOORDERCHK, and requires a database name.
166	 */
167	if ((ret = __db_fcchk(env, "DB->verify", flags,
168	    DB_ORDERCHKONLY, DB_SALVAGE | DB_NOORDERCHK)) != 0)
169		return (ret);
170	if (LF_ISSET(DB_ORDERCHKONLY) && dname == NULL) {
171		__db_errx(env, "DB_ORDERCHKONLY requires a database name");
172		return (EINVAL);
173	}
174	return (0);
175}
176
177/*
178 * __db_verify --
179 *	Walk the entire file page-by-page, either verifying with or without
180 *	dumping in db_dump -d format, or DB_SALVAGE-ing whatever key/data
181 *	pairs can be found and dumping them in standard (db_load-ready)
182 *	dump format.
183 *
184 *	(Salvaging isn't really a verification operation, but we put it
185 *	here anyway because it requires essentially identical top-level
186 *	code.)
187 *
188 *	flags may be 0, DB_NOORDERCHK, DB_ORDERCHKONLY, or DB_SALVAGE
189 *	(and optionally DB_AGGRESSIVE).
190 */
191static int
192__db_verify(dbp, ip, name, subdb, handle, callback, flags)
193	DB *dbp;
194	DB_THREAD_INFO *ip;
195	const char *name, *subdb;
196	void *handle;
197	int (*callback) __P((void *, const void *));
198	u_int32_t flags;
199{
200	DB_FH *fhp;
201	ENV *env;
202	VRFY_DBINFO *vdp;
203	int has_subdbs, isbad, ret, t_ret;
204	char *real_name;
205
206	env = dbp->env;
207	fhp = NULL;
208	vdp = NULL;
209	real_name = NULL;
210	has_subdbs = isbad = ret = 0;
211
212	F_SET(dbp, DB_AM_VERIFYING);
213
214	/* Initialize any feedback function. */
215	if (!LF_ISSET(DB_SALVAGE) && dbp->db_feedback != NULL)
216		dbp->db_feedback(dbp, DB_VERIFY, 0);
217
218	/*
219	 * We don't know how large the cache is, and if the database
220	 * in question uses a small page size--which we don't know
221	 * yet!--it may be uncomfortably small for the default page
222	 * size [#2143].  However, the things we need temporary
223	 * databases for in dbinfo are largely tiny, so using a
224	 * 1024-byte pagesize is probably not going to be a big hit,
225	 * and will make us fit better into small spaces.
226	 */
227	if ((ret = __db_vrfy_dbinfo_create(env, ip,  1024, &vdp)) != 0)
228		goto err;
229
230	/*
231	 * Note whether the user has requested that we use printable
232	 * chars where possible.  We won't get here with this flag if
233	 * we're not salvaging.
234	 */
235	if (LF_ISSET(DB_PRINTABLE))
236		F_SET(vdp, SALVAGE_PRINTABLE);
237
238	/* Find the real name of the file. */
239	if ((ret = __db_appname(env,
240	    DB_APP_DATA, name, 0, NULL, &real_name)) != 0)
241		goto err;
242
243	/*
244	 * Our first order of business is to verify page 0, which is
245	 * the metadata page for the master database of subdatabases
246	 * or of the only database in the file.  We want to do this by hand
247	 * rather than just calling __db_open in case it's corrupt--various
248	 * things in __db_open might act funny.
249	 *
250	 * Once we know the metadata page is healthy, I believe that it's
251	 * safe to open the database normally and then use the page swapping
252	 * code, which makes life easier.
253	 */
254	if ((ret = __os_open(env, real_name, 0, DB_OSO_RDONLY, 0, &fhp)) != 0)
255		goto err;
256
257	/* Verify the metadata page 0; set pagesize and type. */
258	if ((ret = __db_vrfy_pagezero(dbp, vdp, fhp, flags)) != 0) {
259		if (ret == DB_VERIFY_BAD)
260			isbad = 1;
261		else
262			goto err;
263	}
264
265	/*
266	 * We can assume at this point that dbp->pagesize and dbp->type are
267	 * set correctly, or at least as well as they can be, and that
268	 * locking, logging, and txns are not in use.  Thus we can trust
269	 * the memp code not to look at the page, and thus to be safe
270	 * enough to use.
271	 *
272	 * The dbp is not open, but the file is open in the fhp, and we
273	 * cannot assume that __db_open is safe.  Call __env_setup,
274	 * the [safe] part of __db_open that initializes the environment--
275	 * and the mpool--manually.
276	 */
277	if ((ret = __env_setup(dbp, NULL,
278	    name, subdb, TXN_INVALID, DB_ODDFILESIZE | DB_RDONLY)) != 0)
279		goto err;
280
281	/*
282	 * Set our name in the Queue subsystem;  we may need it later
283	 * to deal with extents.
284	 */
285	if (dbp->type == DB_QUEUE &&
286	    (ret = __qam_set_ext_data(dbp, name)) != 0)
287		goto err;
288
289	/* Mark the dbp as opened, so that we correctly handle its close. */
290	F_SET(dbp, DB_AM_OPEN_CALLED);
291
292	/* Find out the page number of the last page in the database. */
293	if ((ret = __memp_get_last_pgno(dbp->mpf, &vdp->last_pgno)) != 0)
294		goto err;
295
296	/*
297	 * DB_ORDERCHKONLY is a special case;  our file consists of
298	 * several subdatabases, which use different hash, bt_compare,
299	 * and/or dup_compare functions.  Consequently, we couldn't verify
300	 * sorting and hashing simply by calling DB->verify() on the file.
301	 * DB_ORDERCHKONLY allows us to come back and check those things;  it
302	 * requires a subdatabase, and assumes that everything but that
303	 * database's sorting/hashing is correct.
304	 */
305	if (LF_ISSET(DB_ORDERCHKONLY)) {
306		ret = __db_vrfy_orderchkonly(dbp, vdp, name, subdb, flags);
307		goto done;
308	}
309
310	/*
311	 * When salvaging, we use a db to keep track of whether we've seen a
312	 * given overflow or dup page in the course of traversing normal data.
313	 * If in the end we have not, we assume its key got lost and print it
314	 * with key "UNKNOWN".
315	 */
316	if (LF_ISSET(DB_SALVAGE)) {
317		if ((ret = __db_salvage_init(vdp)) != 0)
318			goto err;
319
320		/*
321		 * If we're not being aggressive, attempt to crack subdatabases.
322		 * "has_subdbs" will indicate whether the attempt has succeeded
323		 * (even in part), meaning that we have some semblance of
324		 * subdatabases; on the walkpages pass, we print out whichever
325		 * data pages we have not seen.
326		 */
327		if (!LF_ISSET(DB_AGGRESSIVE) && __db_salvage_subdbs(
328		    dbp, vdp, handle, callback, flags, &has_subdbs) != 0)
329			isbad = 1;
330
331		/*
332		 * If we have subdatabases, flag if any keys are found that
333		 * don't belong to a subdatabase -- they'll need to have an
334		 * "__OTHER__" subdatabase header printed first.
335		 */
336		if (has_subdbs)
337			F_SET(vdp, SALVAGE_PRINTHEADER);
338	}
339
340	if ((ret =
341	    __db_vrfy_walkpages(dbp, vdp, handle, callback, flags)) != 0) {
342		if (ret == DB_VERIFY_BAD)
343			isbad = 1;
344		else
345			goto err;
346	}
347
348	/* If we're verifying, verify inter-page structure. */
349	if (!LF_ISSET(DB_SALVAGE) && isbad == 0)
350		if ((ret =
351		    __db_vrfy_structure(dbp, vdp, name, 0, flags)) != 0) {
352			if (ret == DB_VERIFY_BAD)
353				isbad = 1;
354			else
355				goto err;
356		}
357
358	/*
359	 * If we're salvaging, output with key UNKNOWN any overflow or dup pages
360	 * we haven't been able to put in context.  Then destroy the salvager's
361	 * state-saving database.
362	 */
363	if (LF_ISSET(DB_SALVAGE)) {
364		if ((ret = __db_salvage_unknowns(dbp,
365		    vdp, handle, callback, flags)) != 0)
366			isbad = 1;
367		/* No return value, since there's little we can do. */
368		__db_salvage_destroy(vdp);
369	}
370
371	/* Don't display a footer for a database holding other databases. */
372	if (LF_ISSET(DB_SALVAGE) &&
373	    (!has_subdbs || F_ISSET(vdp, SALVAGE_PRINTFOOTER)))
374		(void)__db_prfooter(handle, callback);
375
376done: err:
377	/* Send feedback that we're done. */
378	if (!LF_ISSET(DB_SALVAGE) && dbp->db_feedback != NULL)
379		dbp->db_feedback(dbp, DB_VERIFY, 100);
380
381	if (fhp != NULL &&
382	    (t_ret = __os_closehandle(env, fhp)) != 0 && ret == 0)
383		ret = t_ret;
384	if (vdp != NULL &&
385	    (t_ret = __db_vrfy_dbinfo_destroy(env, vdp)) != 0 && ret == 0)
386		ret = t_ret;
387	if (real_name != NULL)
388		__os_free(env, real_name);
389
390	/*
391	 * DB_VERIFY_FATAL is a private error, translate to a public one.
392	 *
393	 * If we didn't find a page, it's probably a page number was corrupted.
394	 * Return the standard corruption error.
395	 *
396	 * Otherwise, if we found corruption along the way, set the return.
397	 */
398	if (ret == DB_VERIFY_FATAL ||
399	    ret == DB_PAGE_NOTFOUND || (ret == 0 && isbad == 1))
400		ret = DB_VERIFY_BAD;
401
402	/* Make sure there's a public complaint if we found corruption. */
403	if (ret != 0)
404		__db_err(env, ret, "%s", name);
405
406	return (ret);
407}
408
409/*
410 * __db_vrfy_pagezero --
411 *	Verify the master metadata page.  Use seek, read, and a local buffer
412 *	rather than the DB paging code, for safety.
413 *
414 *	Must correctly (or best-guess) set dbp->type and dbp->pagesize.
415 */
416static int
417__db_vrfy_pagezero(dbp, vdp, fhp, flags)
418	DB *dbp;
419	VRFY_DBINFO *vdp;
420	DB_FH *fhp;
421	u_int32_t flags;
422{
423	DBMETA *meta;
424	ENV *env;
425	VRFY_PAGEINFO *pip;
426	db_pgno_t freelist;
427	size_t nr;
428	int isbad, ret, swapped;
429	u_int8_t mbuf[DBMETASIZE];
430
431	isbad = ret = swapped = 0;
432	freelist = 0;
433	env = dbp->env;
434	meta = (DBMETA *)mbuf;
435	dbp->type = DB_UNKNOWN;
436
437	if ((ret = __db_vrfy_getpageinfo(vdp, PGNO_BASE_MD, &pip)) != 0)
438		return (ret);
439
440	/*
441	 * Seek to the metadata page.
442	 * Note that if we're just starting a verification, dbp->pgsize
443	 * may be zero;  this is okay, as we want page zero anyway and
444	 * 0*0 == 0.
445	 */
446	if ((ret = __os_seek(env, fhp, 0, 0, 0)) != 0 ||
447	    (ret = __os_read(env, fhp, mbuf, DBMETASIZE, &nr)) != 0) {
448		__db_err(env, ret,
449		    "Metadata page %lu cannot be read", (u_long)PGNO_BASE_MD);
450		return (ret);
451	}
452
453	if (nr != DBMETASIZE) {
454		EPRINT((env,
455		    "Page %lu: Incomplete metadata page",
456		    (u_long)PGNO_BASE_MD));
457		return (DB_VERIFY_FATAL);
458	}
459
460	if ((ret = __db_chk_meta(env, dbp, meta, 1)) != 0) {
461		EPRINT((env,
462		    "Page %lu: metadata page corrupted", (u_long)PGNO_BASE_MD));
463		isbad = 1;
464		if (ret != -1) {
465			EPRINT((env,
466			    "Page %lu: could not check metadata page",
467			    (u_long)PGNO_BASE_MD));
468			return (DB_VERIFY_FATAL);
469		}
470	}
471
472	/*
473	 * Check all of the fields that we can.
474	 *
475	 * 08-11: Current page number.  Must == pgno.
476	 * Note that endianness doesn't matter--it's zero.
477	 */
478	if (meta->pgno != PGNO_BASE_MD) {
479		isbad = 1;
480		EPRINT((env, "Page %lu: pgno incorrectly set to %lu",
481		    (u_long)PGNO_BASE_MD, (u_long)meta->pgno));
482	}
483
484	/* 12-15: Magic number.  Must be one of valid set. */
485	if (__db_is_valid_magicno(meta->magic, &dbp->type))
486		swapped = 0;
487	else {
488		M_32_SWAP(meta->magic);
489		if (__db_is_valid_magicno(meta->magic,
490		    &dbp->type))
491			swapped = 1;
492		else {
493			isbad = 1;
494			EPRINT((env,
495			    "Page %lu: bad magic number %lu",
496			    (u_long)PGNO_BASE_MD, (u_long)meta->magic));
497		}
498	}
499
500	/*
501	 * 16-19: Version.  Must be current;  for now, we
502	 * don't support verification of old versions.
503	 */
504	if (swapped)
505		M_32_SWAP(meta->version);
506	if ((dbp->type == DB_BTREE &&
507	    (meta->version > DB_BTREEVERSION ||
508	    meta->version < DB_BTREEOLDVER)) ||
509	    (dbp->type == DB_HASH &&
510	    (meta->version > DB_HASHVERSION ||
511	    meta->version < DB_HASHOLDVER)) ||
512	    (dbp->type == DB_QUEUE &&
513	    (meta->version > DB_QAMVERSION ||
514	    meta->version < DB_QAMOLDVER))) {
515		isbad = 1;
516		EPRINT((env,
517    "Page %lu: unsupported DB version %lu; extraneous errors may result",
518		    (u_long)PGNO_BASE_MD, (u_long)meta->version));
519	}
520
521	/*
522	 * 20-23: Pagesize.  Must be power of two,
523	 * greater than 512, and less than 64K.
524	 */
525	if (swapped)
526		M_32_SWAP(meta->pagesize);
527	if (IS_VALID_PAGESIZE(meta->pagesize))
528		dbp->pgsize = meta->pagesize;
529	else {
530		isbad = 1;
531		EPRINT((env, "Page %lu: bad page size %lu",
532		    (u_long)PGNO_BASE_MD, (u_long)meta->pagesize));
533
534		/*
535		 * Now try to settle on a pagesize to use.
536		 * If the user-supplied one is reasonable,
537		 * use it;  else, guess.
538		 */
539		if (!IS_VALID_PAGESIZE(dbp->pgsize))
540			dbp->pgsize = __db_guesspgsize(env, fhp);
541	}
542
543	/*
544	 * 25: Page type.  Must be correct for dbp->type,
545	 * which is by now set as well as it can be.
546	 */
547	/* Needs no swapping--only one byte! */
548	if ((dbp->type == DB_BTREE && meta->type != P_BTREEMETA) ||
549	    (dbp->type == DB_HASH && meta->type != P_HASHMETA) ||
550	    (dbp->type == DB_QUEUE && meta->type != P_QAMMETA)) {
551		isbad = 1;
552		EPRINT((env, "Page %lu: bad page type %lu",
553		    (u_long)PGNO_BASE_MD, (u_long)meta->type));
554	}
555
556	/*
557	 * 26: Meta-flags.
558	 */
559	if (meta->metaflags != 0) {
560		if (meta->metaflags == DBMETA_CHKSUM)
561			F_SET(pip, VRFY_HAS_CHKSUM);
562		else {
563			isbad = 1;
564			EPRINT((env,
565			    "Page %lu: bad meta-data flags value %#lx",
566			    (u_long)PGNO_BASE_MD, (u_long)meta->metaflags));
567		}
568	}
569
570	/*
571	 * 28-31: Free list page number.
572	 * 32-35: Last page in database file.
573	 * We'll verify its sensibility when we do inter-page
574	 * verification later;  for now, just store it.
575	 */
576	if (swapped)
577	    M_32_SWAP(meta->free);
578	freelist = meta->free;
579	if (swapped)
580	    M_32_SWAP(meta->last_pgno);
581	vdp->meta_last_pgno = meta->last_pgno;
582
583	/*
584	 * Initialize vdp->pages to fit a single pageinfo structure for
585	 * this one page.  We'll realloc later when we know how many
586	 * pages there are.
587	 */
588	pip->pgno = PGNO_BASE_MD;
589	pip->type = meta->type;
590
591	/*
592	 * Signal that we still have to check the info specific to
593	 * a given type of meta page.
594	 */
595	F_SET(pip, VRFY_INCOMPLETE);
596
597	pip->free = freelist;
598
599	if ((ret = __db_vrfy_putpageinfo(env, vdp, pip)) != 0)
600		return (ret);
601
602	/* Set up the dbp's fileid.  We don't use the regular open path. */
603	memcpy(dbp->fileid, meta->uid, DB_FILE_ID_LEN);
604
605	if (swapped == 1)
606		F_SET(dbp, DB_AM_SWAP);
607
608	return (isbad ? DB_VERIFY_BAD : 0);
609}
610
611/*
612 * __db_vrfy_walkpages --
613 *	Main loop of the verifier/salvager.  Walks through,
614 *	page by page, and verifies all pages and/or prints all data pages.
615 */
616static int
617__db_vrfy_walkpages(dbp, vdp, handle, callback, flags)
618	DB *dbp;
619	VRFY_DBINFO *vdp;
620	void *handle;
621	int (*callback) __P((void *, const void *));
622	u_int32_t flags;
623{
624	DB_MPOOLFILE *mpf;
625	ENV *env;
626	PAGE *h;
627	VRFY_PAGEINFO *pip;
628	db_pgno_t i;
629	int ret, t_ret, isbad;
630
631	env = dbp->env;
632	mpf = dbp->mpf;
633	h = NULL;
634	ret = isbad = t_ret = 0;
635
636	for (i = 0; i <= vdp->last_pgno; i++) {
637		/*
638		 * If DB_SALVAGE is set, we inspect our database of completed
639		 * pages, and skip any we've already printed in the subdb pass.
640		 */
641		if (LF_ISSET(DB_SALVAGE) && (__db_salvage_isdone(vdp, i) != 0))
642			continue;
643
644		/*
645		 * An individual page get can fail if:
646		 *  * This is a hash database, it is expected to find
647		 *    empty buckets, which don't have allocated pages. Create
648		 *    a dummy page so the verification can proceed.
649		 *  * We are salvaging, flag the error and continue.
650		 */
651		if ((t_ret = __memp_fget(mpf, &i,
652		    vdp->thread_info, NULL, 0, &h)) != 0) {
653			if (dbp->type == DB_HASH) {
654				if ((t_ret =
655				    __db_vrfy_getpageinfo(vdp, i, &pip)) != 0)
656					goto err1;
657				pip->type = P_INVALID;
658				pip->pgno = i;
659				F_CLR(pip, VRFY_IS_ALLZEROES);
660				if ((t_ret = __db_vrfy_putpageinfo(
661				    env, vdp, pip)) != 0)
662					goto err1;
663				continue;
664			}
665err1:			if (ret == 0)
666				ret = t_ret;
667			if (LF_ISSET(DB_SALVAGE))
668				continue;
669			return (ret);
670		}
671
672		if (LF_ISSET(DB_SALVAGE)) {
673			/*
674			 * We pretty much don't want to quit unless a
675			 * bomb hits.  May as well return that something
676			 * was screwy, however.
677			 */
678			if ((t_ret = __db_salvage(dbp,
679			    vdp, i, h, handle, callback, flags)) != 0) {
680				if (ret == 0)
681					ret = t_ret;
682				isbad = 1;
683			}
684		} else {
685			/*
686			 * If we are not salvaging, and we get any error
687			 * other than DB_VERIFY_BAD, return immediately;
688			 * it may not be safe to proceed.  If we get
689			 * DB_VERIFY_BAD, keep going;  listing more errors
690			 * may make it easier to diagnose problems and
691			 * determine the magnitude of the corruption.
692			 *
693			 * Verify info common to all page types.
694			 */
695			if (i != PGNO_BASE_MD) {
696				ret = __db_vrfy_common(dbp, vdp, h, i, flags);
697				if (ret == DB_VERIFY_BAD)
698					isbad = 1;
699				else if (ret != 0)
700					goto err;
701			}
702
703			switch (TYPE(h)) {
704			case P_INVALID:
705				ret = __db_vrfy_invalid(dbp, vdp, h, i, flags);
706				break;
707			case __P_DUPLICATE:
708				isbad = 1;
709				EPRINT((env,
710				    "Page %lu: old-style duplicate page",
711				    (u_long)i));
712				break;
713			case P_HASH_UNSORTED:
714			case P_HASH:
715				ret = __ham_vrfy(dbp, vdp, h, i, flags);
716				break;
717			case P_IBTREE:
718			case P_IRECNO:
719			case P_LBTREE:
720			case P_LDUP:
721				ret = __bam_vrfy(dbp, vdp, h, i, flags);
722				break;
723			case P_LRECNO:
724				ret = __ram_vrfy_leaf(dbp, vdp, h, i, flags);
725				break;
726			case P_OVERFLOW:
727				ret = __db_vrfy_overflow(dbp, vdp, h, i, flags);
728				break;
729			case P_HASHMETA:
730				ret = __ham_vrfy_meta(dbp,
731				    vdp, (HMETA *)h, i, flags);
732				break;
733			case P_BTREEMETA:
734				ret = __bam_vrfy_meta(dbp,
735				    vdp, (BTMETA *)h, i, flags);
736				break;
737			case P_QAMMETA:
738				ret = __qam_vrfy_meta(dbp,
739				    vdp, (QMETA *)h, i, flags);
740				break;
741			case P_QAMDATA:
742				ret = __qam_vrfy_data(dbp,
743				    vdp, (QPAGE *)h, i, flags);
744				break;
745			default:
746				EPRINT((env,
747				    "Page %lu: unknown page type %lu",
748				    (u_long)i, (u_long)TYPE(h)));
749				isbad = 1;
750				break;
751			}
752
753			/*
754			 * Set up error return.
755			 */
756			if (ret == DB_VERIFY_BAD)
757				isbad = 1;
758			else if (ret != 0)
759				goto err;
760
761			/*
762			 * Provide feedback to the application about our
763			 * progress.  The range 0-50% comes from the fact
764			 * that this is the first of two passes through the
765			 * database (front-to-back, then top-to-bottom).
766			 */
767			if (dbp->db_feedback != NULL)
768				dbp->db_feedback(dbp, DB_VERIFY,
769				    (int)((i + 1) * 50 / (vdp->last_pgno + 1)));
770		}
771
772		/*
773		 * Just as with the page get, bail if and only if we're
774		 * not salvaging.
775		 */
776		if ((t_ret = __memp_fput(mpf,
777		    vdp->thread_info, h, dbp->priority)) != 0) {
778			if (ret == 0)
779				ret = t_ret;
780			if (!LF_ISSET(DB_SALVAGE))
781				return (ret);
782		}
783	}
784
785	/*
786	 * If we've seen a Queue metadata page, we may need to walk Queue
787	 * extent pages that won't show up between 0 and vdp->last_pgno.
788	 */
789	if (F_ISSET(vdp, VRFY_QMETA_SET) && (t_ret =
790	    __qam_vrfy_walkqueue(dbp, vdp, handle, callback, flags)) != 0) {
791		if (ret == 0)
792			ret = t_ret;
793		if (t_ret == DB_VERIFY_BAD)
794			isbad = 1;
795		else if (!LF_ISSET(DB_SALVAGE))
796			return (ret);
797	}
798
799	if (0) {
800err:		if (h != NULL && (t_ret = __memp_fput(mpf,
801		    vdp->thread_info, h, dbp->priority)) != 0)
802			return (ret == 0 ? t_ret : ret);
803	}
804
805	return ((isbad == 1 && ret == 0) ? DB_VERIFY_BAD : ret);
806}
807
808/*
809 * __db_vrfy_structure--
810 *	After a beginning-to-end walk through the database has been
811 *	completed, put together the information that has been collected
812 *	to verify the overall database structure.
813 *
814 *	Should only be called if we want to do a database verification,
815 *	i.e. if DB_SALVAGE is not set.
816 */
817static int
818__db_vrfy_structure(dbp, vdp, dbname, meta_pgno, flags)
819	DB *dbp;
820	VRFY_DBINFO *vdp;
821	const char *dbname;
822	db_pgno_t meta_pgno;
823	u_int32_t flags;
824{
825	DB *pgset;
826	ENV *env;
827	VRFY_PAGEINFO *pip;
828	db_pgno_t i;
829	int ret, isbad, hassubs, p;
830
831	isbad = 0;
832	pip = NULL;
833	env = dbp->env;
834	pgset = vdp->pgset;
835
836	/*
837	 * Providing feedback here is tricky;  in most situations,
838	 * we fetch each page one more time, but we do so in a top-down
839	 * order that depends on the access method.  Worse, we do this
840	 * recursively in btree, such that on any call where we're traversing
841	 * a subtree we don't know where that subtree is in the whole database;
842	 * worse still, any given database may be one of several subdbs.
843	 *
844	 * The solution is to decrement a counter vdp->pgs_remaining each time
845	 * we verify (and call feedback on) a page.  We may over- or
846	 * under-count, but the structure feedback function will ensure that we
847	 * never give a percentage under 50 or over 100.  (The first pass
848	 * covered the range 0-50%.)
849	 */
850	if (dbp->db_feedback != NULL)
851		vdp->pgs_remaining = vdp->last_pgno + 1;
852
853	/*
854	 * Call the appropriate function to downwards-traverse the db type.
855	 */
856	switch (dbp->type) {
857	case DB_BTREE:
858	case DB_RECNO:
859		if ((ret = __bam_vrfy_structure(dbp, vdp, 0, flags)) != 0) {
860			if (ret == DB_VERIFY_BAD)
861				isbad = 1;
862			else
863				goto err;
864		}
865
866		/*
867		 * If we have subdatabases and we know that the database is,
868		 * thus far, sound, it's safe to walk the tree of subdatabases.
869		 * Do so, and verify the structure of the databases within.
870		 */
871		if ((ret = __db_vrfy_getpageinfo(vdp, 0, &pip)) != 0)
872			goto err;
873		hassubs = F_ISSET(pip, VRFY_HAS_SUBDBS) ? 1 : 0;
874		if ((ret = __db_vrfy_putpageinfo(env, vdp, pip)) != 0)
875			goto err;
876		pip = NULL;
877
878		if (isbad == 0 && hassubs)
879			if ((ret =
880			    __db_vrfy_subdbs(dbp, vdp, dbname, flags)) != 0) {
881				if (ret == DB_VERIFY_BAD)
882					isbad = 1;
883				else
884					goto err;
885			}
886		break;
887	case DB_HASH:
888		if ((ret = __ham_vrfy_structure(dbp, vdp, 0, flags)) != 0) {
889			if (ret == DB_VERIFY_BAD)
890				isbad = 1;
891			else
892				goto err;
893		}
894		break;
895	case DB_QUEUE:
896		if ((ret = __qam_vrfy_structure(dbp, vdp, flags)) != 0) {
897			if (ret == DB_VERIFY_BAD)
898				isbad = 1;
899		}
900
901		/*
902		 * Queue pages may be unreferenced and totally zeroed, if
903		 * they're empty;  queue doesn't have much structure, so
904		 * this is unlikely to be wrong in any troublesome sense.
905		 * Skip to "err".
906		 */
907		goto err;
908	case DB_UNKNOWN:
909	default:
910		ret = __db_unknown_path(env, "__db_vrfy_structure");
911		goto err;
912	}
913
914	/* Walk free list. */
915	if ((ret =
916	    __db_vrfy_freelist(dbp, vdp, meta_pgno, flags)) == DB_VERIFY_BAD)
917		isbad = 1;
918
919	/*
920	 * If structure checks up until now have failed, it's likely that
921	 * checking what pages have been missed will result in oodles of
922	 * extraneous error messages being EPRINTed.  Skip to the end
923	 * if this is the case;  we're going to be printing at least one
924	 * error anyway, and probably all the more salient ones.
925	 */
926	if (ret != 0 || isbad == 1)
927		goto err;
928
929	/*
930	 * Make sure no page has been missed and that no page is still marked
931	 * "all zeroes" (only certain hash pages can be, and they're unmarked
932	 * in __ham_vrfy_structure).
933	 */
934	for (i = 0; i < vdp->last_pgno + 1; i++) {
935		if ((ret = __db_vrfy_getpageinfo(vdp, i, &pip)) != 0)
936			goto err;
937		if ((ret = __db_vrfy_pgset_get(pgset,
938		    vdp->thread_info, i, &p)) != 0)
939			goto err;
940		if (pip->type == P_OVERFLOW) {
941			if ((u_int32_t)p != pip->refcount) {
942				EPRINT((env,
943		    "Page %lu: overflow refcount %lu, referenced %lu times",
944				    (u_long)i,
945				    (u_long)pip->refcount, (u_long)p));
946				isbad = 1;
947			}
948		} else if (p == 0 &&
949#ifndef HAVE_FTRUNCATE
950		    !(i > vdp->meta_last_pgno &&
951		    (F_ISSET(pip, VRFY_IS_ALLZEROES) || pip->type == P_HASH)) &&
952#endif
953		    !(dbp->type == DB_HASH && pip->type == P_INVALID)) {
954			/*
955			 * It is OK for unreferenced hash buckets to be
956			 * marked invalid and unreferenced.
957			 */
958			EPRINT((env,
959			    "Page %lu: unreferenced page", (u_long)i));
960			isbad = 1;
961		}
962
963		if (F_ISSET(pip, VRFY_IS_ALLZEROES)
964#ifndef HAVE_FTRUNCATE
965		    && i <= vdp->meta_last_pgno
966#endif
967		    ) {
968			EPRINT((env,
969			    "Page %lu: totally zeroed page", (u_long)i));
970			isbad = 1;
971		}
972		if ((ret = __db_vrfy_putpageinfo(env, vdp, pip)) != 0)
973			goto err;
974		pip = NULL;
975	}
976
977err:	if (pip != NULL)
978		(void)__db_vrfy_putpageinfo(env, vdp, pip);
979
980	return ((isbad == 1 && ret == 0) ? DB_VERIFY_BAD : ret);
981}
982
983/*
984 * __db_is_valid_magicno
985 */
986static int
987__db_is_valid_magicno(magic, typep)
988	u_int32_t magic;
989	DBTYPE *typep;
990{
991	switch (magic) {
992	case DB_BTREEMAGIC:
993		*typep = DB_BTREE;
994		return (1);
995	case DB_HASHMAGIC:
996		*typep = DB_HASH;
997		return (1);
998	case DB_QAMMAGIC:
999		*typep = DB_QUEUE;
1000		return (1);
1001	default:
1002		break;
1003	}
1004	*typep = DB_UNKNOWN;
1005	return (0);
1006}
1007
1008/*
1009 * __db_vrfy_common --
1010 *	Verify info common to all page types.
1011 *
1012 * PUBLIC: int  __db_vrfy_common
1013 * PUBLIC:     __P((DB *, VRFY_DBINFO *, PAGE *, db_pgno_t, u_int32_t));
1014 */
1015int
1016__db_vrfy_common(dbp, vdp, h, pgno, flags)
1017	DB *dbp;
1018	VRFY_DBINFO *vdp;
1019	PAGE *h;
1020	db_pgno_t pgno;
1021	u_int32_t flags;
1022{
1023	ENV *env;
1024	VRFY_PAGEINFO *pip;
1025	int ret, t_ret;
1026	u_int8_t *p;
1027
1028	env = dbp->env;
1029
1030	if ((ret = __db_vrfy_getpageinfo(vdp, pgno, &pip)) != 0)
1031		return (ret);
1032
1033	pip->pgno = pgno;
1034	F_CLR(pip, VRFY_IS_ALLZEROES);
1035
1036	/*
1037	 * Hash expands the table by leaving some pages between the
1038	 * old last and the new last totally zeroed.  These pages may
1039	 * not be all zero if they were used, freed and then reallocated.
1040	 *
1041	 * Queue will create sparse files if sparse record numbers are used.
1042	 */
1043	if (pgno != 0 && PGNO(h) == 0) {
1044		F_SET(pip, VRFY_IS_ALLZEROES);
1045		for (p = (u_int8_t *)h; p < (u_int8_t *)h + dbp->pgsize; p++)
1046			if (*p != 0) {
1047				F_CLR(pip, VRFY_IS_ALLZEROES);
1048				break;
1049			}
1050		/*
1051		 * Mark it as a hash, and we'll
1052		 * check that that makes sense structurally later.
1053		 * (The queue verification doesn't care, since queues
1054		 * don't really have much in the way of structure.)
1055		 */
1056		pip->type = P_HASH;
1057		ret = 0;
1058		goto err;	/* well, not really an err. */
1059	}
1060
1061	if (PGNO(h) != pgno) {
1062		EPRINT((env, "Page %lu: bad page number %lu",
1063		    (u_long)pgno, (u_long)h->pgno));
1064		ret = DB_VERIFY_BAD;
1065	}
1066
1067	switch (h->type) {
1068	case P_INVALID:			/* Order matches ordinal value. */
1069	case P_HASH_UNSORTED:
1070	case P_IBTREE:
1071	case P_IRECNO:
1072	case P_LBTREE:
1073	case P_LRECNO:
1074	case P_OVERFLOW:
1075	case P_HASHMETA:
1076	case P_BTREEMETA:
1077	case P_QAMMETA:
1078	case P_QAMDATA:
1079	case P_LDUP:
1080	case P_HASH:
1081		break;
1082	default:
1083		EPRINT((env, "Page %lu: bad page type %lu",
1084		    (u_long)pgno, (u_long)h->type));
1085		ret = DB_VERIFY_BAD;
1086	}
1087	pip->type = h->type;
1088
1089err:	if ((t_ret = __db_vrfy_putpageinfo(env, vdp, pip)) != 0 && ret == 0)
1090		ret = t_ret;
1091
1092	return (ret);
1093}
1094
1095/*
1096 * __db_vrfy_invalid --
1097 *	Verify P_INVALID page.
1098 *	(Yes, there's not much to do here.)
1099 */
1100static int
1101__db_vrfy_invalid(dbp, vdp, h, pgno, flags)
1102	DB *dbp;
1103	VRFY_DBINFO *vdp;
1104	PAGE *h;
1105	db_pgno_t pgno;
1106	u_int32_t flags;
1107{
1108	ENV *env;
1109	VRFY_PAGEINFO *pip;
1110	int ret, t_ret;
1111
1112	env = dbp->env;
1113
1114	if ((ret = __db_vrfy_getpageinfo(vdp, pgno, &pip)) != 0)
1115		return (ret);
1116	pip->next_pgno = pip->prev_pgno = 0;
1117
1118	if (!IS_VALID_PGNO(NEXT_PGNO(h))) {
1119		EPRINT((env, "Page %lu: invalid next_pgno %lu",
1120		    (u_long)pgno, (u_long)NEXT_PGNO(h)));
1121		ret = DB_VERIFY_BAD;
1122	} else
1123		pip->next_pgno = NEXT_PGNO(h);
1124
1125	if ((t_ret = __db_vrfy_putpageinfo(env, vdp, pip)) != 0 && ret == 0)
1126		ret = t_ret;
1127	return (ret);
1128}
1129
1130/*
1131 * __db_vrfy_datapage --
1132 *	Verify elements common to data pages (P_HASH, P_LBTREE,
1133 *	P_IBTREE, P_IRECNO, P_LRECNO, P_OVERFLOW, P_DUPLICATE)--i.e.,
1134 *	those defined in the PAGE structure.
1135 *
1136 *	Called from each of the per-page routines, after the
1137 *	all-page-type-common elements of pip have been verified and filled
1138 *	in.
1139 *
1140 * PUBLIC: int __db_vrfy_datapage
1141 * PUBLIC:     __P((DB *, VRFY_DBINFO *, PAGE *, db_pgno_t, u_int32_t));
1142 */
1143int
1144__db_vrfy_datapage(dbp, vdp, h, pgno, flags)
1145	DB *dbp;
1146	VRFY_DBINFO *vdp;
1147	PAGE *h;
1148	db_pgno_t pgno;
1149	u_int32_t flags;
1150{
1151	ENV *env;
1152	VRFY_PAGEINFO *pip;
1153	u_int32_t smallest_entry;
1154	int isbad, ret, t_ret;
1155
1156	env = dbp->env;
1157
1158	if ((ret = __db_vrfy_getpageinfo(vdp, pgno, &pip)) != 0)
1159		return (ret);
1160	isbad = 0;
1161
1162	/*
1163	 * prev_pgno and next_pgno:  store for inter-page checks,
1164	 * verify that they point to actual pages and not to self.
1165	 *
1166	 * !!!
1167	 * Internal btree pages do not maintain these fields (indeed,
1168	 * they overload them).  Skip.
1169	 */
1170	if (TYPE(h) != P_IBTREE && TYPE(h) != P_IRECNO) {
1171		if (!IS_VALID_PGNO(PREV_PGNO(h)) || PREV_PGNO(h) == pip->pgno) {
1172			isbad = 1;
1173			EPRINT((env, "Page %lu: invalid prev_pgno %lu",
1174			    (u_long)pip->pgno, (u_long)PREV_PGNO(h)));
1175		}
1176		if (!IS_VALID_PGNO(NEXT_PGNO(h)) || NEXT_PGNO(h) == pip->pgno) {
1177			isbad = 1;
1178			EPRINT((env, "Page %lu: invalid next_pgno %lu",
1179			    (u_long)pip->pgno, (u_long)NEXT_PGNO(h)));
1180		}
1181		pip->prev_pgno = PREV_PGNO(h);
1182		pip->next_pgno = NEXT_PGNO(h);
1183	}
1184
1185	/*
1186	 * Verify the number of entries on the page: there's no good way to
1187	 * determine if this is accurate.  The best we can do is verify that
1188	 * it's not more than can, in theory, fit on the page.  Then, we make
1189	 * sure there are at least this many valid elements in inp[], and
1190	 * hope the test catches most cases.
1191	 */
1192	switch (TYPE(h)) {
1193	case P_HASH_UNSORTED:
1194	case P_HASH:
1195		smallest_entry = HKEYDATA_PSIZE(0);
1196		break;
1197	case P_IBTREE:
1198		smallest_entry = BINTERNAL_PSIZE(0);
1199		break;
1200	case P_IRECNO:
1201		smallest_entry = RINTERNAL_PSIZE;
1202		break;
1203	case P_LBTREE:
1204	case P_LDUP:
1205	case P_LRECNO:
1206		smallest_entry = BKEYDATA_PSIZE(0);
1207		break;
1208	default:
1209		smallest_entry = 0;
1210		break;
1211	}
1212	if (smallest_entry * NUM_ENT(h) / 2 > dbp->pgsize) {
1213		isbad = 1;
1214		EPRINT((env, "Page %lu: too many entries: %lu",
1215		    (u_long)pgno, (u_long)NUM_ENT(h)));
1216	}
1217
1218	if (TYPE(h) != P_OVERFLOW)
1219		pip->entries = NUM_ENT(h);
1220
1221	/*
1222	 * btree level.  Should be zero unless we're a btree;
1223	 * if we are a btree, should be between LEAFLEVEL and MAXBTREELEVEL,
1224	 * and we need to save it off.
1225	 */
1226	switch (TYPE(h)) {
1227	case P_IBTREE:
1228	case P_IRECNO:
1229		if (LEVEL(h) < LEAFLEVEL + 1) {
1230			isbad = 1;
1231			EPRINT((env, "Page %lu: bad btree level %lu",
1232			    (u_long)pgno, (u_long)LEVEL(h)));
1233		}
1234		pip->bt_level = LEVEL(h);
1235		break;
1236	case P_LBTREE:
1237	case P_LDUP:
1238	case P_LRECNO:
1239		if (LEVEL(h) != LEAFLEVEL) {
1240			isbad = 1;
1241			EPRINT((env,
1242			    "Page %lu: btree leaf page has incorrect level %lu",
1243			    (u_long)pgno, (u_long)LEVEL(h)));
1244		}
1245		break;
1246	default:
1247		if (LEVEL(h) != 0) {
1248			isbad = 1;
1249			EPRINT((env,
1250			    "Page %lu: nonzero level %lu in non-btree database",
1251			    (u_long)pgno, (u_long)LEVEL(h)));
1252		}
1253		break;
1254	}
1255
1256	/*
1257	 * Even though inp[] occurs in all PAGEs, we look at it in the
1258	 * access-method-specific code, since btree and hash treat
1259	 * item lengths very differently, and one of the most important
1260	 * things we want to verify is that the data--as specified
1261	 * by offset and length--cover the right part of the page
1262	 * without overlaps, gaps, or violations of the page boundary.
1263	 */
1264	if ((t_ret = __db_vrfy_putpageinfo(env, vdp, pip)) != 0 && ret == 0)
1265		ret = t_ret;
1266
1267	return ((ret == 0 && isbad == 1) ? DB_VERIFY_BAD : ret);
1268}
1269
1270/*
1271 * __db_vrfy_meta--
1272 *	Verify the access-method common parts of a meta page, using
1273 *	normal mpool routines.
1274 *
1275 * PUBLIC: int __db_vrfy_meta
1276 * PUBLIC:     __P((DB *, VRFY_DBINFO *, DBMETA *, db_pgno_t, u_int32_t));
1277 */
1278int
1279__db_vrfy_meta(dbp, vdp, meta, pgno, flags)
1280	DB *dbp;
1281	VRFY_DBINFO *vdp;
1282	DBMETA *meta;
1283	db_pgno_t pgno;
1284	u_int32_t flags;
1285{
1286	DBTYPE dbtype, magtype;
1287	ENV *env;
1288	VRFY_PAGEINFO *pip;
1289	int isbad, ret, t_ret;
1290
1291	isbad = 0;
1292	env = dbp->env;
1293
1294	if ((ret = __db_vrfy_getpageinfo(vdp, pgno, &pip)) != 0)
1295		return (ret);
1296
1297	/* type plausible for a meta page */
1298	switch (meta->type) {
1299	case P_BTREEMETA:
1300		dbtype = DB_BTREE;
1301		break;
1302	case P_HASHMETA:
1303		dbtype = DB_HASH;
1304		break;
1305	case P_QAMMETA:
1306		dbtype = DB_QUEUE;
1307		break;
1308	default:
1309		ret = __db_unknown_path(env, "__db_vrfy_meta");
1310		goto err;
1311	}
1312
1313	/* magic number valid */
1314	if (!__db_is_valid_magicno(meta->magic, &magtype)) {
1315		isbad = 1;
1316		EPRINT((env,
1317		    "Page %lu: invalid magic number", (u_long)pgno));
1318	}
1319	if (magtype != dbtype) {
1320		isbad = 1;
1321		EPRINT((env,
1322		    "Page %lu: magic number does not match database type",
1323		    (u_long)pgno));
1324	}
1325
1326	/* version */
1327	if ((dbtype == DB_BTREE &&
1328	    (meta->version > DB_BTREEVERSION ||
1329	    meta->version < DB_BTREEOLDVER)) ||
1330	    (dbtype == DB_HASH &&
1331	    (meta->version > DB_HASHVERSION ||
1332	    meta->version < DB_HASHOLDVER)) ||
1333	    (dbtype == DB_QUEUE &&
1334	    (meta->version > DB_QAMVERSION ||
1335	    meta->version < DB_QAMOLDVER))) {
1336		isbad = 1;
1337		EPRINT((env,
1338    "Page %lu: unsupported database version %lu; extraneous errors may result",
1339		    (u_long)pgno, (u_long)meta->version));
1340	}
1341
1342	/* pagesize */
1343	if (meta->pagesize != dbp->pgsize) {
1344		isbad = 1;
1345		EPRINT((env, "Page %lu: invalid pagesize %lu",
1346		    (u_long)pgno, (u_long)meta->pagesize));
1347	}
1348
1349	/* Flags */
1350	if (meta->metaflags != 0) {
1351		if (meta->metaflags == DBMETA_CHKSUM)
1352			F_SET(pip, VRFY_HAS_CHKSUM);
1353		else {
1354			isbad = 1;
1355			EPRINT((env,
1356			    "Page %lu: bad meta-data flags value %#lx",
1357			    (u_long)PGNO_BASE_MD, (u_long)meta->metaflags));
1358		}
1359	}
1360
1361	/*
1362	 * Free list.
1363	 *
1364	 * If this is not the main, master-database meta page, it
1365	 * should not have a free list.
1366	 */
1367	if (pgno != PGNO_BASE_MD && meta->free != PGNO_INVALID) {
1368		isbad = 1;
1369		EPRINT((env,
1370		    "Page %lu: nonempty free list on subdatabase metadata page",
1371		    (u_long)pgno));
1372	}
1373
1374	/* Can correctly be PGNO_INVALID--that's just the end of the list. */
1375	if (meta->free != PGNO_INVALID && IS_VALID_PGNO(meta->free))
1376		pip->free = meta->free;
1377	else if (!IS_VALID_PGNO(meta->free)) {
1378		isbad = 1;
1379		EPRINT((env,
1380		    "Page %lu: nonsensical free list pgno %lu",
1381		    (u_long)pgno, (u_long)meta->free));
1382	}
1383
1384	/*
1385	 * Check that the meta page agrees with what we got from mpool.
1386	 * If we don't have FTRUNCATE then mpool could include some
1387	 * zeroed pages at the end of the file, we assume the meta page
1388	 * is correct.
1389	 */
1390	if (pgno == PGNO_BASE_MD && meta->last_pgno != vdp->last_pgno) {
1391#ifdef HAVE_FTRUNCATE
1392		isbad = 1;
1393		EPRINT((env,
1394		    "Page %lu: last_pgno is not correct: %lu != %lu",
1395		    (u_long)pgno,
1396		    (u_long)meta->last_pgno, (u_long)vdp->last_pgno));
1397#endif
1398		vdp->meta_last_pgno = meta->last_pgno;
1399	}
1400
1401	/*
1402	 * We have now verified the common fields of the metadata page.
1403	 * Clear the flag that told us they had been incompletely checked.
1404	 */
1405	F_CLR(pip, VRFY_INCOMPLETE);
1406
1407err:	if ((t_ret = __db_vrfy_putpageinfo(env, vdp, pip)) != 0 && ret == 0)
1408		ret = t_ret;
1409
1410	return ((ret == 0 && isbad == 1) ? DB_VERIFY_BAD : ret);
1411}
1412
1413/*
1414 * __db_vrfy_freelist --
1415 *	Walk free list, checking off pages and verifying absence of
1416 *	loops.
1417 */
1418static int
1419__db_vrfy_freelist(dbp, vdp, meta, flags)
1420	DB *dbp;
1421	VRFY_DBINFO *vdp;
1422	db_pgno_t meta;
1423	u_int32_t flags;
1424{
1425	DB *pgset;
1426	ENV *env;
1427	VRFY_PAGEINFO *pip;
1428	db_pgno_t cur_pgno, next_pgno;
1429	int p, ret, t_ret;
1430
1431	env = dbp->env;
1432	pgset = vdp->pgset;
1433	DB_ASSERT(env, pgset != NULL);
1434
1435	if ((ret = __db_vrfy_getpageinfo(vdp, meta, &pip)) != 0)
1436		return (ret);
1437	for (next_pgno = pip->free;
1438	    next_pgno != PGNO_INVALID; next_pgno = pip->next_pgno) {
1439		cur_pgno = pip->pgno;
1440		if ((ret = __db_vrfy_putpageinfo(env, vdp, pip)) != 0)
1441			return (ret);
1442
1443		/* This shouldn't happen, but just in case. */
1444		if (!IS_VALID_PGNO(next_pgno)) {
1445			EPRINT((env,
1446			    "Page %lu: invalid next_pgno %lu on free list page",
1447			    (u_long)cur_pgno, (u_long)next_pgno));
1448			return (DB_VERIFY_BAD);
1449		}
1450
1451		/* Detect cycles. */
1452		if ((ret = __db_vrfy_pgset_get(pgset,
1453		    vdp->thread_info, next_pgno, &p)) != 0)
1454			return (ret);
1455		if (p != 0) {
1456			EPRINT((env,
1457		    "Page %lu: page %lu encountered a second time on free list",
1458			    (u_long)cur_pgno, (u_long)next_pgno));
1459			return (DB_VERIFY_BAD);
1460		}
1461		if ((ret = __db_vrfy_pgset_inc(pgset,
1462		    vdp->thread_info, next_pgno)) != 0)
1463			return (ret);
1464
1465		if ((ret = __db_vrfy_getpageinfo(vdp, next_pgno, &pip)) != 0)
1466			return (ret);
1467
1468		if (pip->type != P_INVALID) {
1469			EPRINT((env,
1470			    "Page %lu: non-invalid page %lu on free list",
1471			    (u_long)cur_pgno, (u_long)next_pgno));
1472			ret = DB_VERIFY_BAD;	  /* unsafe to continue */
1473			break;
1474		}
1475	}
1476
1477	if ((t_ret = __db_vrfy_putpageinfo(env, vdp, pip)) != 0)
1478		ret = t_ret;
1479	return (ret);
1480}
1481
1482/*
1483 * __db_vrfy_subdbs --
1484 *	Walk the known-safe master database of subdbs with a cursor,
1485 *	verifying the structure of each subdatabase we encounter.
1486 */
1487static int
1488__db_vrfy_subdbs(dbp, vdp, dbname, flags)
1489	DB *dbp;
1490	VRFY_DBINFO *vdp;
1491	const char *dbname;
1492	u_int32_t flags;
1493{
1494	DB *mdbp;
1495	DBC *dbc;
1496	DBT key, data;
1497	ENV *env;
1498	VRFY_PAGEINFO *pip;
1499	db_pgno_t meta_pgno;
1500	int ret, t_ret, isbad;
1501	u_int8_t type;
1502
1503	isbad = 0;
1504	dbc = NULL;
1505	env = dbp->env;
1506
1507	if ((ret = __db_master_open(dbp,
1508	    vdp->thread_info, NULL, dbname, DB_RDONLY, 0, &mdbp)) != 0)
1509		return (ret);
1510
1511	if ((ret = __db_cursor_int(mdbp, NULL,
1512	    NULL, DB_BTREE, PGNO_INVALID, 0, DB_LOCK_INVALIDID, &dbc)) != 0)
1513		goto err;
1514
1515	memset(&key, 0, sizeof(key));
1516	memset(&data, 0, sizeof(data));
1517	while ((ret = __dbc_get(dbc, &key, &data, DB_NEXT)) == 0) {
1518		if (data.size != sizeof(db_pgno_t)) {
1519			EPRINT((env,
1520			    "Subdatabase entry not page-number size"));
1521			isbad = 1;
1522			goto err;
1523		}
1524		memcpy(&meta_pgno, data.data, data.size);
1525		/*
1526		 * Subdatabase meta pgnos are stored in network byte
1527		 * order for cross-endian compatibility.  Swap if appropriate.
1528		 */
1529		DB_NTOHL_SWAP(env, &meta_pgno);
1530		if (meta_pgno == PGNO_INVALID || meta_pgno > vdp->last_pgno) {
1531			EPRINT((env,
1532		    "Subdatabase entry references invalid page %lu",
1533			    (u_long)meta_pgno));
1534			isbad = 1;
1535			goto err;
1536		}
1537		if ((ret = __db_vrfy_getpageinfo(vdp, meta_pgno, &pip)) != 0)
1538			goto err;
1539		type = pip->type;
1540		if ((ret = __db_vrfy_putpageinfo(env, vdp, pip)) != 0)
1541			goto err;
1542		switch (type) {
1543		case P_BTREEMETA:
1544			if ((ret = __bam_vrfy_structure(
1545			    dbp, vdp, meta_pgno, flags)) != 0) {
1546				if (ret == DB_VERIFY_BAD)
1547					isbad = 1;
1548				else
1549					goto err;
1550			}
1551			break;
1552		case P_HASHMETA:
1553			if ((ret = __ham_vrfy_structure(
1554			    dbp, vdp, meta_pgno, flags)) != 0) {
1555				if (ret == DB_VERIFY_BAD)
1556					isbad = 1;
1557				else
1558					goto err;
1559			}
1560			break;
1561		case P_QAMMETA:
1562		default:
1563			EPRINT((env,
1564		    "Subdatabase entry references page %lu of invalid type %lu",
1565			    (u_long)meta_pgno, (u_long)type));
1566			ret = DB_VERIFY_BAD;
1567			goto err;
1568		}
1569	}
1570
1571	if (ret == DB_NOTFOUND)
1572		ret = 0;
1573
1574err:	if (dbc != NULL && (t_ret = __dbc_close(dbc)) != 0 && ret == 0)
1575		ret = t_ret;
1576
1577	if ((t_ret = __db_close(mdbp, NULL, 0)) != 0 && ret == 0)
1578		ret = t_ret;
1579
1580	return ((ret == 0 && isbad == 1) ? DB_VERIFY_BAD : ret);
1581}
1582
1583/*
1584 * __db_vrfy_struct_feedback --
1585 *	Provide feedback during top-down database structure traversal.
1586 *	(See comment at the beginning of __db_vrfy_structure.)
1587 *
1588 * PUBLIC: void __db_vrfy_struct_feedback __P((DB *, VRFY_DBINFO *));
1589 */
1590void
1591__db_vrfy_struct_feedback(dbp, vdp)
1592	DB *dbp;
1593	VRFY_DBINFO *vdp;
1594{
1595	int progress;
1596
1597	if (dbp->db_feedback == NULL)
1598		return;
1599
1600	if (vdp->pgs_remaining > 0)
1601		vdp->pgs_remaining--;
1602
1603	/* Don't allow a feedback call of 100 until we're really done. */
1604	progress = 100 - (int)(vdp->pgs_remaining * 50 / (vdp->last_pgno + 1));
1605	dbp->db_feedback(dbp, DB_VERIFY, progress == 100 ? 99 : progress);
1606}
1607
1608/*
1609 * __db_vrfy_orderchkonly --
1610 *	Do an sort-order/hashing check on a known-otherwise-good subdb.
1611 */
1612static int
1613__db_vrfy_orderchkonly(dbp, vdp, name, subdb, flags)
1614	DB *dbp;
1615	VRFY_DBINFO *vdp;
1616	const char *name, *subdb;
1617	u_int32_t flags;
1618{
1619	BTMETA *btmeta;
1620	DB *mdbp, *pgset;
1621	DBC *pgsc;
1622	DBT key, data;
1623	DB_MPOOLFILE *mpf;
1624	ENV *env;
1625	HASH *h_internal;
1626	HMETA *hmeta;
1627	PAGE *h, *currpg;
1628	db_pgno_t meta_pgno, p, pgno;
1629	u_int32_t bucket;
1630	int t_ret, ret;
1631
1632	pgset = NULL;
1633	pgsc = NULL;
1634	env = dbp->env;
1635	mpf = dbp->mpf;
1636	currpg = h = NULL;
1637
1638	LF_CLR(DB_NOORDERCHK);
1639
1640	/* Open the master database and get the meta_pgno for the subdb. */
1641	if ((ret = __db_master_open(dbp,
1642	    vdp->thread_info, NULL, name, DB_RDONLY, 0, &mdbp)) != 0)
1643		goto err;
1644
1645	DB_INIT_DBT(key, subdb, strlen(subdb));
1646	memset(&data, 0, sizeof(data));
1647	if ((ret = __db_get(mdbp,
1648	    vdp->thread_info, NULL, &key, &data, 0)) != 0) {
1649		if (ret == DB_NOTFOUND)
1650			ret = ENOENT;
1651		goto err;
1652	}
1653
1654	if (data.size != sizeof(db_pgno_t)) {
1655		EPRINT((env, "Subdatabase entry of invalid size"));
1656		ret = DB_VERIFY_BAD;
1657		goto err;
1658	}
1659
1660	memcpy(&meta_pgno, data.data, data.size);
1661
1662	/*
1663	 * Subdatabase meta pgnos are stored in network byte
1664	 * order for cross-endian compatibility.  Swap if appropriate.
1665	 */
1666	DB_NTOHL_SWAP(env, &meta_pgno);
1667
1668	if ((ret = __memp_fget(mpf,
1669	     &meta_pgno, vdp->thread_info, NULL, 0, &h)) != 0)
1670		goto err;
1671
1672	if ((ret = __db_vrfy_pgset(env,
1673	    vdp->thread_info, dbp->pgsize, &pgset)) != 0)
1674		goto err;
1675
1676	switch (TYPE(h)) {
1677	case P_BTREEMETA:
1678		btmeta = (BTMETA *)h;
1679		if (F_ISSET(&btmeta->dbmeta, BTM_RECNO)) {
1680			/* Recnos have no order to check. */
1681			ret = 0;
1682			goto err;
1683		}
1684		if ((ret =
1685		    __db_meta2pgset(dbp, vdp, meta_pgno, flags, pgset)) != 0)
1686			goto err;
1687		if ((ret = __db_cursor_int(pgset, NULL, NULL, dbp->type,
1688		    PGNO_INVALID, 0, DB_LOCK_INVALIDID, &pgsc)) != 0)
1689			goto err;
1690		while ((ret = __db_vrfy_pgset_next(pgsc, &p)) == 0) {
1691			if ((ret = __memp_fget(mpf, &p,
1692			     vdp->thread_info, NULL, 0, &currpg)) != 0)
1693				goto err;
1694			if ((ret = __bam_vrfy_itemorder(dbp, NULL,
1695			    vdp->thread_info, currpg, p, NUM_ENT(currpg), 1,
1696			    F_ISSET(&btmeta->dbmeta, BTM_DUP), flags)) != 0)
1697				goto err;
1698			if ((ret = __memp_fput(mpf,
1699			    vdp->thread_info, currpg, dbp->priority)) != 0)
1700				goto err;
1701			currpg = NULL;
1702		}
1703
1704		/*
1705		 * The normal exit condition for the loop above is DB_NOTFOUND.
1706		 * If we see that, zero it and continue on to cleanup.
1707		 * Otherwise, it's a real error and will be returned.
1708		 */
1709		if (ret == DB_NOTFOUND)
1710			ret = 0;
1711		break;
1712	case P_HASHMETA:
1713		hmeta = (HMETA *)h;
1714		h_internal = (HASH *)dbp->h_internal;
1715		/*
1716		 * Make sure h_charkey is right.
1717		 */
1718		if (h_internal == NULL) {
1719			EPRINT((env,
1720			    "Page %lu: DB->h_internal field is NULL",
1721			    (u_long)meta_pgno));
1722			ret = DB_VERIFY_BAD;
1723			goto err;
1724		}
1725		if (h_internal->h_hash == NULL)
1726			h_internal->h_hash = hmeta->dbmeta.version < 5
1727			? __ham_func4 : __ham_func5;
1728		if (hmeta->h_charkey !=
1729		    h_internal->h_hash(dbp, CHARKEY, sizeof(CHARKEY))) {
1730			EPRINT((env,
1731			    "Page %lu: incorrect hash function for database",
1732			    (u_long)meta_pgno));
1733			ret = DB_VERIFY_BAD;
1734			goto err;
1735		}
1736
1737		/*
1738		 * Foreach bucket, verify hashing on each page in the
1739		 * corresponding chain of pages.
1740		 */
1741		for (bucket = 0; bucket <= hmeta->max_bucket; bucket++) {
1742			pgno = BS_TO_PAGE(bucket, hmeta->spares);
1743			while (pgno != PGNO_INVALID) {
1744				if ((ret = __memp_fget(mpf, &pgno,
1745				    vdp->thread_info, NULL, 0, &currpg)) != 0)
1746					goto err;
1747				if ((ret = __ham_vrfy_hashing(dbp,
1748				    NUM_ENT(currpg), hmeta, bucket, pgno,
1749				    flags, h_internal->h_hash)) != 0)
1750					goto err;
1751				pgno = NEXT_PGNO(currpg);
1752				if ((ret = __memp_fput(mpf, vdp->thread_info,
1753				    currpg, dbp->priority)) != 0)
1754					goto err;
1755				currpg = NULL;
1756			}
1757		}
1758		break;
1759	default:
1760		EPRINT((env, "Page %lu: database metapage of bad type %lu",
1761		    (u_long)meta_pgno, (u_long)TYPE(h)));
1762		ret = DB_VERIFY_BAD;
1763		break;
1764	}
1765
1766err:	if (pgsc != NULL && (t_ret = __dbc_close(pgsc)) != 0 && ret == 0)
1767		ret = t_ret;
1768	if (pgset != NULL &&
1769	    (t_ret = __db_close(pgset, NULL, 0)) != 0 && ret == 0)
1770		ret = t_ret;
1771	if (h != NULL && (t_ret = __memp_fput(mpf,
1772	    vdp->thread_info, h, dbp->priority)) != 0)
1773		ret = t_ret;
1774	if (currpg != NULL &&
1775	    (t_ret = __memp_fput(mpf,
1776		vdp->thread_info, currpg, dbp->priority)) != 0)
1777		ret = t_ret;
1778	if ((t_ret = __db_close(mdbp, NULL, 0)) != 0)
1779		ret = t_ret;
1780	return (ret);
1781}
1782
1783/*
1784 * __db_salvage --
1785 *	Walk through a page, salvaging all likely or plausible (w/
1786 *	DB_AGGRESSIVE) key/data pairs and marking seen pages in vdp.
1787 *
1788 * PUBLIC: int __db_salvage __P((DB *, VRFY_DBINFO *, db_pgno_t,
1789 * PUBLIC:     PAGE *, void *, int (*)(void *, const void *), u_int32_t));
1790 */
1791int
1792__db_salvage(dbp, vdp, pgno, h, handle, callback, flags)
1793	DB *dbp;
1794	VRFY_DBINFO *vdp;
1795	db_pgno_t pgno;
1796	PAGE *h;
1797	void *handle;
1798	int (*callback) __P((void *, const void *));
1799	u_int32_t flags;
1800{
1801	ENV *env;
1802	VRFY_PAGEINFO *pip;
1803	int keyflag, ret, t_ret;
1804
1805	env = dbp->env;
1806	DB_ASSERT(env, LF_ISSET(DB_SALVAGE));
1807
1808	/*
1809	 * !!!
1810	 * We dump record numbers when salvaging Queue databases, but not for
1811	 * immutable Recno databases.  The problem is we can't figure out the
1812	 * record number from the database page in the Recno case, while the
1813	 * offset in the file is sufficient for Queue.
1814	 */
1815	keyflag = 0;
1816
1817	/* If we got this page in the subdb pass, we can safely skip it. */
1818	if (__db_salvage_isdone(vdp, pgno))
1819		return (0);
1820
1821	switch (TYPE(h)) {
1822	case P_HASHMETA:
1823		ret = __ham_vrfy_meta(dbp, vdp, (HMETA *)h, pgno, flags);
1824		break;
1825	case P_BTREEMETA:
1826		ret = __bam_vrfy_meta(dbp, vdp, (BTMETA *)h, pgno, flags);
1827		break;
1828	case P_QAMMETA:
1829		keyflag = 1;
1830		ret = __qam_vrfy_meta(dbp, vdp, (QMETA *)h, pgno, flags);
1831		break;
1832	case P_HASH_UNSORTED:
1833	case P_HASH:
1834		return (__ham_salvage(dbp, vdp,
1835		    pgno, h, handle, callback, flags));
1836	case P_LBTREE:
1837		return (__bam_salvage(dbp, vdp,
1838		    pgno, P_LBTREE, h, handle, callback, NULL, flags));
1839	case P_LDUP:
1840		return (__db_salvage_markneeded(vdp, pgno, SALVAGE_LDUP));
1841	case P_OVERFLOW:
1842		return (__db_salvage_markneeded(vdp, pgno, SALVAGE_OVERFLOW));
1843	case P_LRECNO:
1844		/*
1845		 * Recnos are tricky -- they may represent dup pages, or
1846		 * they may be subdatabase/regular database pages in their
1847		 * own right.  If the former, they need to be printed with a
1848		 * key, preferably when we hit the corresponding datum in
1849		 * a btree/hash page.  If the latter, there is no key.
1850		 *
1851		 * If a database is sufficiently frotzed, we're not going
1852		 * to be able to get this right, so we best-guess:  just
1853		 * mark it needed now, and if we're really a normal recno
1854		 * database page, the "unknowns" pass will pick us up.
1855		 */
1856		return (__db_salvage_markneeded(vdp, pgno, SALVAGE_LRECNO));
1857	case P_QAMDATA:
1858		return (__qam_salvage(dbp, vdp,
1859		    pgno, h, handle, callback, flags));
1860	case P_IBTREE:
1861		/*
1862		 * We need to mark any overflow keys on internal pages as seen,
1863		 * so we don't print them out in __db_salvage_unknowns.  But if
1864		 * we're an upgraded database, a P_LBTREE page may very well
1865		 * have a reference to the same overflow pages (this practice
1866		 * stopped somewhere around db4.5).  To give P_LBTREEs a chance
1867		 * to print out any keys on shared pages, mark the page now and
1868		 * deal with it at the end.
1869		 */
1870		return (__db_salvage_markneeded(vdp, pgno, SALVAGE_IBTREE));
1871	case P_INVALID:
1872	case P_IRECNO:
1873	case __P_DUPLICATE:
1874	default:
1875		/*
1876		 * There's no need to display an error, the page type was
1877		 * already checked and reported on.
1878		 */
1879		return (0);
1880	}
1881	if (ret != 0)
1882		return (ret);
1883
1884	/*
1885	 * We have to display the dump header if it's a metadata page.  It's
1886	 * our last chance as the page was marked "seen" in the vrfy routine,
1887	 * and  we won't see the page again.  We don't display headers for
1888	 * the first database in a multi-database file, that database simply
1889	 * contains a list of subdatabases.
1890	 */
1891	if ((ret = __db_vrfy_getpageinfo(vdp, pgno, &pip)) != 0)
1892		return (ret);
1893	if (!F_ISSET(pip, VRFY_HAS_SUBDBS))
1894		ret = __db_prheader(
1895		    dbp, NULL, 0, keyflag, handle, callback, vdp, pgno);
1896	if ((t_ret = __db_vrfy_putpageinfo(env, vdp, pip)) != 0 && ret == 0)
1897		ret = t_ret;
1898	return (ret);
1899}
1900
1901/*
1902 * __db_salvage_unknowns --
1903 *	Walk through the salvager database, printing with key "UNKNOWN"
1904 *	any pages we haven't dealt with.
1905 */
1906static int
1907__db_salvage_unknowns(dbp, vdp, handle, callback, flags)
1908	DB *dbp;
1909	VRFY_DBINFO *vdp;
1910	void *handle;
1911	int (*callback) __P((void *, const void *));
1912	u_int32_t flags;
1913{
1914	DBC *dbc;
1915	DBT unkdbt, key, *dbt;
1916	DB_MPOOLFILE *mpf;
1917	ENV *env;
1918	PAGE *h;
1919	db_pgno_t pgno;
1920	u_int32_t pgtype;
1921	int ret, t_ret;
1922	void *ovflbuf;
1923
1924	dbc = NULL;
1925	env = dbp->env;
1926	mpf = dbp->mpf;
1927
1928	DB_INIT_DBT(unkdbt, "UNKNOWN", sizeof("UNKNOWN") - 1);
1929
1930	if ((ret = __os_malloc(env, dbp->pgsize, &ovflbuf)) != 0)
1931		return (ret);
1932
1933	/*
1934	 * We make two passes -- in the first pass, skip SALVAGE_OVERFLOW
1935	 * pages, because they may be referenced by the standard database
1936	 * pages that we're resolving.
1937	 */
1938	while ((t_ret =
1939	    __db_salvage_getnext(vdp, &dbc, &pgno, &pgtype, 1)) == 0) {
1940		if ((t_ret = __memp_fget(mpf,
1941		    &pgno, vdp->thread_info, NULL, 0, &h)) != 0) {
1942			if (ret == 0)
1943				ret = t_ret;
1944			continue;
1945		}
1946
1947		dbt = NULL;
1948		switch (pgtype) {
1949		case SALVAGE_LDUP:
1950		case SALVAGE_LRECNODUP:
1951			dbt = &unkdbt;
1952			/* FALLTHROUGH */
1953		case SALVAGE_IBTREE:
1954		case SALVAGE_LBTREE:
1955		case SALVAGE_LRECNO:
1956			if ((t_ret = __bam_salvage(dbp, vdp, pgno, pgtype,
1957			    h, handle, callback, dbt, flags)) != 0 && ret == 0)
1958				ret = t_ret;
1959			break;
1960		case SALVAGE_OVERFLOW:
1961			DB_ASSERT(env, 0);	/* Shouldn't ever happen. */
1962			break;
1963		case SALVAGE_HASH:
1964			if ((t_ret = __ham_salvage(dbp, vdp,
1965			    pgno, h, handle, callback, flags)) != 0 && ret == 0)
1966				ret = t_ret;
1967			break;
1968		case SALVAGE_INVALID:
1969		case SALVAGE_IGNORE:
1970		default:
1971			/*
1972			 * Shouldn't happen, but if it does, just do what the
1973			 * nice man says.
1974			 */
1975			DB_ASSERT(env, 0);
1976			break;
1977		}
1978		if ((t_ret = __memp_fput(mpf,
1979		    vdp->thread_info, h, dbp->priority)) != 0 && ret == 0)
1980			ret = t_ret;
1981	}
1982
1983	/* We should have reached the end of the database. */
1984	if (t_ret == DB_NOTFOUND)
1985		t_ret = 0;
1986	if (t_ret != 0 && ret == 0)
1987		ret = t_ret;
1988
1989	/* Re-open the cursor so we traverse the database again. */
1990	if ((t_ret = __dbc_close(dbc)) != 0 && ret == 0)
1991		ret = t_ret;
1992	dbc = NULL;
1993
1994	/* Now, deal with any remaining overflow pages. */
1995	while ((t_ret =
1996	    __db_salvage_getnext(vdp, &dbc, &pgno, &pgtype, 0)) == 0) {
1997		if ((t_ret = __memp_fget(mpf,
1998		    &pgno, vdp->thread_info, NULL, 0, &h)) != 0) {
1999			if (ret == 0)
2000				ret = t_ret;
2001			continue;
2002		}
2003
2004		switch (pgtype) {
2005		case SALVAGE_OVERFLOW:
2006			/*
2007			 * XXX:
2008			 * This may generate multiple "UNKNOWN" keys in
2009			 * a database with no dups.  What to do?
2010			 */
2011			if ((t_ret = __db_safe_goff(dbp,
2012			    vdp, pgno, &key, &ovflbuf, flags)) != 0 ||
2013			    ((vdp->type == DB_BTREE || vdp->type == DB_HASH) &&
2014			    (t_ret = __db_vrfy_prdbt(&unkdbt,
2015			    0, " ", handle, callback, 0, vdp)) != 0) ||
2016			    (t_ret = __db_vrfy_prdbt(
2017			    &key, 0, " ", handle, callback, 0, vdp)) != 0)
2018				if (ret == 0)
2019					ret = t_ret;
2020			break;
2021		default:
2022			DB_ASSERT(env, 0);	/* Shouldn't ever happen. */
2023			break;
2024		}
2025		if ((t_ret = __memp_fput(mpf,
2026		    vdp->thread_info, h, dbp->priority)) != 0 && ret == 0)
2027			ret = t_ret;
2028	}
2029
2030	/* We should have reached the end of the database. */
2031	if (t_ret == DB_NOTFOUND)
2032		t_ret = 0;
2033	if (t_ret != 0 && ret == 0)
2034		ret = t_ret;
2035
2036	if ((t_ret = __dbc_close(dbc)) != 0 && ret == 0)
2037		ret = t_ret;
2038
2039	__os_free(env, ovflbuf);
2040
2041	return (ret);
2042}
2043
2044/*
2045 * Offset of the ith inp array entry, which we can compare to the offset
2046 * the entry stores.
2047 */
2048#define	INP_OFFSET(dbp, h, i)	\
2049    ((db_indx_t)((u_int8_t *)((P_INP(dbp,(h))) + (i)) - (u_int8_t *)(h)))
2050
2051/*
2052 * __db_vrfy_inpitem --
2053 *	Verify that a single entry in the inp array is sane, and update
2054 *	the high water mark and current item offset.  (The former of these is
2055 *	used for state information between calls, and is required;  it must
2056 *	be initialized to the pagesize before the first call.)
2057 *
2058 *	Returns DB_VERIFY_FATAL if inp has collided with the data,
2059 *	since verification can't continue from there;  returns DB_VERIFY_BAD
2060 *	if anything else is wrong.
2061 *
2062 * PUBLIC: int __db_vrfy_inpitem __P((DB *, PAGE *,
2063 * PUBLIC:     db_pgno_t, u_int32_t, int, u_int32_t, u_int32_t *, u_int32_t *));
2064 */
2065int
2066__db_vrfy_inpitem(dbp, h, pgno, i, is_btree, flags, himarkp, offsetp)
2067	DB *dbp;
2068	PAGE *h;
2069	db_pgno_t pgno;
2070	u_int32_t i;
2071	int is_btree;
2072	u_int32_t flags, *himarkp, *offsetp;
2073{
2074	BKEYDATA *bk;
2075	ENV *env;
2076	db_indx_t *inp, offset, len;
2077
2078	env = dbp->env;
2079
2080	DB_ASSERT(env, himarkp != NULL);
2081	inp = P_INP(dbp, h);
2082
2083	/*
2084	 * Check that the inp array, which grows from the beginning of the
2085	 * page forward, has not collided with the data, which grow from the
2086	 * end of the page backward.
2087	 */
2088	if (inp + i >= (db_indx_t *)((u_int8_t *)h + *himarkp)) {
2089		/* We've collided with the data.  We need to bail. */
2090		EPRINT((env, "Page %lu: entries listing %lu overlaps data",
2091		    (u_long)pgno, (u_long)i));
2092		return (DB_VERIFY_FATAL);
2093	}
2094
2095	offset = inp[i];
2096
2097	/*
2098	 * Check that the item offset is reasonable:  it points somewhere
2099	 * after the inp array and before the end of the page.
2100	 */
2101	if (offset <= INP_OFFSET(dbp, h, i) || offset > dbp->pgsize) {
2102		EPRINT((env, "Page %lu: bad offset %lu at page index %lu",
2103		    (u_long)pgno, (u_long)offset, (u_long)i));
2104		return (DB_VERIFY_BAD);
2105	}
2106
2107	/* Update the high-water mark (what HOFFSET should be) */
2108	if (offset < *himarkp)
2109		*himarkp = offset;
2110
2111	if (is_btree) {
2112		/*
2113		 * Check alignment;  if it's unaligned, it's unsafe to
2114		 * manipulate this item.
2115		 */
2116		if (offset != DB_ALIGN(offset, sizeof(u_int32_t))) {
2117			EPRINT((env,
2118			    "Page %lu: unaligned offset %lu at page index %lu",
2119			    (u_long)pgno, (u_long)offset, (u_long)i));
2120			return (DB_VERIFY_BAD);
2121		}
2122
2123		/*
2124		 * Check that the item length remains on-page.
2125		 */
2126		bk = GET_BKEYDATA(dbp, h, i);
2127
2128		/*
2129		 * We need to verify the type of the item here;
2130		 * we can't simply assume that it will be one of the
2131		 * expected three.  If it's not a recognizable type,
2132		 * it can't be considered to have a verifiable
2133		 * length, so it's not possible to certify it as safe.
2134		 */
2135		switch (B_TYPE(bk->type)) {
2136		case B_KEYDATA:
2137			len = bk->len;
2138			break;
2139		case B_DUPLICATE:
2140		case B_OVERFLOW:
2141			len = BOVERFLOW_SIZE;
2142			break;
2143		default:
2144			EPRINT((env,
2145			    "Page %lu: item %lu of unrecognizable type",
2146			    (u_long)pgno, (u_long)i));
2147			return (DB_VERIFY_BAD);
2148		}
2149
2150		if ((size_t)(offset + len) > dbp->pgsize) {
2151			EPRINT((env,
2152			    "Page %lu: item %lu extends past page boundary",
2153			    (u_long)pgno, (u_long)i));
2154			return (DB_VERIFY_BAD);
2155		}
2156	}
2157
2158	if (offsetp != NULL)
2159		*offsetp = offset;
2160	return (0);
2161}
2162
2163/*
2164 * __db_vrfy_duptype--
2165 *	Given a page number and a set of flags to __bam_vrfy_subtree,
2166 *	verify that the dup tree type is correct--i.e., it's a recno
2167 *	if DUPSORT is not set and a btree if it is.
2168 *
2169 * PUBLIC: int __db_vrfy_duptype
2170 * PUBLIC:     __P((DB *, VRFY_DBINFO *, db_pgno_t, u_int32_t));
2171 */
2172int
2173__db_vrfy_duptype(dbp, vdp, pgno, flags)
2174	DB *dbp;
2175	VRFY_DBINFO *vdp;
2176	db_pgno_t pgno;
2177	u_int32_t flags;
2178{
2179	ENV *env;
2180	VRFY_PAGEINFO *pip;
2181	int ret, isbad;
2182
2183	env = dbp->env;
2184	isbad = 0;
2185
2186	if ((ret = __db_vrfy_getpageinfo(vdp, pgno, &pip)) != 0)
2187		return (ret);
2188
2189	switch (pip->type) {
2190	case P_IBTREE:
2191	case P_LDUP:
2192		if (!LF_ISSET(DB_ST_DUPSORT)) {
2193			EPRINT((env,
2194	    "Page %lu: sorted duplicate set in unsorted-dup database",
2195			    (u_long)pgno));
2196			isbad = 1;
2197		}
2198		break;
2199	case P_IRECNO:
2200	case P_LRECNO:
2201		if (LF_ISSET(DB_ST_DUPSORT)) {
2202			EPRINT((env,
2203	    "Page %lu: unsorted duplicate set in sorted-dup database",
2204			    (u_long)pgno));
2205			isbad = 1;
2206		}
2207		break;
2208	default:
2209		/*
2210		 * If the page is entirely zeroed, its pip->type will be a lie
2211		 * (we assumed it was a hash page, as they're allowed to be
2212		 * zeroed);  handle this case specially.
2213		 */
2214		if (F_ISSET(pip, VRFY_IS_ALLZEROES))
2215			ZEROPG_ERR_PRINT(env, pgno, "duplicate page");
2216		else
2217			EPRINT((env,
2218		    "Page %lu: duplicate page of inappropriate type %lu",
2219			    (u_long)pgno, (u_long)pip->type));
2220		isbad = 1;
2221		break;
2222	}
2223
2224	if ((ret = __db_vrfy_putpageinfo(env, vdp, pip)) != 0)
2225		return (ret);
2226	return (isbad == 1 ? DB_VERIFY_BAD : 0);
2227}
2228
2229/*
2230 * __db_salvage_duptree --
2231 *	Attempt to salvage a given duplicate tree, given its alleged root.
2232 *
2233 *	The key that corresponds to this dup set has been passed to us
2234 *	in DBT *key.  Because data items follow keys, though, it has been
2235 *	printed once already.
2236 *
2237 *	The basic idea here is that pgno ought to be a P_LDUP, a P_LRECNO, a
2238 *	P_IBTREE, or a P_IRECNO.  If it's an internal page, use the verifier
2239 *	functions to make sure it's safe;  if it's not, we simply bail and the
2240 *	data will have to be printed with no key later on.  if it is safe,
2241 *	recurse on each of its children.
2242 *
2243 *	Whether or not it's safe, if it's a leaf page, __bam_salvage it.
2244 *
2245 *	At all times, use the DB hanging off vdp to mark and check what we've
2246 *	done, so each page gets printed exactly once and we don't get caught
2247 *	in any cycles.
2248 *
2249 * PUBLIC: int __db_salvage_duptree __P((DB *, VRFY_DBINFO *, db_pgno_t,
2250 * PUBLIC:     DBT *, void *, int (*)(void *, const void *), u_int32_t));
2251 */
2252int
2253__db_salvage_duptree(dbp, vdp, pgno, key, handle, callback, flags)
2254	DB *dbp;
2255	VRFY_DBINFO *vdp;
2256	db_pgno_t pgno;
2257	DBT *key;
2258	void *handle;
2259	int (*callback) __P((void *, const void *));
2260	u_int32_t flags;
2261{
2262	DB_MPOOLFILE *mpf;
2263	PAGE *h;
2264	int ret, t_ret;
2265
2266	mpf = dbp->mpf;
2267
2268	if (pgno == PGNO_INVALID || !IS_VALID_PGNO(pgno))
2269		return (DB_VERIFY_BAD);
2270
2271	/* We have a plausible page.  Try it. */
2272	if ((ret = __memp_fget(mpf, &pgno, vdp->thread_info, NULL, 0, &h)) != 0)
2273		return (ret);
2274
2275	switch (TYPE(h)) {
2276	case P_IBTREE:
2277	case P_IRECNO:
2278		if ((ret = __db_vrfy_common(dbp, vdp, h, pgno, flags)) != 0)
2279			goto err;
2280		if ((ret = __bam_vrfy(dbp,
2281		    vdp, h, pgno, flags | DB_NOORDERCHK)) != 0 ||
2282		    (ret = __db_salvage_markdone(vdp, pgno)) != 0)
2283			goto err;
2284		/*
2285		 * We have a known-healthy internal page.  Walk it.
2286		 */
2287		if ((ret = __bam_salvage_walkdupint(dbp, vdp, h, key,
2288		    handle, callback, flags)) != 0)
2289			goto err;
2290		break;
2291	case P_LRECNO:
2292	case P_LDUP:
2293		if ((ret = __bam_salvage(dbp,
2294		    vdp, pgno, TYPE(h), h, handle, callback, key, flags)) != 0)
2295			goto err;
2296		break;
2297	default:
2298		ret = DB_VERIFY_BAD;
2299		goto err;
2300	}
2301
2302err:	if ((t_ret = __memp_fput(mpf,
2303	     vdp->thread_info, h, dbp->priority)) != 0 && ret == 0)
2304		ret = t_ret;
2305	return (ret);
2306}
2307
2308/*
2309 * __db_salvage_subdbs --
2310 *	Check and see if this database has subdbs;  if so, try to salvage
2311 *	them independently.
2312 */
2313static int
2314__db_salvage_subdbs(dbp, vdp, handle, callback, flags, hassubsp)
2315	DB *dbp;
2316	VRFY_DBINFO *vdp;
2317	void *handle;
2318	int (*callback) __P((void *, const void *));
2319	u_int32_t flags;
2320	int *hassubsp;
2321{
2322	DB *pgset;
2323	DBC *pgsc;
2324	DB_MPOOLFILE *mpf;
2325	ENV *env;
2326	PAGE *h;
2327	VRFY_PAGEINFO *pip;
2328	db_pgno_t p, meta_pgno;
2329	int ret, t_ret;
2330
2331	*hassubsp = 0;
2332
2333	env = dbp->env;
2334	pgset = NULL;
2335	pgsc = NULL;
2336	mpf = dbp->mpf;
2337	h = NULL;
2338	pip = NULL;
2339	ret = 0;
2340
2341	/*
2342	 * Check to make sure the page is OK and find out if it contains
2343	 * subdatabases.
2344	 */
2345	meta_pgno = PGNO_BASE_MD;
2346	if ((t_ret = __memp_fget(mpf,
2347	    &meta_pgno, vdp->thread_info, NULL, 0, &h)) == 0 &&
2348	    (t_ret = __db_vrfy_common(dbp, vdp, h, PGNO_BASE_MD, flags)) == 0 &&
2349	    (t_ret = __db_salvage(
2350	    dbp, vdp, PGNO_BASE_MD, h, handle, callback, flags)) == 0 &&
2351	    (t_ret = __db_vrfy_getpageinfo(vdp, 0, &pip)) == 0)
2352		if (F_ISSET(pip, VRFY_HAS_SUBDBS))
2353			*hassubsp = 1;
2354	if (pip != NULL &&
2355	    (t_ret = __db_vrfy_putpageinfo(env, vdp, pip)) != 0 && ret == 0)
2356		ret = t_ret;
2357	if (h != NULL) {
2358		if ((t_ret = __memp_fput(mpf,
2359		     vdp->thread_info, h, dbp->priority)) != 0 && ret == 0)
2360			ret = t_ret;
2361		h = NULL;
2362	}
2363	if (ret != 0 || *hassubsp == 0)
2364		return (ret);
2365
2366	/*
2367	 * We have subdbs.  Try to crack them.
2368	 *
2369	 * To do so, get a set of leaf pages in the master database, and then
2370	 * walk each of the valid ones, salvaging subdbs as we go.  If any
2371	 * prove invalid, just drop them;  we'll pick them up on a later pass.
2372	 */
2373	if ((ret = __db_vrfy_pgset(env,
2374	    vdp->thread_info, dbp->pgsize, &pgset)) != 0)
2375		goto err;
2376	if ((ret = __db_meta2pgset(dbp, vdp, PGNO_BASE_MD, flags, pgset)) != 0)
2377		goto err;
2378	if ((ret = __db_cursor(pgset, vdp->thread_info, NULL, &pgsc, 0)) != 0)
2379		goto err;
2380	while ((t_ret = __db_vrfy_pgset_next(pgsc, &p)) == 0) {
2381		if ((t_ret = __memp_fget(mpf,
2382		    &p, vdp->thread_info, NULL, 0, &h)) == 0 &&
2383		    (t_ret = __db_vrfy_common(dbp, vdp, h, p, flags)) == 0 &&
2384		    (t_ret =
2385		    __bam_vrfy(dbp, vdp, h, p, flags | DB_NOORDERCHK)) == 0)
2386			t_ret = __db_salvage_subdbpg(
2387			    dbp, vdp, h, handle, callback, flags);
2388		if (t_ret != 0 && ret == 0)
2389			ret = t_ret;
2390		if (h != NULL) {
2391			if ((t_ret = __memp_fput(mpf, vdp->thread_info,
2392			    h, dbp->priority)) != 0 && ret == 0)
2393				ret = t_ret;
2394			h = NULL;
2395		}
2396	}
2397
2398	if (t_ret != DB_NOTFOUND && ret == 0)
2399		ret = t_ret;
2400
2401err:	if (pgsc != NULL && (t_ret = __dbc_close(pgsc)) != 0 && ret == 0)
2402		ret = t_ret;
2403	if (pgset != NULL &&
2404	    (t_ret = __db_close(pgset, NULL, 0)) != 0 && ret ==0)
2405		ret = t_ret;
2406	if (h != NULL &&
2407	    (t_ret = __memp_fput(mpf,
2408		vdp->thread_info, h, dbp->priority)) != 0 && ret == 0)
2409		ret = t_ret;
2410	return (ret);
2411}
2412
2413/*
2414 * __db_salvage_subdbpg --
2415 *	Given a known-good leaf page in the master database, salvage all
2416 *	leaf pages corresponding to each subdb.
2417 */
2418static int
2419__db_salvage_subdbpg(dbp, vdp, master, handle, callback, flags)
2420	DB *dbp;
2421	VRFY_DBINFO *vdp;
2422	PAGE *master;
2423	void *handle;
2424	int (*callback) __P((void *, const void *));
2425	u_int32_t flags;
2426{
2427	BKEYDATA *bkkey, *bkdata;
2428	BOVERFLOW *bo;
2429	DB *pgset;
2430	DBC *pgsc;
2431	DBT key;
2432	DB_MPOOLFILE *mpf;
2433	ENV *env;
2434	PAGE *subpg;
2435	db_indx_t i;
2436	db_pgno_t meta_pgno, p;
2437	int ret, err_ret, t_ret;
2438	char *subdbname;
2439
2440	env = dbp->env;
2441	mpf = dbp->mpf;
2442	ret = err_ret = 0;
2443	subdbname = NULL;
2444
2445	if ((ret = __db_vrfy_pgset(env,
2446	     vdp->thread_info, dbp->pgsize, &pgset)) != 0)
2447		return (ret);
2448
2449	/*
2450	 * For each entry, get and salvage the set of pages
2451	 * corresponding to that entry.
2452	 */
2453	for (i = 0; i < NUM_ENT(master); i += P_INDX) {
2454		bkkey = GET_BKEYDATA(dbp, master, i);
2455		bkdata = GET_BKEYDATA(dbp, master, i + O_INDX);
2456
2457		/* Get the subdatabase name. */
2458		if (B_TYPE(bkkey->type) == B_OVERFLOW) {
2459			/*
2460			 * We can, in principle anyway, have a subdb
2461			 * name so long it overflows.  Ick.
2462			 */
2463			bo = (BOVERFLOW *)bkkey;
2464			if ((ret = __db_safe_goff(dbp, vdp,
2465			    bo->pgno, &key, &subdbname, flags)) != 0) {
2466				err_ret = DB_VERIFY_BAD;
2467				continue;
2468			}
2469
2470			/* Nul-terminate it. */
2471			if ((ret = __os_realloc(env,
2472			    key.size + 1, &subdbname)) != 0)
2473				goto err;
2474			subdbname[key.size] = '\0';
2475		} else if (B_TYPE(bkkey->type) == B_KEYDATA) {
2476			if ((ret = __os_realloc(env,
2477			    bkkey->len + 1, &subdbname)) != 0)
2478				goto err;
2479			memcpy(subdbname, bkkey->data, bkkey->len);
2480			subdbname[bkkey->len] = '\0';
2481		}
2482
2483		/* Get the corresponding pgno. */
2484		if (bkdata->len != sizeof(db_pgno_t)) {
2485			err_ret = DB_VERIFY_BAD;
2486			continue;
2487		}
2488		memcpy(&meta_pgno,
2489		    (db_pgno_t *)bkdata->data, sizeof(db_pgno_t));
2490
2491		/*
2492		 * Subdatabase meta pgnos are stored in network byte
2493		 * order for cross-endian compatibility.  Swap if appropriate.
2494		 */
2495		DB_NTOHL_SWAP(env, &meta_pgno);
2496
2497		/* If we can't get the subdb meta page, just skip the subdb. */
2498		if (!IS_VALID_PGNO(meta_pgno) || (ret = __memp_fget(mpf,
2499		    &meta_pgno, vdp->thread_info, NULL, 0, &subpg)) != 0) {
2500			err_ret = ret;
2501			continue;
2502		}
2503
2504		/*
2505		 * Verify the subdatabase meta page.  This has two functions.
2506		 * First, if it's bad, we have no choice but to skip the subdb
2507		 * and let the pages just get printed on a later pass.  Second,
2508		 * the access-method-specific meta verification routines record
2509		 * the various state info (such as the presence of dups)
2510		 * that we need for __db_prheader().
2511		 */
2512		if ((ret =
2513		    __db_vrfy_common(dbp, vdp, subpg, meta_pgno, flags)) != 0) {
2514			err_ret = ret;
2515			(void)__memp_fput(mpf,
2516			    vdp->thread_info, subpg, dbp->priority);
2517			continue;
2518		}
2519		switch (TYPE(subpg)) {
2520		case P_BTREEMETA:
2521			if ((ret = __bam_vrfy_meta(dbp,
2522			    vdp, (BTMETA *)subpg, meta_pgno, flags)) != 0) {
2523				err_ret = ret;
2524				(void)__memp_fput(mpf,
2525				    vdp->thread_info, subpg, dbp->priority);
2526				continue;
2527			}
2528			break;
2529		case P_HASHMETA:
2530			if ((ret = __ham_vrfy_meta(dbp,
2531			    vdp, (HMETA *)subpg, meta_pgno, flags)) != 0) {
2532				err_ret = ret;
2533				(void)__memp_fput(mpf,
2534				    vdp->thread_info, subpg, dbp->priority);
2535				continue;
2536			}
2537			break;
2538		default:
2539			/* This isn't an appropriate page;  skip this subdb. */
2540			err_ret = DB_VERIFY_BAD;
2541			continue;
2542		}
2543
2544		if ((ret = __memp_fput(mpf,
2545		    vdp->thread_info, subpg, dbp->priority)) != 0) {
2546			err_ret = ret;
2547			continue;
2548		}
2549
2550		/* Print a subdatabase header. */
2551		if ((ret = __db_prheader(dbp,
2552		    subdbname, 0, 0, handle, callback, vdp, meta_pgno)) != 0)
2553			goto err;
2554
2555		if ((ret = __db_meta2pgset(dbp, vdp, meta_pgno,
2556		    flags, pgset)) != 0) {
2557			err_ret = ret;
2558			continue;
2559		}
2560
2561		if ((ret = __db_cursor(pgset,
2562		    vdp->thread_info, NULL, &pgsc, 0)) != 0)
2563			goto err;
2564		while ((ret = __db_vrfy_pgset_next(pgsc, &p)) == 0) {
2565			if ((ret = __memp_fget(mpf,
2566			    &p, vdp->thread_info, NULL, 0, &subpg)) != 0) {
2567				err_ret = ret;
2568				continue;
2569			}
2570			if ((ret = __db_salvage(dbp, vdp, p, subpg,
2571			    handle, callback, flags)) != 0)
2572				err_ret = ret;
2573			if ((ret = __memp_fput(mpf,
2574			    vdp->thread_info, subpg, dbp->priority)) != 0)
2575				err_ret = ret;
2576		}
2577
2578		if (ret != DB_NOTFOUND)
2579			goto err;
2580
2581		if ((ret = __dbc_close(pgsc)) != 0)
2582			goto err;
2583		if ((ret = __db_prfooter(handle, callback)) != 0)
2584			goto err;
2585	}
2586err:	if (subdbname)
2587		__os_free(env, subdbname);
2588
2589	if ((t_ret = __db_close(pgset, NULL, 0)) != 0)
2590		ret = t_ret;
2591
2592	if ((t_ret = __db_salvage_markdone(vdp, PGNO(master))) != 0)
2593		return (t_ret);
2594
2595	return ((err_ret != 0) ? err_ret : ret);
2596}
2597
2598/*
2599 * __db_meta2pgset --
2600 *	Given a known-safe meta page number, return the set of pages
2601 *	corresponding to the database it represents.  Return DB_VERIFY_BAD if
2602 *	it's not a suitable meta page or is invalid.
2603 */
2604static int
2605__db_meta2pgset(dbp, vdp, pgno, flags, pgset)
2606	DB *dbp;
2607	VRFY_DBINFO *vdp;
2608	db_pgno_t pgno;
2609	u_int32_t flags;
2610	DB *pgset;
2611{
2612	DB_MPOOLFILE *mpf;
2613	PAGE *h;
2614	int ret, t_ret;
2615
2616	mpf = dbp->mpf;
2617
2618	if ((ret = __memp_fget(mpf, &pgno, vdp->thread_info, NULL, 0, &h)) != 0)
2619		return (ret);
2620
2621	switch (TYPE(h)) {
2622	case P_BTREEMETA:
2623		ret = __bam_meta2pgset(dbp, vdp, (BTMETA *)h, flags, pgset);
2624		break;
2625	case P_HASHMETA:
2626		ret = __ham_meta2pgset(dbp, vdp, (HMETA *)h, flags, pgset);
2627		break;
2628	default:
2629		ret = DB_VERIFY_BAD;
2630		break;
2631	}
2632
2633	if ((t_ret = __memp_fput(mpf, vdp->thread_info, h, dbp->priority)) != 0)
2634		return (t_ret);
2635	return (ret);
2636}
2637
2638/*
2639 * __db_guesspgsize --
2640 *	Try to guess what the pagesize is if the one on the meta page
2641 *	and the one in the db are invalid.
2642 */
2643static u_int
2644__db_guesspgsize(env, fhp)
2645	ENV *env;
2646	DB_FH *fhp;
2647{
2648	db_pgno_t i;
2649	size_t nr;
2650	u_int32_t guess;
2651	u_int8_t type;
2652
2653	for (guess = DB_MAX_PGSIZE; guess >= DB_MIN_PGSIZE; guess >>= 1) {
2654		/*
2655		 * We try to read three pages ahead after the first one
2656		 * and make sure we have plausible types for all of them.
2657		 * If the seeks fail, continue with a smaller size;
2658		 * we're probably just looking past the end of the database.
2659		 * If they succeed and the types are reasonable, also continue
2660		 * with a size smaller;  we may be looking at pages N,
2661		 * 2N, and 3N for some N > 1.
2662		 *
2663		 * As soon as we hit an invalid type, we stop and return
2664		 * our previous guess; that last one was probably the page size.
2665		 */
2666		for (i = 1; i <= 3; i++) {
2667			if (__os_seek(
2668			    env, fhp, i, guess, SSZ(DBMETA, type)) != 0)
2669				break;
2670			if (__os_read(env,
2671			    fhp, &type, 1, &nr) != 0 || nr == 0)
2672				break;
2673			if (type == P_INVALID || type >= P_PAGETYPE_MAX)
2674				return (guess << 1);
2675		}
2676	}
2677
2678	/*
2679	 * If we're just totally confused--the corruption takes up most of the
2680	 * beginning pages of the database--go with the default size.
2681	 */
2682	return (DB_DEF_IOSIZE);
2683}
2684