1/*-
2 * See the file LICENSE for redistribution information.
3 *
4 * Copyright (c) 1996,2008 Oracle.  All rights reserved.
5 *
6 * $Id: db_upg.c,v 12.25 2008/01/08 20:58:10 bostic Exp $
7 */
8
9#include "db_config.h"
10
11#include "db_int.h"
12#include "dbinc/db_page.h"
13#include "dbinc/db_swap.h"
14#include "dbinc/btree.h"
15#include "dbinc/hash.h"
16#include "dbinc/qam.h"
17
18/*
19 * __db_upgrade_pp --
20 *	DB->upgrade pre/post processing.
21 *
22 * PUBLIC: int __db_upgrade_pp __P((DB *, const char *, u_int32_t));
23 */
24int
25__db_upgrade_pp(dbp, fname, flags)
26	DB *dbp;
27	const char *fname;
28	u_int32_t flags;
29{
30#ifdef HAVE_UPGRADE_SUPPORT
31	DB_THREAD_INFO *ip;
32	ENV *env;
33	int ret;
34
35	env = dbp->env;
36
37	/*
38	 * !!!
39	 * The actual argument checking is simple, do it inline.
40	 */
41	if ((ret = __db_fchk(env, "DB->upgrade", flags, DB_DUPSORT)) != 0)
42		return (ret);
43
44	ENV_ENTER(env, ip);
45	ret = __db_upgrade(dbp, fname, flags);
46	ENV_LEAVE(env, ip);
47	return (ret);
48#else
49	COMPQUIET(dbp, NULL);
50	COMPQUIET(fname, NULL);
51	COMPQUIET(flags, 0);
52
53	__db_errx(dbp->env, "upgrade not supported");
54	return (EINVAL);
55#endif
56}
57
58#ifdef HAVE_UPGRADE_SUPPORT
59static int (* const func_31_list[P_PAGETYPE_MAX])
60    __P((DB *, char *, u_int32_t, DB_FH *, PAGE *, int *)) = {
61	NULL,			/* P_INVALID */
62	NULL,			/* __P_DUPLICATE */
63	__ham_31_hash,		/* P_HASH_UNSORTED */
64	NULL,			/* P_IBTREE */
65	NULL,			/* P_IRECNO */
66	__bam_31_lbtree,	/* P_LBTREE */
67	NULL,			/* P_LRECNO */
68	NULL,			/* P_OVERFLOW */
69	__ham_31_hashmeta,	/* P_HASHMETA */
70	__bam_31_btreemeta,	/* P_BTREEMETA */
71	NULL,			/* P_QAMMETA */
72	NULL,			/* P_QAMDATA */
73	NULL,			/* P_LDUP */
74	NULL,			/* P_HASH */
75};
76
77static int (* const func_46_list[P_PAGETYPE_MAX])
78    __P((DB *, char *, u_int32_t, DB_FH *, PAGE *, int *)) = {
79	NULL,			/* P_INVALID */
80	NULL,			/* __P_DUPLICATE */
81	__ham_46_hash,		/* P_HASH_UNSORTED */
82	NULL,			/* P_IBTREE */
83	NULL,			/* P_IRECNO */
84	NULL,			/* P_LBTREE */
85	NULL,			/* P_LRECNO */
86	NULL,			/* P_OVERFLOW */
87	__ham_46_hashmeta,	/* P_HASHMETA */
88	NULL,			/* P_BTREEMETA */
89	NULL,			/* P_QAMMETA */
90	NULL,			/* P_QAMDATA */
91	NULL,			/* P_LDUP */
92	NULL,			/* P_HASH */
93};
94
95static int __db_page_pass __P((DB *, char *, u_int32_t, int (* const [])
96	       (DB *, char *, u_int32_t, DB_FH *, PAGE *, int *), DB_FH *));
97static int __db_set_lastpgno __P((DB *, char *, DB_FH *));
98
99/*
100 * __db_upgrade --
101 *	Upgrade an existing database.
102 *
103 * PUBLIC: int __db_upgrade __P((DB *, const char *, u_int32_t));
104 */
105int
106__db_upgrade(dbp, fname, flags)
107	DB *dbp;
108	const char *fname;
109	u_int32_t flags;
110{
111	DBMETA *meta;
112	DB_FH *fhp;
113	ENV *env;
114	size_t n;
115	int ret, t_ret, use_mp_open;
116	u_int8_t mbuf[256], tmpflags;
117	char *real_name;
118
119	use_mp_open = 0;
120	env = dbp->env;
121	fhp = NULL;
122
123	/* Get the real backing file name. */
124	if ((ret = __db_appname(env,
125	    DB_APP_DATA, fname, 0, NULL, &real_name)) != 0)
126		return (ret);
127
128	/* Open the file. */
129	if ((ret = __os_open(env, real_name, 0, 0, 0, &fhp)) != 0) {
130		__db_err(env, ret, "%s", real_name);
131		return (ret);
132	}
133
134	/* Initialize the feedback. */
135	if (dbp->db_feedback != NULL)
136		dbp->db_feedback(dbp, DB_UPGRADE, 0);
137
138	/*
139	 * Read the metadata page.  We read 256 bytes, which is larger than
140	 * any access method's metadata page and smaller than any disk sector.
141	 */
142	if ((ret = __os_read(env, fhp, mbuf, sizeof(mbuf), &n)) != 0)
143		goto err;
144
145	switch (((DBMETA *)mbuf)->magic) {
146	case DB_BTREEMAGIC:
147		switch (((DBMETA *)mbuf)->version) {
148		case 6:
149			/*
150			 * Before V7 not all pages had page types, so we do the
151			 * single meta-data page by hand.
152			 */
153			if ((ret =
154			    __bam_30_btreemeta(dbp, real_name, mbuf)) != 0)
155				goto err;
156			if ((ret = __os_seek(env, fhp, 0, 0, 0)) != 0)
157				goto err;
158			if ((ret = __os_write(env, fhp, mbuf, 256, &n)) != 0)
159				goto err;
160			/* FALLTHROUGH */
161		case 7:
162			/*
163			 * We need the page size to do more.  Rip it out of
164			 * the meta-data page.
165			 */
166			memcpy(&dbp->pgsize, mbuf + 20, sizeof(u_int32_t));
167
168			if ((ret = __db_page_pass(
169			    dbp, real_name, flags, func_31_list, fhp)) != 0)
170				goto err;
171			/* FALLTHROUGH */
172		case 8:
173			if ((ret =
174			     __db_set_lastpgno(dbp, real_name, fhp)) != 0)
175				goto err;
176			/* FALLTHROUGH */
177		case 9:
178			break;
179		default:
180			__db_errx(env, "%s: unsupported btree version: %lu",
181			    real_name, (u_long)((DBMETA *)mbuf)->version);
182			ret = DB_OLD_VERSION;
183			goto err;
184		}
185		break;
186	case DB_HASHMAGIC:
187		switch (((DBMETA *)mbuf)->version) {
188		case 4:
189		case 5:
190			/*
191			 * Before V6 not all pages had page types, so we do the
192			 * single meta-data page by hand.
193			 */
194			if ((ret =
195			    __ham_30_hashmeta(dbp, real_name, mbuf)) != 0)
196				goto err;
197			if ((ret = __os_seek(env, fhp, 0, 0, 0)) != 0)
198				goto err;
199			if ((ret = __os_write(env, fhp, mbuf, 256, &n)) != 0)
200				goto err;
201
202			/*
203			 * Before V6, we created hash pages one by one as they
204			 * were needed, using hashhdr.ovfl_point to reserve
205			 * a block of page numbers for them.  A consequence
206			 * of this was that, if no overflow pages had been
207			 * created, the current doubling might extend past
208			 * the end of the database file.
209			 *
210			 * In DB 3.X, we now create all the hash pages
211			 * belonging to a doubling atomically; it's not
212			 * safe to just save them for later, because when
213			 * we create an overflow page we'll just create
214			 * a new last page (whatever that may be).  Grow
215			 * the database to the end of the current doubling.
216			 */
217			if ((ret =
218			    __ham_30_sizefix(dbp, fhp, real_name, mbuf)) != 0)
219				goto err;
220			/* FALLTHROUGH */
221		case 6:
222			/*
223			 * We need the page size to do more.  Rip it out of
224			 * the meta-data page.
225			 */
226			memcpy(&dbp->pgsize, mbuf + 20, sizeof(u_int32_t));
227
228			if ((ret = __db_page_pass(
229			    dbp, real_name, flags, func_31_list, fhp)) != 0)
230				goto err;
231			/* FALLTHROUGH */
232		case 7:
233			if ((ret =
234			     __db_set_lastpgno(dbp, real_name, fhp)) != 0)
235				goto err;
236			/* FALLTHROUGH */
237		case 8:
238			/*
239			 * Any upgrade that has proceeded this far has metadata
240			 * pages compatible with hash version 8 metadata pages,
241			 * so casting mbuf to a dbmeta is safe.
242			 * If a newer revision moves the pagesize, checksum or
243			 * encrypt_alg flags in the metadata, then the
244			 * extraction of the fields will need to use hard coded
245			 * offsets.
246			 */
247			meta = (DBMETA*)mbuf;
248			/*
249			 * We need the page size to do more.  Extract it from
250			 * the meta-data page.
251			 */
252			memcpy(&dbp->pgsize, &meta->pagesize,
253			    sizeof(u_int32_t));
254			/*
255			 * Rip out metadata and encrypt_alg fields from the
256			 * metadata page. So the upgrade can know how big
257			 * the page metadata pre-amble is. Any upgrade that has
258			 * proceeded this far has metadata pages compatible
259			 * with hash version 8 metadata pages, so extracting
260			 * the fields is safe.
261			 */
262			memcpy(&tmpflags, &meta->metaflags, sizeof(u_int8_t));
263			if (FLD_ISSET(tmpflags, DBMETA_CHKSUM))
264				F_SET(dbp, DB_AM_CHKSUM);
265			memcpy(&tmpflags, &meta->encrypt_alg, sizeof(u_int8_t));
266			if (tmpflags != 0) {
267				if (!CRYPTO_ON(dbp->env)) {
268					__db_errx(env,
269"Attempt to upgrade an encrypted database without providing a password.");
270					ret = EINVAL;
271					goto err;
272				}
273				F_SET(dbp, DB_AM_ENCRYPT);
274			}
275
276			/*
277			 * This is ugly. It is necessary to have a usable
278			 * mpool in the dbp to upgrade from an unsorted
279			 * to a sorted hash database. The mpool file is used
280			 * to resolve offpage key items, which are needed to
281			 * determine sort order. Having mpool open and access
282			 * the file does not affect the page pass, since the
283			 * page pass only updates DB_HASH_UNSORTED pages
284			 * in-place, and the mpool file is only used to read
285			 * OFFPAGE items.
286			 */
287			use_mp_open = 1;
288			if ((ret = __os_closehandle(env, fhp)) != 0)
289				return (ret);
290			dbp->type = DB_HASH;
291			if ((ret = __env_mpool(dbp, fname,
292			    DB_AM_NOT_DURABLE | DB_AM_VERIFYING)) != 0)
293				return (ret);
294			fhp = dbp->mpf->fhp;
295
296			/* Do the actual conversion pass. */
297			if ((ret = __db_page_pass(
298			    dbp, real_name, flags, func_46_list, fhp)) != 0)
299				goto err;
300
301			/* FALLTHROUGH */
302		case 9:
303			break;
304		default:
305			__db_errx(env, "%s: unsupported hash version: %lu",
306			    real_name, (u_long)((DBMETA *)mbuf)->version);
307			ret = DB_OLD_VERSION;
308			goto err;
309		}
310		break;
311	case DB_QAMMAGIC:
312		switch (((DBMETA *)mbuf)->version) {
313		case 1:
314			/*
315			 * If we're in a Queue database, the only page that
316			 * needs upgrading is the meta-database page, don't
317			 * bother with a full pass.
318			 */
319			if ((ret = __qam_31_qammeta(dbp, real_name, mbuf)) != 0)
320				return (ret);
321			/* FALLTHROUGH */
322		case 2:
323			if ((ret = __qam_32_qammeta(dbp, real_name, mbuf)) != 0)
324				return (ret);
325			if ((ret = __os_seek(env, fhp, 0, 0, 0)) != 0)
326				goto err;
327			if ((ret = __os_write(env, fhp, mbuf, 256, &n)) != 0)
328				goto err;
329			/* FALLTHROUGH */
330		case 3:
331		case 4:
332			break;
333		default:
334			__db_errx(env, "%s: unsupported queue version: %lu",
335			    real_name, (u_long)((DBMETA *)mbuf)->version);
336			ret = DB_OLD_VERSION;
337			goto err;
338		}
339		break;
340	default:
341		M_32_SWAP(((DBMETA *)mbuf)->magic);
342		switch (((DBMETA *)mbuf)->magic) {
343		case DB_BTREEMAGIC:
344		case DB_HASHMAGIC:
345		case DB_QAMMAGIC:
346			__db_errx(env,
347		"%s: DB->upgrade only supported on native byte-order systems",
348			    real_name);
349			break;
350		default:
351			__db_errx(env,
352			    "%s: unrecognized file type", real_name);
353			break;
354		}
355		ret = EINVAL;
356		goto err;
357	}
358
359	ret = __os_fsync(env, fhp);
360
361	/*
362	 * If mp_open was used, then rely on the database close to clean up
363	 * any file handles.
364	 */
365err:	if (use_mp_open == 0 && fhp != NULL &&
366	    (t_ret = __os_closehandle(env, fhp)) != 0 && ret == 0)
367		ret = t_ret;
368	__os_free(env, real_name);
369
370	/* We're done. */
371	if (dbp->db_feedback != NULL)
372		dbp->db_feedback(dbp, DB_UPGRADE, 100);
373
374	return (ret);
375}
376
377/*
378 * __db_page_pass --
379 *	Walk the pages of the database, upgrading whatever needs it.
380 */
381static int
382__db_page_pass(dbp, real_name, flags, fl, fhp)
383	DB *dbp;
384	char *real_name;
385	u_int32_t flags;
386	int (* const fl[P_PAGETYPE_MAX])
387	    __P((DB *, char *, u_int32_t, DB_FH *, PAGE *, int *));
388	DB_FH *fhp;
389{
390	ENV *env;
391	PAGE *page;
392	db_pgno_t i, pgno_last;
393	size_t n;
394	int dirty, ret;
395
396	env = dbp->env;
397
398	/* Determine the last page of the file. */
399	if ((ret = __db_lastpgno(dbp, real_name, fhp, &pgno_last)) != 0)
400		return (ret);
401
402	/* Allocate memory for a single page. */
403	if ((ret = __os_malloc(env, dbp->pgsize, &page)) != 0)
404		return (ret);
405
406	/* Walk the file, calling the underlying conversion functions. */
407	for (i = 0; i < pgno_last; ++i) {
408		if (dbp->db_feedback != NULL)
409			dbp->db_feedback(
410			    dbp, DB_UPGRADE, (int)((i * 100)/pgno_last));
411		if ((ret = __os_seek(env, fhp, i, dbp->pgsize, 0)) != 0)
412			break;
413		if ((ret = __os_read(env, fhp, page, dbp->pgsize, &n)) != 0)
414			break;
415		dirty = 0;
416		/* Always decrypt the page. */
417		if ((ret = __db_decrypt_pg(env, dbp, page)) != 0)
418			break;
419		if (fl[TYPE(page)] != NULL && (ret = fl[TYPE(page)]
420		    (dbp, real_name, flags, fhp, page, &dirty)) != 0)
421			break;
422		if (dirty) {
423			if ((ret = __db_encrypt_and_checksum_pg(
424			    env, dbp, page)) != 0)
425				break;
426			if ((ret =
427			    __os_seek(env, fhp, i, dbp->pgsize, 0)) != 0)
428				break;
429			if ((ret = __os_write(env,
430			    fhp, page, dbp->pgsize, &n)) != 0)
431				break;
432		}
433	}
434
435	__os_free(dbp->env, page);
436	return (ret);
437}
438
439/*
440 * __db_lastpgno --
441 *	Return the current last page number of the file.
442 *
443 * PUBLIC: int __db_lastpgno __P((DB *, char *, DB_FH *, db_pgno_t *));
444 */
445int
446__db_lastpgno(dbp, real_name, fhp, pgno_lastp)
447	DB *dbp;
448	char *real_name;
449	DB_FH *fhp;
450	db_pgno_t *pgno_lastp;
451{
452	ENV *env;
453	db_pgno_t pgno_last;
454	u_int32_t mbytes, bytes;
455	int ret;
456
457	env = dbp->env;
458
459	if ((ret = __os_ioinfo(env,
460	    real_name, fhp, &mbytes, &bytes, NULL)) != 0) {
461		__db_err(env, ret, "%s", real_name);
462		return (ret);
463	}
464
465	/* Page sizes have to be a power-of-two. */
466	if (bytes % dbp->pgsize != 0) {
467		__db_errx(env,
468		    "%s: file size not a multiple of the pagesize", real_name);
469		return (EINVAL);
470	}
471	pgno_last = mbytes * (MEGABYTE / dbp->pgsize);
472	pgno_last += bytes / dbp->pgsize;
473
474	*pgno_lastp = pgno_last;
475	return (0);
476}
477
478/*
479 * __db_set_lastpgno --
480 *	Update the meta->last_pgno field.
481 *
482 * Code assumes that we do not have checksums/crypto on the page.
483 */
484static int
485__db_set_lastpgno(dbp, real_name, fhp)
486	DB *dbp;
487	char *real_name;
488	DB_FH *fhp;
489{
490	DBMETA meta;
491	ENV *env;
492	int ret;
493	size_t n;
494
495	env = dbp->env;
496	if ((ret = __os_seek(env, fhp, 0, 0, 0)) != 0)
497		return (ret);
498	if ((ret = __os_read(env, fhp, &meta, sizeof(meta), &n)) != 0)
499		return (ret);
500	dbp->pgsize = meta.pagesize;
501	if ((ret = __db_lastpgno(dbp, real_name, fhp, &meta.last_pgno)) != 0)
502		return (ret);
503	if ((ret = __os_seek(env, fhp, 0, 0, 0)) != 0)
504		return (ret);
505	if ((ret = __os_write(env, fhp, &meta, sizeof(meta), &n)) != 0)
506		return (ret);
507
508	return (0);
509}
510#endif /* HAVE_UPGRADE_SUPPORT */
511