1/*-
2 * See the file LICENSE for redistribution information.
3 *
4 * Copyright (c) 1996,2008 Oracle.  All rights reserved.
5 */
6/*
7 * Copyright (c) 1990, 1993, 1994
8 *	Margo Seltzer.  All rights reserved.
9 */
10/*
11 * Copyright (c) 1990, 1993, 1994
12 *	The Regents of the University of California.  All rights reserved.
13 *
14 * This code is derived from software contributed to Berkeley by
15 * Margo Seltzer.
16 *
17 * Redistribution and use in source and binary forms, with or without
18 * modification, are permitted provided that the following conditions
19 * are met:
20 * 1. Redistributions of source code must retain the above copyright
21 *    notice, this list of conditions and the following disclaimer.
22 * 2. Redistributions in binary form must reproduce the above copyright
23 *    notice, this list of conditions and the following disclaimer in the
24 *    documentation and/or other materials provided with the distribution.
25 * 3. Neither the name of the University nor the names of its contributors
26 *    may be used to endorse or promote products derived from this software
27 *    without specific prior written permission.
28 *
29 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
30 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
31 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
32 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
33 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
34 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
35 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
36 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
37 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
38 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
39 * SUCH DAMAGE.
40 *
41 * $Id: hash_open.c,v 12.33 2008/01/30 12:18:22 mjc Exp $
42 */
43
44#include "db_config.h"
45
46#include "db_int.h"
47#include "dbinc/crypto.h"
48#include "dbinc/db_page.h"
49#include "dbinc/hash.h"
50#include "dbinc/log.h"
51#include "dbinc/lock.h"
52#include "dbinc/mp.h"
53#include "dbinc/btree.h"
54#include "dbinc/fop.h"
55
56static db_pgno_t __ham_init_meta __P((DB *, HMETA *, db_pgno_t, DB_LSN *));
57
58/*
59 * __ham_open --
60 *
61 * PUBLIC: int __ham_open __P((DB *, DB_THREAD_INFO *,
62 * PUBLIC:     DB_TXN *, const char * name, db_pgno_t, u_int32_t));
63 */
64int
65__ham_open(dbp, ip, txn, name, base_pgno, flags)
66	DB *dbp;
67	DB_THREAD_INFO *ip;
68	DB_TXN *txn;
69	const char *name;
70	db_pgno_t base_pgno;
71	u_int32_t flags;
72{
73	DBC *dbc;
74	ENV *env;
75	HASH *hashp;
76	HASH_CURSOR *hcp;
77	int ret, t_ret;
78
79	env = dbp->env;
80	dbc = NULL;
81
82	/*
83	 * Get a cursor.  If DB_CREATE is specified, we may be creating
84	 * pages, and to do that safely in CDB we need a write cursor.
85	 * In STD_LOCKING mode, we'll synchronize using the meta page
86	 * lock instead.
87	 */
88	if ((ret = __db_cursor(dbp, ip,
89	    txn, &dbc, LF_ISSET(DB_CREATE) && CDB_LOCKING(env) ?
90	    DB_WRITECURSOR : 0)) != 0)
91		return (ret);
92
93	hcp = (HASH_CURSOR *)dbc->internal;
94	hashp = dbp->h_internal;
95	hashp->meta_pgno = base_pgno;
96	if ((ret = __ham_get_meta(dbc)) != 0)
97		goto err;
98
99	/* Initialize the hdr structure.  */
100	if (hcp->hdr->dbmeta.magic == DB_HASHMAGIC) {
101		/* File exists, verify the data in the header. */
102		if (hashp->h_hash == NULL)
103			hashp->h_hash = hcp->hdr->dbmeta.version < 5
104			? __ham_func4 : __ham_func5;
105		hashp->h_nelem = hcp->hdr->nelem;
106		if (F_ISSET(&hcp->hdr->dbmeta, DB_HASH_DUP))
107			F_SET(dbp, DB_AM_DUP);
108		if (F_ISSET(&hcp->hdr->dbmeta, DB_HASH_DUPSORT))
109			F_SET(dbp, DB_AM_DUPSORT);
110		if (F_ISSET(&hcp->hdr->dbmeta, DB_HASH_SUBDB))
111			F_SET(dbp, DB_AM_SUBDB);
112		if (PGNO(hcp->hdr) == PGNO_BASE_MD &&
113		     !F_ISSET(dbp, DB_AM_RECOVER))
114			__memp_set_last_pgno(dbp->mpf,
115			    hcp->hdr->dbmeta.last_pgno);
116	} else if (!IS_RECOVERING(env) && !F_ISSET(dbp, DB_AM_RECOVER)) {
117		__db_errx(env,
118		    "%s: Invalid hash meta page %lu", name, (u_long)base_pgno);
119		ret = EINVAL;
120	}
121
122	/* Release the meta data page */
123	if ((t_ret = __ham_release_meta(dbc)) != 0 && ret == 0)
124		ret = t_ret;
125err:	if ((t_ret  = __dbc_close(dbc)) != 0 && ret == 0)
126		ret = t_ret;
127
128	return (ret);
129}
130
131/*
132 * __ham_metachk --
133 *
134 * PUBLIC: int __ham_metachk __P((DB *, const char *, HMETA *));
135 */
136int
137__ham_metachk(dbp, name, hashm)
138	DB *dbp;
139	const char *name;
140	HMETA *hashm;
141{
142	ENV *env;
143	u_int32_t vers;
144	int ret;
145
146	env = dbp->env;
147
148	/*
149	 * At this point, all we know is that the magic number is for a Hash.
150	 * Check the version, the database may be out of date.
151	 */
152	vers = hashm->dbmeta.version;
153	if (F_ISSET(dbp, DB_AM_SWAP))
154		M_32_SWAP(vers);
155	switch (vers) {
156	case 4:
157	case 5:
158	case 6:
159		__db_errx(env,
160		    "%s: hash version %lu requires a version upgrade",
161		    name, (u_long)vers);
162		return (DB_OLD_VERSION);
163	case 7:
164	case 8:
165	case 9:
166		break;
167	default:
168		__db_errx(env,
169		    "%s: unsupported hash version: %lu", name, (u_long)vers);
170		return (EINVAL);
171	}
172
173	/* Swap the page if we need to. */
174	if (F_ISSET(dbp, DB_AM_SWAP) &&
175	    (ret = __ham_mswap(env, (PAGE *)hashm)) != 0)
176		return (ret);
177
178	/* Check the type. */
179	if (dbp->type != DB_HASH && dbp->type != DB_UNKNOWN)
180		return (EINVAL);
181	dbp->type = DB_HASH;
182	DB_ILLEGAL_METHOD(dbp, DB_OK_HASH);
183
184	/*
185	 * Check application info against metadata info, and set info, flags,
186	 * and type based on metadata info.
187	 */
188	if ((ret = __db_fchk(env,
189	    "DB->open", hashm->dbmeta.flags,
190	    DB_HASH_DUP | DB_HASH_SUBDB | DB_HASH_DUPSORT)) != 0)
191		return (ret);
192
193	if (F_ISSET(&hashm->dbmeta, DB_HASH_DUP))
194		F_SET(dbp, DB_AM_DUP);
195	else
196		if (F_ISSET(dbp, DB_AM_DUP)) {
197			__db_errx(env,
198		"%s: DB_DUP specified to open method but not set in database",
199			    name);
200			return (EINVAL);
201		}
202
203	if (F_ISSET(&hashm->dbmeta, DB_HASH_SUBDB))
204		F_SET(dbp, DB_AM_SUBDB);
205	else
206		if (F_ISSET(dbp, DB_AM_SUBDB)) {
207			__db_errx(env,
208	    "%s: multiple databases specified but not supported in file",
209			name);
210			return (EINVAL);
211		}
212
213	if (F_ISSET(&hashm->dbmeta, DB_HASH_DUPSORT)) {
214		if (dbp->dup_compare == NULL)
215			dbp->dup_compare = __bam_defcmp;
216	} else
217		if (dbp->dup_compare != NULL) {
218			__db_errx(env,
219		"%s: duplicate sort function specified but not set in database",
220			    name);
221			return (EINVAL);
222		}
223
224	/* Set the page size. */
225	dbp->pgsize = hashm->dbmeta.pagesize;
226
227	/* Copy the file's ID. */
228	memcpy(dbp->fileid, hashm->dbmeta.uid, DB_FILE_ID_LEN);
229
230	return (0);
231}
232
233/*
234 * __ham_init_meta --
235 *
236 * Initialize a hash meta-data page.  We assume that the meta-data page is
237 * contiguous with the initial buckets that we create.  If that turns out
238 * to be false, we'll fix it up later.  Return the initial number of buckets
239 * allocated.
240 */
241static db_pgno_t
242__ham_init_meta(dbp, meta, pgno, lsnp)
243	DB *dbp;
244	HMETA *meta;
245	db_pgno_t pgno;
246	DB_LSN *lsnp;
247{
248	ENV *env;
249	HASH *hashp;
250	db_pgno_t nbuckets;
251	u_int i, l2;
252
253	env = dbp->env;
254	hashp = dbp->h_internal;
255
256	if (hashp->h_hash == NULL)
257		hashp->h_hash = DB_HASHVERSION < 5 ? __ham_func4 : __ham_func5;
258
259	if (hashp->h_nelem != 0 && hashp->h_ffactor != 0) {
260		hashp->h_nelem = (hashp->h_nelem - 1) / hashp->h_ffactor + 1;
261		l2 = __db_log2(hashp->h_nelem > 2 ? hashp->h_nelem : 2);
262	} else
263		l2 = 1;
264	nbuckets = (db_pgno_t)(1 << l2);
265
266	memset(meta, 0, sizeof(HMETA));
267	meta->dbmeta.lsn = *lsnp;
268	meta->dbmeta.pgno = pgno;
269	meta->dbmeta.magic = DB_HASHMAGIC;
270	meta->dbmeta.version = DB_HASHVERSION;
271	meta->dbmeta.pagesize = dbp->pgsize;
272	if (F_ISSET(dbp, DB_AM_CHKSUM))
273		FLD_SET(meta->dbmeta.metaflags, DBMETA_CHKSUM);
274	if (F_ISSET(dbp, DB_AM_ENCRYPT)) {
275		meta->dbmeta.encrypt_alg = env->crypto_handle->alg;
276		DB_ASSERT(env, meta->dbmeta.encrypt_alg != 0);
277		meta->crypto_magic = meta->dbmeta.magic;
278	}
279	meta->dbmeta.type = P_HASHMETA;
280	meta->dbmeta.free = PGNO_INVALID;
281	meta->dbmeta.last_pgno = pgno;
282	meta->max_bucket = nbuckets - 1;
283	meta->high_mask = nbuckets - 1;
284	meta->low_mask = (nbuckets >> 1) - 1;
285	meta->ffactor = hashp->h_ffactor;
286	meta->nelem = hashp->h_nelem;
287	meta->h_charkey = hashp->h_hash(dbp, CHARKEY, sizeof(CHARKEY));
288	memcpy(meta->dbmeta.uid, dbp->fileid, DB_FILE_ID_LEN);
289
290	if (F_ISSET(dbp, DB_AM_DUP))
291		F_SET(&meta->dbmeta, DB_HASH_DUP);
292	if (F_ISSET(dbp, DB_AM_SUBDB))
293		F_SET(&meta->dbmeta, DB_HASH_SUBDB);
294	if (dbp->dup_compare != NULL)
295		F_SET(&meta->dbmeta, DB_HASH_DUPSORT);
296
297	/*
298	 * Create the first and second buckets pages so that we have the
299	 * page numbers for them and we can store that page number in the
300	 * meta-data header (spares[0]).
301	 */
302	meta->spares[0] = pgno + 1;
303
304	/* Fill in the last fields of the meta data page. */
305	for (i = 1; i <= l2; i++)
306		meta->spares[i] = meta->spares[0];
307	for (; i < NCACHED; i++)
308		meta->spares[i] = PGNO_INVALID;
309
310	return (nbuckets);
311}
312
313/*
314 * __ham_new_file --
315 *	Create the necessary pages to begin a new database file.  If name
316 * is NULL, then this is an unnamed file, the mpf has been set in the dbp
317 * and we simply create the pages using mpool.  In this case, we don't log
318 * because we never have to redo an unnamed create and the undo simply
319 * frees resources.
320 *
321 * This code appears more complex than it is because of the two cases (named
322 * and unnamed).  The way to read the code is that for each page being created,
323 * there are three parts: 1) a "get page" chunk (which either uses malloc'd
324 * memory or calls __memp_fget), 2) the initialization, and 3) the "put page"
325 * chunk which either does a fop write or an __memp_fput.
326 *
327 * PUBLIC: int __ham_new_file __P((DB *,
328 * PUBLIC:      DB_THREAD_INFO *, DB_TXN *, DB_FH *, const char *));
329 */
330int
331__ham_new_file(dbp, ip, txn, fhp, name)
332	DB *dbp;
333	DB_THREAD_INFO *ip;
334	DB_TXN *txn;
335	DB_FH *fhp;
336	const char *name;
337{
338	DBT pdbt;
339	DB_LSN lsn;
340	DB_MPOOLFILE *mpf;
341	DB_PGINFO pginfo;
342	ENV *env;
343	HMETA *meta;
344	PAGE *page;
345	int ret;
346	db_pgno_t lpgno;
347	void *buf;
348
349	env = dbp->env;
350	mpf = dbp->mpf;
351	meta = NULL;
352	page = NULL;
353	buf = NULL;
354
355	if (F_ISSET(dbp, DB_AM_INMEM)) {
356		/* Build meta-data page. */
357		lpgno = PGNO_BASE_MD;
358		if ((ret = __memp_fget(mpf, &lpgno, ip, txn,
359		    DB_MPOOL_CREATE | DB_MPOOL_DIRTY, &meta)) != 0)
360			return (ret);
361		LSN_NOT_LOGGED(lsn);
362		lpgno = __ham_init_meta(dbp, meta, PGNO_BASE_MD, &lsn);
363		meta->dbmeta.last_pgno = lpgno;
364		if ((ret = __db_log_page(dbp,
365		    txn, &lsn, meta->dbmeta.pgno, (PAGE *)meta)) != 0)
366			goto err;
367		ret = __memp_fput(mpf, ip, meta, dbp->priority);
368		meta = NULL;
369		if (ret != 0)
370			goto err;
371
372		/* Allocate the final hash bucket. */
373		if ((ret = __memp_fget(mpf, &lpgno, ip, txn,
374		    DB_MPOOL_CREATE, &page)) != 0)
375			goto err;
376		P_INIT(page,
377		    dbp->pgsize, lpgno, PGNO_INVALID, PGNO_INVALID, 0, P_HASH);
378		LSN_NOT_LOGGED(page->lsn);
379		if ((ret =
380		    __db_log_page(dbp, txn, &page->lsn, lpgno, page)) != 0)
381			goto err;
382		ret = __memp_fput(mpf, ip, page, dbp->priority);
383		page = NULL;
384		if (ret != 0)
385			goto err;
386	} else {
387		memset(&pdbt, 0, sizeof(pdbt));
388
389		/* Build meta-data page. */
390		pginfo.db_pagesize = dbp->pgsize;
391		pginfo.type = dbp->type;
392		pginfo.flags =
393		    F_ISSET(dbp, (DB_AM_CHKSUM | DB_AM_ENCRYPT | DB_AM_SWAP));
394		pdbt.data = &pginfo;
395		pdbt.size = sizeof(pginfo);
396		if ((ret = __os_calloc(dbp->env, 1, dbp->pgsize, &buf)) != 0)
397			return (ret);
398		meta = (HMETA *)buf;
399		LSN_NOT_LOGGED(lsn);
400		lpgno = __ham_init_meta(dbp, meta, PGNO_BASE_MD, &lsn);
401		meta->dbmeta.last_pgno = lpgno;
402		if ((ret =
403		    __db_pgout(env->dbenv, PGNO_BASE_MD, meta, &pdbt)) != 0)
404			goto err;
405		if ((ret = __fop_write(env, txn, name, DB_APP_DATA, fhp,
406		    dbp->pgsize, 0, 0, buf, dbp->pgsize, 1, F_ISSET(
407		    dbp, DB_AM_NOT_DURABLE) ? DB_LOG_NOT_DURABLE : 0)) != 0)
408			goto err;
409		meta = NULL;
410
411		/* Allocate the final hash bucket. */
412#ifdef DIAGNOSTIC
413		memset(buf, 0, dbp->pgsize);
414#endif
415		page = (PAGE *)buf;
416		P_INIT(page,
417		    dbp->pgsize, lpgno, PGNO_INVALID, PGNO_INVALID, 0, P_HASH);
418		LSN_NOT_LOGGED(page->lsn);
419		if ((ret = __db_pgout(env->dbenv, lpgno, buf, &pdbt)) != 0)
420			goto err;
421		if ((ret = __fop_write(env, txn, name, DB_APP_DATA, fhp,
422		    dbp->pgsize, lpgno, 0, buf, dbp->pgsize, 1, F_ISSET(
423		    dbp, DB_AM_NOT_DURABLE) ? DB_LOG_NOT_DURABLE : 0)) != 0)
424			goto err;
425		page = NULL;
426	}
427
428err:	if (buf != NULL)
429		__os_free(env, buf);
430	else {
431		if (meta != NULL)
432			(void)__memp_fput(mpf, ip, meta, dbp->priority);
433		if (page != NULL)
434			(void)__memp_fput(mpf, ip, page, dbp->priority);
435	}
436	return (ret);
437}
438
439/*
440 * __ham_new_subdb --
441 *	Create the necessary pages to begin a new subdatabase.
442 *
443 * PUBLIC: int __ham_new_subdb __P((DB *, DB *, DB_THREAD_INFO *, DB_TXN *));
444 */
445int
446__ham_new_subdb(mdbp, dbp, ip, txn)
447	DB *mdbp, *dbp;
448	DB_THREAD_INFO *ip;
449	DB_TXN *txn;
450{
451	DBC *dbc;
452	DBMETA *mmeta;
453	DB_LOCK metalock, mmlock;
454	DB_LSN lsn;
455	DB_MPOOLFILE *mpf;
456	ENV *env;
457	HMETA *meta;
458	PAGE *h;
459	int i, ret, t_ret;
460	db_pgno_t lpgno, mpgno;
461
462	env = mdbp->env;
463	mpf = mdbp->mpf;
464	dbc = NULL;
465	meta = NULL;
466	mmeta = NULL;
467	LOCK_INIT(metalock);
468	LOCK_INIT(mmlock);
469
470	if ((ret = __db_cursor(mdbp, ip, txn,
471	    &dbc, CDB_LOCKING(env) ?  DB_WRITECURSOR : 0)) != 0)
472		return (ret);
473
474	/* Get and lock the new meta data page. */
475	if ((ret = __db_lget(dbc,
476	    0, dbp->meta_pgno, DB_LOCK_WRITE, 0, &metalock)) != 0)
477		goto err;
478	if ((ret = __memp_fget(mpf, &dbp->meta_pgno, ip, dbc->txn,
479	    DB_MPOOL_CREATE, &meta)) != 0)
480		goto err;
481
482	/* Initialize the new meta-data page. */
483	lsn = meta->dbmeta.lsn;
484	lpgno = __ham_init_meta(dbp, meta, dbp->meta_pgno, &lsn);
485
486	/*
487	 * We are about to allocate a set of contiguous buckets (lpgno
488	 * worth).  We need to get the master meta-data page to figure
489	 * out where these pages are and to allocate them.  So, lock and
490	 * get the master meta data page.
491	 */
492	mpgno = PGNO_BASE_MD;
493	if ((ret = __db_lget(dbc, 0, mpgno, DB_LOCK_WRITE, 0, &mmlock)) != 0)
494		goto err;
495	if ((ret = __memp_fget(mpf, &mpgno, ip, dbc->txn,
496	    DB_MPOOL_DIRTY, &mmeta)) != 0)
497		goto err;
498
499	/*
500	 * Now update the hash meta-data page to reflect where the first
501	 * set of buckets are actually located.
502	 */
503	meta->spares[0] = mmeta->last_pgno + 1;
504	for (i = 0; i < NCACHED && meta->spares[i] != PGNO_INVALID; i++)
505		meta->spares[i] = meta->spares[0];
506
507	/* The new meta data page is now complete; log it. */
508	if ((ret = __db_log_page(mdbp,
509	    txn, &meta->dbmeta.lsn, dbp->meta_pgno, (PAGE *)meta)) != 0)
510		goto err;
511
512	/* Reflect the group allocation. */
513	if (DBENV_LOGGING(env)
514#if !defined(DEBUG_WOP)
515	    && txn != NULL
516#endif
517	)
518		if ((ret = __ham_groupalloc_log(mdbp, txn,
519		    &LSN(mmeta), 0, &LSN(mmeta), meta->spares[0],
520		    meta->max_bucket + 1, 0, mmeta->last_pgno)) != 0)
521			goto err;
522
523	/* Release the new meta-data page. */
524	if ((ret = __memp_fput(mpf, ip, meta, dbc->priority)) != 0)
525		goto err;
526	meta = NULL;
527
528	lpgno += mmeta->last_pgno;
529
530	/* Now allocate the final hash bucket. */
531	if ((ret = __memp_fget(mpf, &lpgno, ip, dbc->txn,
532	    DB_MPOOL_CREATE | DB_MPOOL_DIRTY, &h)) != 0)
533		goto err;
534
535	mmeta->last_pgno = lpgno;
536	P_INIT(h, dbp->pgsize, lpgno, PGNO_INVALID, PGNO_INVALID, 0, P_HASH);
537	LSN(h) = LSN(mmeta);
538	if ((ret = __memp_fput(mpf, ip, h, dbc->priority)) != 0)
539		goto err;
540
541err:	/* Now put the master-metadata page back. */
542	if (mmeta != NULL && (t_ret = __memp_fput(mpf,
543		ip, mmeta, dbc->priority)) != 0 && ret == 0)
544		ret = t_ret;
545	if ((t_ret = __LPUT(dbc, mmlock)) != 0 && ret == 0)
546		ret = t_ret;
547	if (meta != NULL && (t_ret = __memp_fput(mpf,
548	    ip, meta, dbc->priority)) != 0 && ret == 0)
549		ret = t_ret;
550	if ((t_ret = __LPUT(dbc, metalock)) != 0 && ret == 0)
551		ret = t_ret;
552	if (dbc != NULL)
553		if ((t_ret = __dbc_close(dbc)) != 0 && ret == 0)
554			ret = t_ret;
555	return (ret);
556}
557