1/*-
2 * See the file LICENSE for redistribution information.
3 *
4 * Copyright (c) 1996-2009 Oracle.  All rights reserved.
5 *
6 * $Id$
7 */
8
9#include "db_config.h"
10
11#include "db_int.h"
12#include "dbinc/db_page.h"
13#include "dbinc/log.h"
14#include "dbinc/txn.h"
15#include "dbinc/db_am.h"
16
17static int __dbreg_push_id __P((ENV *, int32_t));
18static int __dbreg_pop_id __P((ENV *, int32_t *));
19static int __dbreg_pluck_id __P((ENV *, int32_t));
20
21/*
22 * The dbreg subsystem, as its name implies, registers database handles so
23 * that we can associate log messages with them without logging a filename
24 * or a full, unique DB ID.  Instead, we assign each dbp an int32_t which is
25 * easy and cheap to log, and use this subsystem to map back and forth.
26 *
27 * Overview of how dbreg ids are managed:
28 *
29 * OPEN
30 *	dbreg_setup (Creates FNAME struct.)
31 *	dbreg_new_id (Assigns new ID to dbp and logs it.  May be postponed
32 *	until we attempt to log something else using that dbp, if the dbp
33 *	was opened on a replication client.)
34 *
35 * CLOSE
36 *	dbreg_close_id  (Logs closure of dbp/revocation of ID.)
37 *	dbreg_revoke_id (As name implies, revokes ID.)
38 *	dbreg_teardown (Destroys FNAME.)
39 *
40 * RECOVERY
41 *	dbreg_setup
42 *	dbreg_assign_id (Assigns a particular ID we have in the log to a dbp.)
43 *
44 *	sometimes: dbreg_revoke_id; dbreg_teardown
45 *	other times: normal close path
46 *
47 * A note about locking:
48 *
49 *	FNAME structures are referenced only by their corresponding dbp's
50 *	until they have a valid id.
51 *
52 *	Once they have a valid id, they must get linked into the log
53 *	region list so they can get logged on checkpoints.
54 *
55 *	An FNAME that may/does have a valid id must be accessed under
56 *	protection of the mtx_filelist, with the following exception:
57 *
58 *	We don't want to have to grab the mtx_filelist on every log
59 *	record, and it should be safe not to do so when we're just
60 *	looking at the id, because once allocated, the id should
61 *	not change under a handle until the handle is closed.
62 *
63 *	If a handle is closed during an attempt by another thread to
64 *	log with it, well, the application doing the close deserves to
65 *	go down in flames and a lot else is about to fail anyway.
66 *
67 *	When in the course of logging we encounter an invalid id
68 *	and go to allocate it lazily, we *do* need to check again
69 *	after grabbing the mutex, because it's possible to race with
70 *	another thread that has also decided that it needs to allocate
71 *	a id lazily.
72 *
73 * See SR #5623 for further discussion of the new dbreg design.
74 */
75
76/*
77 * __dbreg_setup --
78 *	Allocate and initialize an FNAME structure.  The FNAME structures
79 * live in the log shared region and map one-to-one with open database handles.
80 * When the handle needs to be logged, the FNAME should have a valid fid
81 * allocated.  If the handle currently isn't logged, it still has an FNAME
82 * entry.  If we later discover that the handle needs to be logged, we can
83 * allocate a id for it later.  (This happens when the handle is on a
84 * replication client that later becomes a master.)
85 *
86 * PUBLIC: int __dbreg_setup __P((DB *, const char *, const char *, u_int32_t));
87 */
88int
89__dbreg_setup(dbp, fname, dname, create_txnid)
90	DB *dbp;
91	const char *fname, *dname;
92	u_int32_t create_txnid;
93{
94	DB_LOG *dblp;
95	ENV *env;
96	FNAME *fnp;
97	REGINFO *infop;
98	int ret;
99	size_t len;
100	void *p;
101
102	env = dbp->env;
103	dblp = env->lg_handle;
104	infop = &dblp->reginfo;
105
106	fnp = NULL;
107	p = NULL;
108
109	/* Allocate an FNAME and, if necessary, a buffer for the name itself. */
110	LOG_SYSTEM_LOCK(env);
111	if ((ret = __env_alloc(infop, sizeof(FNAME), &fnp)) != 0)
112		goto err;
113	memset(fnp, 0, sizeof(FNAME));
114	if (fname == NULL)
115		fnp->fname_off = INVALID_ROFF;
116	else {
117		len = strlen(fname) + 1;
118		if ((ret = __env_alloc(infop, len, &p)) != 0)
119			goto err;
120		fnp->fname_off = R_OFFSET(infop, p);
121		memcpy(p, fname, len);
122	}
123	if (dname == NULL)
124		fnp->dname_off = INVALID_ROFF;
125	else {
126		len = strlen(dname) + 1;
127		if ((ret = __env_alloc(infop, len, &p)) != 0)
128			goto err;
129		fnp->dname_off = R_OFFSET(infop, p);
130		memcpy(p, dname, len);
131	}
132	LOG_SYSTEM_UNLOCK(env);
133
134	/*
135	 * Fill in all the remaining info that we'll need later to register
136	 * the file, if we use it for logging.
137	 */
138	fnp->id = fnp->old_id = DB_LOGFILEID_INVALID;
139	fnp->s_type = dbp->type;
140	memcpy(fnp->ufid, dbp->fileid, DB_FILE_ID_LEN);
141	fnp->meta_pgno = dbp->meta_pgno;
142	fnp->create_txnid = create_txnid;
143	dbp->dbenv->thread_id(dbp->dbenv, &fnp->pid, NULL);
144
145	if (F_ISSET(dbp, DB_AM_INMEM))
146		F_SET(fnp, DB_FNAME_INMEM);
147	if (F_ISSET(dbp, DB_AM_RECOVER))
148		F_SET(fnp, DB_FNAME_RECOVER);
149	fnp->txn_ref = 1;
150	fnp->mutex = dbp->mutex;
151
152	dbp->log_filename = fnp;
153
154	return (0);
155
156err:	LOG_SYSTEM_UNLOCK(env);
157	if (ret == ENOMEM)
158		__db_errx(env,
159    "Logging region out of memory; you may need to increase its size");
160
161	return (ret);
162}
163
164/*
165 * __dbreg_teardown --
166 *	Destroy a DB handle's FNAME struct.  This is only called when closing
167 * the DB.
168 *
169 * PUBLIC: int __dbreg_teardown __P((DB *));
170 */
171int
172__dbreg_teardown(dbp)
173	DB *dbp;
174{
175	int ret;
176
177	/*
178	 * We may not have an FNAME if we were never opened.  This is not an
179	 * error.
180	 */
181	if (dbp->log_filename == NULL)
182		return (0);
183
184	ret = __dbreg_teardown_int(dbp->env, dbp->log_filename);
185
186	/* We freed the copy of the mutex from the FNAME. */
187	dbp->log_filename = NULL;
188	dbp->mutex = MUTEX_INVALID;
189
190	return (ret);
191}
192
193/*
194 * __dbreg_teardown_int --
195 *	Destroy an FNAME struct.
196 *
197 * PUBLIC: int __dbreg_teardown_int __P((ENV *, FNAME *));
198 */
199int
200__dbreg_teardown_int(env, fnp)
201	ENV *env;
202	FNAME *fnp;
203{
204	DB_LOG *dblp;
205	REGINFO *infop;
206	int ret;
207
208	if (F_ISSET(fnp, DB_FNAME_NOTLOGGED))
209		return (0);
210	dblp = env->lg_handle;
211	infop = &dblp->reginfo;
212
213	DB_ASSERT(env, fnp->id == DB_LOGFILEID_INVALID);
214	ret = __mutex_free(env, &fnp->mutex);
215
216	LOG_SYSTEM_LOCK(env);
217	if (fnp->fname_off != INVALID_ROFF)
218		__env_alloc_free(infop, R_ADDR(infop, fnp->fname_off));
219	if (fnp->dname_off != INVALID_ROFF)
220		__env_alloc_free(infop, R_ADDR(infop, fnp->dname_off));
221	__env_alloc_free(infop, fnp);
222	LOG_SYSTEM_UNLOCK(env);
223
224	return (ret);
225}
226
227/*
228 * __dbreg_new_id --
229 *	Get an unused dbreg id to this database handle.
230 *	Used as a wrapper to acquire the mutex and
231 *	only set the id on success.
232 *
233 * PUBLIC: int __dbreg_new_id __P((DB *, DB_TXN *));
234 */
235int
236__dbreg_new_id(dbp, txn)
237	DB *dbp;
238	DB_TXN *txn;
239{
240	DB_LOG *dblp;
241	ENV *env;
242	FNAME *fnp;
243	LOG *lp;
244	int32_t id;
245	int ret;
246
247	env = dbp->env;
248	dblp = env->lg_handle;
249	lp = dblp->reginfo.primary;
250	fnp = dbp->log_filename;
251
252	/* The mtx_filelist protects the FNAME list and id management. */
253	MUTEX_LOCK(env, lp->mtx_filelist);
254	if (fnp->id != DB_LOGFILEID_INVALID) {
255		MUTEX_UNLOCK(env, lp->mtx_filelist);
256		return (0);
257	}
258	if ((ret = __dbreg_get_id(dbp, txn, &id)) == 0)
259		fnp->id = id;
260	MUTEX_UNLOCK(env, lp->mtx_filelist);
261	return (ret);
262}
263
264/*
265 * __dbreg_get_id --
266 *	Assign an unused dbreg id to this database handle.
267 *	Assume the caller holds the mtx_filelist locked.  Assume the
268 *	caller will set the fnp->id field with the id we return.
269 *
270 * PUBLIC: int __dbreg_get_id __P((DB *, DB_TXN *, int32_t *));
271 */
272int
273__dbreg_get_id(dbp, txn, idp)
274	DB *dbp;
275	DB_TXN *txn;
276	int32_t *idp;
277{
278	DB_LOG *dblp;
279	ENV *env;
280	FNAME *fnp;
281	LOG *lp;
282	int32_t id;
283	int ret;
284
285	env = dbp->env;
286	dblp = env->lg_handle;
287	lp = dblp->reginfo.primary;
288	fnp = dbp->log_filename;
289
290	/*
291	 * It's possible that after deciding we needed to call this function,
292	 * someone else allocated an ID before we grabbed the lock.  Check
293	 * to make sure there was no race and we have something useful to do.
294	 */
295	/* Get an unused ID from the free list. */
296	if ((ret = __dbreg_pop_id(env, &id)) != 0)
297		goto err;
298
299	/* If no ID was found, allocate a new one. */
300	if (id == DB_LOGFILEID_INVALID)
301		id = lp->fid_max++;
302
303	/* If the file is durable (i.e., not, not-durable), mark it as such. */
304	if (!F_ISSET(dbp, DB_AM_NOT_DURABLE))
305		F_SET(fnp, DB_FNAME_DURABLE);
306
307	/* Hook the FNAME into the list of open files. */
308	SH_TAILQ_INSERT_HEAD(&lp->fq, fnp, q, __fname);
309
310	/*
311	 * Log the registry.  We should only request a new ID in situations
312	 * where logging is reasonable.
313	 */
314	DB_ASSERT(env, !F_ISSET(dbp, DB_AM_RECOVER));
315
316	if ((ret = __dbreg_log_id(dbp, txn, id, 0)) != 0)
317		goto err;
318
319	/*
320	 * Once we log the create_txnid, we need to make sure we never
321	 * log it again (as might happen if this is a replication client
322	 * that later upgrades to a master).
323	 */
324	fnp->create_txnid = TXN_INVALID;
325
326	DB_ASSERT(env, dbp->type == fnp->s_type);
327	DB_ASSERT(env, dbp->meta_pgno == fnp->meta_pgno);
328
329	if ((ret = __dbreg_add_dbentry(env, dblp, dbp, id)) != 0)
330		goto err;
331	/*
332	 * If we have a successful call, set the ID.  Otherwise
333	 * we have to revoke it and remove it from all the lists
334	 * it has been added to, and return an invalid id.
335	 */
336err:
337	if (ret != 0 && id != DB_LOGFILEID_INVALID) {
338		(void)__dbreg_revoke_id(dbp, 1, id);
339		id = DB_LOGFILEID_INVALID;
340	}
341	*idp = id;
342	return (ret);
343}
344
345/*
346 * __dbreg_assign_id --
347 *	Assign a particular dbreg id to this database handle.
348 *
349 * PUBLIC: int __dbreg_assign_id __P((DB *, int32_t, int));
350 */
351int
352__dbreg_assign_id(dbp, id, deleted)
353	DB *dbp;
354	int32_t id;
355	int deleted;
356{
357	DB *close_dbp;
358	DB_LOG *dblp;
359	ENV *env;
360	FNAME *close_fnp, *fnp;
361	LOG *lp;
362	int ret;
363
364	env = dbp->env;
365	dblp = env->lg_handle;
366	lp = dblp->reginfo.primary;
367	fnp = dbp->log_filename;
368
369	close_dbp = NULL;
370	close_fnp = NULL;
371
372	/* The mtx_filelist protects the FNAME list and id management. */
373	MUTEX_LOCK(env, lp->mtx_filelist);
374
375	/* We should only call this on DB handles that have no ID. */
376	DB_ASSERT(env, fnp->id == DB_LOGFILEID_INVALID);
377
378	/*
379	 * Make sure there isn't already a file open with this ID. There can
380	 * be in recovery, if we're recovering across a point where an ID got
381	 * reused.
382	 */
383	if (__dbreg_id_to_fname(dblp, id, 1, &close_fnp) == 0) {
384		/*
385		 * We want to save off any dbp we have open with this id.  We
386		 * can't safely close it now, because we hold the mtx_filelist,
387		 * but we should be able to rely on it being open in this
388		 * process, and we're running recovery, so no other thread
389		 * should muck with it if we just put off closing it until
390		 * we're ready to return.
391		 *
392		 * Once we have the dbp, revoke its id;  we're about to
393		 * reuse it.
394		 */
395		ret = __dbreg_id_to_db(env, NULL, &close_dbp, id, 0);
396		if (ret == ENOENT) {
397			ret = 0;
398			goto cont;
399		} else if (ret != 0)
400			goto err;
401
402		if ((ret = __dbreg_revoke_id(close_dbp, 1,
403		    DB_LOGFILEID_INVALID)) != 0)
404			goto err;
405	}
406
407	/*
408	 * Remove this ID from the free list, if it's there, and make sure
409	 * we don't allocate it anew.
410	 */
411cont:	if ((ret = __dbreg_pluck_id(env, id)) != 0)
412		goto err;
413	if (id >= lp->fid_max)
414		lp->fid_max = id + 1;
415
416	/* Now go ahead and assign the id to our dbp. */
417	fnp->id = id;
418	/* If the file is durable (i.e., not, not-durable), mark it as such. */
419	if (!F_ISSET(dbp, DB_AM_NOT_DURABLE))
420		F_SET(fnp, DB_FNAME_DURABLE);
421	SH_TAILQ_INSERT_HEAD(&lp->fq, fnp, q, __fname);
422
423	/*
424	 * If we get an error adding the dbentry, revoke the id.
425	 * We void the return value since we want to retain and
426	 * return the original error in ret anyway.
427	 */
428	if ((ret = __dbreg_add_dbentry(env, dblp, dbp, id)) != 0)
429		(void)__dbreg_revoke_id(dbp, 1, id);
430	else
431		dblp->dbentry[id].deleted = deleted;
432
433err:	MUTEX_UNLOCK(env, lp->mtx_filelist);
434
435	/* There's nothing useful that our caller can do if this close fails. */
436	if (close_dbp != NULL)
437		(void)__db_close(close_dbp, NULL, DB_NOSYNC);
438
439	return (ret);
440}
441
442/*
443 * __dbreg_revoke_id --
444 *	Take a log id away from a dbp, in preparation for closing it,
445 *	but without logging the close.
446 *
447 * PUBLIC: int __dbreg_revoke_id __P((DB *, int, int32_t));
448 */
449int
450__dbreg_revoke_id(dbp, have_lock, force_id)
451	DB *dbp;
452	int have_lock;
453	int32_t force_id;
454{
455	DB_REP *db_rep;
456	ENV *env;
457	int push;
458
459	env = dbp->env;
460
461	/*
462	 * If we are not in recovery but the file was opened for a recovery
463	 * operation, then this process aborted a transaction for another
464	 * process and the id may still be in use, so don't reuse this id.
465	 * If our fid generation in replication has changed, this fid
466	 * should not be reused
467	 */
468	db_rep = env->rep_handle;
469	push = (!F_ISSET(dbp, DB_AM_RECOVER) || IS_RECOVERING(env)) &&
470	    (!REP_ON(env) || ((REP *)db_rep->region)->gen == dbp->fid_gen);
471
472	return (__dbreg_revoke_id_int(dbp->env,
473	      dbp->log_filename, have_lock, push, force_id));
474}
475/*
476 * __dbreg_revoke_id_int --
477 *	Revoke a log, in preparation for closing it, but without logging
478 *	the close.
479 *
480 * PUBLIC: int __dbreg_revoke_id_int
481 * PUBLIC:     __P((ENV *, FNAME *, int, int, int32_t));
482 */
483int
484__dbreg_revoke_id_int(env, fnp, have_lock, push, force_id)
485	ENV *env;
486	FNAME *fnp;
487	int have_lock, push;
488	int32_t force_id;
489{
490	DB_LOG *dblp;
491	LOG *lp;
492	int32_t id;
493	int ret;
494
495	dblp = env->lg_handle;
496	lp = dblp->reginfo.primary;
497	ret = 0;
498
499	/* If we lack an ID, this is a null-op. */
500	if (fnp == NULL)
501		return (0);
502
503	/*
504	 * If we have a force_id, we had an error after allocating
505	 * the id, and putting it on the fq list, but before we
506	 * finished setting up fnp.  So, if we have a force_id use it.
507	 */
508	if (force_id != DB_LOGFILEID_INVALID)
509		id = force_id;
510	else if (fnp->id == DB_LOGFILEID_INVALID) {
511		if (fnp->old_id == DB_LOGFILEID_INVALID)
512			return (0);
513		id = fnp->old_id;
514	} else
515		id = fnp->id;
516	if (!have_lock)
517		MUTEX_LOCK(env, lp->mtx_filelist);
518
519	fnp->id = DB_LOGFILEID_INVALID;
520	fnp->old_id = DB_LOGFILEID_INVALID;
521
522	/* Remove the FNAME from the list of open files. */
523	SH_TAILQ_REMOVE(&lp->fq, fnp, q, __fname);
524
525	/*
526	 * This FNAME may be for a DBP which is already closed.  Its ID may
527	 * still be in use by an aborting transaction.  If not,
528	 * remove this id from the dbentry table and push it onto the
529	 * free list.
530	 */
531	if (!F_ISSET(fnp, DB_FNAME_CLOSED) &&
532	    (ret = __dbreg_rem_dbentry(dblp, id)) == 0 && push)
533		ret = __dbreg_push_id(env, id);
534
535	if (!have_lock)
536		MUTEX_UNLOCK(env, lp->mtx_filelist);
537	return (ret);
538}
539
540/*
541 * __dbreg_close_id --
542 *	Take a dbreg id away from a dbp that we're closing, and log
543 * the unregistry if the refcount goes to 0.
544 *
545 * PUBLIC: int __dbreg_close_id __P((DB *, DB_TXN *, u_int32_t));
546 */
547int
548__dbreg_close_id(dbp, txn, op)
549	DB *dbp;
550	DB_TXN *txn;
551	u_int32_t op;
552{
553	DB_LOG *dblp;
554	ENV *env;
555	FNAME *fnp;
556	LOG *lp;
557	int ret, t_ret;
558
559	env = dbp->env;
560	dblp = env->lg_handle;
561	lp = dblp->reginfo.primary;
562	fnp = dbp->log_filename;
563
564	/* If we lack an ID, this is a null-op. */
565	if (fnp == NULL)
566		return (0);
567
568	if (fnp->id == DB_LOGFILEID_INVALID) {
569		ret = __dbreg_revoke_id(dbp, 0, DB_LOGFILEID_INVALID);
570		goto done;
571	}
572
573	/*
574	 * If we are the last reference to this db then we need to log it
575	 * as closed.  Otherwise the last transaction will do the logging.
576	 * Remove the DBP from the db entry table since it can nolonger
577	 * be used.  If we abort it will have to be reopened.
578	 */
579	ret = 0;
580	DB_ASSERT(env, fnp->txn_ref > 0);
581	if (fnp->txn_ref > 1) {
582		MUTEX_LOCK(env, dbp->mutex);
583		if (fnp->txn_ref > 1) {
584			if (!F_ISSET(fnp, DB_FNAME_CLOSED) &&
585			    (t_ret = __dbreg_rem_dbentry(
586			    env->lg_handle, fnp->id)) != 0 && ret == 0)
587				ret = t_ret;
588
589			/*
590			 * The DB handle has been closed in the logging system.
591			 * Transactions may still have a ref to this name.
592			 * Mark it so that if recovery reopens the file id
593			 * the transaction will not close the wrong handle.
594			 */
595			F_SET(fnp, DB_FNAME_CLOSED);
596			fnp->txn_ref--;
597			MUTEX_UNLOCK(env, dbp->mutex);
598			/* The mutex now lives only in the FNAME. */
599			dbp->mutex = MUTEX_INVALID;
600			dbp->log_filename = NULL;
601			goto no_log;
602		}
603	}
604	MUTEX_LOCK(env, lp->mtx_filelist);
605
606	if ((ret = __dbreg_log_close(env, fnp, txn, op)) != 0)
607		goto err;
608	ret = __dbreg_revoke_id(dbp, 1, DB_LOGFILEID_INVALID);
609
610err:	MUTEX_UNLOCK(env, lp->mtx_filelist);
611
612done:	if ((t_ret = __dbreg_teardown(dbp)) != 0 && ret == 0)
613		ret = t_ret;
614no_log:
615	return (ret);
616}
617/*
618 * __dbreg_close_id_int --
619 *	Close down a dbreg id and log the unregistry.  This is called only
620 * when a transaction has the last ref to the fname.
621 *
622 * PUBLIC: int __dbreg_close_id_int __P((ENV *, FNAME *, u_int32_t, int));
623 */
624int
625__dbreg_close_id_int(env, fnp, op, locked)
626	ENV *env;
627	FNAME *fnp;
628	u_int32_t op;
629	int locked;
630{
631	DB_LOG *dblp;
632	LOG *lp;
633	int ret, t_ret;
634
635	DB_ASSERT(env, fnp->txn_ref == 1);
636	dblp = env->lg_handle;
637	lp = dblp->reginfo.primary;
638
639	if (fnp->id == DB_LOGFILEID_INVALID)
640		return (__dbreg_revoke_id_int(env,
641		     fnp, locked, 1, DB_LOGFILEID_INVALID));
642
643	if (F_ISSET(fnp, DB_FNAME_RECOVER))
644		return (__dbreg_close_file(env, fnp));
645	/*
646	 * If log_close fails then it will mark the name DB_FNAME_NOTLOGGED
647	 * and the id must persist.
648	 */
649	if (!locked)
650		MUTEX_LOCK(env, lp->mtx_filelist);
651	if ((ret = __dbreg_log_close(env, fnp, NULL, op)) != 0)
652		goto err;
653
654	ret = __dbreg_revoke_id_int(env, fnp, 1, 1, DB_LOGFILEID_INVALID);
655
656err:	if (!locked)
657		MUTEX_UNLOCK(env, lp->mtx_filelist);
658
659	if ((t_ret = __dbreg_teardown_int(env, fnp)) != 0 && ret == 0)
660		ret = t_ret;
661	return (ret);
662}
663
664/*
665 * __dbreg_failchk --
666 *
667 * Look for entries that belong to dead processes and either close them
668 * out or, if there are pending transactions, just remove the mutex which
669 * will get discarded later.
670 *
671 * PUBLIC: int __dbreg_failchk __P((ENV *));
672 */
673int
674__dbreg_failchk(env)
675	ENV *env;
676{
677	DB_ENV *dbenv;
678	DB_LOG *dblp;
679	FNAME *fnp, *nnp;
680	LOG *lp;
681	int ret, t_ret;
682	char buf[DB_THREADID_STRLEN];
683
684	if ((dblp = env->lg_handle) == NULL)
685		return (0);
686
687	lp = dblp->reginfo.primary;
688	dbenv = env->dbenv;
689	ret = 0;
690
691	MUTEX_LOCK(env, lp->mtx_filelist);
692	for (fnp = SH_TAILQ_FIRST(&lp->fq, __fname); fnp != NULL; fnp = nnp) {
693		nnp = SH_TAILQ_NEXT(fnp, q, __fname);
694		if (dbenv->is_alive(dbenv, fnp->pid, 0, DB_MUTEX_PROCESS_ONLY))
695			continue;
696		MUTEX_LOCK(env, fnp->mutex);
697		__db_msg(env,
698		    "Freeing log information for process: %s, (ref %lu)",
699		    dbenv->thread_id_string(dbenv, fnp->pid, 0, buf),
700		    (u_long)fnp->txn_ref);
701		if (fnp->txn_ref > 1 || F_ISSET(fnp, DB_FNAME_CLOSED)) {
702			if (!F_ISSET(fnp, DB_FNAME_CLOSED)) {
703				fnp->txn_ref--;
704				F_SET(fnp, DB_FNAME_CLOSED);
705			}
706			MUTEX_UNLOCK(env, fnp->mutex);
707			fnp->mutex = MUTEX_INVALID;
708			fnp->pid = 0;
709		} else {
710			F_SET(fnp, DB_FNAME_CLOSED);
711			if ((t_ret = __dbreg_close_id_int(env,
712			    fnp, DBREG_CLOSE, 1)) && ret == 0)
713				ret = t_ret;
714		}
715	}
716
717	MUTEX_UNLOCK(env, lp->mtx_filelist);
718	return (ret);
719}
720/*
721 * __dbreg_log_close --
722 *
723 * Log a close of a database.  Called when closing a file or when a
724 * replication client is becoming a master.  That closes all the
725 * files it previously had open.
726 *
727 * Assumes caller holds the lp->mutex_filelist lock already.
728 *
729 * PUBLIC: int __dbreg_log_close __P((ENV *, FNAME *,
730 * PUBLIC:    DB_TXN *, u_int32_t));
731 */
732int
733__dbreg_log_close(env, fnp, txn, op)
734	ENV *env;
735	FNAME *fnp;
736	DB_TXN *txn;
737	u_int32_t op;
738{
739	DBT fid_dbt, r_name, *dbtp;
740	DB_LOG *dblp;
741	DB_LSN r_unused;
742	int ret;
743
744	dblp = env->lg_handle;
745	ret = 0;
746
747	if (fnp->fname_off == INVALID_ROFF)
748		dbtp = NULL;
749	else {
750		memset(&r_name, 0, sizeof(r_name));
751		r_name.data = R_ADDR(&dblp->reginfo, fnp->fname_off);
752		r_name.size = (u_int32_t)strlen((char *)r_name.data) + 1;
753		dbtp = &r_name;
754	}
755	memset(&fid_dbt, 0, sizeof(fid_dbt));
756	fid_dbt.data = fnp->ufid;
757	fid_dbt.size = DB_FILE_ID_LEN;
758	if ((ret = __dbreg_register_log(env, txn, &r_unused,
759	    F_ISSET(fnp, DB_FNAME_DURABLE) ? 0 : DB_LOG_NOT_DURABLE,
760	    op, dbtp, &fid_dbt, fnp->id,
761	    fnp->s_type, fnp->meta_pgno, TXN_INVALID)) != 0) {
762		/*
763		 * We are trying to close, but the log write failed.
764		 * Unfortunately, close needs to plow forward, because
765		 * the application can't do anything with the handle.
766		 * Make the entry in the shared memory region so that
767		 * when we close the environment, we know that this
768		 * happened.  Also, make sure we remove this from the
769		 * per-process table, so that we don't try to close it
770		 * later.
771		 */
772		F_SET(fnp, DB_FNAME_NOTLOGGED);
773		(void)__dbreg_rem_dbentry(dblp, fnp->id);
774	}
775	return (ret);
776}
777
778/*
779 * __dbreg_push_id and __dbreg_pop_id --
780 *	Dbreg ids from closed files are kept on a stack in shared memory
781 * for recycling.  (We want to reuse them as much as possible because each
782 * process keeps open files in an array by ID.)  Push them to the stack and
783 * pop them from it, managing memory as appropriate.
784 *
785 * The stack is protected by the mtx_filelist, and both functions assume it
786 * is already locked.
787 */
788static int
789__dbreg_push_id(env, id)
790	ENV *env;
791	int32_t id;
792{
793	DB_LOG *dblp;
794	LOG *lp;
795	REGINFO *infop;
796	int32_t *stack, *newstack;
797	int ret;
798
799	dblp = env->lg_handle;
800	infop = &dblp->reginfo;
801	lp = infop->primary;
802
803	if (id == lp->fid_max - 1) {
804		lp->fid_max--;
805		return (0);
806	}
807
808	/* Check if we have room on the stack. */
809	if (lp->free_fid_stack == INVALID_ROFF ||
810	    lp->free_fids_alloced <= lp->free_fids + 1) {
811		LOG_SYSTEM_LOCK(env);
812		if ((ret = __env_alloc(infop,
813		    (lp->free_fids_alloced + 20) * sizeof(u_int32_t),
814		    &newstack)) != 0) {
815			LOG_SYSTEM_UNLOCK(env);
816			return (ret);
817		}
818
819		if (lp->free_fid_stack != INVALID_ROFF) {
820			stack = R_ADDR(infop, lp->free_fid_stack);
821			memcpy(newstack, stack,
822			    lp->free_fids_alloced * sizeof(u_int32_t));
823			__env_alloc_free(infop, stack);
824		}
825		lp->free_fid_stack = R_OFFSET(infop, newstack);
826		lp->free_fids_alloced += 20;
827		LOG_SYSTEM_UNLOCK(env);
828	}
829
830	stack = R_ADDR(infop, lp->free_fid_stack);
831	stack[lp->free_fids++] = id;
832	return (0);
833}
834
835static int
836__dbreg_pop_id(env, id)
837	ENV *env;
838	int32_t *id;
839{
840	DB_LOG *dblp;
841	LOG *lp;
842	int32_t *stack;
843
844	dblp = env->lg_handle;
845	lp = dblp->reginfo.primary;
846
847	/* Do we have anything to pop? */
848	if (lp->free_fid_stack != INVALID_ROFF && lp->free_fids > 0) {
849		stack = R_ADDR(&dblp->reginfo, lp->free_fid_stack);
850		*id = stack[--lp->free_fids];
851	} else
852		*id = DB_LOGFILEID_INVALID;
853
854	return (0);
855}
856
857/*
858 * __dbreg_pluck_id --
859 *	Remove a particular dbreg id from the stack of free ids.  This is
860 * used when we open a file, as in recovery, with a specific ID that might
861 * be on the stack.
862 *
863 * Returns success whether or not the particular id was found, and like
864 * push and pop, assumes that the mtx_filelist is locked.
865 */
866static int
867__dbreg_pluck_id(env, id)
868	ENV *env;
869	int32_t id;
870{
871	DB_LOG *dblp;
872	LOG *lp;
873	int32_t *stack;
874	u_int i;
875
876	dblp = env->lg_handle;
877	lp = dblp->reginfo.primary;
878
879	if (id >= lp->fid_max)
880		return (0);
881
882	/* Do we have anything to look at? */
883	if (lp->free_fid_stack != INVALID_ROFF) {
884		stack = R_ADDR(&dblp->reginfo, lp->free_fid_stack);
885		for (i = 0; i < lp->free_fids; i++)
886			if (id == stack[i]) {
887				/*
888				 * Found it.  Overwrite it with the top
889				 * id (which may harmlessly be itself),
890				 * and shorten the stack by one.
891				 */
892				stack[i] = stack[lp->free_fids - 1];
893				lp->free_fids--;
894				return (0);
895			}
896	}
897
898	return (0);
899}
900
901/*
902 * __dbreg_log_id --
903 *	Used for in-memory named files.  They are created in mpool and
904 * are given id's early in the open process so that we can read and
905 * create pages in the mpool for the files.  However, at the time that
906 * the mpf is created, the file may not be fully created and/or its
907 * meta-data may not be fully known, so we can't do a full dbregister.
908 * This is a routine exported that will log a complete dbregister
909 * record that will allow for both recovery and replication.
910 *
911 * PUBLIC: int __dbreg_log_id __P((DB *, DB_TXN *, int32_t, int));
912 */
913int
914__dbreg_log_id(dbp, txn, id, needlock)
915	DB *dbp;
916	DB_TXN *txn;
917	int32_t id;
918	int needlock;
919{
920	DBT fid_dbt, r_name;
921	DB_LOG *dblp;
922	DB_LSN unused;
923	ENV *env;
924	FNAME *fnp;
925	LOG *lp;
926	u_int32_t op;
927	int i, ret;
928
929	env = dbp->env;
930	dblp = env->lg_handle;
931	lp = dblp->reginfo.primary;
932	fnp = dbp->log_filename;
933
934	/*
935	 * Verify that the fnp has been initialized, by seeing if it
936	 * has any non-zero bytes in it.
937	 */
938	for (i = 0; i < DB_FILE_ID_LEN; i++)
939		if (fnp->ufid[i] != 0)
940			break;
941	if (i == DB_FILE_ID_LEN)
942		memcpy(fnp->ufid, dbp->fileid, DB_FILE_ID_LEN);
943
944	if (fnp->s_type == DB_UNKNOWN)
945		fnp->s_type = dbp->type;
946
947	/*
948	 * Log the registry.  We should only request a new ID in situations
949	 * where logging is reasonable.
950	 */
951	memset(&fid_dbt, 0, sizeof(fid_dbt));
952	memset(&r_name, 0, sizeof(r_name));
953
954	if (needlock)
955		MUTEX_LOCK(env, lp->mtx_filelist);
956
957	if (fnp->fname_off != INVALID_ROFF) {
958		r_name.data = R_ADDR(&dblp->reginfo, fnp->fname_off);
959		r_name.size = (u_int32_t)strlen((char *)r_name.data) + 1;
960	}
961
962	fid_dbt.data = dbp->fileid;
963	fid_dbt.size = DB_FILE_ID_LEN;
964
965	op = !F_ISSET(dbp, DB_AM_OPEN_CALLED) ? DBREG_PREOPEN :
966	    (F_ISSET(dbp, DB_AM_INMEM) ? DBREG_REOPEN : DBREG_OPEN);
967	ret = __dbreg_register_log(env, txn, &unused,
968	    F_ISSET(dbp, DB_AM_NOT_DURABLE) ? DB_LOG_NOT_DURABLE : 0,
969	    op, r_name.size == 0 ? NULL : &r_name, &fid_dbt, id,
970	    fnp->s_type, fnp->meta_pgno, fnp->create_txnid);
971
972	if (needlock)
973		MUTEX_UNLOCK(env, lp->mtx_filelist);
974
975	return (ret);
976}
977