1/*-
2 * See the file LICENSE for redistribution information.
3 *
4 * Copyright (c) 1996-2009 Oracle.  All rights reserved.
5 */
6/*
7 * Copyright (c) 1995, 1996
8 *	The President and Fellows of Harvard University.  All rights reserved.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 *    notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 *    notice, this list of conditions and the following disclaimer in the
17 *    documentation and/or other materials provided with the distribution.
18 * 3. Neither the name of the University nor the names of its contributors
19 *    may be used to endorse or promote products derived from this software
20 *    without specific prior written permission.
21 *
22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 * SUCH DAMAGE.
33 *
34 * $Id$
35 */
36
37#include "db_config.h"
38
39#include "db_int.h"
40#include "dbinc/db_page.h"
41#include "dbinc/db_am.h"
42#include "dbinc/log.h"
43#include "dbinc/txn.h"
44
45static int __dbreg_open_file __P((ENV *,
46    DB_TXN *, __dbreg_register_args *, void *));
47
48/*
49 * PUBLIC: int __dbreg_register_recover
50 * PUBLIC:     __P((ENV *, DBT *, DB_LSN *, db_recops, void *));
51 */
52int
53__dbreg_register_recover(env, dbtp, lsnp, op, info)
54	ENV *env;
55	DBT *dbtp;
56	DB_LSN *lsnp;
57	db_recops op;
58	void *info;
59{
60	__dbreg_register_args *argp;
61	DB_ENTRY *dbe;
62	DB_LOG *dblp;
63	DB *dbp;
64	u_int32_t status;
65	int do_close, do_open, do_rem, ret, t_ret;
66
67	dblp = env->lg_handle;
68	dbp = NULL;
69
70#ifdef DEBUG_RECOVER
71	REC_PRINT(__dbreg_register_print);
72#endif
73	do_open = do_close = 0;
74	if ((ret = __dbreg_register_read(env, dbtp->data, &argp)) != 0)
75		goto out;
76
77	switch (argp->opcode) {
78	case DBREG_REOPEN:
79	case DBREG_PREOPEN:
80	case DBREG_OPEN:
81		/*
82		 * In general, we redo the open on REDO and abort on UNDO.
83		 * However, a reopen is a second instance of an open of
84		 * in-memory files and we don't want to close them yet
85		 * on abort, so just skip that here.
86		 */
87		if ((DB_REDO(op) ||
88		    op == DB_TXN_OPENFILES || op == DB_TXN_POPENFILES))
89			do_open = 1;
90		else if (argp->opcode != DBREG_REOPEN)
91			do_close = 1;
92		break;
93	case DBREG_CLOSE:
94		if (DB_UNDO(op))
95			do_open = 1;
96		else
97			do_close = 1;
98		break;
99	case DBREG_RCLOSE:
100		/*
101		 * DBREG_RCLOSE was generated by recover because a file was
102		 * left open.  The POPENFILES pass, which is run to open
103		 * files to abort prepared transactions, may not include the
104		 * open for this file so we open it here.  Note that a normal
105		 * CLOSE is not legal before the prepared transaction is
106		 * committed or aborted.
107		 */
108		if (DB_UNDO(op) || op == DB_TXN_POPENFILES)
109			do_open = 1;
110		else
111			do_close = 1;
112		break;
113	case DBREG_CHKPNT:
114		if (DB_UNDO(op) ||
115		    op == DB_TXN_OPENFILES || op == DB_TXN_POPENFILES)
116			do_open = 1;
117		break;
118	default:
119		ret = __db_unknown_path(env, "__dbreg_register_recover");
120		goto out;
121	}
122
123	if (do_open) {
124		/*
125		 * We must open the db even if the meta page is not
126		 * yet written as we may be creating subdatabase.
127		 */
128		if (op == DB_TXN_OPENFILES && argp->opcode != DBREG_CHKPNT)
129			F_SET(dblp, DBLOG_FORCE_OPEN);
130
131		/*
132		 * During an abort or an open pass to recover prepared txns,
133		 * we need to make sure that we use the same locker id on the
134		 * open.  We pass the txnid along to ensure this.
135		 */
136		ret = __dbreg_open_file(env,
137		    op == DB_TXN_ABORT || op == DB_TXN_POPENFILES ?
138		    argp->txnp : NULL, argp, info);
139		if (ret == DB_PAGE_NOTFOUND && argp->meta_pgno != PGNO_BASE_MD)
140			ret = ENOENT;
141		if (ret == ENOENT || ret == EINVAL) {
142			/*
143			 * If this is an OPEN while rolling forward, it's
144			 * possible that the file was recreated since last
145			 * time we got here.  In that case, we've got deleted
146			 * set and probably shouldn't, so we need to check
147			 * for that case and possibly retry.
148			 */
149			if (DB_REDO(op) && argp->txnp != 0 &&
150			    dblp->dbentry[argp->fileid].deleted) {
151				dblp->dbentry[argp->fileid].deleted = 0;
152				ret =
153				    __dbreg_open_file(env, NULL, argp, info);
154				if (ret == DB_PAGE_NOTFOUND &&
155				     argp->meta_pgno != PGNO_BASE_MD)
156					ret = ENOENT;
157			}
158			/*
159			 * We treat ENOENT as OK since it's possible that
160			 * the file was renamed or deleted.
161			 * All other errors, we return.
162			 */
163			if (ret == ENOENT)
164				ret = 0;
165		}
166		F_CLR(dblp, DBLOG_FORCE_OPEN);
167	}
168
169	if (do_close) {
170		/*
171		 * If we are undoing an open, or redoing a close,
172		 * then we need to close the file.  If we are simply
173		 * revoking then we just need to grab the DBP and revoke
174		 * the log id.
175		 *
176		 * If the file is deleted, then we can just ignore this close.
177		 * Otherwise, we should usually have a valid dbp we should
178		 * close or whose reference count should be decremented.
179		 * However, if we shut down without closing a file, we may, in
180		 * fact, not have the file open, and that's OK.
181		 */
182		do_rem = 0;
183		MUTEX_LOCK(env, dblp->mtx_dbreg);
184		if (argp->fileid < dblp->dbentry_cnt) {
185			/*
186			 * Typically, closes should match an open which means
187			 * that if this is a close, there should be a valid
188			 * entry in the dbentry table when we get here,
189			 * however there are exceptions.  1. If this is an
190			 * OPENFILES pass, then we may have started from
191			 * a log file other than the first, and the
192			 * corresponding open appears in an earlier file.
193			 * 2. If we are undoing an open on an abort or
194			 * recovery, it's possible that we failed after
195			 * the log record, but before we actually entered
196			 * a handle here.
197			 * 3. If we aborted an open, then we wrote a non-txnal
198			 * RCLOSE into the log.  During the forward pass, the
199			 * file won't be open, and that's OK.
200			 */
201			dbe = &dblp->dbentry[argp->fileid];
202			if (dbe->dbp == NULL && !dbe->deleted) {
203				/* No valid entry here. Nothing to do. */
204				MUTEX_UNLOCK(env, dblp->mtx_dbreg);
205				goto done;
206			}
207
208			/* We have either an open entry or a deleted entry. */
209			if ((dbp = dbe->dbp) != NULL) {
210				/*
211				 * If we're a replication client, it's
212				 * possible to get here with a dbp that
213				 * the user opened, but which we later
214				 * assigned a fileid to.  Be sure that
215				 * we only close dbps that we opened in
216				 * the recovery code or that were opened
217				 * inside a currently aborting transaction
218				 * but not by the recovery code.
219				 */
220				do_rem = F_ISSET(dbp, DB_AM_RECOVER) ?
221				    op != DB_TXN_ABORT : op == DB_TXN_ABORT;
222				MUTEX_UNLOCK(env, dblp->mtx_dbreg);
223			} else if (dbe->deleted) {
224				MUTEX_UNLOCK(env, dblp->mtx_dbreg);
225				if ((ret = __dbreg_rem_dbentry(
226				    dblp, argp->fileid)) != 0)
227					goto out;
228			}
229		} else
230			MUTEX_UNLOCK(env, dblp->mtx_dbreg);
231
232		/*
233		 * During recovery, all files are closed.  On an abort, we only
234		 * close the file if we opened it during the abort
235		 * (DB_AM_RECOVER set), otherwise we simply do a __db_refresh.
236		 * For the close case, if remove or rename has closed the file,
237		 * don't request a sync, because a NULL mpf would be a problem.
238		 *
239		 * If we are undoing a create we'd better discard any buffers
240		 * from the memory pool.  We identify creates because the
241		 * argp->id field contains the transaction containing the file
242		 * create; if that id is invalid, we are not creating.
243		 *
244		 * On the backward pass, we need to "undo" opens even if the
245		 * transaction in which they appeared committed, because we have
246		 * already undone the corresponding close.  In that case, the
247		 * id will be valid, but we do not want to discard buffers.
248		 */
249		if (do_rem && dbp != NULL) {
250			if (argp->id != TXN_INVALID) {
251				if ((ret = __db_txnlist_find(env,
252				    info, argp->txnp->txnid, &status))
253				    != DB_NOTFOUND && ret != 0)
254					goto out;
255				if (ret == DB_NOTFOUND || status != TXN_COMMIT)
256					F_SET(dbp, DB_AM_DISCARD);
257				ret = 0;
258			}
259
260			if (op == DB_TXN_ABORT) {
261				if ((t_ret = __db_refresh(dbp,
262				    NULL, DB_NOSYNC, NULL, 0)) != 0 && ret == 0)
263					ret = t_ret;
264			} else {
265				if ((t_ret = __db_close(
266				    dbp, NULL, DB_NOSYNC)) != 0 && ret == 0)
267					ret = t_ret;
268			}
269		}
270	}
271done:	if (ret == 0)
272		*lsnp = argp->prev_lsn;
273out:	if (argp != NULL)
274		__os_free(env, argp);
275	return (ret);
276}
277
278/*
279 * __dbreg_open_file --
280 *	Called during log_register recovery.  Make sure that we have an
281 *	entry in the dbentry table for this ndx.  Returns 0 on success,
282 *	non-zero on error.
283 */
284static int
285__dbreg_open_file(env, txn, argp, info)
286	ENV *env;
287	DB_TXN *txn;
288	__dbreg_register_args *argp;
289	void *info;
290{
291	DB *dbp;
292	DB_ENTRY *dbe;
293	DB_LOG *dblp;
294	u_int32_t id, status;
295	int ret;
296
297	dblp = env->lg_handle;
298
299	/*
300	 * When we're opening, we have to check that the name we are opening
301	 * is what we expect.  If it's not, then we close the old file and
302	 * open the new one.
303	 */
304	MUTEX_LOCK(env, dblp->mtx_dbreg);
305	if (argp->fileid != DB_LOGFILEID_INVALID &&
306	    argp->fileid < dblp->dbentry_cnt)
307		dbe = &dblp->dbentry[argp->fileid];
308	else
309		dbe = NULL;
310
311	if (dbe != NULL) {
312		if (dbe->deleted) {
313			MUTEX_UNLOCK(env, dblp->mtx_dbreg);
314			return (ENOENT);
315		}
316
317		/*
318		 * At the end of OPENFILES, we may have a file open.  If this
319		 * is a reopen, then we will always close and reopen.  If the
320		 * open was part of a committed transaction, so it doesn't
321		 * get undone.  However, if the fileid was previously used,
322		 * we'll see a close that may need to get undone.  There are
323		 * three ways we can detect this. 1) the meta-pgno in the
324		 * current file does not match that of the open file, 2) the
325		 * file uid of the current file does not match that of the
326		 * previously opened file, 3) the current file is unnamed, in
327		 * which case it should never be opened during recovery.
328		 * It is also possible that the db open previously failed
329		 * because the file was missing.  Check the DB_AM_OPEN_CALLED
330		 * bit and try to open it again.
331		 */
332		if ((dbp = dbe->dbp) != NULL) {
333			if (argp->opcode == DBREG_REOPEN ||
334			    !F_ISSET(dbp, DB_AM_OPEN_CALLED) ||
335			    dbp->meta_pgno != argp->meta_pgno ||
336			    argp->name.size == 0 ||
337			    memcmp(dbp->fileid, argp->uid.data,
338			    DB_FILE_ID_LEN) != 0) {
339				MUTEX_UNLOCK(env, dblp->mtx_dbreg);
340				(void)__dbreg_revoke_id(dbp, 0,
341				    DB_LOGFILEID_INVALID);
342				if (F_ISSET(dbp, DB_AM_RECOVER))
343					(void)__db_close(dbp, NULL, DB_NOSYNC);
344				goto reopen;
345			}
346
347			/*
348			 * We should only get here if we already have the
349			 * dbp from an openfiles pass, in which case, what's
350			 * here had better be the same dbp.
351			 */
352			DB_ASSERT(env, dbe->dbp == dbp);
353			MUTEX_UNLOCK(env, dblp->mtx_dbreg);
354
355			/*
356			 * This is a successful open.  We need to record that
357			 * in the txnlist so that we know how to handle the
358			 * subtransaction that created the file system object.
359			 */
360			if (argp->id != TXN_INVALID &&
361			    (ret = __db_txnlist_update(env, info,
362			    argp->id, TXN_EXPECTED, NULL, &status, 1)) != 0)
363				return (ret);
364			return (0);
365		}
366	}
367
368	MUTEX_UNLOCK(env, dblp->mtx_dbreg);
369
370reopen:
371	/*
372	 * We never re-open temporary files.  Temp files are only useful during
373	 * aborts in which case the dbp was entered when the file was
374	 * registered. During recovery, we treat temp files as properly deleted
375	 * files, allowing the open to fail and not reporting any errors when
376	 * recovery fails to get a valid dbp from __dbreg_id_to_db.
377	 */
378	if (argp->name.size == 0) {
379		(void)__dbreg_add_dbentry(env, dblp, NULL, argp->fileid);
380		return (ENOENT);
381	}
382
383	/*
384	 * We are about to pass a recovery txn pointer into the main library.
385	 * We need to make sure that any accessed fields are set appropriately.
386	 */
387	if (txn != NULL) {
388		id = txn->txnid;
389		memset(txn, 0, sizeof(DB_TXN));
390		txn->txnid = id;
391		txn->mgrp = env->tx_handle;
392	}
393
394	return (__dbreg_do_open(env,
395	    txn, dblp, argp->uid.data, argp->name.data, argp->ftype,
396	    argp->fileid, argp->meta_pgno, info, argp->id, argp->opcode));
397}
398