1/*-
2 * See the file LICENSE for redistribution information.
3 *
4 * Copyright (c) 2001-2009 Oracle.  All rights reserved.
5 *
6 * $Id$
7 */
8
9#include "db_config.h"
10
11#include "db_int.h"
12#include "dbinc/txn.h"
13#include "dbinc/db_page.h"
14#include "dbinc/db_dispatch.h"
15#include "dbinc/log.h"
16#include "dbinc_auto/db_auto.h"
17#include "dbinc_auto/crdel_auto.h"
18#include "dbinc_auto/db_ext.h"
19
20/*
21 * __txn_map_gid
22 *	Return the txn that corresponds to this global ID.
23 *
24 * PUBLIC: int __txn_map_gid __P((ENV *,
25 * PUBLIC:     u_int8_t *, TXN_DETAIL **, roff_t *));
26 */
27int
28__txn_map_gid(env, gid, tdp, offp)
29	ENV *env;
30	u_int8_t *gid;
31	TXN_DETAIL **tdp;
32	roff_t *offp;
33{
34	DB_TXNMGR *mgr;
35	DB_TXNREGION *region;
36
37	mgr = env->tx_handle;
38	region = mgr->reginfo.primary;
39
40	/*
41	 * Search the internal active transaction table to find the
42	 * matching xid.  If this is a performance hit, then we
43	 * can create a hash table, but I doubt it's worth it.
44	 */
45	TXN_SYSTEM_LOCK(env);
46	SH_TAILQ_FOREACH(*tdp, &region->active_txn, links, __txn_detail)
47		if (memcmp(gid, (*tdp)->gid, sizeof((*tdp)->gid)) == 0)
48			break;
49	TXN_SYSTEM_UNLOCK(env);
50
51	if (*tdp == NULL)
52		return (EINVAL);
53
54	*offp = R_OFFSET(&mgr->reginfo, *tdp);
55	return (0);
56}
57
58/*
59 * __txn_recover_pp --
60 *	ENV->txn_recover pre/post processing.
61 *
62 * PUBLIC: int __txn_recover_pp __P((DB_ENV *,
63 * PUBLIC:     DB_PREPLIST *, u_int32_t, u_int32_t *, u_int32_t));
64 */
65int
66__txn_recover_pp(dbenv, preplist, count, retp, flags)
67	DB_ENV *dbenv;
68	DB_PREPLIST *preplist;
69	u_int32_t count, *retp;
70	u_int32_t flags;
71{
72	DB_THREAD_INFO *ip;
73	ENV *env;
74	int ret;
75
76	env = dbenv->env;
77
78	ENV_REQUIRES_CONFIG(
79	    env, env->tx_handle, "txn_recover", DB_INIT_TXN);
80
81	if (F_ISSET((DB_TXNREGION *)env->tx_handle->reginfo.primary,
82	    TXN_IN_RECOVERY)) {
83		__db_errx(env, "operation not permitted while in recovery");
84		return (EINVAL);
85	}
86
87	if (flags != DB_FIRST && flags != DB_NEXT)
88		return (__db_ferr(env, "DB_ENV->txn_recover", 0));
89
90	ENV_ENTER(env, ip);
91	REPLICATION_WRAP(env,
92	    (__txn_recover(env, preplist, count, retp, flags)), 0, ret);
93	ENV_LEAVE(env, ip);
94	return (ret);
95}
96
97/*
98 * __txn_recover --
99 *	ENV->txn_recover.
100 *
101 * PUBLIC: int __txn_recover __P((ENV *,
102 * PUBLIC:         DB_PREPLIST *, u_int32_t, u_int32_t *, u_int32_t));
103 */
104int
105__txn_recover(env, txns, count, retp, flags)
106	ENV *env;
107	DB_PREPLIST *txns;
108	u_int32_t  count, *retp;
109	u_int32_t flags;
110{
111	DB_LSN min;
112	DB_PREPLIST *prepp;
113	DB_THREAD_INFO *ip;
114	DB_TXNMGR *mgr;
115	DB_TXNREGION *region;
116	TXN_DETAIL *td;
117	u_int32_t i;
118	int restored, ret;
119
120	*retp = 0;
121
122	MAX_LSN(min);
123	prepp = txns;
124	restored = ret = 0;
125
126	DB_ASSERT(env, txns != NULL);
127	/*
128	 * If we are starting a scan, then we traverse the active transaction
129	 * list once making sure that all transactions are marked as not having
130	 * been collected.  Then on each pass, we mark the ones we collected
131	 * so that if we cannot collect them all at once, we can finish up
132	 * next time with a continue.
133	 */
134
135	mgr = env->tx_handle;
136	region = mgr->reginfo.primary;
137
138	/*
139	 * During this pass we need to figure out if we are going to need
140	 * to open files.  We need to open files if we've never collected
141	 * before (in which case, none of the COLLECTED bits will be set)
142	 * and the ones that we are collecting are restored (if they aren't
143	 * restored, then we never crashed; just the main server did).
144	 */
145	TXN_SYSTEM_LOCK(env);
146
147	/* Now begin collecting active transactions. */
148	for (td = SH_TAILQ_FIRST(&region->active_txn, __txn_detail);
149	    td != NULL && *retp < count;
150	    td = SH_TAILQ_NEXT(td, links, __txn_detail)) {
151		if (td->status != TXN_PREPARED ||
152		    (flags != DB_FIRST && F_ISSET(td, TXN_DTL_COLLECTED)))
153			continue;
154
155		if (F_ISSET(td, TXN_DTL_RESTORED))
156			restored = 1;
157
158		if ((ret = __os_calloc(env,
159		    1, sizeof(DB_TXN), &prepp->txn)) != 0) {
160			TXN_SYSTEM_UNLOCK(env);
161			goto err;
162		}
163		if ((ret = __txn_continue(env, prepp->txn, td)) != 0)
164			goto err;
165		F_SET(prepp->txn, TXN_MALLOC);
166		if (F_ISSET(env->dbenv, DB_ENV_TXN_NOSYNC))
167			F_SET(prepp->txn, TXN_NOSYNC);
168		else if (F_ISSET(env->dbenv, DB_ENV_TXN_WRITE_NOSYNC))
169			F_SET(prepp->txn, TXN_WRITE_NOSYNC);
170		else
171			F_SET(prepp->txn, TXN_SYNC);
172		memcpy(prepp->gid, td->gid, sizeof(td->gid));
173		prepp++;
174
175		if (!IS_ZERO_LSN(td->begin_lsn) &&
176		    LOG_COMPARE(&td->begin_lsn, &min) < 0)
177			min = td->begin_lsn;
178
179		(*retp)++;
180		F_SET(td, TXN_DTL_COLLECTED);
181	}
182	if (flags == DB_FIRST)
183		for (; td != NULL; td = SH_TAILQ_NEXT(td, links, __txn_detail))
184			F_CLR(td, TXN_DTL_COLLECTED);
185	TXN_SYSTEM_UNLOCK(env);
186
187	/*
188	 * Now link all the transactions into the transaction manager's list.
189	 */
190	if (*retp != 0) {
191		MUTEX_LOCK(env, mgr->mutex);
192		for (i = 0; i < *retp; i++)
193			TAILQ_INSERT_TAIL(&mgr->txn_chain, txns[i].txn, links);
194		MUTEX_UNLOCK(env, mgr->mutex);
195
196		/*
197		 * If we are restoring, update our count of outstanding
198		 * transactions.
199		 */
200		if (REP_ON(env)) {
201			REP_SYSTEM_LOCK(env);
202			env->rep_handle->region->op_cnt += (u_long)*retp;
203			REP_SYSTEM_UNLOCK(env);
204		}
205
206	}
207	/*
208	 * If recovery already opened the files for us, don't
209	 * do it here.
210	 */
211	if (restored != 0 && flags == DB_FIRST &&
212	    !F_ISSET(env->lg_handle, DBLOG_OPENFILES)) {
213		ENV_GET_THREAD_INFO(env, ip);
214		ret = __txn_openfiles(env, ip, &min, 0);
215	}
216
217	if (0) {
218err:		TXN_SYSTEM_UNLOCK(env);
219	}
220	return (ret);
221}
222
223/*
224 * __txn_openfiles --
225 *	Call env_openfiles.
226 *
227 * PUBLIC: int __txn_openfiles __P((ENV *, DB_THREAD_INFO *, DB_LSN *, int));
228 */
229int
230__txn_openfiles(env, ip, min, force)
231	ENV *env;
232	DB_THREAD_INFO *ip;
233	DB_LSN *min;
234	int force;
235{
236	DBT data;
237	DB_LOGC *logc;
238	DB_LSN open_lsn;
239	DB_TXNHEAD *txninfo;
240	__txn_ckp_args *ckp_args;
241	int ret, t_ret;
242
243	/*
244	 * Figure out the last checkpoint before the smallest
245	 * start_lsn in the region.
246	 */
247	logc = NULL;
248	if ((ret = __log_cursor(env, &logc)) != 0)
249		goto err;
250
251	memset(&data, 0, sizeof(data));
252	if ((ret = __txn_getckp(env, &open_lsn)) == 0)
253		while (!IS_ZERO_LSN(open_lsn) && (ret =
254		    __logc_get(logc, &open_lsn, &data, DB_SET)) == 0 &&
255		    (force ||
256		    (min != NULL && LOG_COMPARE(min, &open_lsn) < 0))) {
257			/* Format the log record. */
258			if ((ret = __txn_ckp_read(
259			    env, data.data, &ckp_args)) != 0) {
260				__db_errx(env,
261			    "Invalid checkpoint record at [%lu][%lu]",
262				    (u_long)open_lsn.file,
263				    (u_long)open_lsn.offset);
264				goto err;
265			}
266			/*
267			 * If force is set, then we're forcing ourselves
268			 * to go back far enough to open files.
269			 * Use ckp_lsn and then break out of the loop.
270			 */
271			open_lsn = force ? ckp_args->ckp_lsn :
272			    ckp_args->last_ckp;
273			__os_free(env, ckp_args);
274			if (force) {
275				if ((ret = __logc_get(logc, &open_lsn,
276				    &data, DB_SET)) != 0)
277					goto err;
278				break;
279			}
280		}
281
282	/*
283	 * There are several ways by which we may have gotten here.
284	 * - We got a DB_NOTFOUND -- we need to read the first
285	 *	log record.
286	 * - We found a checkpoint before min.  We're done.
287	 * - We found a checkpoint after min who's last_ckp is 0.  We
288	 *	need to start at the beginning of the log.
289	 * - We are forcing an openfiles and we have our ckp_lsn.
290	 */
291	if ((ret == DB_NOTFOUND || IS_ZERO_LSN(open_lsn)) && (ret =
292	    __logc_get(logc, &open_lsn, &data, DB_FIRST)) != 0) {
293		__db_errx(env, "No log records");
294		goto err;
295	}
296
297	if ((ret = __db_txnlist_init(env, ip, 0, 0, NULL, &txninfo)) != 0)
298		goto err;
299	ret = __env_openfiles(
300	    env, logc, txninfo, &data, &open_lsn, NULL, (double)0, 0);
301	if (txninfo != NULL)
302		__db_txnlist_end(env, txninfo);
303
304err:
305	if (logc != NULL && (t_ret = __logc_close(logc)) != 0 && ret == 0)
306		ret = t_ret;
307	return (ret);
308}
309