1/*-
2 * See the file LICENSE for redistribution information.
3 *
4 * Copyright (c) 1996,2008 Oracle.  All rights reserved.
5 *
6 * $Id: txn_region.c,v 12.34 2008/01/08 20:59:00 bostic Exp $
7 */
8
9#include "db_config.h"
10
11#include "db_int.h"
12#include "dbinc/log.h"
13#include "dbinc/txn.h"
14
15static int __txn_init __P((ENV *, DB_TXNMGR *));
16static size_t __txn_region_size __P((ENV *));
17
18/*
19 * __txn_open --
20 *	Open a transaction region.
21 *
22 * PUBLIC: int __txn_open __P((ENV *, int));
23 */
24int
25__txn_open(env, create_ok)
26	ENV *env;
27	int create_ok;
28{
29	DB_TXNMGR *mgr;
30	int ret;
31
32	/* Create/initialize the transaction manager structure. */
33	if ((ret = __os_calloc(env, 1, sizeof(DB_TXNMGR), &mgr)) != 0)
34		return (ret);
35	TAILQ_INIT(&mgr->txn_chain);
36	mgr->env = env;
37
38	/* Join/create the txn region. */
39	mgr->reginfo.env = env;
40	mgr->reginfo.type = REGION_TYPE_TXN;
41	mgr->reginfo.id = INVALID_REGION_ID;
42	mgr->reginfo.flags = REGION_JOIN_OK;
43	if (create_ok)
44		F_SET(&mgr->reginfo, REGION_CREATE_OK);
45	if ((ret = __env_region_attach(env,
46	    &mgr->reginfo, __txn_region_size(env))) != 0)
47		goto err;
48
49	/* If we created the region, initialize it. */
50	if (F_ISSET(&mgr->reginfo, REGION_CREATE))
51		if ((ret = __txn_init(env, mgr)) != 0)
52			goto err;
53
54	/* Set the local addresses. */
55	mgr->reginfo.primary =
56	    R_ADDR(&mgr->reginfo, mgr->reginfo.rp->primary);
57
58	/* If threaded, acquire a mutex to protect the active TXN list. */
59	if ((ret = __mutex_alloc(
60	    env, MTX_TXN_ACTIVE, DB_MUTEX_PROCESS_ONLY, &mgr->mutex)) != 0)
61		goto err;
62
63	env->tx_handle = mgr;
64	return (0);
65
66err:	env->tx_handle = NULL;
67	if (mgr->reginfo.addr != NULL)
68		(void)__env_region_detach(env, &mgr->reginfo, 0);
69
70	(void)__mutex_free(env, &mgr->mutex);
71	__os_free(env, mgr);
72	return (ret);
73}
74
75/*
76 * __txn_init --
77 *	Initialize a transaction region in shared memory.
78 */
79static int
80__txn_init(env, mgr)
81	ENV *env;
82	DB_TXNMGR *mgr;
83{
84	DB_ENV *dbenv;
85	DB_LSN last_ckp;
86	DB_TXNREGION *region;
87	int ret;
88
89	dbenv = env->dbenv;
90
91	/*
92	 * Find the last checkpoint in the log.
93	 */
94	ZERO_LSN(last_ckp);
95	if (LOGGING_ON(env)) {
96		/*
97		 * The log system has already walked through the last
98		 * file.  Get the LSN of a checkpoint it may have found.
99		 */
100		if ((ret = __log_get_cached_ckp_lsn(env, &last_ckp)) != 0)
101			return (ret);
102
103		/*
104		 * If that didn't work, look backwards from the beginning of
105		 * the last log file until we find the last checkpoint.
106		 */
107		if (IS_ZERO_LSN(last_ckp) &&
108		    (ret = __txn_findlastckp(env, &last_ckp, NULL)) != 0)
109			return (ret);
110	}
111
112	if ((ret = __env_alloc(&mgr->reginfo,
113	    sizeof(DB_TXNREGION), &mgr->reginfo.primary)) != 0) {
114		__db_errx(env,
115		    "Unable to allocate memory for the transaction region");
116		return (ret);
117	}
118	mgr->reginfo.rp->primary =
119	    R_OFFSET(&mgr->reginfo, mgr->reginfo.primary);
120	region = mgr->reginfo.primary;
121	memset(region, 0, sizeof(*region));
122
123	if ((ret = __mutex_alloc(
124	    env, MTX_TXN_REGION, 0, &region->mtx_region)) != 0)
125		return (ret);
126
127	region->maxtxns = dbenv->tx_max;
128	region->last_txnid = TXN_MINIMUM;
129	region->cur_maxid = TXN_MAXIMUM;
130
131	if ((ret = __mutex_alloc(
132	    env, MTX_TXN_CHKPT, 0, &region->mtx_ckp)) != 0)
133		return (ret);
134	region->last_ckp = last_ckp;
135	region->time_ckp = time(NULL);
136
137	memset(&region->stat, 0, sizeof(region->stat));
138#ifdef HAVE_STATISTICS
139	region->stat.st_maxtxns = region->maxtxns;
140#endif
141
142	SH_TAILQ_INIT(&region->active_txn);
143	SH_TAILQ_INIT(&region->mvcc_txn);
144	return (ret);
145}
146
147/*
148 * __txn_findlastckp --
149 *	Find the last checkpoint in the log, walking backwards from the
150 *	max_lsn given or the beginning of the last log file.  (The
151 *	log system looked through the last log file when it started up.)
152 *
153 * PUBLIC: int __txn_findlastckp __P((ENV *, DB_LSN *, DB_LSN *));
154 */
155int
156__txn_findlastckp(env, lsnp, max_lsn)
157	ENV *env;
158	DB_LSN *lsnp;
159	DB_LSN *max_lsn;
160{
161	DBT dbt;
162	DB_LOGC *logc;
163	DB_LSN lsn;
164	int ret, t_ret;
165	u_int32_t rectype;
166
167	ZERO_LSN(*lsnp);
168
169	if ((ret = __log_cursor(env, &logc)) != 0)
170		return (ret);
171
172	/* Get the last LSN. */
173	memset(&dbt, 0, sizeof(dbt));
174	if (max_lsn != NULL) {
175		lsn = *max_lsn;
176		if ((ret = __logc_get(logc, &lsn, &dbt, DB_SET)) != 0)
177			goto err;
178	} else {
179		if ((ret = __logc_get(logc, &lsn, &dbt, DB_LAST)) != 0)
180			goto err;
181		/*
182		 * Twiddle the last LSN so it points to the beginning of the
183		 * last file; we know there's no checkpoint after that, since
184		 * the log system already looked there.
185		 */
186		lsn.offset = 0;
187	}
188
189	/* Read backwards, looking for checkpoints. */
190	while ((ret = __logc_get(logc, &lsn, &dbt, DB_PREV)) == 0) {
191		if (dbt.size < sizeof(u_int32_t))
192			continue;
193		memcpy(&rectype, dbt.data, sizeof(u_int32_t));
194		if (rectype == DB___txn_ckp) {
195			*lsnp = lsn;
196			break;
197		}
198	}
199
200err:	if ((t_ret = __logc_close(logc)) != 0 && ret == 0)
201		ret = t_ret;
202
203	/*
204	 * Not finding a checkpoint is not an error;  there may not exist
205	 * one in the log.
206	 */
207	return ((ret == 0 || ret == DB_NOTFOUND) ? 0 : ret);
208}
209
210/*
211 * __txn_env_refresh --
212 *	Clean up after the transaction system on a close or failed open.
213 *
214 * PUBLIC: int __txn_env_refresh __P((ENV *));
215 */
216int
217__txn_env_refresh(env)
218	ENV *env;
219{
220	DB_TXN *txn;
221	DB_TXNMGR *mgr;
222	REGINFO *reginfo;
223	u_int32_t txnid;
224	int aborted, ret, t_ret;
225
226	ret = 0;
227	mgr = env->tx_handle;
228	reginfo = &mgr->reginfo;
229
230	/*
231	 * This function can only be called once per process (i.e., not
232	 * once per thread), so no synchronization is required.
233	 *
234	 * The caller is probably doing something wrong if close is called with
235	 * active transactions.  Try and abort any active transactions that are
236	 * not prepared, but it's quite likely the aborts will fail because
237	 * recovery won't find open files.  If we can't abort any of the
238	 * unprepared transaction, panic, we have to run recovery to get back
239	 * to a known state.
240	 */
241	aborted = 0;
242	if (TAILQ_FIRST(&mgr->txn_chain) != NULL) {
243		while ((txn = TAILQ_FIRST(&mgr->txn_chain)) != NULL) {
244			/* Prepared transactions are OK. */
245			txnid = txn->txnid;
246			if (((TXN_DETAIL *)txn->td)->status == TXN_PREPARED) {
247				if ((ret = __txn_discard_int(txn, 0)) != 0) {
248					__db_err(env, ret,
249					    "unable to discard txn %#lx",
250					    (u_long)txnid);
251					break;
252				}
253				continue;
254			}
255			aborted = 1;
256			if ((t_ret = __txn_abort(txn)) != 0) {
257				__db_err(env, t_ret,
258				    "unable to abort transaction %#lx",
259				    (u_long)txnid);
260				ret = __env_panic(env, t_ret);
261				break;
262			}
263		}
264		if (aborted) {
265			__db_errx(env,
266	"Error: closing the transaction region with active transactions");
267			if (ret == 0)
268				ret = EINVAL;
269		}
270	}
271
272	/* Discard the per-thread lock. */
273	if ((t_ret = __mutex_free(env, &mgr->mutex)) != 0 && ret == 0)
274		ret = t_ret;
275
276	/* Detach from the region. */
277	if ((t_ret = __env_region_detach(env, reginfo, 0)) != 0 && ret == 0)
278		ret = t_ret;
279
280	__os_free(env, mgr);
281
282	env->tx_handle = NULL;
283	return (ret);
284}
285
286/*
287 * __txn_region_mutex_count --
288 *	Return the number of mutexes the txn region will need.
289 *
290 * PUBLIC: u_int32_t __txn_region_mutex_count __P((ENV *));
291 */
292u_int32_t
293__txn_region_mutex_count(env)
294	ENV *env;
295{
296	DB_ENV *dbenv;
297
298	dbenv = env->dbenv;
299
300	/*
301	 * We need a MVCC mutex for each TXN_DETAIL structure, a mutex for
302	 * DB_TXNMGR structure, two mutexes for the DB_TXNREGION structure.
303	 */
304	return (dbenv->tx_max + 1 + 2);
305}
306
307/*
308 * __txn_region_size --
309 *	 Return the amount of space needed for the txn region.
310 */
311static size_t
312__txn_region_size(env)
313	ENV *env;
314{
315	DB_ENV *dbenv;
316	size_t s;
317
318	dbenv = env->dbenv;
319
320	/*
321	 * Make the region large enough to hold the primary transaction region
322	 * structure, txn_max transaction detail structures, txn_max chunks of
323	 * overhead required by the underlying shared region allocator for each
324	 * chunk of memory, txn_max transaction names, at an average of 20
325	 * bytes each, and 10KB for safety.
326	 */
327	s = sizeof(DB_TXNREGION) +
328	    dbenv->tx_max * (sizeof(TXN_DETAIL) + __env_alloc_overhead() + 20) +
329	    10 * 1024;
330	return (s);
331}
332
333/*
334 * __txn_id_set --
335 *	Set the current transaction ID and current maximum unused ID (for
336 *	testing purposes only).
337 *
338 * PUBLIC: int __txn_id_set __P((ENV *, u_int32_t, u_int32_t));
339 */
340int
341__txn_id_set(env, cur_txnid, max_txnid)
342	ENV *env;
343	u_int32_t cur_txnid, max_txnid;
344{
345	DB_TXNMGR *mgr;
346	DB_TXNREGION *region;
347	int ret;
348
349	ENV_REQUIRES_CONFIG(env, env->tx_handle, "txn_id_set", DB_INIT_TXN);
350
351	mgr = env->tx_handle;
352	region = mgr->reginfo.primary;
353	region->last_txnid = cur_txnid;
354	region->cur_maxid = max_txnid;
355
356	ret = 0;
357	if (cur_txnid < TXN_MINIMUM) {
358		__db_errx(env, "Current ID value %lu below minimum",
359		    (u_long)cur_txnid);
360		ret = EINVAL;
361	}
362	if (max_txnid < TXN_MINIMUM) {
363		__db_errx(env, "Maximum ID value %lu below minimum",
364		    (u_long)max_txnid);
365		ret = EINVAL;
366	}
367	return (ret);
368}
369
370/*
371 * __txn_oldest_reader --
372 *	 Find the oldest "read LSN" of any active transaction'
373 *	 MVCC changes older than this can safely be discarded from the cache.
374 *
375 * PUBLIC: int __txn_oldest_reader __P((ENV *, DB_LSN *));
376 */
377int
378__txn_oldest_reader(env, lsnp)
379	ENV *env;
380	DB_LSN *lsnp;
381{
382	DB_LSN old_lsn;
383	DB_TXNMGR *mgr;
384	DB_TXNREGION *region;
385	TXN_DETAIL *td;
386	int ret;
387
388	if ((mgr = env->tx_handle) == NULL)
389		return (0);
390	region = mgr->reginfo.primary;
391
392	if ((ret = __log_current_lsn(env, &old_lsn, NULL, NULL)) != 0)
393		return (ret);
394
395	TXN_SYSTEM_LOCK(env);
396	SH_TAILQ_FOREACH(td, &region->active_txn, links, __txn_detail)
397		if (LOG_COMPARE(&td->read_lsn, &old_lsn) < 0)
398			old_lsn = td->read_lsn;
399	TXN_SYSTEM_UNLOCK(env);
400
401	DB_ASSERT(env, LOG_COMPARE(&old_lsn, lsnp) >= 0);
402	*lsnp = old_lsn;
403
404	return (0);
405}
406
407/*
408 * __txn_add_buffer --
409 *	Add to the count of buffers created by the given transaction.
410 *
411 * PUBLIC: int __txn_add_buffer __P((ENV *, TXN_DETAIL *));
412 */
413int
414__txn_add_buffer(env, td)
415	ENV *env;
416	TXN_DETAIL *td;
417{
418	DB_ASSERT(env, td != NULL);
419
420	MUTEX_LOCK(env, td->mvcc_mtx);
421	DB_ASSERT(env, td->mvcc_ref < UINT32_MAX);
422	++td->mvcc_ref;
423	MUTEX_UNLOCK(env, td->mvcc_mtx);
424
425	COMPQUIET(env, NULL);
426	return (0);
427}
428
429/*
430 * __txn_remove_buffer --
431 *	Remove a buffer from a transaction -- free the transaction if necessary.
432 *
433 * PUBLIC: int __txn_remove_buffer __P((ENV *, TXN_DETAIL *, db_mutex_t));
434 */
435int
436__txn_remove_buffer(env, td, hash_mtx)
437	ENV *env;
438	TXN_DETAIL *td;
439	db_mutex_t hash_mtx;
440{
441	DB_TXNMGR *mgr;
442	DB_TXNREGION *region;
443	int need_free, ret;
444
445	DB_ASSERT(env, td != NULL);
446	ret = 0;
447	mgr = env->tx_handle;
448	region = mgr->reginfo.primary;
449
450	MUTEX_LOCK(env, td->mvcc_mtx);
451	DB_ASSERT(env, td->mvcc_ref > 0);
452	need_free = (--td->mvcc_ref == 0);
453	MUTEX_UNLOCK(env, td->mvcc_mtx);
454
455	if (need_free &&
456	    (td->status == TXN_COMMITTED || td->status == TXN_ABORTED)) {
457		MUTEX_UNLOCK(env, hash_mtx);
458
459		ret = __mutex_free(env, &td->mvcc_mtx);
460		td->mvcc_mtx = MUTEX_INVALID;
461
462		TXN_SYSTEM_LOCK(env);
463		SH_TAILQ_REMOVE(&region->mvcc_txn, td, links, __txn_detail);
464#ifdef HAVE_STATISTICS
465		--region->stat.st_nsnapshot;
466#endif
467		__env_alloc_free(&mgr->reginfo, td);
468		TXN_SYSTEM_UNLOCK(env);
469
470		MUTEX_LOCK(env, hash_mtx);
471	}
472
473	return (ret);
474}
475