1/*-
2 * See the file LICENSE for redistribution information.
3 *
4 * Copyright (c) 2001,2008 Oracle.  All rights reserved.
5 *
6 * $Id: txn_util.c,v 12.25 2008/01/31 18:40:48 bostic Exp $
7 */
8
9#include "db_config.h"
10
11#include "db_int.h"
12#include "dbinc/db_page.h"
13#include "dbinc/lock.h"
14#include "dbinc/mp.h"
15#include "dbinc/txn.h"
16#include "dbinc/log.h"
17#include "dbinc/db_am.h"
18
19typedef struct __txn_event TXN_EVENT;
20struct __txn_event {
21	TXN_EVENT_T op;
22	TAILQ_ENTRY(__txn_event) links;
23	union {
24		struct {
25			/* Delayed close. */
26			DB *dbp;
27		} c;
28		struct {
29			/* Delayed remove. */
30			char *name;
31			u_int8_t *fileid;
32			int inmem;
33		} r;
34		struct {
35			/* Lock event. */
36			DB_LOCK lock;
37			DB_LOCKER *locker;
38			DB *dbp;
39		} t;
40	} u;
41};
42
43/*
44 * __txn_closeevent --
45 *
46 * Creates a close event that can be added to the [so-called] commit list, so
47 * that we can redo a failed DB handle close once we've aborted the transaction.
48 *
49 * PUBLIC: int __txn_closeevent __P((ENV *, DB_TXN *, DB *));
50 */
51int
52__txn_closeevent(env, txn, dbp)
53	ENV *env;
54	DB_TXN *txn;
55	DB *dbp;
56{
57	int ret;
58	TXN_EVENT *e;
59
60	e = NULL;
61	if ((ret = __os_calloc(env, 1, sizeof(TXN_EVENT), &e)) != 0)
62		return (ret);
63
64	e->u.c.dbp = dbp;
65	e->op = TXN_CLOSE;
66	TAILQ_INSERT_TAIL(&txn->events, e, links);
67
68	return (0);
69}
70
71/*
72 * __txn_remevent --
73 *
74 * Creates a remove event that can be added to the commit list.
75 *
76 * PUBLIC: int __txn_remevent __P((ENV *,
77 * PUBLIC:       DB_TXN *, const char *, u_int8_t *, int));
78 */
79int
80__txn_remevent(env, txn, name, fileid, inmem)
81	ENV *env;
82	DB_TXN *txn;
83	const char *name;
84	u_int8_t *fileid;
85	int inmem;
86{
87	int ret;
88	TXN_EVENT *e;
89
90	e = NULL;
91	if ((ret = __os_calloc(env, 1, sizeof(TXN_EVENT), &e)) != 0)
92		return (ret);
93
94	if ((ret = __os_strdup(env, name, &e->u.r.name)) != 0)
95		goto err;
96
97	if (fileid != NULL) {
98		if ((ret = __os_calloc(env,
99		    1, DB_FILE_ID_LEN, &e->u.r.fileid)) != 0)
100			return (ret);
101		memcpy(e->u.r.fileid, fileid, DB_FILE_ID_LEN);
102	}
103
104	e->u.r.inmem = inmem;
105	e->op = TXN_REMOVE;
106	TAILQ_INSERT_TAIL(&txn->events, e, links);
107
108	return (0);
109
110err:	if (e != NULL)
111		__os_free(env, e);
112
113	return (ret);
114}
115
116/*
117 * __txn_remrem --
118 *	Remove a remove event because the remove has been superceeded,
119 * by a create of the same name, for example.
120 *
121 * PUBLIC: void __txn_remrem __P((ENV *, DB_TXN *, const char *));
122 */
123void
124__txn_remrem(env, txn, name)
125	ENV *env;
126	DB_TXN *txn;
127	const char *name;
128{
129	TXN_EVENT *e, *next_e;
130
131	for (e = TAILQ_FIRST(&txn->events); e != NULL; e = next_e) {
132		next_e = TAILQ_NEXT(e, links);
133		if (e->op != TXN_REMOVE || strcmp(name, e->u.r.name) != 0)
134			continue;
135		TAILQ_REMOVE(&txn->events, e, links);
136		__os_free(env, e->u.r.name);
137		if (e->u.r.fileid != NULL)
138			__os_free(env, e->u.r.fileid);
139		__os_free(env, e);
140	}
141
142	return;
143}
144
145/*
146 * __txn_lockevent --
147 *
148 * Add a lockevent to the commit-queue.  The lock event indicates a locker
149 * trade.
150 *
151 * PUBLIC: int __txn_lockevent __P((ENV *,
152 * PUBLIC:     DB_TXN *, DB *, DB_LOCK *, DB_LOCKER *));
153 */
154int
155__txn_lockevent(env, txn, dbp, lock, locker)
156	ENV *env;
157	DB_TXN *txn;
158	DB *dbp;
159	DB_LOCK *lock;
160	DB_LOCKER *locker;
161{
162	int ret;
163	TXN_EVENT *e;
164
165	if (!LOCKING_ON(env))
166		return (0);
167
168	e = NULL;
169	if ((ret = __os_calloc(env, 1, sizeof(TXN_EVENT), &e)) != 0)
170		return (ret);
171
172	e->u.t.locker = locker;
173	e->u.t.lock = *lock;
174	e->u.t.dbp = dbp;
175	e->op = TXN_TRADE;
176	TAILQ_INSERT_TAIL(&txn->events, e, links);
177	dbp->cur_txn = txn;
178
179	return (0);
180}
181
182/*
183 * __txn_remlock --
184 *	Remove a lock event because the locker is going away.  We can remove
185 * by lock (using offset) or by locker_id (or by both).
186 *
187 * PUBLIC: void __txn_remlock __P((ENV *, DB_TXN *, DB_LOCK *, DB_LOCKER *));
188 */
189void
190__txn_remlock(env, txn, lock, locker)
191	ENV *env;
192	DB_TXN *txn;
193	DB_LOCK *lock;
194	DB_LOCKER *locker;
195{
196	TXN_EVENT *e, *next_e;
197
198	for (e = TAILQ_FIRST(&txn->events); e != NULL; e = next_e) {
199		next_e = TAILQ_NEXT(e, links);
200		if ((e->op != TXN_TRADE && e->op != TXN_TRADED) ||
201		    (e->u.t.lock.off != lock->off && e->u.t.locker != locker))
202			continue;
203		TAILQ_REMOVE(&txn->events, e, links);
204		__os_free(env, e);
205	}
206
207	return;
208}
209
210/*
211 * __txn_doevents --
212 * Process the list of events associated with a transaction.  On commit,
213 * apply the events; on abort, just toss the entries.
214 *
215 * PUBLIC: int __txn_doevents __P((ENV *, DB_TXN *, int, int));
216 */
217#define	DO_TRADE do {							\
218	memset(&req, 0, sizeof(req));					\
219	req.lock = e->u.t.lock;						\
220	req.op = DB_LOCK_TRADE;						\
221	t_ret = __lock_vec(env, e->u.t.locker, 0, &req, 1, NULL);	\
222	if (t_ret == 0)	{						\
223		e->u.t.dbp->cur_locker = e->u.t.locker;			\
224		e->u.t.dbp->cur_txn = NULL;				\
225	} else if (t_ret == DB_NOTFOUND)				\
226		t_ret = 0;						\
227	if (t_ret != 0 && ret == 0)					\
228		ret = t_ret;						\
229	e->op = TXN_TRADED;						\
230} while (0)
231
232int
233__txn_doevents(env, txn, opcode, preprocess)
234	ENV *env;
235	DB_TXN *txn;
236	int opcode, preprocess;
237{
238	DB_LOCKREQ req;
239	TXN_EVENT *e;
240	int ret, t_ret;
241
242	ret = 0;
243
244	/*
245	 * This phase only gets called if we have a phase where we
246	 * release read locks.  Since not all paths will call this
247	 * phase, we have to check for it below as well.  So, when
248	 * we do the trade, we update the opcode of the entry so that
249	 * we don't try the trade again.
250	 */
251	if (preprocess) {
252		for (e = TAILQ_FIRST(&txn->events);
253		    e != NULL; e = TAILQ_NEXT(e, links)) {
254			if (e->op != TXN_TRADE ||
255			    IS_WRITELOCK(e->u.t.lock.mode))
256				continue;
257			DO_TRADE;
258		}
259		return (ret);
260	}
261
262	/*
263	 * Prepare should only cause a preprocess, since the transaction
264	 * isn't over.
265	 */
266	DB_ASSERT(env, opcode != TXN_PREPARE);
267	while ((e = TAILQ_FIRST(&txn->events)) != NULL) {
268		TAILQ_REMOVE(&txn->events, e, links);
269		/*
270		 * Most deferred events should only happen on
271		 * commits, not aborts or prepares.  The one exception
272		 * is a close which gets done on commit and abort, but
273		 * not prepare. If we're not doing operations, then we
274		 * can just go free resources.
275		 */
276		if (opcode == TXN_ABORT && e->op != TXN_CLOSE)
277			goto dofree;
278		switch (e->op) {
279		case TXN_CLOSE:
280			if ((t_ret = __db_close(e->u.c.dbp,
281			    NULL, DB_NOSYNC)) != 0 && ret == 0)
282				ret = t_ret;
283			break;
284		case TXN_REMOVE:
285			if (e->u.r.fileid != NULL) {
286				if ((t_ret = __memp_nameop(env,
287				    e->u.r.fileid, NULL, e->u.r.name,
288				    NULL, e->u.r.inmem)) != 0 && ret == 0)
289					ret = t_ret;
290			} else if ((t_ret =
291			    __os_unlink(env, e->u.r.name, 0)) != 0 && ret == 0)
292				ret = t_ret;
293			break;
294		case TXN_TRADE:
295			DO_TRADE;
296			/* Fall through */
297		case TXN_TRADED:
298			/* Downgrade the lock. */
299			if ((t_ret = __lock_downgrade(env,
300			    &e->u.t.lock, DB_LOCK_READ, 0)) != 0 && ret == 0)
301				ret = t_ret;
302			break;
303		default:
304			/* This had better never happen. */
305			DB_ASSERT(env, 0);
306		}
307dofree:
308		/* Free resources here. */
309		switch (e->op) {
310		case TXN_REMOVE:
311			if (e->u.r.fileid != NULL)
312				__os_free(env, e->u.r.fileid);
313			__os_free(env, e->u.r.name);
314			break;
315		case TXN_TRADE:
316			if (opcode == TXN_ABORT)
317				e->u.t.dbp->cur_txn = NULL;
318			break;
319		case TXN_CLOSE:
320		case TXN_TRADED:
321		default:
322			break;
323		}
324		__os_free(env, e);
325	}
326
327	return (ret);
328}
329
330/*
331 * PUBLIC: int __txn_record_fname __P((ENV *, DB_TXN *, FNAME *));
332 */
333int
334__txn_record_fname(env, txn, fname)
335	ENV *env;
336	DB_TXN *txn;
337	FNAME *fname;
338{
339	DB_LOG *dblp;
340	DB_TXNMGR *mgr;
341	TXN_DETAIL *td;
342	roff_t fname_off;
343	roff_t *np, *ldbs;
344	u_int32_t i;
345	int ret;
346
347	if ((td = txn->td) == NULL)
348		return (0);
349	mgr = env->tx_handle;
350	dblp = env->lg_handle;
351	fname_off = R_OFFSET(&dblp->reginfo, fname);
352
353	/* See if we already have a ref to this DB handle. */
354	ldbs = R_ADDR(&mgr->reginfo, td->log_dbs);
355	for (i = 0, np = ldbs; i < td->nlog_dbs; i++, np++)
356		if (*np == fname_off)
357			return (0);
358
359	if (td->nlog_slots <= td->nlog_dbs) {
360		TXN_SYSTEM_LOCK(env);
361		if ((ret = __env_alloc(&mgr->reginfo,
362		    sizeof(roff_t) * (td->nlog_slots << 1), &np)) != 0)
363			return (ret);
364		memcpy(np, ldbs, td->nlog_dbs * sizeof(roff_t));
365		if (td->nlog_slots > TXN_NSLOTS)
366			__env_alloc_free(&mgr->reginfo, ldbs);
367
368		TXN_SYSTEM_UNLOCK(env);
369		td->log_dbs = R_OFFSET(&mgr->reginfo, np);
370		ldbs = np;
371		td->nlog_slots = td->nlog_slots << 1;
372	}
373
374	ldbs[td->nlog_dbs] = fname_off;
375	td->nlog_dbs++;
376	fname->txn_ref++;
377
378	return (0);
379}
380
381/*
382 * __txn_dref_fnam --
383 *	Either pass the fname to our parent txn or decrement the refcount
384 * and close the fileid if it goes to zero.
385 *
386 * PUBLIC: int __txn_dref_fname __P((ENV *, DB_TXN *));
387 */
388int
389__txn_dref_fname(env, txn)
390	ENV *env;
391	DB_TXN *txn;
392{
393	DB_LOG *dblp;
394	DB_TXNMGR *mgr;
395	FNAME *fname;
396	roff_t *np;
397	TXN_DETAIL *ptd, *td;
398	u_int32_t i;
399	int ret;
400
401	td = txn->td;
402
403	if (td->nlog_dbs == 0)
404		return (0);
405
406	mgr = env->tx_handle;
407	dblp = env->lg_handle;
408	ret = 0;
409
410	ptd = txn->parent != NULL ? txn->parent->td : NULL;
411
412	np = R_ADDR(&mgr->reginfo, td->log_dbs);
413	for (i = 0; i < td->nlog_dbs; i++, np++) {
414		fname = R_ADDR(&dblp->reginfo, *np);
415		MUTEX_LOCK(env, fname->mutex);
416		if (ptd != NULL) {
417			ret = __txn_record_fname(env, txn->parent, fname);
418			fname->txn_ref--;
419			MUTEX_UNLOCK(env, fname->mutex);
420		} else if (fname->txn_ref == 1) {
421			MUTEX_UNLOCK(env, fname->mutex);
422			DB_ASSERT(env, fname->txn_ref != 0);
423			ret = __dbreg_close_id_int(
424			    env, fname, DBREG_CLOSE, 0);
425		} else {
426			fname->txn_ref--;
427			MUTEX_UNLOCK(env, fname->mutex);
428		}
429		if (ret != 0)
430			break;
431	}
432
433	return (ret);
434}
435