1/*-
2 * See the file LICENSE for redistribution information.
3 *
4 * Copyright (c) 1998
5 *	Sleepycat Software.  All rights reserved.
6 */
7
8#pragma ident	"%Z%%M%	%I%	%E% SMI"
9
10/* XXX Remove the global transaction and hang it off the environment. */
11#include "config.h"
12
13#ifndef lint
14static const char sccsid[] = "@(#)xa.c	10.4 (Sleepycat) 10/11/98";
15#endif /* not lint */
16
17#ifndef NO_SYSTEM_INCLUDES
18#include <sys/types.h>
19
20#include <stdlib.h>
21#include <stdio.h>
22#include <string.h>
23#endif
24
25#include "db_int.h"
26#include "db_page.h"
27#include "shqueue.h"
28#include "log.h"
29#include "txn.h"
30#include "db_auto.h"
31#include "db_ext.h"
32#include "db_dispatch.h"
33
34static int  __db_xa_close __P((char *, int, long));
35static int  __db_xa_commit __P((XID *, int, long));
36static int  __db_xa_complete __P((int *, int *, int, long));
37static int  __db_xa_end __P((XID *, int, long));
38static int  __db_xa_forget __P((XID *, int, long));
39static int  __db_xa_open __P((char *, int, long));
40static int  __db_xa_prepare __P((XID *, int, long));
41static int  __db_xa_recover __P((XID *, long, int, long));
42static int  __db_xa_rollback __P((XID *, int, long));
43static int  __db_xa_start __P((XID *, int, long));
44static void __xa_txn_end __P((DB_ENV *));
45static void __xa_txn_init __P((DB_ENV *, TXN_DETAIL *, size_t));
46
47/*
48 * Possible flag values:
49 *	Dynamic registration	0 => no dynamic registration
50 *				TMREGISTER => dynamic registration
51 *	Asynchronous operation	0 => no support for asynchrony
52 *				TMUSEASYNC => async support
53 *	Migration support	0 => migration of transactions across
54 *				     threads is possible
55 *				TMNOMIGRATE => no migration across threads
56 */
57const struct xa_switch_t db_xa_switch = {
58	 "Berkeley DB",		/* name[RMNAMESZ] */
59	 TMNOMIGRATE,		/* flags */
60	 0,			/* version */
61	 __db_xa_open,		/* xa_open_entry */
62	 __db_xa_close,		/* xa_close_entry */
63	 __db_xa_start,		/* xa_start_entry */
64	 __db_xa_end,		/* xa_end_entry */
65	 __db_xa_rollback,	/* xa_rollback_entry */
66	 __db_xa_prepare,	/* xa_prepare_entry */
67	 __db_xa_commit,	/* xa_commit_entry */
68	 __db_xa_recover,	/* xa_recover_entry */
69	 __db_xa_forget,	/* xa_forget_entry */
70	 __db_xa_complete	/* xa_complete_entry */
71};
72
73/*
74 * __db_xa_open --
75 *	The open call in the XA protocol.  The rmid field is an id number
76 * that the TM assigned us and will pass us on every xa call.  We need to
77 * map that rmid number into a dbenv structure that we create during
78 * initialization.  Since this id number is thread specific, we do not
79 * need to store it in shared memory.  The file xa_map.c implements all
80 * such xa->db mappings.
81 *	The xa_info field is instance specific information.  We require
82 * that the value of DB_HOME be passed in xa_info.  Since xa_info is the
83 * only thing that we get to pass to db_appinit, any config information
84 * will have to be done via a config file instead of via the db_appinit
85 * call.
86 */
87static int
88__db_xa_open(xa_info, rmid, flags)
89	char *xa_info;
90	int rmid;
91	long flags;
92{
93	DB_ENV *env;
94
95	if (LF_ISSET(TMASYNC))
96		return (XAER_ASYNC);
97	if (flags != TMNOFLAGS)
98		return (XAER_INVAL);
99
100	/* Verify if we already have this environment open. */
101	if (__db_rmid_to_env(rmid, &env, 0) == 0)
102		return (XA_OK);
103
104	/*
105	 * Since we cannot tell whether the environment is OK or not,
106	 * we can't actually do the db_appinit in xa_open.  Instead,
107	 * we save the mapping between the rmid and the xa_info.  If
108	 * we next get a call to __xa_recover, we do the db_appinit
109	 * with DB_RECOVER set.  If we get any other call, then we
110	 * do the db_appinit.
111	 */
112	return (__db_map_rmid_name(rmid, xa_info));
113}
114
115/*
116 * __db_xa_close --
117 *	The close call of the XA protocol.  The only trickiness here
118 * is that if there are any active transactions, we must fail.  It is
119 * *not* an error to call close on an environment that has already been
120 * closed (I am interpreting that to mean it's OK to call close on an
121 * environment that has never been opened).
122 */
123static int
124__db_xa_close(xa_info, rmid, flags)
125	char *xa_info;
126	int rmid;
127	long flags;
128{
129	DB_ENV *env;
130	int ret, t_ret;
131
132	COMPQUIET(xa_info, NULL);
133
134	if (LF_ISSET(TMASYNC))
135		return (XAER_ASYNC);
136	if (flags != TMNOFLAGS)
137		return (XAER_INVAL);
138
139	/* If the environment is closed, then we're done. */
140	if (__db_rmid_to_env(rmid, &env, 0) != 0)
141		return (XA_OK);
142
143	/* Check if there are any pending transactions. */
144	if (env->xa_txn != NULL && env->xa_txn->txnid != TXN_INVALID)
145		return (XAER_PROTO);
146
147	/* Now, destroy the mapping and close the environment. */
148	ret = __db_unmap_rmid(rmid);
149	if ((t_ret = db_appexit(env)) != 0 && ret == 0)
150		ret = t_ret;
151
152	__os_free(env, sizeof(DB_ENV));
153
154	return (ret == 0 ? XA_OK : XAER_RMERR);
155}
156
157/*
158 * __db_xa_start --
159 *	Begin a transaction for the current resource manager.
160 */
161static int
162__db_xa_start(xid, rmid, flags)
163	XID *xid;
164	int rmid;
165	long flags;
166{
167	DB_ENV *env;
168	TXN_DETAIL *td;
169	size_t off;
170	int is_known;
171
172#define	OK_FLAGS	(TMJOIN | TMRESUME | TMNOWAIT | TMASYNC | TMNOFLAGS)
173	if (LF_ISSET(~OK_FLAGS))
174		return (XAER_INVAL);
175
176	if (LF_ISSET(TMJOIN) && LF_ISSET(TMRESUME))
177		return (XAER_INVAL);
178
179	if (LF_ISSET(TMASYNC))
180		return (XAER_ASYNC);
181
182	if (__db_rmid_to_env(rmid, &env, 1) != 0)
183		return (XAER_PROTO);
184
185	is_known = __db_xid_to_txn(env, xid, &off) == 0;
186
187	if (is_known && !LF_ISSET(TMRESUME) && !LF_ISSET(TMJOIN))
188		return (XAER_DUPID);
189
190	if (!is_known && LF_ISSET(TMRESUME | TMJOIN))
191		return (XAER_NOTA);
192
193	/*
194	 * This can't block, so we can ignore TMNOWAIT.
195	 *
196	 * Other error conditions: RMERR, RMFAIL, OUTSIDE, PROTO, RB*
197	 */
198	if (is_known) {
199		td = (TXN_DETAIL *)((u_int8_t *)env->tx_info->region + off);
200		if (td->xa_status == TXN_XA_SUSPENDED && !LF_ISSET(TMRESUME))
201			return (XAER_PROTO);
202		if (td->xa_status == TXN_XA_DEADLOCKED)
203			return (XA_RBDEADLOCK);
204		if (td->xa_status == TXN_XA_ABORTED)
205			return (XA_RBOTHER);
206
207		/* Now, fill in the global transaction structure. */
208		__xa_txn_init(env, td, off);
209		td->xa_status = TXN_XA_STARTED;
210	} else {
211		if (__txn_xa_begin(env, env->xa_txn) != 0)
212			return (XAER_RMERR);
213		(void)__db_map_xid(env, xid, env->xa_txn->off);
214		td = (TXN_DETAIL *)
215		    ((u_int8_t *)env->tx_info->region + env->xa_txn->off);
216		td->xa_status = TXN_XA_STARTED;
217	}
218	return (XA_OK);
219}
220
221/*
222 * __db_xa_end --
223 *	Disassociate the current transaction from the current process.
224 */
225static int
226__db_xa_end(xid, rmid, flags)
227	XID *xid;
228	int rmid;
229	long flags;
230{
231	DB_ENV *env;
232	DB_TXN *txn;
233	TXN_DETAIL *td;
234	size_t off;
235
236	if (flags != TMNOFLAGS && !LF_ISSET(TMSUSPEND | TMSUCCESS | TMFAIL))
237		return (XAER_INVAL);
238
239	if (__db_rmid_to_env(rmid, &env, 0) != 0)
240		return (XAER_PROTO);
241
242	if (__db_xid_to_txn(env, xid, &off) != 0)
243		return (XAER_NOTA);
244
245	txn = env->xa_txn;
246	if (off != txn->off)
247		return (XAER_PROTO);
248
249	td = (TXN_DETAIL *)((u_int8_t *)env->tx_info->region + off);
250	if (td->xa_status == TXN_XA_DEADLOCKED)
251		return (XA_RBDEADLOCK);
252
253	if (td->status == TXN_ABORTED)
254		return (XA_RBOTHER);
255
256	if (td->xa_status != TXN_XA_STARTED)
257		return (XAER_PROTO);
258
259	/* Update the shared memory last_lsn field */
260	td->last_lsn = txn->last_lsn;
261
262	/*
263	 * If we ever support XA migration, we cannot keep SUSPEND/END
264	 * status in the shared region; it would have to be process local.
265	 */
266	if (LF_ISSET(TMSUSPEND))
267		td->xa_status = TXN_XA_SUSPENDED;
268	else
269		td->xa_status = TXN_XA_ENDED;
270
271	txn->txnid = TXN_INVALID;
272	return (XA_OK);
273}
274
275/*
276 * __db_xa_prepare --
277 *	Sync the log to disk so we can guarantee recoverability.
278 */
279static int
280__db_xa_prepare(xid, rmid, flags)
281	XID *xid;
282	int rmid;
283	long flags;
284{
285	DB_ENV *env;
286	TXN_DETAIL *td;
287	size_t off;
288
289	if (LF_ISSET(TMASYNC))
290		return (XAER_ASYNC);
291	if (flags != TMNOFLAGS)
292		return (XAER_INVAL);
293
294	/*
295	 * We need to know if we've ever called prepare on this.
296	 * As part of the prepare, we set the xa_status field to
297	 * reflect that fact that prepare has been called, and if
298	 * it's ever called again, it's an error.
299	 */
300	if (__db_rmid_to_env(rmid, &env, 1) != 0)
301		return (XAER_PROTO);
302
303	if (__db_xid_to_txn(env, xid, &off) != 0)
304		return (XAER_NOTA);
305
306	td = (TXN_DETAIL *)((u_int8_t *)env->tx_info->region + off);
307
308	if (td->xa_status == TXN_XA_DEADLOCKED)
309		return (XA_RBDEADLOCK);
310
311	if (td->xa_status != TXN_XA_ENDED && td->xa_status != TXN_XA_SUSPENDED)
312		return (XAER_PROTO);
313
314	/* Now, fill in the global transaction structure. */
315	__xa_txn_init(env, td, off);
316
317	if (txn_prepare(env->xa_txn) != 0)
318		return (XAER_RMERR);
319
320	td->xa_status = TXN_XA_PREPARED;
321
322	/* No fatal value that would require an XAER_RMFAIL. */
323	__xa_txn_end(env);
324	return (XA_OK);
325}
326
327/*
328 * __db_xa_commit --
329 *	Commit the transaction
330 */
331static int
332__db_xa_commit(xid, rmid, flags)
333	XID *xid;
334	int rmid;
335	long flags;
336{
337	DB_ENV *env;
338	TXN_DETAIL *td;
339	size_t off;
340
341	if (LF_ISSET(TMASYNC))
342		return (XAER_ASYNC);
343#undef	OK_FLAGS
344#define	OK_FLAGS	(TMNOFLAGS | TMNOWAIT | TMONEPHASE)
345	if (LF_ISSET(~OK_FLAGS))
346		return (XAER_INVAL);
347
348	/*
349	 * We need to know if we've ever called prepare on this.
350	 * We can verify this by examining the xa_status field.
351	 */
352	if (__db_rmid_to_env(rmid, &env, 1) != 0)
353		return (XAER_PROTO);
354
355	if (__db_xid_to_txn(env, xid, &off) != 0)
356		return (XAER_NOTA);
357
358	td = (TXN_DETAIL *)((u_int8_t *)env->tx_info->region + off);
359
360	if (td->xa_status == TXN_XA_DEADLOCKED)
361		return (XA_RBDEADLOCK);
362
363	if (td->xa_status == TXN_XA_ABORTED)
364		return (XA_RBOTHER);
365
366	if (LF_ISSET(TMONEPHASE) &&
367	    td->xa_status != TXN_XA_ENDED && td->xa_status != TXN_XA_SUSPENDED)
368		return (XAER_PROTO);
369
370	if (!LF_ISSET(TMONEPHASE) && td->xa_status != TXN_XA_PREPARED)
371		return (XAER_PROTO);
372
373	/* Now, fill in the global transaction structure. */
374	__xa_txn_init(env, td, off);
375
376	if (txn_commit(env->xa_txn) != 0)
377		return (XAER_RMERR);
378
379	/* No fatal value that would require an XAER_RMFAIL. */
380	__xa_txn_end(env);
381	return (XA_OK);
382}
383
384/*
385 * __db_xa_recover --
386 *	Returns a list of prepared and heuristically completed transactions.
387 *
388 * The return value is the number of xids placed into the xid array (less
389 * than or equal to the count parameter).  The flags are going to indicate
390 * whether we are starting a scan or continuing one.
391 */
392static int
393__db_xa_recover(xids, count, rmid, flags)
394	XID *xids;
395	long count, flags;
396	int rmid;
397{
398	__txn_xa_regop_args *argp;
399	DBT data;
400	DB_ENV *env;
401	DB_LOG *log;
402	XID *xidp;
403	char *dbhome;
404	int err, ret;
405	u_int32_t rectype, txnid;
406
407	ret = 0;
408	xidp = xids;
409
410
411	/*
412	 * If we are starting a scan, then we need to open the environment
413	 * and run recovery.  This recovery puts us in a state where we can
414	 * either commit or abort any transactions that were prepared but not
415	 * yet committed.  Once we've done that, we need to figure out where
416	 * to begin checking for such transactions.  If we are not starting
417	 * a scan, then the environment had better have already been recovered
418	 * and we'll start from * wherever the log cursor is.  Since XA apps
419	 * cannot be threaded, we don't have to worry about someone else
420	 * having moved it.
421	 */
422	if (LF_ISSET(TMSTARTRSCAN)) {
423		/* If the environment is open, we have a problem. */
424		if (__db_rmid_to_env(rmid, &env, 0) == XA_OK)
425			return (XAER_PROTO);
426
427		if ((ret = __os_calloc(1, sizeof(DB_ENV), &env)) != 0)
428			return (XAER_RMERR);
429
430		if (__db_rmid_to_name(rmid, &dbhome) != 0)
431			goto err1;
432
433#undef XA_FLAGS
434#define	XA_FLAGS DB_RECOVER | \
435	DB_CREATE | DB_INIT_LOCK | DB_INIT_LOG | DB_INIT_MPOOL | DB_INIT_TXN
436		if ((ret = db_appinit(dbhome, NULL, env, XA_FLAGS)) != 0)
437			goto err1;
438
439		if (__db_map_rmid(rmid, env) != 0)
440			goto err2;
441
442		/* Now figure out from where to begin scan. */
443		log = env->lg_info;
444		if ((err = __log_findckp(log, &log->xa_first)) == DB_NOTFOUND) {
445			/*
446			 * If there were no log files, then we have no
447			 * transactions to return, so we simply return 0.
448			 */
449			return (0);
450		}
451		if ((err = __db_txnlist_init(&log->xa_info)) != 0)
452			goto err3;
453	} else {
454		/* We had better already know about this rmid. */
455		if (__db_rmid_to_env(rmid, &env, 0) != 0)
456			return (XAER_PROTO);
457		/*
458		 * If we are not starting a scan, the log cursor had
459		 * better be set.
460		 */
461		log = env->lg_info;
462		if (IS_ZERO_LSN(log->xa_lsn))
463			return (XAER_PROTO);
464	}
465
466	/*
467	 * At this point log->xa_first contains the point in the log
468	 * to which we need to roll back.  If we are starting a scan,
469	 * we'll start at the last record; if we're continuing a scan,
470	 * we'll have to start at log->xa_lsn.
471	 */
472
473	memset(&data, 0, sizeof(data));
474	for (err = log_get(log, &log->xa_lsn, &data,
475	    LF_ISSET(TMSTARTRSCAN) ? DB_LAST : DB_SET);
476	    err == 0 && log_compare(&log->xa_lsn, &log->xa_first) > 0;
477	    err = log_get(log, &log->xa_lsn, &data, DB_PREV)) {
478		memcpy(&rectype, data.data, sizeof(rectype));
479
480		/*
481		 * The only record type we care about is an DB_txn_xa_regop.
482		 * If it's a commit, we have to add it to a txnlist.  If it's
483		 * a prepare, and we don't have a commit, then we return it.
484		 * We are redoing some of what's in the xa_regop_recovery
485		 * code, but we have to do it here so we can get at the xid
486		 * in the record.
487		 */
488		if (rectype != DB_txn_xa_regop && rectype != DB_txn_regop)
489			continue;
490
491		memcpy(&txnid, (u_int8_t *)data.data + sizeof(rectype),
492		    sizeof(txnid));
493		err = __db_txnlist_find(log->xa_info, txnid);
494		switch (rectype) {
495		case DB_txn_regop:
496			if (err == DB_NOTFOUND)
497				__db_txnlist_add(log->xa_info, txnid);
498			err = 0;
499			break;
500		case DB_txn_xa_regop:
501			/*
502			 * This transaction is commited, so we needn't read
503			 * the record and do anything.
504			 */
505			if (err == 0)
506				break;
507			if ((err =
508			    __txn_xa_regop_read(data.data, &argp)) != 0) {
509				ret = XAER_RMERR;
510				goto out;
511			}
512
513			xidp->formatID = argp->formatID;
514			xidp->gtrid_length = argp->gtrid;
515			xidp->bqual_length = argp->bqual;
516			memcpy(xidp->data, argp->xid.data, argp->xid.size);
517			ret++;
518			xidp++;
519			__os_free(argp, sizeof(*argp));
520			if (ret == count)
521				goto done;
522			break;
523		}
524	}
525
526	if (err != 0 && err != DB_NOTFOUND)
527		goto out;
528
529done:	if (LF_ISSET(TMENDRSCAN)) {
530		ZERO_LSN(log->xa_lsn);
531		ZERO_LSN(log->xa_first);
532
533out:		__db_txnlist_end(log->xa_info);
534		log->xa_info = NULL;
535	}
536	return (ret);
537
538err3:	(void)__db_unmap_rmid(rmid);
539err2:	(void)db_appexit(env);
540err1:	__os_free(env, sizeof(DB_ENV));
541	return (XAER_RMERR);
542}
543
544/*
545 * __db_xa_rollback
546 *	Abort an XA transaction.
547 */
548static int
549__db_xa_rollback(xid, rmid, flags)
550	XID *xid;
551	int rmid;
552	long flags;
553{
554	DB_ENV *env;
555	TXN_DETAIL *td;
556	size_t off;
557
558	if (LF_ISSET(TMASYNC))
559		return (XAER_ASYNC);
560	if (flags != TMNOFLAGS)
561		return (XAER_INVAL);
562
563	if (__db_rmid_to_env(rmid, &env, 1) != 0)
564		return (XAER_PROTO);
565
566	if (__db_xid_to_txn(env, xid, &off) != 0)
567		return (XAER_NOTA);
568
569	td = (TXN_DETAIL *)((u_int8_t *)env->tx_info->region + off);
570
571	if (td->xa_status == TXN_XA_DEADLOCKED)
572		return (XA_RBDEADLOCK);
573
574	if (td->xa_status == TXN_XA_ABORTED)
575		return (XA_RBOTHER);
576
577	if (LF_ISSET(TMONEPHASE) &&
578	    td->xa_status != TXN_XA_ENDED && td->xa_status != TXN_XA_SUSPENDED)
579		return (XAER_PROTO);
580
581	/* Now, fill in the global transaction structure. */
582	__xa_txn_init(env, td, off);
583	if (txn_abort(env->xa_txn) != 0)
584		return (XAER_RMERR);
585
586	/* No fatal value that would require an XAER_RMFAIL. */
587	__xa_txn_end(env);
588	return (XA_OK);
589}
590
591/*
592 * __db_xa_forget --
593 *	Forget about an XID for a transaction that was heuristically
594 * completed.  Since we do not heuristically complete anything, I
595 * don't think we have to do anything here, but we should make sure
596 * that we reclaim the slots in the txnid table.
597 */
598static int
599__db_xa_forget(xid, rmid, flags)
600	XID *xid;
601	int rmid;
602	long flags;
603{
604	DB_ENV *env;
605	size_t off;
606
607	if (LF_ISSET(TMASYNC))
608		return (XAER_ASYNC);
609	if (flags != TMNOFLAGS)
610		return (XAER_INVAL);
611
612	if (__db_rmid_to_env(rmid, &env, 1) != 0)
613		return (XAER_PROTO);
614
615	/*
616	 * If mapping is gone, then we're done.
617	 */
618	if (__db_xid_to_txn(env, xid, &off) != 0)
619		return (XA_OK);
620
621	__db_unmap_xid(env, xid, off);
622
623	/* No fatal value that would require an XAER_RMFAIL. */
624	return (XA_OK);
625}
626
627/*
628 * __db_xa_complete --
629 *	Used to wait for asynchronous operations to complete.  Since we're
630 *	not doing asynch, this is an invalid operation.
631 */
632static int
633__db_xa_complete(handle, retval, rmid, flags)
634	int *handle, *retval, rmid;
635	long flags;
636{
637	COMPQUIET(handle, NULL);
638	COMPQUIET(retval, NULL);
639	COMPQUIET(rmid, 0);
640	COMPQUIET(flags, 0);
641
642	return (XAER_INVAL);
643}
644
645/*
646 * __xa_txn_init --
647 * 	Fill in the fields of the local transaction structure given
648 *	the detail transaction structure.
649 */
650static void
651__xa_txn_init(env, td, off)
652	DB_ENV *env;
653	TXN_DETAIL *td;
654	size_t off;
655{
656	DB_TXN *txn;
657
658	txn = env->xa_txn;
659	txn->mgrp = env->tx_info;
660	txn->parent = NULL;
661	txn->last_lsn = td->last_lsn;
662	txn->txnid = td->txnid;
663	txn->off = off;
664	txn->flags = 0;
665}
666
667/*
668 * __xa_txn_end --
669 * 	Invalidate a transaction structure that was generated by xa_txn_init.
670 */
671static void
672__xa_txn_end(env)
673	DB_ENV *env;
674{
675	DB_TXN *txn;
676
677	txn = env->xa_txn;
678	if (txn != NULL)
679		txn->txnid = TXN_INVALID;
680}
681
682