1# See the file LICENSE for redistribution information.
2#
3# Copyright (c) 2001,2008 Oracle.  All rights reserved.
4#
5# $Id: rep075.tcl,v 12.7 2008/01/08 20:58:53 bostic Exp $
6#
7# TEST	rep075
8# TEST	Replication and prepared transactions.
9# TEST	Test having outstanding prepared transactions and simulating
10# TEST	crashing or upgrading or downgrading sites.
11# TEST
12#
13proc rep075 { method { tnum "075" } args } {
14
15	source ./include.tcl
16	if { $is_windows9x_test == 1 } {
17		puts "Skipping replication test on Win 9x platform."
18		return
19	}
20
21	# Run for all access methods.
22	if { $checking_valid_methods } {
23		set test_methods { btree }
24		return $test_methods
25	}
26	if { [is_btree $method] == 0 } {
27		puts "Rep075: Skipping for method $method"
28		return
29	}
30
31	set args [convert_args $method $args]
32	set logsets [create_logsets 2]
33	#
34	# Swapping the envs is the only thing that should
35	# work for:
36	#   HP, old Windows: can't open two handles on same env.
37	#   in-memory logs: prepared txns don't survive recovery
38	#
39	global mixed_mode_logging
40	if { $is_hp_test == 1  || $is_windows9x_test == 1 ||
41	     $mixed_mode_logging > 0 } {
42		set prep {swap}
43	} else {
44		set prep {dbrecover swap resolve recover envrecover}
45	}
46	set ops {commit abort both}
47
48	# Run the body of the test with and without recovery.
49	foreach l $logsets {
50		foreach p $prep {
51			foreach o $ops {
52				puts "Rep$tnum ($method $p $o):\
53				    Replication and prepared txns."
54				puts "Rep$tnum: Master logs are [lindex $l 0]"
55				puts "Rep$tnum: Client logs are [lindex $l 1]"
56				puts "Rep$tnum: close DBs after prepare"
57				rep075_sub $method $tnum $l $p $o 1 $args
58				puts "Rep$tnum: close DBs before prepare"
59				rep075_sub $method $tnum $l $p $o 0 $args
60			}
61		}
62	}
63}
64
65proc rep075_sub { method tnum logset prep op after largs } {
66	global testdir
67	global rep_verbose
68	global verbose_type
69	global util_path
70
71	set verbargs ""
72	if { $rep_verbose == 1 } {
73		set verbargs " -verbose {$verbose_type on} "
74	}
75	env_cleanup $testdir
76
77	replsetup $testdir/MSGQUEUEDIR
78
79	set masterdir $testdir/MASTERDIR
80	set clientdir $testdir/CLIENTDIR
81	set clientdir2 $testdir/CLIENTDIR2
82	file mkdir $masterdir
83	file mkdir $clientdir
84	file mkdir $clientdir2
85
86        # Log size is small so we quickly create more than one.
87	# The documentation says that the log file must be at least
88	# four times the size of the in-memory log buffer.
89	set pagesize 4096
90	append largs " -pagesize $pagesize "
91	set log_buf [expr $pagesize * 2]
92	set log_max [expr $log_buf * 4]
93	set m_logargs " -log_buffer $log_buf "
94	set c_logargs " -log_buffer $log_buf "
95
96	set m_logtype [lindex $logset 0]
97	set c_logtype [lindex $logset 1]
98
99	# In-memory logs require a large log buffer, and cannot
100	# be used with -txn nosync.
101	set m_logargs [adjust_logargs $m_logtype]
102	set c_logargs [adjust_logargs $c_logtype]
103	set m_txnargs [adjust_txnargs $m_logtype]
104	set c_txnargs [adjust_txnargs $c_logtype]
105
106	# Open a master.
107	repladd 1
108	set ma_envcmd "berkdb_env_noerr -create $m_txnargs \
109	    $m_logargs -errpfx ENV0 -log_max $log_max $verbargs \
110	    -home $masterdir -rep_transport \[list 1 replsend\]"
111	set env0 [eval $ma_envcmd -rep_master]
112	set masterenv $env0
113	error_check_good master_env [is_valid_env $env0] TRUE
114
115	# Open a client.
116	repladd 2
117	set cl_envcmd "berkdb_env_noerr -create $c_txnargs \
118	    $c_logargs -errpfx ENV1 -log_max $log_max $verbargs \
119	    -home $clientdir -rep_transport \[list 2 replsend\]"
120	set env1 [eval $cl_envcmd -rep_client]
121	set clientenv $env1
122	error_check_good client_env [is_valid_env $env1] TRUE
123
124	repladd 3
125	set cl2_envcmd "berkdb_env_noerr -create $c_txnargs \
126	    $c_logargs -errpfx ENV2 -log_max $log_max $verbargs \
127	    -home $clientdir2 -rep_transport \[list 3 replsend\]"
128	set env2 [eval $cl2_envcmd -rep_client]
129	set clientenv2 $env2
130	error_check_good client_env [is_valid_env $env2] TRUE
131
132	set omethod [convert_method $method]
133
134	# Bring the clients online by processing the startup messages.
135	set envlist "{$env0 1} {$env1 2} {$env2 3}"
136	process_msgs $envlist
137
138	#
139	# Run rep_test in a database with a sub database.
140	#
141	set testfile "test$tnum.db"
142	set sub "subdb"
143	set db1 [eval {berkdb_open_noerr -env $masterenv -auto_commit \
144	    -create -mode 0644} $largs $omethod $testfile $sub]
145	error_check_good dbopen [is_valid_db $db1] TRUE
146
147	puts "\tRep$tnum.a: Running rep_test in replicated env."
148	set niter 1
149	eval rep_test $method $masterenv $db1 $niter 0 0 0 0 $largs
150	process_msgs $envlist
151
152	set testfile2 "test$tnum.2.db"
153	set db [eval {berkdb_open_noerr -env $masterenv -auto_commit \
154	    -create -mode 0644} $largs $omethod $testfile2]
155	error_check_good dbopen [is_valid_db $db] TRUE
156
157	#
158	# Create and prepare 2 transactions:
159	# One txn is for the first database and one txn for the
160	# second database.  We want to test that we can detect
161	# when the last restored txn has been resolved.  And we
162	# want to test various files being open.
163	#
164	puts "\tRep$tnum.b: Prepare some txns."
165	set pbnyc 2
166	set key key
167	set data some_data
168	set txn1 [$masterenv txn]
169	error_check_good txn [is_valid_txn $txn1 $masterenv] TRUE
170	error_check_good put [$db1 put -txn $txn1 $key $data] 0
171
172	set gid [make_gid rep075:$txn1]
173	error_check_good commit [$txn1 prepare $gid] 0
174
175	set txn2 [$masterenv txn]
176	error_check_good txn [is_valid_txn $txn2 $masterenv] TRUE
177	error_check_good put [$db put -txn $txn2 $key $data] 0
178
179	set gid [make_gid rep075:$txn2]
180	error_check_good commit [$txn2 prepare $gid] 0
181	if { $after == 0 } {
182		$db1 close
183		$db close
184	}
185	process_msgs $envlist
186
187	#
188	# Now we have txns on a master that are PBNYC (prepared but
189	# not yet committed).  Alter the replication system now
190	# based on what we're testing this time through.
191	#
192	puts "\tRep$tnum.c: Reset replication ($prep)."
193
194	if { $op == "commit" } {
195		set op1 commit
196		set op2 commit
197	} elseif { $op == "abort" } {
198		set op1 abort
199		set op2 abort
200	} else {
201		set i [berkdb random_int 0 1]
202		if { $i == 0 } {
203			set op1 commit
204			set op2 abort
205		} else {
206			set op1 abort
207			set op2 commit
208		}
209	}
210	set oplist [list $op1 $op2]
211	#
212	# If we are doing a swap, swap roles between master and client
213	# and then call txn recover.  Master should then commit.
214	# This operation tests handling prepared txns in replication code.
215	#
216	# If we are doing a recover, each site stops using its old
217	# env handle and then opens a new one, with recovery.
218	# This operation tests handling prepared txns and then
219	# starting replication.
220	#
221	# If we are doing an envrecover, each site stops using its old
222	# env handle and then opens a new one, with recovery.
223	# Each site then opens a 2nd dbenv handle to run txn_recover
224	# and resolve each operation.
225	# This operation tests handling prepared txns and then
226	# starting replication.
227	#
228	# If we are doing a resolve, each site prepares the txns
229	# and then resolves the txns and then stops using the old
230	# env handle to cause a "crash".  We then open a new one
231	# with recovery.  This operation tests handling prepared
232	# txns and having them resolved.
233	#
234	if { $prep == "swap" } {
235		puts "\tRep$tnum.c.0: Swap roles master->client."
236		#
237		# A downgrading master must resolve the txns.  So, commit
238		# them here, but don't send the messages to the client that
239		# is about to become master.
240		#
241		error_check_good commit [$txn1 commit] 0
242		error_check_good commit [$txn2 commit] 0
243		if { $after == 1 } {
244			$db1 close
245			$db close
246		}
247		replclear 2
248		replclear 3
249		set newclient $env0
250		error_check_good downgrade [$newclient rep_start -client] 0
251		set ctxnlist [$newclient txn_recover]
252		set newmaster $env1
253		puts "\tRep$tnum.c.1: Swap roles client->master."
254		error_check_good upgrade [$newmaster rep_start -master] 0
255		set txnlist [$newmaster txn_recover]
256
257		puts "\tRep$tnum.c.2: Check status of prepared txn."
258		error_check_good txnlist_len [llength $txnlist] $pbnyc
259		error_check_good txnlist_len [llength $ctxnlist] 0
260
261		#
262		# Now commit that old prepared txn.
263		#
264		puts "\tRep$tnum.c.3: Resolve prepared txn ($op)."
265		rep075_resolve $txnlist $oplist
266	} elseif { $prep == "recover" } {
267		#
268		# To simulate a crash, simply stop using the old handles
269		# and reopen new ones, with recovery.  First flush both
270		# the log and mpool to disk.
271		#
272		set origenv0 $env0
273		set origenv1 $env1
274		set origtxn1 $txn1
275		set origtxn2 $txn2
276		puts "\tRep$tnum.c.0: Sync and recover master environment."
277		error_check_good flush1 [$env0 log_flush] 0
278		error_check_good sync1 [$env0 mpool_sync] 0
279		if { $after == 1 } {
280			$db1 close
281			$db close
282		}
283		set env0 [eval $ma_envcmd -recover]
284		error_check_good master_env [is_valid_env $env0] TRUE
285		puts "\tRep$tnum.c.1: Run txn_recover on master env."
286		set txnlist [$env0 txn_recover]
287		error_check_good txnlist_len [llength $txnlist] $pbnyc
288		puts "\tRep$tnum.c.2: Resolve txn ($op) on master env."
289		rep075_resolve $txnlist $oplist
290
291		puts "\tRep$tnum.c.3: Sync and recover client environment."
292		error_check_good flush1 [$env1 log_flush] 0
293		error_check_good sync1 [$env1 mpool_sync] 0
294		set env1 [eval $cl_envcmd -recover]
295		error_check_good client_env [is_valid_env $env1] TRUE
296		puts "\tRep$tnum.c.4: Run txn_recover on client env."
297		set txnlist [$env1 txn_recover]
298		error_check_good txnlist_len [llength $txnlist] $pbnyc
299
300		puts "\tRep$tnum.c.5: Resolve txn ($op) on client env."
301		rep075_resolve $txnlist $oplist
302
303		puts "\tRep$tnum.c.6: Restart replication on both envs."
304		error_check_good master [$env0 rep_start -master] 0
305		error_check_good client [$env1 rep_start -client] 0
306		set newmaster $env0
307		set envlist "{$env0 1} {$env1 2} {$env2 3}"
308		#
309		# Clean up old Tcl handles.
310		#
311		catch {$origenv0 close} res
312		catch {$origenv1 close} res
313		catch {$origtxn1 close} res
314		catch {$origtxn2 close} res
315	} elseif { $prep == "resolve" } {
316		#
317		# Check having prepared txns in the log, but they are
318		# also resolved before we "crash".
319		# To simulate a crash, simply stop using the old handles
320		# and reopen new ones, with recovery.  First flush both
321		# the log and mpool to disk.
322		#
323		set origenv0 $env0
324		set origenv1 $env1
325		set origdb1 $db1
326		set origdb $db
327		puts "\tRep$tnum.c.0: Resolve ($op1 $op2) and recover master."
328		error_check_good resolve1 [$txn1 $op1] 0
329		error_check_good resolve2 [$txn2 $op2] 0
330		error_check_good flush0 [$env0 log_flush] 0
331		error_check_good sync0 [$env0 mpool_sync] 0
332		process_msgs $envlist
333		set env0 [eval $ma_envcmd -recover]
334		error_check_good master_env [is_valid_env $env0] TRUE
335		puts "\tRep$tnum.c.1: Run txn_recover on master env."
336		set txnlist [$env0 txn_recover]
337		error_check_good txnlist_len [llength $txnlist] 0
338
339		puts "\tRep$tnum.c.2: Sync and recover client environment."
340		error_check_good flush1 [$env1 log_flush] 0
341		error_check_good sync1 [$env1 mpool_sync] 0
342		set env1 [eval $cl_envcmd -recover]
343		error_check_good client_env [is_valid_env $env1] TRUE
344		puts "\tRep$tnum.c.3: Run txn_recover on client env."
345		set txnlist [$env1 txn_recover]
346		error_check_good txnlist_len [llength $txnlist] 0
347
348		puts "\tRep$tnum.c.4: Restart replication on both envs."
349		error_check_good master [$env0 rep_start -master] 0
350		error_check_good client [$env1 rep_start -client] 0
351		set newmaster $env0
352		set envlist "{$env0 1} {$env1 2} {$env2 3}"
353		catch {$origenv0 close} res
354		catch {$origenv1 close} res
355		catch {$origdb close} res
356		catch {$origdb1 close} res
357	} elseif { $prep == "envrecover" || $prep == "dbrecover" } {
358		#
359		# To simulate a crash, simply stop using the old handles
360		# and reopen new ones, with recovery.  First flush both
361		# the log and mpool to disk.
362		#
363		set origenv0 $env0
364		set origenv1 $env1
365		set origtxn1 $txn1
366		set origtxn2 $txn2
367		puts "\tRep$tnum.c.0: Sync and recover master environment."
368		error_check_good flush1 [$env0 log_flush] 0
369		error_check_good sync1 [$env0 mpool_sync] 0
370		set oldgen [stat_field $env0 rep_stat "Generation number"]
371		error_check_good flush1 [$env1 log_flush] 0
372		error_check_good sync1 [$env1 mpool_sync] 0
373		if { $after == 1 } {
374			$db1 close
375			$db close
376		}
377		if { $prep == "dbrecover" } {
378			set recargs "-h $masterdir -c "
379			set stat [catch {eval exec $util_path/db_recover \
380			    -e $recargs} result]
381			if { $stat == 1 } {
382				error "FAIL: Recovery error: $result."
383			}
384			set recargs "-h $clientdir -c "
385			set stat [catch {eval exec $util_path/db_recover \
386			    -e $recargs} result]
387			if { $stat == 1 } {
388				error "FAIL: Recovery error: $result."
389			}
390		}
391		#
392		# !!!
393		# We still need to open with recovery, even if 'dbrecover'
394		# because db_recover cannot open the env with replication
395		# enabled.  But db_recover will be the real recovery that
396		# needs to deal with the prepared txn.  This recovery below
397		# for db_recover, should be a no-op essentially.
398		#
399		set recenv0 [eval $ma_envcmd -recover]
400		error_check_good master_env [is_valid_env $recenv0] TRUE
401		puts "\tRep$tnum.c.1: Run txn_recover on master env."
402		set env0 [eval $ma_envcmd]
403		error_check_good master_env [is_valid_env $env0] TRUE
404		set txnlist [$env0 txn_recover]
405		error_check_good txnlist_len [llength $txnlist] $pbnyc
406		puts "\tRep$tnum.c.2: Resolve txn ($op) on master env."
407		rep075_resolve $txnlist $oplist
408		error_check_good recenv0_close [$recenv0 close] 0
409
410		puts "\tRep$tnum.c.3: Recover client environment."
411		set recenv1 [eval $cl_envcmd -recover -errpfx "ENV1REC"]
412		error_check_good client_env [is_valid_env $recenv1] TRUE
413		puts "\tRep$tnum.c.4: Run txn_recover on client env."
414		set env1 [eval $cl_envcmd -errpfx "ENV1NEW"]
415		error_check_good client_env [is_valid_env $env1] TRUE
416		set txnlist [$env1 txn_recover]
417		error_check_good txnlist_len [llength $txnlist] $pbnyc
418
419		puts "\tRep$tnum.c.5: Resolve txns ($oplist) on client env."
420		rep075_resolve $txnlist $oplist
421		error_check_good recenv1_close [$recenv1 close] 0
422
423		puts "\tRep$tnum.c.6: Restart replication on both envs."
424		if { $prep == "dbrecover" } {
425			#
426			# XXX Since we ran db_recover, we lost the rep gen
427			# and clientenv2 cannot detect the change.  Until
428			# SR 15396 is fixed, we'll fake it by becoming
429			# master, downgrading and then upgrading again to
430			# advance the generation number.
431			#
432			error_check_good master [$env0 rep_start -master] 0
433			error_check_good master [$env0 rep_start -client] 0
434			replclear 2
435			replclear 3
436		}
437		error_check_good master [$env0 rep_start -master] 0
438		set gen [stat_field $env0 rep_stat "Generation number"]
439		error_check_bad gen $gen $oldgen
440		error_check_good client [$env1 rep_start -client] 0
441		set newmaster $env0
442		set envlist "{$env0 1} {$env1 2} {$env2 3}"
443		process_msgs $envlist
444		#
445		# Clean up old Tcl handles.
446		#
447		catch {$origenv0 close} res
448		catch {$origenv1 close} res
449		catch {$origtxn1 close} res
450		catch {$origtxn2 close} res
451	}
452	#
453	# Run a standard rep_test creating test.db now.
454	#
455	eval rep_test $method $newmaster NULL $niter 0 0 0 0 $largs
456	process_msgs $envlist
457
458	#
459	# Verify whether or not the key exists in the databases both
460	# on the client and the master.
461	#
462	puts "\tRep$tnum.d: Verify prepared data."
463	foreach e $envlist {
464		set env [lindex $e 0]
465		set db1 [eval {berkdb_open_noerr -env $env -auto_commit \
466		    -create -mode 0644} $largs $omethod $testfile $sub]
467		error_check_good dbopen [is_valid_db $db1] TRUE
468		set db2 [eval {berkdb_open_noerr -env $env -auto_commit \
469		    -create -mode 0644} $largs $omethod $testfile2]
470		error_check_good dbopen [is_valid_db $db2] TRUE
471		set k1 [$db1 get $key]
472		set k2 [$db2 get $key]
473		if { $op1 == "commit" } {
474			error_check_good key [llength $k1] 1
475		} else {
476			error_check_good key [llength $k1] 0
477		}
478		if { $op2 == "commit" } {
479			error_check_good key [llength $k2] 1
480		} else {
481			error_check_good key [llength $k2] 0
482		}
483
484		error_check_good db_close [$db1 close] 0
485		error_check_good db_close [$db2 close] 0
486	}
487	error_check_good env0_close [$env0 close] 0
488	error_check_good env1_close [$env1 close] 0
489	error_check_good env2_close [$env2 close] 0
490
491	replclose $testdir/MSGQUEUEDIR
492	return
493}
494
495proc rep075_resolve { txnlist ops } {
496	error_check_good resolve_lists [llength $txnlist] [llength $ops]
497	foreach trec $txnlist op $ops {
498		set txn [lindex $trec 0]
499		error_check_good commit [$txn $op] 0
500	}
501}
502