1# See the file LICENSE for redistribution information.
2#
3# Copyright (c) 2002-2009 Oracle.  All rights reserved.
4#
5# $Id$
6#
7# TEST  rep005
8# TEST	Replication election test with error handling.
9# TEST
10# TEST	Run rep_test in a replicated master environment;
11# TEST  hold an election among a group of clients to make sure they select
12# TEST  a proper master from amongst themselves, forcing errors at various
13# TEST	locations in the election path.
14
15proc rep005 { method args } {
16
17	source ./include.tcl
18	global databases_in_memory
19	global repfiles_in_memory
20
21	if { $is_windows9x_test == 1 } {
22		puts "Skipping replication test on Win 9x platform."
23		return
24	}
25
26	# Skip for all methods except btree.
27	if { $checking_valid_methods } {
28		set test_methods { btree }
29		return $test_methods
30	}
31	if { [is_btree $method] == 0 } {
32		puts "Rep005: Skipping for method $method."
33		return
34	}
35
36	set msg2 "and on-disk replication files"
37	if { $repfiles_in_memory } {
38		set msg2 "and in-memory replication files"
39	}
40
41	set tnum "005"
42	set niter 10
43	set nclients 3
44	set logsets [create_logsets [expr $nclients + 1]]
45	set msg "using on-disk databases"
46	if { $databases_in_memory } {
47		set msg "using named in-memory databases."
48		if { [is_queueext $method] } {
49			puts -nonewline "Skipping rep$tnum for method "
50			puts "$method with named in-memory databases."
51			return
52		}
53	}
54
55	# We don't want to run this with -recover - it takes too
56	# long and doesn't cover any new ground.
57	set recargs ""
58	foreach l $logsets {
59		puts "Rep$tnum ($recargs): Replication election\
60		    error test with $nclients clients $msg $msg2."
61		puts -nonewline "Rep$tnum: Started at: "
62		puts [clock format [clock seconds] -format "%H:%M %D"]
63		puts "Rep$tnum: Master logs are [lindex $l 0]"
64		for { set i 0 } { $i < $nclients } { incr i } {
65			puts "Rep$tnum: Client $i logs are\
66			    [lindex $l [expr $i + 1]]"
67		}
68		rep005_sub $method $tnum \
69		    $niter $nclients $l $recargs $args
70	}
71}
72
73proc rep005_sub { method tnum niter nclients logset recargs largs } {
74	source ./include.tcl
75	global rand_init
76	error_check_good set_random_seed [berkdb srand $rand_init] 0
77	global databases_in_memory
78	global repfiles_in_memory
79	global rep_verbose
80	global verbose_type
81
82	set verbargs ""
83	if { $rep_verbose == 1 } {
84		set verbargs " -verbose {$verbose_type on} "
85	}
86
87	set repmemargs ""
88	if { $repfiles_in_memory } {
89		set repmemargs "-rep_inmem_files "
90	}
91
92	env_cleanup $testdir
93
94	set qdir $testdir/MSGQUEUEDIR
95	replsetup $qdir
96
97	set masterdir $testdir/MASTERDIR
98	file mkdir $masterdir
99	set m_logtype [lindex $logset 0]
100	set m_logargs [adjust_logargs $m_logtype]
101	set m_txnargs [adjust_txnargs $m_logtype]
102
103	for { set i 0 } { $i < $nclients } { incr i } {
104		set clientdir($i) $testdir/CLIENTDIR.$i
105		file mkdir $clientdir($i)
106		set c_logtype($i) [lindex $logset [expr $i + 1]]
107		set c_logargs($i) [adjust_logargs $c_logtype($i)]
108		set c_txnargs($i) [adjust_txnargs $c_logtype($i)]
109	}
110
111	# Open a master.
112	repladd 1
113	set env_cmd(M) "berkdb_env_noerr -create -log_max 1000000 \
114	    -event rep_event $repmemargs \
115	    -home $masterdir $m_logargs -errpfx MASTER $verbargs \
116	    $m_txnargs -rep_master -rep_transport \[list 1 replsend\]"
117	set masterenv [eval $env_cmd(M) $recargs]
118
119	set envlist {}
120	lappend envlist "$masterenv 1"
121
122	# Open the clients.
123	for { set i 0 } { $i < $nclients } { incr i } {
124		set envid [expr $i + 2]
125		repladd $envid
126		set env_cmd($i) "berkdb_env_noerr -create \
127		    -event rep_event $repmemargs \
128		    -home $clientdir($i) $c_logargs($i) \
129		    $c_txnargs($i) -rep_client $verbargs \
130		    -errpfx CLIENT$i \
131		    -rep_transport \[list $envid replsend\]"
132		set clientenv($i) [eval $env_cmd($i) $recargs]
133		lappend envlist "$clientenv($i) $envid"
134	}
135
136	# Process startup messages
137	process_msgs $envlist
138	# Run rep_test in the master.
139	puts "\tRep$tnum.a: Running rep_test in replicated env."
140	eval rep_test $method $masterenv NULL $niter 0 0 0 $largs
141
142	# Process all the messages and close the master.
143	process_msgs $envlist
144
145	# Check that databases are in-memory or on-disk as expected.
146	check_db_location $masterenv
147	for { set i 0 } { $i < $nclients } { incr i } {
148		check_db_location $clientenv($i)
149	}
150
151	error_check_good masterenv_close [$masterenv close] 0
152	set envlist [lreplace $envlist 0 0]
153
154	for { set i 0 } { $i < $nclients } { incr i } {
155		replclear [expr $i + 2]
156	}
157
158	# We set up the error list for each client.  We know that the
159	# first client is the one calling the election, therefore, add
160	# the error location on sending the message (electsend) for that one.
161	set m "Rep$tnum"
162	set count 0
163	set win -1
164	#
165	set c0err { none electinit }
166	set c1err $c0err
167	set c2err $c0err
168	set numtests [expr [llength $c0err] * [llength $c1err] * \
169	    [llength $c2err]]
170	puts "\t$m.b: Starting $numtests election with error tests"
171	set last_win -1
172	set win -1
173	foreach c0 $c0err {
174		foreach c1 $c1err {
175			foreach c2 $c2err {
176				set elist [list $c0 $c1 $c2]
177				rep005_elect env_cmd envlist $qdir \
178				    $m $count win last_win $elist $logset
179				incr count
180			}
181		}
182	}
183
184	foreach pair $envlist {
185		set cenv [lindex $pair 0]
186		error_check_good cenv_close [$cenv close] 0
187	}
188
189	replclose $testdir/MSGQUEUEDIR
190	puts -nonewline \
191	    "Rep$tnum: Completed at: "
192	puts [clock format [clock seconds] -format "%H:%M %D"]
193}
194
195proc rep005_elect { ecmd celist qdir msg count \
196    winner lsn_lose elist logset} {
197	global elect_timeout elect_serial
198	global timeout_ok
199	global databases_in_memory
200	upvar $ecmd env_cmd
201	upvar $celist envlist
202	upvar $winner win
203	upvar $lsn_lose last_win
204
205	# Set the proper value for the first time through the
206	# loop.  On subsequent passes, timeout_ok will already
207	# be set.
208	if { [info exists timeout_ok] == 0 } {
209		set timeout_ok 0
210	}
211
212	set nclients [llength $elist]
213	set nsites [expr $nclients + 1]
214
215	set cl_list {}
216	foreach pair $envlist {
217		set id [lindex $pair 1]
218		set i [expr $id - 2]
219		set clientenv($i) [lindex $pair 0]
220		set err_cmd($i) [lindex $elist $i]
221		set elect_pipe($i) INVALID
222		replclear $id
223		lappend cl_list $i
224	}
225
226	# Select winner.  We want to test biggest LSN wins, and secondarily
227	# highest priority wins.  If we already have a master, make sure
228	# we don't start a client in that master.
229	set el 0
230	if { $win == -1 } {
231		if { $last_win != -1 } {
232			set cl_list [lreplace $cl_list $last_win $last_win]
233			set el $last_win
234		}
235		set windex [berkdb random_int 0 [expr [llength $cl_list] - 1]]
236		set win [lindex $cl_list $windex]
237	} else {
238		# Easy case, if we have a master, the winner must be the
239		# same one as last time, just use $win.
240		# If client0 is the current existing master, start the
241		# election in client 1.
242		if {$win == 0} {
243			set el 1
244		}
245	}
246	# Winner has priority 100.  If we are testing LSN winning, the
247	# make sure the lowest LSN client has the highest priority.
248	# Everyone else has priority 10.
249	for { set i 0 } { $i < $nclients } { incr i } {
250		set crash($i) 0
251		if { $i == $win } {
252			set pri($i) 100
253		} elseif { $i == $last_win } {
254			set pri($i) 200
255		} else {
256			set pri($i) 10
257		}
258	}
259
260	puts "\t$msg.b.$count: Start election (win=client$win) $elist"
261	set msg $msg.c.$count
262	set nsites $nclients
263	set nvotes $nsites
264	if { $databases_in_memory } {
265		set dbname { "" test.db }
266	} else {
267		set dbname test.db
268	}
269	run_election env_cmd envlist err_cmd pri crash \
270	    $qdir $msg $el $nsites $nvotes $nclients $win \
271	    0 $dbname 0 $timeout_ok
272
273	#
274	# Sometimes test elections with an existing master.
275	# Other times test elections without master by closing the
276	# master we just elected and creating a new client.
277	# We want to weight it to close the new master.  So, use
278	# a list to cause closing about 70% of the time.
279	#
280	set close_list { 0 0 0 1 1 1 1 1 1 1}
281	set close_len [expr [llength $close_list] - 1]
282	set close_index [berkdb random_int 0 $close_len]
283
284	# Unless we close the master, the next election will time out.
285	set timeout_ok 1
286
287	if { [lindex $close_list $close_index] == 1 } {
288		# Declare that we expect the next election to succeed.
289		set timeout_ok 0
290		puts -nonewline "\t\t$msg: Closing "
291		error_check_good log_flush [$clientenv($win) log_flush] 0
292		error_check_good newmaster_close [$clientenv($win) close] 0
293		#
294		# If the next test should win via LSN then remove the
295		# env before starting the new client so that we
296		# can guarantee this client doesn't win the next one.
297		set lsn_win { 0 0 0 0 1 1 1 1 1 1 }
298		set lsn_len [expr [llength $lsn_win] - 1]
299		set lsn_index [berkdb random_int 0 $lsn_len]
300		set rec_arg ""
301		set win_inmem [expr [string compare [lindex $logset \
302		    [expr $win + 1]] in-memory] == 0]
303		if { [lindex $lsn_win $lsn_index] == 1 } {
304			set last_win $win
305			set dirindex [lsearch -exact $env_cmd($win) "-home"]
306			incr dirindex
307			set lsn_dir [lindex $env_cmd($win) $dirindex]
308			env_cleanup $lsn_dir
309			puts -nonewline "and cleaning "
310		} else {
311			#
312			# If we're not cleaning the env, decide if we should
313			# run recovery upon reopening the env.  This causes
314			# two things:
315			# 1. Removal of region files which forces the env
316			# to read its __db.rep.egen file.
317			# 2. Adding a couple log records, so this client must
318			# be the next winner as well since it'll have the
319			# biggest LSN.
320			#
321			set rec_win { 0 0 0 0 0 0 1 1 1 1 }
322			set rec_len [expr [llength $rec_win] - 1]
323			set rec_index [berkdb random_int 0 $rec_len]
324			if { [lindex $rec_win $rec_index] == 1 } {
325				puts -nonewline "and recovering "
326				set rec_arg "-recover"
327				#
328				# If we're in memory and about to run
329				# recovery, we force ourselves not to win
330				# the next election because recovery will
331				# blow away the entire log in memory.
332				# However, we don't skip this entirely
333				# because we still want to force reading
334				# of __db.rep.egen.
335				#
336				if { $win_inmem } {
337					set last_win $win
338				} else {
339					set last_win -1
340				}
341			} else {
342				set last_win -1
343			}
344		}
345		puts "new master, new client $win"
346		set clientenv($win) [eval $env_cmd($win) $rec_arg]
347		error_check_good cl($win) [is_valid_env $clientenv($win)] TRUE
348		#
349		# Since we started a new client, we need to replace it
350		# in the message processing list so that we get the
351		# new Tcl handle name in there.
352		set newel "$clientenv($win) [expr $win + 2]"
353		set envlist [lreplace $envlist $win $win $newel]
354		if { $rec_arg == "" || $win_inmem } {
355			set win -1
356		}
357		#
358		# Since we started a new client we want to give them
359		# all a chance to process everything outstanding before
360		# the election on the next iteration.
361		#
362		process_msgs $envlist
363	}
364}
365