1# See the file LICENSE for redistribution information.
2#
3# Copyright (c) 2002,2008 Oracle.  All rights reserved.
4#
5# $Id: rep005.tcl,v 12.21 2008/01/08 20:58:53 bostic Exp $
6#
7# TEST  rep005
8# TEST	Replication election test with error handling.
9# TEST
10# TEST	Run a modified version of test001 in a replicated master environment;
11# TEST  hold an election among a group of clients to make sure they select
12# TEST  a proper master from amongst themselves, forcing errors at various
13# TEST	locations in the election path.
14
15proc rep005 { method args } {
16
17	source ./include.tcl
18	if { $is_windows9x_test == 1 } {
19		puts "Skipping replication test on Win 9x platform."
20		return
21	}
22
23	# Skip for all methods except btree.
24	if { $checking_valid_methods } {
25		set test_methods { btree }
26		return $test_methods
27	}
28	if { [is_btree $method] == 0 } {
29		puts "Rep005: Skipping for method $method."
30		return
31	}
32
33	set tnum "005"
34	set niter 10
35	set nclients 3
36	set logsets [create_logsets [expr $nclients + 1]]
37
38	# We don't want to run this with -recover - it takes too
39	# long and doesn't cover any new ground.
40	set recargs ""
41	foreach l $logsets {
42		puts "Rep$tnum ($recargs): Replication election\
43		    error test with $nclients clients."
44		puts -nonewline "Rep$tnum: Started at: "
45		puts [clock format [clock seconds] -format "%H:%M %D"]
46		puts "Rep$tnum: Master logs are [lindex $l 0]"
47		for { set i 0 } { $i < $nclients } { incr i } {
48			puts "Rep$tnum: Client $i logs are\
49			    [lindex $l [expr $i + 1]]"
50		}
51		rep005_sub $method $tnum \
52		    $niter $nclients $l $recargs $args
53	}
54}
55
56proc rep005_sub { method tnum niter nclients logset recargs largs } {
57	source ./include.tcl
58	global rand_init
59	error_check_good set_random_seed [berkdb srand $rand_init] 0
60	global rep_verbose
61	global verbose_type
62
63	set verbargs ""
64	if { $rep_verbose == 1 } {
65		set verbargs " -verbose {$verbose_type on} "
66	}
67
68	env_cleanup $testdir
69
70	set qdir $testdir/MSGQUEUEDIR
71	replsetup $qdir
72
73	set masterdir $testdir/MASTERDIR
74	file mkdir $masterdir
75	set m_logtype [lindex $logset 0]
76	set m_logargs [adjust_logargs $m_logtype]
77	set m_txnargs [adjust_txnargs $m_logtype]
78
79	for { set i 0 } { $i < $nclients } { incr i } {
80		set clientdir($i) $testdir/CLIENTDIR.$i
81		file mkdir $clientdir($i)
82		set c_logtype($i) [lindex $logset [expr $i + 1]]
83		set c_logargs($i) [adjust_logargs $c_logtype($i)]
84		set c_txnargs($i) [adjust_txnargs $c_logtype($i)]
85	}
86
87	# Open a master.
88	repladd 1
89	set env_cmd(M) "berkdb_env_noerr -create -log_max 1000000 \
90	    -event rep_event \
91	    -home $masterdir $m_logargs -errpfx MASTER $verbargs \
92	    $m_txnargs -rep_master -rep_transport \[list 1 replsend\]"
93	set masterenv [eval $env_cmd(M) $recargs]
94
95	set envlist {}
96	lappend envlist "$masterenv 1"
97
98	# Open the clients.
99	for { set i 0 } { $i < $nclients } { incr i } {
100		set envid [expr $i + 2]
101		repladd $envid
102		set env_cmd($i) "berkdb_env_noerr -create \
103		    -event rep_event \
104		    -home $clientdir($i) $c_logargs($i) \
105		    $c_txnargs($i) -rep_client $verbargs \
106		    -errpfx CLIENT$i \
107		    -rep_transport \[list $envid replsend\]"
108		set clientenv($i) [eval $env_cmd($i) $recargs]
109		lappend envlist "$clientenv($i) $envid"
110	}
111
112	# Process startup messages
113	process_msgs $envlist
114	# Run a modified test001 in the master.
115	puts "\tRep$tnum.a: Running test001 in replicated env."
116	eval rep_test $method $masterenv NULL $niter 0 0 0 0 $largs
117
118	# Process all the messages and close the master.
119	process_msgs $envlist
120	error_check_good masterenv_close [$masterenv close] 0
121	set envlist [lreplace $envlist 0 0]
122
123	for { set i 0 } { $i < $nclients } { incr i } {
124		replclear [expr $i + 2]
125	}
126	#
127	# We set up the error list for each client.  We know that the
128	# first client is the one calling the election, therefore, add
129	# the error location on sending the message (electsend) for that one.
130	set m "Rep$tnum"
131	set count 0
132	set win -1
133	#
134	# A full test can take a long time to run.  For normal testing
135	# pare it down a lot so that it runs in a shorter time.
136	#
137	set c0err { none electinit none }
138	set c1err $c0err
139	set c2err $c0err
140	set numtests [expr [llength $c0err] * [llength $c1err] * \
141	    [llength $c2err]]
142	puts "\t$m.b: Starting $numtests election with error tests"
143	set last_win -1
144	set win -1
145	foreach c0 $c0err {
146		foreach c1 $c1err {
147			foreach c2 $c2err {
148				set elist [list $c0 $c1 $c2]
149				rep005_elect env_cmd envlist $qdir \
150				    $m $count win last_win $elist $logset
151				incr count
152			}
153		}
154	}
155
156	foreach pair $envlist {
157		set cenv [lindex $pair 0]
158		error_check_good cenv_close [$cenv close] 0
159	}
160
161	replclose $testdir/MSGQUEUEDIR
162	puts -nonewline \
163	    "Rep$tnum: Completed at: "
164	puts [clock format [clock seconds] -format "%H:%M %D"]
165}
166
167proc rep005_elect { ecmd celist qdir msg count \
168    winner lsn_lose elist logset} {
169	global elect_timeout elect_serial
170	global timeout_ok
171	upvar $ecmd env_cmd
172	upvar $celist envlist
173	upvar $winner win
174	upvar $lsn_lose last_win
175
176	# Set the proper value for the first time through the
177	# loop.  On subsequent passes, timeout_ok will already
178	# be set.
179	if { [info exists timeout_ok] == 0 } {
180		set timeout_ok 0
181	}
182
183	set nclients [llength $elist]
184	set nsites [expr $nclients + 1]
185
186	set cl_list {}
187	foreach pair $envlist {
188		set id [lindex $pair 1]
189		set i [expr $id - 2]
190		set clientenv($i) [lindex $pair 0]
191		set err_cmd($i) [lindex $elist $i]
192		set elect_pipe($i) INVALID
193		replclear $id
194		lappend cl_list $i
195	}
196
197	# Select winner.  We want to test biggest LSN wins, and secondarily
198	# highest priority wins.  If we already have a master, make sure
199	# we don't start a client in that master.
200	set el 0
201	if { $win == -1 } {
202		if { $last_win != -1 } {
203			set cl_list [lreplace $cl_list $last_win $last_win]
204			set el $last_win
205		}
206		set windex [berkdb random_int 0 [expr [llength $cl_list] - 1]]
207		set win [lindex $cl_list $windex]
208	} else {
209		# Easy case, if we have a master, the winner must be the
210		# same one as last time, just use $win.
211		# If client0 is the current existing master, start the
212		# election in client 1.
213		if {$win == 0} {
214			set el 1
215		}
216	}
217	# Winner has priority 100.  If we are testing LSN winning, the
218	# make sure the lowest LSN client has the highest priority.
219	# Everyone else has priority 10.
220	for { set i 0 } { $i < $nclients } { incr i } {
221		set crash($i) 0
222		if { $i == $win } {
223			set pri($i) 100
224		} elseif { $i == $last_win } {
225			set pri($i) 200
226		} else {
227			set pri($i) 10
228		}
229	}
230
231	puts "\t$msg.b.$count: Start election (win=client$win) $elist"
232	set msg $msg.c.$count
233	set nsites $nclients
234	set nvotes $nsites
235	run_election env_cmd envlist err_cmd pri crash \
236	    $qdir $msg $el $nsites $nvotes $nclients $win \
237	    0 "test.db" 0 $timeout_ok
238
239	#
240	# Sometimes test elections with an existing master.
241	# Other times test elections without master by closing the
242	# master we just elected and creating a new client.
243	# We want to weight it to close the new master.  So, use
244	# a list to cause closing about 70% of the time.
245	#
246	set close_list { 0 0 0 1 1 1 1 1 1 1}
247	set close_len [expr [llength $close_list] - 1]
248	set close_index [berkdb random_int 0 $close_len]
249
250	# Unless we close the master, the next election will time out.
251	set timeout_ok 1
252
253	if { [lindex $close_list $close_index] == 1 } {
254		# Declare that we expect the next election to succeed.
255		set timeout_ok 0
256		puts -nonewline "\t\t$msg: Closing "
257		error_check_good log_flush [$clientenv($win) log_flush] 0
258		error_check_good newmaster_close [$clientenv($win) close] 0
259		#
260		# If the next test should win via LSN then remove the
261		# env before starting the new client so that we
262		# can guarantee this client doesn't win the next one.
263		set lsn_win { 0 0 0 0 1 1 1 1 1 1 }
264		set lsn_len [expr [llength $lsn_win] - 1]
265		set lsn_index [berkdb random_int 0 $lsn_len]
266		set rec_arg ""
267		set win_inmem [expr [string compare [lindex $logset \
268		    [expr $win + 1]] in-memory] == 0]
269		if { [lindex $lsn_win $lsn_index] == 1 } {
270			set last_win $win
271			set dirindex [lsearch -exact $env_cmd($win) "-home"]
272			incr dirindex
273			set lsn_dir [lindex $env_cmd($win) $dirindex]
274			env_cleanup $lsn_dir
275			puts -nonewline "and cleaning "
276		} else {
277			#
278			# If we're not cleaning the env, decide if we should
279			# run recovery upon reopening the env.  This causes
280			# two things:
281			# 1. Removal of region files which forces the env
282			# to read its __db.rep.egen file.
283			# 2. Adding a couple log records, so this client must
284			# be the next winner as well since it'll have the
285			# biggest LSN.
286			#
287			set rec_win { 0 0 0 0 0 0 1 1 1 1 }
288			set rec_len [expr [llength $rec_win] - 1]
289			set rec_index [berkdb random_int 0 $rec_len]
290			if { [lindex $rec_win $rec_index] == 1 } {
291				puts -nonewline "and recovering "
292				set rec_arg "-recover"
293				#
294				# If we're in memory and about to run
295				# recovery, we force ourselves not to win
296				# the next election because recovery will
297				# blow away the entire log in memory.
298				# However, we don't skip this entirely
299				# because we still want to force reading
300				# of __db.rep.egen.
301				#
302				if { $win_inmem } {
303					set last_win $win
304				} else {
305					set last_win -1
306				}
307			} else {
308				set last_win -1
309			}
310		}
311		puts "new master, new client $win"
312		set clientenv($win) [eval $env_cmd($win) $rec_arg]
313		error_check_good cl($win) [is_valid_env $clientenv($win)] TRUE
314		#
315		# Since we started a new client, we need to replace it
316		# in the message processing list so that we get the
317		# new Tcl handle name in there.
318		set newel "$clientenv($win) [expr $win + 2]"
319		set envlist [lreplace $envlist $win $win $newel]
320		if { $rec_arg == "" || $win_inmem } {
321			set win -1
322		}
323		#
324		# Since we started a new client we want to give them
325		# all a chance to process everything outstanding before
326		# the election on the next iteration.
327		#
328		process_msgs $envlist
329	}
330}
331