1# See the file LICENSE for redistribution information. 2# 3# Copyright (c) 2002,2008 Oracle. All rights reserved. 4# 5# $Id: rep005.tcl,v 12.21 2008/01/08 20:58:53 bostic Exp $ 6# 7# TEST rep005 8# TEST Replication election test with error handling. 9# TEST 10# TEST Run a modified version of test001 in a replicated master environment; 11# TEST hold an election among a group of clients to make sure they select 12# TEST a proper master from amongst themselves, forcing errors at various 13# TEST locations in the election path. 14 15proc rep005 { method args } { 16 17 source ./include.tcl 18 if { $is_windows9x_test == 1 } { 19 puts "Skipping replication test on Win 9x platform." 20 return 21 } 22 23 # Skip for all methods except btree. 24 if { $checking_valid_methods } { 25 set test_methods { btree } 26 return $test_methods 27 } 28 if { [is_btree $method] == 0 } { 29 puts "Rep005: Skipping for method $method." 30 return 31 } 32 33 set tnum "005" 34 set niter 10 35 set nclients 3 36 set logsets [create_logsets [expr $nclients + 1]] 37 38 # We don't want to run this with -recover - it takes too 39 # long and doesn't cover any new ground. 40 set recargs "" 41 foreach l $logsets { 42 puts "Rep$tnum ($recargs): Replication election\ 43 error test with $nclients clients." 44 puts -nonewline "Rep$tnum: Started at: " 45 puts [clock format [clock seconds] -format "%H:%M %D"] 46 puts "Rep$tnum: Master logs are [lindex $l 0]" 47 for { set i 0 } { $i < $nclients } { incr i } { 48 puts "Rep$tnum: Client $i logs are\ 49 [lindex $l [expr $i + 1]]" 50 } 51 rep005_sub $method $tnum \ 52 $niter $nclients $l $recargs $args 53 } 54} 55 56proc rep005_sub { method tnum niter nclients logset recargs largs } { 57 source ./include.tcl 58 global rand_init 59 error_check_good set_random_seed [berkdb srand $rand_init] 0 60 global rep_verbose 61 global verbose_type 62 63 set verbargs "" 64 if { $rep_verbose == 1 } { 65 set verbargs " -verbose {$verbose_type on} " 66 } 67 68 env_cleanup $testdir 69 70 set qdir $testdir/MSGQUEUEDIR 71 replsetup $qdir 72 73 set masterdir $testdir/MASTERDIR 74 file mkdir $masterdir 75 set m_logtype [lindex $logset 0] 76 set m_logargs [adjust_logargs $m_logtype] 77 set m_txnargs [adjust_txnargs $m_logtype] 78 79 for { set i 0 } { $i < $nclients } { incr i } { 80 set clientdir($i) $testdir/CLIENTDIR.$i 81 file mkdir $clientdir($i) 82 set c_logtype($i) [lindex $logset [expr $i + 1]] 83 set c_logargs($i) [adjust_logargs $c_logtype($i)] 84 set c_txnargs($i) [adjust_txnargs $c_logtype($i)] 85 } 86 87 # Open a master. 88 repladd 1 89 set env_cmd(M) "berkdb_env_noerr -create -log_max 1000000 \ 90 -event rep_event \ 91 -home $masterdir $m_logargs -errpfx MASTER $verbargs \ 92 $m_txnargs -rep_master -rep_transport \[list 1 replsend\]" 93 set masterenv [eval $env_cmd(M) $recargs] 94 95 set envlist {} 96 lappend envlist "$masterenv 1" 97 98 # Open the clients. 99 for { set i 0 } { $i < $nclients } { incr i } { 100 set envid [expr $i + 2] 101 repladd $envid 102 set env_cmd($i) "berkdb_env_noerr -create \ 103 -event rep_event \ 104 -home $clientdir($i) $c_logargs($i) \ 105 $c_txnargs($i) -rep_client $verbargs \ 106 -errpfx CLIENT$i \ 107 -rep_transport \[list $envid replsend\]" 108 set clientenv($i) [eval $env_cmd($i) $recargs] 109 lappend envlist "$clientenv($i) $envid" 110 } 111 112 # Process startup messages 113 process_msgs $envlist 114 # Run a modified test001 in the master. 115 puts "\tRep$tnum.a: Running test001 in replicated env." 116 eval rep_test $method $masterenv NULL $niter 0 0 0 0 $largs 117 118 # Process all the messages and close the master. 119 process_msgs $envlist 120 error_check_good masterenv_close [$masterenv close] 0 121 set envlist [lreplace $envlist 0 0] 122 123 for { set i 0 } { $i < $nclients } { incr i } { 124 replclear [expr $i + 2] 125 } 126 # 127 # We set up the error list for each client. We know that the 128 # first client is the one calling the election, therefore, add 129 # the error location on sending the message (electsend) for that one. 130 set m "Rep$tnum" 131 set count 0 132 set win -1 133 # 134 # A full test can take a long time to run. For normal testing 135 # pare it down a lot so that it runs in a shorter time. 136 # 137 set c0err { none electinit none } 138 set c1err $c0err 139 set c2err $c0err 140 set numtests [expr [llength $c0err] * [llength $c1err] * \ 141 [llength $c2err]] 142 puts "\t$m.b: Starting $numtests election with error tests" 143 set last_win -1 144 set win -1 145 foreach c0 $c0err { 146 foreach c1 $c1err { 147 foreach c2 $c2err { 148 set elist [list $c0 $c1 $c2] 149 rep005_elect env_cmd envlist $qdir \ 150 $m $count win last_win $elist $logset 151 incr count 152 } 153 } 154 } 155 156 foreach pair $envlist { 157 set cenv [lindex $pair 0] 158 error_check_good cenv_close [$cenv close] 0 159 } 160 161 replclose $testdir/MSGQUEUEDIR 162 puts -nonewline \ 163 "Rep$tnum: Completed at: " 164 puts [clock format [clock seconds] -format "%H:%M %D"] 165} 166 167proc rep005_elect { ecmd celist qdir msg count \ 168 winner lsn_lose elist logset} { 169 global elect_timeout elect_serial 170 global timeout_ok 171 upvar $ecmd env_cmd 172 upvar $celist envlist 173 upvar $winner win 174 upvar $lsn_lose last_win 175 176 # Set the proper value for the first time through the 177 # loop. On subsequent passes, timeout_ok will already 178 # be set. 179 if { [info exists timeout_ok] == 0 } { 180 set timeout_ok 0 181 } 182 183 set nclients [llength $elist] 184 set nsites [expr $nclients + 1] 185 186 set cl_list {} 187 foreach pair $envlist { 188 set id [lindex $pair 1] 189 set i [expr $id - 2] 190 set clientenv($i) [lindex $pair 0] 191 set err_cmd($i) [lindex $elist $i] 192 set elect_pipe($i) INVALID 193 replclear $id 194 lappend cl_list $i 195 } 196 197 # Select winner. We want to test biggest LSN wins, and secondarily 198 # highest priority wins. If we already have a master, make sure 199 # we don't start a client in that master. 200 set el 0 201 if { $win == -1 } { 202 if { $last_win != -1 } { 203 set cl_list [lreplace $cl_list $last_win $last_win] 204 set el $last_win 205 } 206 set windex [berkdb random_int 0 [expr [llength $cl_list] - 1]] 207 set win [lindex $cl_list $windex] 208 } else { 209 # Easy case, if we have a master, the winner must be the 210 # same one as last time, just use $win. 211 # If client0 is the current existing master, start the 212 # election in client 1. 213 if {$win == 0} { 214 set el 1 215 } 216 } 217 # Winner has priority 100. If we are testing LSN winning, the 218 # make sure the lowest LSN client has the highest priority. 219 # Everyone else has priority 10. 220 for { set i 0 } { $i < $nclients } { incr i } { 221 set crash($i) 0 222 if { $i == $win } { 223 set pri($i) 100 224 } elseif { $i == $last_win } { 225 set pri($i) 200 226 } else { 227 set pri($i) 10 228 } 229 } 230 231 puts "\t$msg.b.$count: Start election (win=client$win) $elist" 232 set msg $msg.c.$count 233 set nsites $nclients 234 set nvotes $nsites 235 run_election env_cmd envlist err_cmd pri crash \ 236 $qdir $msg $el $nsites $nvotes $nclients $win \ 237 0 "test.db" 0 $timeout_ok 238 239 # 240 # Sometimes test elections with an existing master. 241 # Other times test elections without master by closing the 242 # master we just elected and creating a new client. 243 # We want to weight it to close the new master. So, use 244 # a list to cause closing about 70% of the time. 245 # 246 set close_list { 0 0 0 1 1 1 1 1 1 1} 247 set close_len [expr [llength $close_list] - 1] 248 set close_index [berkdb random_int 0 $close_len] 249 250 # Unless we close the master, the next election will time out. 251 set timeout_ok 1 252 253 if { [lindex $close_list $close_index] == 1 } { 254 # Declare that we expect the next election to succeed. 255 set timeout_ok 0 256 puts -nonewline "\t\t$msg: Closing " 257 error_check_good log_flush [$clientenv($win) log_flush] 0 258 error_check_good newmaster_close [$clientenv($win) close] 0 259 # 260 # If the next test should win via LSN then remove the 261 # env before starting the new client so that we 262 # can guarantee this client doesn't win the next one. 263 set lsn_win { 0 0 0 0 1 1 1 1 1 1 } 264 set lsn_len [expr [llength $lsn_win] - 1] 265 set lsn_index [berkdb random_int 0 $lsn_len] 266 set rec_arg "" 267 set win_inmem [expr [string compare [lindex $logset \ 268 [expr $win + 1]] in-memory] == 0] 269 if { [lindex $lsn_win $lsn_index] == 1 } { 270 set last_win $win 271 set dirindex [lsearch -exact $env_cmd($win) "-home"] 272 incr dirindex 273 set lsn_dir [lindex $env_cmd($win) $dirindex] 274 env_cleanup $lsn_dir 275 puts -nonewline "and cleaning " 276 } else { 277 # 278 # If we're not cleaning the env, decide if we should 279 # run recovery upon reopening the env. This causes 280 # two things: 281 # 1. Removal of region files which forces the env 282 # to read its __db.rep.egen file. 283 # 2. Adding a couple log records, so this client must 284 # be the next winner as well since it'll have the 285 # biggest LSN. 286 # 287 set rec_win { 0 0 0 0 0 0 1 1 1 1 } 288 set rec_len [expr [llength $rec_win] - 1] 289 set rec_index [berkdb random_int 0 $rec_len] 290 if { [lindex $rec_win $rec_index] == 1 } { 291 puts -nonewline "and recovering " 292 set rec_arg "-recover" 293 # 294 # If we're in memory and about to run 295 # recovery, we force ourselves not to win 296 # the next election because recovery will 297 # blow away the entire log in memory. 298 # However, we don't skip this entirely 299 # because we still want to force reading 300 # of __db.rep.egen. 301 # 302 if { $win_inmem } { 303 set last_win $win 304 } else { 305 set last_win -1 306 } 307 } else { 308 set last_win -1 309 } 310 } 311 puts "new master, new client $win" 312 set clientenv($win) [eval $env_cmd($win) $rec_arg] 313 error_check_good cl($win) [is_valid_env $clientenv($win)] TRUE 314 # 315 # Since we started a new client, we need to replace it 316 # in the message processing list so that we get the 317 # new Tcl handle name in there. 318 set newel "$clientenv($win) [expr $win + 2]" 319 set envlist [lreplace $envlist $win $win $newel] 320 if { $rec_arg == "" || $win_inmem } { 321 set win -1 322 } 323 # 324 # Since we started a new client we want to give them 325 # all a chance to process everything outstanding before 326 # the election on the next iteration. 327 # 328 process_msgs $envlist 329 } 330} 331