1# See the file LICENSE for redistribution information. 2# 3# Copyright (c) 2002,2008 Oracle. All rights reserved. 4# 5# $Id: rep067.tcl,v 1.11 2008/01/08 20:58:53 bostic Exp $ 6# 7# TEST rep067 8# TEST Replication election test with large timeouts. 9# TEST 10# TEST Test replication elections among clients with widely varying 11# TEST timeouts. This test is used to simulate a customer that 12# TEST wants to force full participation in an election, but only 13# TEST if all sites are present (i.e. if all sites are restarted 14# TEST together). If any site has already been part of the group, 15# TEST then we want to be able to elect a master based on majority. 16# TEST Using varied timeouts, we can force full participation if 17# TEST all sites are present with "long_timeout" amount of time and 18# TEST then revert to majority. 19# TEST 20# TEST A long_timeout would be several minutes whereas a normal 21# TEST short timeout would be a few seconds. 22# 23proc rep067 { method args } { 24 25 source ./include.tcl 26 if { $is_windows9x_test == 1 } { 27 puts "Skipping replication test on Win 9x platform." 28 return 29 } 30 31 # Skip for all methods except btree. 32 if { $checking_valid_methods } { 33 set test_methods { btree } 34 return $test_methods 35 } 36 if { [is_btree $method] == 0 } { 37 puts "Rep067: Skipping for method $method." 38 return 39 } 40 41 set tnum "067" 42 set niter 10 43 set nclients 3 44 set logsets [create_logsets [expr $nclients + 1]] 45 46 # We don't want to run this with -recover - it takes too 47 # long and doesn't cover any new ground. 48 set recargs "" 49 foreach l $logsets { 50 puts "Rep$tnum ($recargs): Replication election\ 51 mixed long timeouts with $nclients clients." 52 puts -nonewline "Rep$tnum: Started at: " 53 puts [clock format [clock seconds] -format "%H:%M %D"] 54 puts "Rep$tnum: Master logs are [lindex $l 0]" 55 for { set i 0 } { $i < $nclients } { incr i } { 56 puts "Rep$tnum: Client $i logs are\ 57 [lindex $l [expr $i + 1]]" 58 } 59 rep067_sub $method $tnum \ 60 $niter $nclients $l $recargs $args 61 } 62} 63 64proc rep067_sub { method tnum niter nclients logset recargs largs } { 65 source ./include.tcl 66 global rand_init 67 error_check_good set_random_seed [berkdb srand $rand_init] 0 68 global rep_verbose 69 global verbose_type 70 71 set verbargs "" 72 if { $rep_verbose == 1 } { 73 set verbargs " -verbose {$verbose_type on} " 74 } 75 76 env_cleanup $testdir 77 78 set qdir $testdir/MSGQUEUEDIR 79 replsetup $qdir 80 81 set masterdir $testdir/MASTERDIR 82 file mkdir $masterdir 83 set m_logtype [lindex $logset 0] 84 set m_logargs [adjust_logargs $m_logtype] 85 set m_txnargs [adjust_txnargs $m_logtype] 86 87 for { set i 0 } { $i < $nclients } { incr i } { 88 set clientdir($i) $testdir/CLIENTDIR.$i 89 file mkdir $clientdir($i) 90 set c_logtype($i) [lindex $logset [expr $i + 1]] 91 set c_logargs($i) [adjust_logargs $c_logtype($i)] 92 set c_txnargs($i) [adjust_txnargs $c_logtype($i)] 93 } 94 95 # Open a master. 96 repladd 1 97 set env_cmd(M) "berkdb_env_noerr -create -log_max 1000000 \ 98 -event rep_event \ 99 -home $masterdir $m_logargs $verbargs -errpfx MASTER \ 100 $m_txnargs -rep_master -rep_transport \[list 1 replsend\]" 101 set masterenv [eval $env_cmd(M) $recargs] 102 103 set envlist {} 104 lappend envlist "$masterenv 1" 105 106 # Open the clients. 107 for { set i 0 } { $i < $nclients } { incr i } { 108 set envid [expr $i + 2] 109 repladd $envid 110 set env_cmd($i) "berkdb_env_noerr -create \ 111 -event rep_event -home $clientdir($i) \ 112 $c_logargs($i) $c_txnargs($i) -rep_client $verbargs \ 113 -errpfx CLIENT.$i -rep_transport \[list $envid replsend\]" 114 set clientenv($i) [eval $env_cmd($i) $recargs] 115 lappend envlist "$clientenv($i) $envid" 116 } 117 118 # Process startup messages 119 process_msgs $envlist 120 # Run a modified test001 in the master. 121 puts "\tRep$tnum.a: Running test001 in replicated env." 122 eval rep_test $method $masterenv NULL $niter 0 0 0 0 $largs 123 124 # Process all the messages and close the master. 125 process_msgs $envlist 126 error_check_good masterenv_close [$masterenv close] 0 127 set envlist [lreplace $envlist 0 0] 128 129 # 130 # Make sure all clients are starting with no pending messages. 131 # 132 for { set i 0 } { $i < $nclients } { incr i } { 133 replclear [expr $i + 2] 134 } 135 136 # 137 # Run the test for all different timoeut combinations. 138 # 139 set c0to { long medium } 140 set c1to { medium short } 141 set c2to { short long } 142 set numtests [expr [llength $c0to] * [llength $c1to] * \ 143 [llength $c2to]] 144 set m "Rep$tnum" 145 set count 0 146 set last_win -1 147 set win -1 148 set quorum { majority all } 149 foreach q $quorum { 150 puts "\t$m.b: Starting $numtests election with\ 151 timeout tests: $q must participate" 152 foreach c0 $c0to { 153 foreach c1 $c1to { 154 foreach c2 $c2to { 155 set elist [list $c0 $c1 $c2] 156 rep067_elect env_cmd envlist $qdir \ 157 $m $count win last_win $elist \ 158 $q $logset 159 incr count 160 } 161 } 162 } 163 } 164 165 foreach pair $envlist { 166 set cenv [lindex $pair 0] 167 error_check_good cenv_close [$cenv close] 0 168 } 169 170 replclose $testdir/MSGQUEUEDIR 171 puts -nonewline \ 172 "Rep$tnum: Completed at: " 173 puts [clock format [clock seconds] -format "%H:%M %D"] 174} 175 176proc rep067_elect { ecmd celist qdir msg count \ 177 winner lsn_lose elist quorum logset} { 178 global elect_timeout elect_serial 179 global timeout_ok 180 upvar $ecmd env_cmd 181 upvar $celist envlist 182 upvar $winner win 183 upvar $lsn_lose last_win 184 185 # Set the proper value for the first time through the 186 # loop. On subsequent passes, timeout_ok will already 187 # be set. 188 if { [info exists timeout_ok] == 0 } { 189 set timeout_ok 0 190 } 191 192 set nclients [llength $elist] 193 set nsites [expr $nclients + 1] 194 195 # 196 # Set long timeout to 3 minutes (180 sec). 197 # Set medium timeout to half the long timeout. 198 # Set short timeout to 10 seconds. 199 set long_timeout 180000000 200 set med_timeout [expr $long_timeout / 2] 201 set short_timeout 10000000 202 set cl_list {} 203 foreach pair $envlist { 204 set id [lindex $pair 1] 205 set i [expr $id - 2] 206 set clientenv($i) [lindex $pair 0] 207 set to [lindex $elist $i] 208 if { $to == "long" } { 209 set elect_timeout($i) $long_timeout 210 } elseif { $to == "medium" } { 211 set elect_timeout($i) $med_timeout 212 } elseif { $to == "short" } { 213 set elect_timeout($i) $short_timeout 214 } 215 set elect_pipe($i) INVALID 216 set err_cmd($i) "none" 217 replclear $id 218 lappend cl_list $i 219 } 220 221 # Select winner. We want to test biggest LSN wins, and secondarily 222 # highest priority wins. If we already have a master, make sure 223 # we don't start a client in that master. 224 set elector 0 225 if { $win == -1 } { 226 if { $last_win != -1 } { 227 set cl_list [lreplace $cl_list $last_win $last_win] 228 set elector $last_win 229 } 230 set windex [berkdb random_int 0 [expr [llength $cl_list] - 1]] 231 set win [lindex $cl_list $windex] 232 } else { 233 # Easy case, if we have a master, the winner must be the 234 # same one as last time, just use $win. 235 # If client0 is the current existing master, start the 236 # election in client 1. 237 if {$win == 0} { 238 set elector 1 239 } 240 } 241 # Winner has priority 100. If we are testing LSN winning, the 242 # make sure the lowest LSN client has the highest priority. 243 # Everyone else has priority 10. 244 for { set i 0 } { $i < $nclients } { incr i } { 245 set crash($i) 0 246 if { $i == $win } { 247 set pri($i) 100 248 } elseif { $i == $last_win } { 249 set pri($i) 200 250 } else { 251 set pri($i) 10 252 } 253 } 254 255 puts "\t$msg.b.$count: Start election (win=client$win) $elist" 256 set msg $msg.c.$count 257 # 258 # If we want all sites, then set nsites and nvotes the same. 259 # otherwise, we need to increase nsites to account 260 # for the master that is "down". 261 # 262 if { $quorum == "all" } { 263 set nsites $nclients 264 } else { 265 set nsites [expr $nclients + 1] 266 } 267 set nvotes $nclients 268 run_election env_cmd envlist err_cmd pri crash \ 269 $qdir $msg $elector $nsites $nvotes $nclients $win \ 270 0 "test.db" 0 $timeout_ok 271 # 272 # Sometimes test elections with an existing master. 273 # Other times test elections without master by closing the 274 # master we just elected and creating a new client. 275 # We want to weight it to close the new master. So, use 276 # a list to cause closing about 70% of the time. 277 # 278 set close_list { 0 0 0 1 1 1 1 1 1 1} 279 set close_len [expr [llength $close_list] - 1] 280 set close_index [berkdb random_int 0 $close_len] 281 282 # Unless we close the master, the next election will time out. 283 set timeout_ok 1 284 285 if { [lindex $close_list $close_index] == 1 } { 286 # Declare that we expect the next election to succeed. 287 set timeout_ok 0 288 puts -nonewline "\t\t$msg: Closing " 289 error_check_good newmaster_flush [$clientenv($win) log_flush] 0 290 error_check_good newmaster_close [$clientenv($win) close] 0 291 # 292 # If the next test should win via LSN then remove the 293 # env before starting the new client so that we 294 # can guarantee this client doesn't win the next one. 295 set lsn_win { 0 0 0 0 1 1 1 1 1 1 } 296 set lsn_len [expr [llength $lsn_win] - 1] 297 set lsn_index [berkdb random_int 0 $lsn_len] 298 set rec_arg "" 299 set win_inmem [expr [string compare [lindex $logset \ 300 [expr $win + 1]] in-memory] == 0] 301 if { [lindex $lsn_win $lsn_index] == 1 } { 302 set last_win $win 303 set dirindex [lsearch -exact $env_cmd($win) "-home"] 304 incr dirindex 305 set lsn_dir [lindex $env_cmd($win) $dirindex] 306 env_cleanup $lsn_dir 307 puts -nonewline "and cleaning " 308 } else { 309 # 310 # If we're not cleaning the env, decide if we should 311 # run recovery upon reopening the env. This causes 312 # two things: 313 # 1. Removal of region files which forces the env 314 # to read its __db.rep.egen file. 315 # 2. Adding a couple log records, so this client must 316 # be the next winner as well since it'll have the 317 # biggest LSN. 318 # 319 set rec_win { 0 0 0 0 0 0 1 1 1 1 } 320 set rec_len [expr [llength $rec_win] - 1] 321 set rec_index [berkdb random_int 0 $rec_len] 322 if { [lindex $rec_win $rec_index] == 1 } { 323 puts -nonewline "and recovering " 324 set rec_arg "-recover" 325 # 326 # If we're in memory and about to run 327 # recovery, we force ourselves not to win 328 # the next election because recovery will 329 # blow away the entire log in memory. 330 # However, we don't skip this entirely 331 # because we still want to force reading 332 # of __db.rep.egen. 333 # 334 if { $win_inmem } { 335 set last_win $win 336 } else { 337 set last_win -1 338 } 339 } else { 340 set last_win -1 341 } 342 } 343 puts "new master, new client $win" 344 set clientenv($win) [eval $env_cmd($win) $rec_arg] 345 error_check_good cl($win) [is_valid_env $clientenv($win)] TRUE 346 # 347 # Since we started a new client, we need to replace it 348 # in the message processing list so that we get the 349 # new Tcl handle name in there. 350 set newelector "$clientenv($win) [expr $win + 2]" 351 set envlist [lreplace $envlist $win $win $newelector] 352 if { $rec_arg == "" || $win_inmem } { 353 set win -1 354 } 355 # 356 # Since we started a new client we want to give them 357 # all a chance to process everything outstanding before 358 # the election on the next iteration. 359 # 360 process_msgs $envlist 361 } 362} 363