1# See the file LICENSE for redistribution information. 2# 3# Copyright (c) 2002-2009 Oracle. All rights reserved. 4# 5# $Id$ 6# 7# TEST rep067 8# TEST Replication election test with large timeouts. 9# TEST 10# TEST Test replication elections among clients with widely varying 11# TEST timeouts. This test is used to simulate a customer that 12# TEST wants to force full participation in an election, but only 13# TEST if all sites are present (i.e. if all sites are restarted 14# TEST together). If any site has already been part of the group, 15# TEST then we want to be able to elect a master based on majority. 16# TEST Using varied timeouts, we can force full participation if 17# TEST all sites are present with "long_timeout" amount of time and 18# TEST then revert to majority. 19# TEST 20# TEST A long_timeout would be several minutes whereas a normal 21# TEST short timeout would be a few seconds. 22# 23proc rep067 { method args } { 24 25 source ./include.tcl 26 global databases_in_memory 27 global repfiles_in_memory 28 29 if { $is_windows9x_test == 1 } { 30 puts "Skipping replication test on Win 9x platform." 31 return 32 } 33 34 # Skip for all methods except btree. 35 if { $checking_valid_methods } { 36 set test_methods { btree } 37 return $test_methods 38 } 39 if { [is_btree $method] == 0 } { 40 puts "Rep067: Skipping for method $method." 41 return 42 } 43 44 set tnum "067" 45 set niter 10 46 set nclients 3 47 set logsets [create_logsets [expr $nclients + 1]] 48 49 # Set up for on-disk or in-memory databases. 50 set msg "using on-disk databases" 51 if { $databases_in_memory } { 52 set msg "using named in-memory databases" 53 if { [is_queueext $method] } { 54 puts -nonewline "Skipping rep$tnum for method " 55 puts "$method with named in-memory databases." 56 return 57 } 58 } 59 60 set msg2 "and on-disk replication files" 61 if { $repfiles_in_memory } { 62 set msg2 "and in-memory replication files" 63 } 64 65 # We don't want to run this with -recover - it takes too 66 # long and doesn't cover any new ground. 67 set recargs "" 68 foreach l $logsets { 69 puts "Rep$tnum ($recargs): Replication election mixed\ 70 long timeouts with $nclients clients $msg $msg2." 71 puts -nonewline "Rep$tnum: Started at: " 72 puts [clock format [clock seconds] -format "%H:%M %D"] 73 puts "Rep$tnum: Master logs are [lindex $l 0]" 74 for { set i 0 } { $i < $nclients } { incr i } { 75 puts "Rep$tnum: Client $i logs are\ 76 [lindex $l [expr $i + 1]]" 77 } 78 rep067_sub $method $tnum \ 79 $niter $nclients $l $recargs $args 80 } 81} 82 83proc rep067_sub { method tnum niter nclients logset recargs largs } { 84 source ./include.tcl 85 global rand_init 86 error_check_good set_random_seed [berkdb srand $rand_init] 0 87 global repfiles_in_memory 88 global rep_verbose 89 global verbose_type 90 91 set verbargs "" 92 if { $rep_verbose == 1 } { 93 set verbargs " -verbose {$verbose_type on} " 94 } 95 96 set repmemargs "" 97 if { $repfiles_in_memory } { 98 set repmemargs "-rep_inmem_files " 99 } 100 101 env_cleanup $testdir 102 103 set qdir $testdir/MSGQUEUEDIR 104 replsetup $qdir 105 106 set masterdir $testdir/MASTERDIR 107 file mkdir $masterdir 108 set m_logtype [lindex $logset 0] 109 set m_logargs [adjust_logargs $m_logtype] 110 set m_txnargs [adjust_txnargs $m_logtype] 111 112 for { set i 0 } { $i < $nclients } { incr i } { 113 set clientdir($i) $testdir/CLIENTDIR.$i 114 file mkdir $clientdir($i) 115 set c_logtype($i) [lindex $logset [expr $i + 1]] 116 set c_logargs($i) [adjust_logargs $c_logtype($i)] 117 set c_txnargs($i) [adjust_txnargs $c_logtype($i)] 118 } 119 120 # Open a master. 121 repladd 1 122 set env_cmd(M) "berkdb_env_noerr -create -log_max 1000000 \ 123 -event rep_event $repmemargs \ 124 -home $masterdir $m_logargs $verbargs -errpfx MASTER \ 125 $m_txnargs -rep_master -rep_transport \[list 1 replsend\]" 126 set masterenv [eval $env_cmd(M) $recargs] 127 128 set envlist {} 129 lappend envlist "$masterenv 1" 130 131 # Open the clients. 132 for { set i 0 } { $i < $nclients } { incr i } { 133 set envid [expr $i + 2] 134 repladd $envid 135 set env_cmd($i) "berkdb_env_noerr -create \ 136 -event rep_event $repmemargs -home $clientdir($i) \ 137 $c_logargs($i) $c_txnargs($i) -rep_client $verbargs \ 138 -errpfx CLIENT.$i -rep_transport \[list $envid replsend\]" 139 set clientenv($i) [eval $env_cmd($i) $recargs] 140 lappend envlist "$clientenv($i) $envid" 141 } 142 143 # Process startup messages 144 process_msgs $envlist 145 146 # Run a modified test001 in the master. 147 puts "\tRep$tnum.a: Running test001 in replicated env." 148 eval rep_test $method $masterenv NULL $niter 0 0 0 $largs 149 150 # Process all the messages and close the master. 151 process_msgs $envlist 152 error_check_good masterenv_close [$masterenv close] 0 153 set envlist [lreplace $envlist 0 0] 154 155 # 156 # Make sure all clients are starting with no pending messages. 157 # 158 for { set i 0 } { $i < $nclients } { incr i } { 159 replclear [expr $i + 2] 160 } 161 162 # 163 # Run the test for all different timoeut combinations. 164 # 165 set c0to { long medium } 166 set c1to { medium short } 167 set c2to { short long } 168 set numtests [expr [llength $c0to] * [llength $c1to] * \ 169 [llength $c2to]] 170 set m "Rep$tnum" 171 set count 0 172 set last_win -1 173 set win -1 174 set quorum { majority all } 175 foreach q $quorum { 176 puts "\t$m.b: Starting $numtests election with\ 177 timeout tests: $q must participate" 178 foreach c0 $c0to { 179 foreach c1 $c1to { 180 foreach c2 $c2to { 181 set elist [list $c0 $c1 $c2] 182 rep067_elect env_cmd envlist $qdir \ 183 $m $count win last_win $elist \ 184 $q $logset 185 incr count 186 } 187 } 188 } 189 } 190 191 foreach pair $envlist { 192 set cenv [lindex $pair 0] 193 error_check_good cenv_close [$cenv close] 0 194 } 195 196 replclose $testdir/MSGQUEUEDIR 197 puts -nonewline \ 198 "Rep$tnum: Completed at: " 199 puts [clock format [clock seconds] -format "%H:%M %D"] 200} 201 202proc rep067_elect { ecmd celist qdir msg count \ 203 winner lsn_lose elist quorum logset} { 204 global elect_timeout elect_serial 205 global timeout_ok 206 global databases_in_memory 207 upvar $ecmd env_cmd 208 upvar $celist envlist 209 upvar $winner win 210 upvar $lsn_lose last_win 211 212 # Set the proper value for the first time through the 213 # loop. On subsequent passes, timeout_ok will already 214 # be set. 215 if { [info exists timeout_ok] == 0 } { 216 set timeout_ok 0 217 } 218 219 set nclients [llength $elist] 220 set nsites [expr $nclients + 1] 221 222 # 223 # Set long timeout to 3 minutes (180 sec). 224 # Set medium timeout to half the long timeout. 225 # Set short timeout to 10 seconds. 226 set long_timeout 180000000 227 set med_timeout [expr $long_timeout / 2] 228 set short_timeout 10000000 229 set cl_list {} 230 foreach pair $envlist { 231 set id [lindex $pair 1] 232 set i [expr $id - 2] 233 set clientenv($i) [lindex $pair 0] 234 set to [lindex $elist $i] 235 if { $to == "long" } { 236 set elect_timeout($i) $long_timeout 237 } elseif { $to == "medium" } { 238 set elect_timeout($i) $med_timeout 239 } elseif { $to == "short" } { 240 set elect_timeout($i) $short_timeout 241 } 242 set elect_pipe($i) INVALID 243 set err_cmd($i) "none" 244 replclear $id 245 lappend cl_list $i 246 } 247 248 # Select winner. We want to test biggest LSN wins, and secondarily 249 # highest priority wins. If we already have a master, make sure 250 # we don't start a client in that master. 251 set elector 0 252 if { $win == -1 } { 253 if { $last_win != -1 } { 254 set cl_list [lreplace $cl_list $last_win $last_win] 255 set elector $last_win 256 } 257 set windex [berkdb random_int 0 [expr [llength $cl_list] - 1]] 258 set win [lindex $cl_list $windex] 259 } else { 260 # Easy case, if we have a master, the winner must be the 261 # same one as last time, just use $win. 262 # If client0 is the current existing master, start the 263 # election in client 1. 264 if {$win == 0} { 265 set elector 1 266 } 267 } 268 # Winner has priority 100. If we are testing LSN winning, the 269 # make sure the lowest LSN client has the highest priority. 270 # Everyone else has priority 10. 271 for { set i 0 } { $i < $nclients } { incr i } { 272 set crash($i) 0 273 if { $i == $win } { 274 set pri($i) 100 275 } elseif { $i == $last_win } { 276 set pri($i) 200 277 } else { 278 set pri($i) 10 279 } 280 } 281 282 puts "\t$msg.b.$count: Start election (win=client$win) $elist" 283 set msg $msg.c.$count 284 # 285 # If we want all sites, then set nsites and nvotes the same. 286 # otherwise, we need to increase nsites to account 287 # for the master that is "down". 288 # 289 if { $quorum == "all" } { 290 set nsites $nclients 291 } else { 292 set nsites [expr $nclients + 1] 293 } 294 set nvotes $nclients 295 if { $databases_in_memory } { 296 set dbname { "" "test.db" } 297 } else { 298 set dbname "test.db" 299 } 300 301 run_election env_cmd envlist err_cmd pri crash \ 302 $qdir $msg $elector $nsites $nvotes $nclients $win \ 303 0 $dbname 0 $timeout_ok 304 # 305 # Sometimes test elections with an existing master. 306 # Other times test elections without master by closing the 307 # master we just elected and creating a new client. 308 # We want to weight it to close the new master. So, use 309 # a list to cause closing about 70% of the time. 310 # 311 set close_list { 0 0 0 1 1 1 1 1 1 1} 312 set close_len [expr [llength $close_list] - 1] 313 set close_index [berkdb random_int 0 $close_len] 314 315 # Unless we close the master, the next election will time out. 316 set timeout_ok 1 317 318 if { [lindex $close_list $close_index] == 1 } { 319 # Declare that we expect the next election to succeed. 320 set timeout_ok 0 321 puts -nonewline "\t\t$msg: Closing " 322 error_check_good newmaster_flush [$clientenv($win) log_flush] 0 323 error_check_good newmaster_close [$clientenv($win) close] 0 324 # 325 # If the next test should win via LSN then remove the 326 # env before starting the new client so that we 327 # can guarantee this client doesn't win the next one. 328 set lsn_win { 0 0 0 0 1 1 1 1 1 1 } 329 set lsn_len [expr [llength $lsn_win] - 1] 330 set lsn_index [berkdb random_int 0 $lsn_len] 331 set rec_arg "" 332 set win_inmem [expr [string compare [lindex $logset \ 333 [expr $win + 1]] in-memory] == 0] 334 if { [lindex $lsn_win $lsn_index] == 1 } { 335 set last_win $win 336 set dirindex [lsearch -exact $env_cmd($win) "-home"] 337 incr dirindex 338 set lsn_dir [lindex $env_cmd($win) $dirindex] 339 env_cleanup $lsn_dir 340 puts -nonewline "and cleaning " 341 } else { 342 # 343 # If we're not cleaning the env, decide if we should 344 # run recovery upon reopening the env. This causes 345 # two things: 346 # 1. Removal of region files which forces the env 347 # to read its __db.rep.egen file. 348 # 2. Adding a couple log records, so this client must 349 # be the next winner as well since it'll have the 350 # biggest LSN. 351 # 352 set rec_win { 0 0 0 0 0 0 1 1 1 1 } 353 set rec_len [expr [llength $rec_win] - 1] 354 set rec_index [berkdb random_int 0 $rec_len] 355 if { [lindex $rec_win $rec_index] == 1 } { 356 puts -nonewline "and recovering " 357 set rec_arg "-recover" 358 # 359 # If we're in memory and about to run 360 # recovery, we force ourselves not to win 361 # the next election because recovery will 362 # blow away the entire log in memory. 363 # However, we don't skip this entirely 364 # because we still want to force reading 365 # of __db.rep.egen. 366 # 367 if { $win_inmem } { 368 set last_win $win 369 } else { 370 set last_win -1 371 } 372 } else { 373 set last_win -1 374 } 375 } 376 puts "new master, new client $win" 377 set clientenv($win) [eval $env_cmd($win) $rec_arg] 378 error_check_good cl($win) [is_valid_env $clientenv($win)] TRUE 379 # 380 # Since we started a new client, we need to replace it 381 # in the message processing list so that we get the 382 # new Tcl handle name in there. 383 set newelector "$clientenv($win) [expr $win + 2]" 384 set envlist [lreplace $envlist $win $win $newelector] 385 if { $rec_arg == "" || $win_inmem } { 386 set win -1 387 } 388 # 389 # Since we started a new client we want to give them 390 # all a chance to process everything outstanding before 391 # the election on the next iteration. 392 # 393 process_msgs $envlist 394 } 395} 396