1# See the file LICENSE for redistribution information. 2# 3# Copyright (c) 2002-2009 Oracle. All rights reserved. 4# 5# $Id$ 6# 7# TEST rep005 8# TEST Replication election test with error handling. 9# TEST 10# TEST Run rep_test in a replicated master environment; 11# TEST hold an election among a group of clients to make sure they select 12# TEST a proper master from amongst themselves, forcing errors at various 13# TEST locations in the election path. 14 15proc rep005 { method args } { 16 17 source ./include.tcl 18 global databases_in_memory 19 global repfiles_in_memory 20 21 if { $is_windows9x_test == 1 } { 22 puts "Skipping replication test on Win 9x platform." 23 return 24 } 25 26 # Skip for all methods except btree. 27 if { $checking_valid_methods } { 28 set test_methods { btree } 29 return $test_methods 30 } 31 if { [is_btree $method] == 0 } { 32 puts "Rep005: Skipping for method $method." 33 return 34 } 35 36 set msg2 "and on-disk replication files" 37 if { $repfiles_in_memory } { 38 set msg2 "and in-memory replication files" 39 } 40 41 set tnum "005" 42 set niter 10 43 set nclients 3 44 set logsets [create_logsets [expr $nclients + 1]] 45 set msg "using on-disk databases" 46 if { $databases_in_memory } { 47 set msg "using named in-memory databases." 48 if { [is_queueext $method] } { 49 puts -nonewline "Skipping rep$tnum for method " 50 puts "$method with named in-memory databases." 51 return 52 } 53 } 54 55 # We don't want to run this with -recover - it takes too 56 # long and doesn't cover any new ground. 57 set recargs "" 58 foreach l $logsets { 59 puts "Rep$tnum ($recargs): Replication election\ 60 error test with $nclients clients $msg $msg2." 61 puts -nonewline "Rep$tnum: Started at: " 62 puts [clock format [clock seconds] -format "%H:%M %D"] 63 puts "Rep$tnum: Master logs are [lindex $l 0]" 64 for { set i 0 } { $i < $nclients } { incr i } { 65 puts "Rep$tnum: Client $i logs are\ 66 [lindex $l [expr $i + 1]]" 67 } 68 rep005_sub $method $tnum \ 69 $niter $nclients $l $recargs $args 70 } 71} 72 73proc rep005_sub { method tnum niter nclients logset recargs largs } { 74 source ./include.tcl 75 global rand_init 76 error_check_good set_random_seed [berkdb srand $rand_init] 0 77 global databases_in_memory 78 global repfiles_in_memory 79 global rep_verbose 80 global verbose_type 81 82 set verbargs "" 83 if { $rep_verbose == 1 } { 84 set verbargs " -verbose {$verbose_type on} " 85 } 86 87 set repmemargs "" 88 if { $repfiles_in_memory } { 89 set repmemargs "-rep_inmem_files " 90 } 91 92 env_cleanup $testdir 93 94 set qdir $testdir/MSGQUEUEDIR 95 replsetup $qdir 96 97 set masterdir $testdir/MASTERDIR 98 file mkdir $masterdir 99 set m_logtype [lindex $logset 0] 100 set m_logargs [adjust_logargs $m_logtype] 101 set m_txnargs [adjust_txnargs $m_logtype] 102 103 for { set i 0 } { $i < $nclients } { incr i } { 104 set clientdir($i) $testdir/CLIENTDIR.$i 105 file mkdir $clientdir($i) 106 set c_logtype($i) [lindex $logset [expr $i + 1]] 107 set c_logargs($i) [adjust_logargs $c_logtype($i)] 108 set c_txnargs($i) [adjust_txnargs $c_logtype($i)] 109 } 110 111 # Open a master. 112 repladd 1 113 set env_cmd(M) "berkdb_env_noerr -create -log_max 1000000 \ 114 -event rep_event $repmemargs \ 115 -home $masterdir $m_logargs -errpfx MASTER $verbargs \ 116 $m_txnargs -rep_master -rep_transport \[list 1 replsend\]" 117 set masterenv [eval $env_cmd(M) $recargs] 118 119 set envlist {} 120 lappend envlist "$masterenv 1" 121 122 # Open the clients. 123 for { set i 0 } { $i < $nclients } { incr i } { 124 set envid [expr $i + 2] 125 repladd $envid 126 set env_cmd($i) "berkdb_env_noerr -create \ 127 -event rep_event $repmemargs \ 128 -home $clientdir($i) $c_logargs($i) \ 129 $c_txnargs($i) -rep_client $verbargs \ 130 -errpfx CLIENT$i \ 131 -rep_transport \[list $envid replsend\]" 132 set clientenv($i) [eval $env_cmd($i) $recargs] 133 lappend envlist "$clientenv($i) $envid" 134 } 135 136 # Process startup messages 137 process_msgs $envlist 138 # Run rep_test in the master. 139 puts "\tRep$tnum.a: Running rep_test in replicated env." 140 eval rep_test $method $masterenv NULL $niter 0 0 0 $largs 141 142 # Process all the messages and close the master. 143 process_msgs $envlist 144 145 # Check that databases are in-memory or on-disk as expected. 146 check_db_location $masterenv 147 for { set i 0 } { $i < $nclients } { incr i } { 148 check_db_location $clientenv($i) 149 } 150 151 error_check_good masterenv_close [$masterenv close] 0 152 set envlist [lreplace $envlist 0 0] 153 154 for { set i 0 } { $i < $nclients } { incr i } { 155 replclear [expr $i + 2] 156 } 157 158 # We set up the error list for each client. We know that the 159 # first client is the one calling the election, therefore, add 160 # the error location on sending the message (electsend) for that one. 161 set m "Rep$tnum" 162 set count 0 163 set win -1 164 # 165 set c0err { none electinit } 166 set c1err $c0err 167 set c2err $c0err 168 set numtests [expr [llength $c0err] * [llength $c1err] * \ 169 [llength $c2err]] 170 puts "\t$m.b: Starting $numtests election with error tests" 171 set last_win -1 172 set win -1 173 foreach c0 $c0err { 174 foreach c1 $c1err { 175 foreach c2 $c2err { 176 set elist [list $c0 $c1 $c2] 177 rep005_elect env_cmd envlist $qdir \ 178 $m $count win last_win $elist $logset 179 incr count 180 } 181 } 182 } 183 184 foreach pair $envlist { 185 set cenv [lindex $pair 0] 186 error_check_good cenv_close [$cenv close] 0 187 } 188 189 replclose $testdir/MSGQUEUEDIR 190 puts -nonewline \ 191 "Rep$tnum: Completed at: " 192 puts [clock format [clock seconds] -format "%H:%M %D"] 193} 194 195proc rep005_elect { ecmd celist qdir msg count \ 196 winner lsn_lose elist logset} { 197 global elect_timeout elect_serial 198 global timeout_ok 199 global databases_in_memory 200 upvar $ecmd env_cmd 201 upvar $celist envlist 202 upvar $winner win 203 upvar $lsn_lose last_win 204 205 # Set the proper value for the first time through the 206 # loop. On subsequent passes, timeout_ok will already 207 # be set. 208 if { [info exists timeout_ok] == 0 } { 209 set timeout_ok 0 210 } 211 212 set nclients [llength $elist] 213 set nsites [expr $nclients + 1] 214 215 set cl_list {} 216 foreach pair $envlist { 217 set id [lindex $pair 1] 218 set i [expr $id - 2] 219 set clientenv($i) [lindex $pair 0] 220 set err_cmd($i) [lindex $elist $i] 221 set elect_pipe($i) INVALID 222 replclear $id 223 lappend cl_list $i 224 } 225 226 # Select winner. We want to test biggest LSN wins, and secondarily 227 # highest priority wins. If we already have a master, make sure 228 # we don't start a client in that master. 229 set el 0 230 if { $win == -1 } { 231 if { $last_win != -1 } { 232 set cl_list [lreplace $cl_list $last_win $last_win] 233 set el $last_win 234 } 235 set windex [berkdb random_int 0 [expr [llength $cl_list] - 1]] 236 set win [lindex $cl_list $windex] 237 } else { 238 # Easy case, if we have a master, the winner must be the 239 # same one as last time, just use $win. 240 # If client0 is the current existing master, start the 241 # election in client 1. 242 if {$win == 0} { 243 set el 1 244 } 245 } 246 # Winner has priority 100. If we are testing LSN winning, the 247 # make sure the lowest LSN client has the highest priority. 248 # Everyone else has priority 10. 249 for { set i 0 } { $i < $nclients } { incr i } { 250 set crash($i) 0 251 if { $i == $win } { 252 set pri($i) 100 253 } elseif { $i == $last_win } { 254 set pri($i) 200 255 } else { 256 set pri($i) 10 257 } 258 } 259 260 puts "\t$msg.b.$count: Start election (win=client$win) $elist" 261 set msg $msg.c.$count 262 set nsites $nclients 263 set nvotes $nsites 264 if { $databases_in_memory } { 265 set dbname { "" test.db } 266 } else { 267 set dbname test.db 268 } 269 run_election env_cmd envlist err_cmd pri crash \ 270 $qdir $msg $el $nsites $nvotes $nclients $win \ 271 0 $dbname 0 $timeout_ok 272 273 # 274 # Sometimes test elections with an existing master. 275 # Other times test elections without master by closing the 276 # master we just elected and creating a new client. 277 # We want to weight it to close the new master. So, use 278 # a list to cause closing about 70% of the time. 279 # 280 set close_list { 0 0 0 1 1 1 1 1 1 1} 281 set close_len [expr [llength $close_list] - 1] 282 set close_index [berkdb random_int 0 $close_len] 283 284 # Unless we close the master, the next election will time out. 285 set timeout_ok 1 286 287 if { [lindex $close_list $close_index] == 1 } { 288 # Declare that we expect the next election to succeed. 289 set timeout_ok 0 290 puts -nonewline "\t\t$msg: Closing " 291 error_check_good log_flush [$clientenv($win) log_flush] 0 292 error_check_good newmaster_close [$clientenv($win) close] 0 293 # 294 # If the next test should win via LSN then remove the 295 # env before starting the new client so that we 296 # can guarantee this client doesn't win the next one. 297 set lsn_win { 0 0 0 0 1 1 1 1 1 1 } 298 set lsn_len [expr [llength $lsn_win] - 1] 299 set lsn_index [berkdb random_int 0 $lsn_len] 300 set rec_arg "" 301 set win_inmem [expr [string compare [lindex $logset \ 302 [expr $win + 1]] in-memory] == 0] 303 if { [lindex $lsn_win $lsn_index] == 1 } { 304 set last_win $win 305 set dirindex [lsearch -exact $env_cmd($win) "-home"] 306 incr dirindex 307 set lsn_dir [lindex $env_cmd($win) $dirindex] 308 env_cleanup $lsn_dir 309 puts -nonewline "and cleaning " 310 } else { 311 # 312 # If we're not cleaning the env, decide if we should 313 # run recovery upon reopening the env. This causes 314 # two things: 315 # 1. Removal of region files which forces the env 316 # to read its __db.rep.egen file. 317 # 2. Adding a couple log records, so this client must 318 # be the next winner as well since it'll have the 319 # biggest LSN. 320 # 321 set rec_win { 0 0 0 0 0 0 1 1 1 1 } 322 set rec_len [expr [llength $rec_win] - 1] 323 set rec_index [berkdb random_int 0 $rec_len] 324 if { [lindex $rec_win $rec_index] == 1 } { 325 puts -nonewline "and recovering " 326 set rec_arg "-recover" 327 # 328 # If we're in memory and about to run 329 # recovery, we force ourselves not to win 330 # the next election because recovery will 331 # blow away the entire log in memory. 332 # However, we don't skip this entirely 333 # because we still want to force reading 334 # of __db.rep.egen. 335 # 336 if { $win_inmem } { 337 set last_win $win 338 } else { 339 set last_win -1 340 } 341 } else { 342 set last_win -1 343 } 344 } 345 puts "new master, new client $win" 346 set clientenv($win) [eval $env_cmd($win) $rec_arg] 347 error_check_good cl($win) [is_valid_env $clientenv($win)] TRUE 348 # 349 # Since we started a new client, we need to replace it 350 # in the message processing list so that we get the 351 # new Tcl handle name in there. 352 set newel "$clientenv($win) [expr $win + 2]" 353 set envlist [lreplace $envlist $win $win $newel] 354 if { $rec_arg == "" || $win_inmem } { 355 set win -1 356 } 357 # 358 # Since we started a new client we want to give them 359 # all a chance to process everything outstanding before 360 # the election on the next iteration. 361 # 362 process_msgs $envlist 363 } 364} 365