1# See the file LICENSE for redistribution information. 2# 3# Copyright (c) 2004,2008 Oracle. All rights reserved. 4# 5# $Id: rep039.tcl,v 1.31 2008/04/10 17:19:47 carol Exp $ 6# 7# TEST rep039 8# TEST Test of interrupted internal initialization changes. The 9# TEST interruption is due to a changed master, or the client crashing, 10# TEST or both. 11# TEST 12# TEST One master, two clients. 13# TEST Generate several log files. Remove old master log files. 14# TEST Restart client, optionally having "cleaned" client env dir. Either 15# TEST way, this has the effect of forcing an internal init. 16# TEST Interrupt the internal init. 17# TEST Vary the number of times we process messages to make sure 18# TEST the interruption occurs at varying stages of the first internal 19# TEST initialization. 20# TEST 21# TEST Run for btree and queue only because of the number of permutations. 22# TEST 23proc rep039 { method { niter 200 } { tnum "039" } args } { 24 25 source ./include.tcl 26 27 # Run for btree and queue methods only. 28 if { $checking_valid_methods } { 29 set test_methods {} 30 foreach method $valid_methods { 31 if { [is_btree $method] == 1 || \ 32 [is_queue $method] == 1 } { 33 lappend test_methods $method 34 } 35 } 36 return $test_methods 37 } 38 if { [is_btree $method] == 0 && [is_queue $method] == 0 } { 39 puts "Rep$tnum: skipping for non-btree, non-queue method." 40 return 41 } 42 43 # Skip for mixed-mode logging -- this test has a very large 44 # set of iterations already. 45 global mixed_mode_logging 46 if { $mixed_mode_logging > 0 } { 47 puts "Rep$tnum: Skipping for mixed mode logging." 48 return 49 } 50 51 # This test needs to set its own pagesize. 52 set pgindex [lsearch -exact $args "-pagesize"] 53 if { $pgindex != -1 } { 54 puts "Rep$tnum: skipping for specific pagesizes" 55 return 56 } 57 58 set args [convert_args $method $args] 59 60 # Run the body of the test with and without recovery, 61 # and with and without cleaning. 62 set cleanopts { noclean clean } 63 set archopts { archive noarchive } 64 set nummsgs 4 65 set announce {puts "Rep$tnum ($method $r $clean $a $crash $l $args):\ 66 Test of internal init. $i message iters. \ 67 Test $cnt of $maxtest tests $with recovery."} 68 foreach r $test_recopts { 69 if { $r == "-recover" && ! $is_windows_test && ! $is_hp_test } { 70 set crashopts { master_change client_crash both } 71 } else { 72 set crashopts { master_change } 73 } 74 # Only one of the three sites in the replication group needs to 75 # be tested with in-memory logs: the "client under test". 76 # 77 if { $r == "-recover" } { 78 set cl_logopts { on-disk } 79 set with "with" 80 } else { 81 set cl_logopts { on-disk in-memory } 82 set with "without" 83 } 84 set maxtest [expr [llength $crashopts] * \ 85 [llength $cleanopts] * \ 86 [llength $archopts] * \ 87 [llength $cl_logopts] * \ 88 [expr $nummsgs]] 89 set cnt 1 90 foreach crash $crashopts { 91 foreach clean $cleanopts { 92 foreach a $archopts { 93 foreach l $cl_logopts { 94 for { set i 1 } \ 95 { $i <= $nummsgs } \ 96 { incr i } { 97 eval $announce 98 rep039_sub $method \ 99 $niter $tnum $r \ 100 $clean $a $crash \ 101 $l $i $args 102 incr cnt 103 } 104 } 105 } 106 } 107 } 108 } 109} 110 111proc rep039_sub \ 112 { method niter tnum recargs clean archive crash cl_logopt pmsgs largs } { 113 global testdir 114 global util_path 115 global rep_verbose 116 global verbose_type 117 118 set verbargs "" 119 if { $rep_verbose == 1 } { 120 set verbargs " -verbose {$verbose_type on} " 121 } 122 123 set master_change false 124 set client_crash false 125 if { $crash == "master_change" } { 126 set master_change true 127 } elseif { $crash == "client_crash" } { 128 set client_crash true 129 } elseif { $crash == "both" } { 130 set master_change true 131 set client_crash true 132 } else { 133 error "FAIL:[timestamp] '$crash' is an unrecognized crash type" 134 } 135 136 env_cleanup $testdir 137 138 replsetup $testdir/MSGQUEUEDIR 139 140 # This test has three replication sites: a master, a client whose 141 # behavior is under test, and another client. We'll call them 142 # "A", "B" and "C". At one point during the test, we may (depending on 143 # the setting of $master_change) switch roles between the master and the 144 # other client. 145 # 146 # The initial site/role assignments are as follows: 147 # 148 # A = master 149 # B = client under test 150 # C = other client 151 # 152 # In the case where we do switch roles, the roles become: 153 # 154 # A = other client 155 # B = client under test (no change here) 156 # C = master 157 # 158 # Although the real names are A, B, and C, we'll use mnemonic names 159 # whenever possible. In particular, this means that we'll have to 160 # re-jigger the mnemonic names after the role switch. 161 162 file mkdir [set dirs(A) $testdir/SITE_A] 163 file mkdir [set dirs(B) $testdir/SITE_B] 164 file mkdir [set dirs(C) $testdir/SITE_C] 165 166 # Log size is small so we quickly create more than one. 167 # The documentation says that the log file must be at least 168 # four times the size of the in-memory log buffer. 169 set pagesize 4096 170 append largs " -pagesize $pagesize " 171 set log_buf [expr $pagesize * 2] 172 set log_max [expr $log_buf * 4] 173 174 # Set up the three sites: A, B, and C will correspond to EID's 175 # 1, 2, and 3 in the obvious way. As we start out, site A is always the 176 # master. 177 # 178 repladd 1 179 set env_A_cmd "berkdb_env_noerr -create -txn nosync $verbargs \ 180 -log_buffer $log_buf -log_max $log_max -errpfx SITE_A \ 181 -home $dirs(A) -rep_transport \[list 1 replsend\]" 182 set envs(A) [eval $env_A_cmd $recargs -rep_master] 183 184 # Open a client 185 repladd 2 186 set txn_arg [adjust_txnargs $cl_logopt] 187 set log_arg [adjust_logargs $cl_logopt] 188 if { $cl_logopt == "on-disk" } { 189 # Override in this case, because we want to specify log_buffer. 190 set log_arg "-log_buffer $log_buf" 191 } 192 set env_B_cmd "berkdb_env_noerr -create $txn_arg $verbargs \ 193 $log_arg -log_max $log_max -errpfx SITE_B \ 194 -home $dirs(B) -rep_transport \[list 2 replsend\]" 195 set envs(B) [eval $env_B_cmd $recargs -rep_client] 196 197 # Open 2nd client 198 repladd 3 199 set env_C_cmd "berkdb_env_noerr -create -txn nosync $verbargs \ 200 -log_buffer $log_buf -log_max $log_max -errpfx SITE_C \ 201 -home $dirs(C) -rep_transport \[list 3 replsend\]" 202 set envs(C) [eval $env_C_cmd $recargs -rep_client] 203 204 # Turn off throttling for this test. 205 foreach site [array names envs] { 206 $envs($site) rep_limit 0 0 207 } 208 209 # Bring the clients online by processing the startup messages. 210 set envlist "{$envs(A) 1} {$envs(B) 2} {$envs(C) 3}" 211 process_msgs $envlist 212 213 # Set up the (indirect) mnemonic role names for the first part of the 214 # test. 215 set master A 216 set test_client B 217 set other C 218 219 # Clobber replication's 30-second anti-archive timer, which will have 220 # been started by client sync-up internal init, so that we can do a 221 # log_archive in a moment. 222 # 223 $envs($master) test force noarchive_timeout 224 225 # Run rep_test in the master (and update client). 226 puts "\tRep$tnum.a: Running rep_test in replicated env." 227 eval rep_test $method $envs($master) NULL $niter 0 0 0 0 $largs 228 process_msgs $envlist 229 230 puts "\tRep$tnum.b: Close client." 231 error_check_good client_close [$envs($test_client) close] 0 232 233 set res [eval exec $util_path/db_archive -l -h $dirs($test_client)] 234 set last_client_log [lindex [lsort $res] end] 235 236 set stop 0 237 while { $stop == 0 } { 238 # Run rep_test in the master (don't update client). 239 puts "\tRep$tnum.c: Running rep_test in replicated env." 240 eval rep_test $method $envs($master) NULL $niter 0 0 0 0 $largs 241 # 242 # Clear messages for first client. We want that site 243 # to get far behind. 244 # 245 replclear 2 246 puts "\tRep$tnum.d: Run db_archive on master." 247 set res [eval exec $util_path/db_archive -d -h $dirs($master)] 248 set res [eval exec $util_path/db_archive -l -h $dirs($master)] 249 if { [lsearch -exact $res $last_client_log] == -1 } { 250 set stop 1 251 } 252 } 253 254 set envlist "{$envs($master) 1} {$envs($other) 3}" 255 process_msgs $envlist 256 257 if { $archive == "archive" } { 258 puts "\tRep$tnum.d: Run db_archive on other client." 259 set res [eval exec $util_path/db_archive -l -h $dirs($other)] 260 error_check_bad \ 261 log.1.present [lsearch -exact $res log.0000000001] -1 262 set res [eval exec $util_path/db_archive -d -h $dirs($other)] 263 set res [eval exec $util_path/db_archive -l -h $dirs($other)] 264 error_check_good \ 265 log.1.gone [lsearch -exact $res log.0000000001] -1 266 } else { 267 puts "\tRep$tnum.d: Skipping db_archive on other client." 268 } 269 270 puts "\tRep$tnum.e: Reopen test client ($clean)." 271 if { $clean == "clean" } { 272 env_cleanup $dirs($test_client) 273 } 274 275 # (The test client is always site B, EID 2.) 276 # 277 set envs(B) [eval $env_B_cmd $recargs -rep_client] 278 error_check_good client_env [is_valid_env $envs(B)] TRUE 279 $envs(B) rep_limit 0 0 280 281 # Hold an open database handle while doing internal init, to make sure 282 # no back lock interactions are happening. But only do so some of the 283 # time. 284 # 285 if {$clean == "noclean" && [berkdb random_int 0 1] == 1} { 286 puts "\tRep$tnum.g: Hold open db handle from client app." 287 set cdb [eval {berkdb_open_noerr -env} $envs($test_client) "test.db"] 288 error_check_good dbopen [is_valid_db $cdb] TRUE 289 set ccur [$cdb cursor] 290 error_check_good curs [is_valid_cursor $ccur $cdb] TRUE 291 set ret [$ccur get -first] 292 set kd [lindex $ret 0] 293 set key [lindex $kd 0] 294 error_check_good cclose [$ccur close] 0 295 } else { 296 puts "\tRep$tnum.g: (No client app handle will be held.)" 297 set cdb "NONE" 298 } 299 300 set envlist "{$envs(A) 1} {$envs(B) 2} {$envs(C) 3}" 301 proc_msgs_once $envlist 302 303 # 304 # We want to simulate a master continually getting new 305 # records while an update is going on. 306 # 307 set entries 10 308 eval rep_test $method $envs($master) NULL $entries $niter 0 0 0 $largs 309 # 310 # We call proc_msgs_once N times to get us into page recovery: 311 # 1. Send master messages and client finds master. 312 # 2. Master replies and client does verify. 313 # 3. Master gives verify_fail and client does update_req. 314 # 4. Master send update info and client does page_req. 315 # 316 # We vary the number of times we call proc_msgs_once (via pmsgs) 317 # so that we test switching master at each point in the 318 # internal initialization processing. 319 # 320 set nproced 0 321 puts "\tRep$tnum.f: Get partially through initialization ($pmsgs iters)" 322 for { set i 1 } { $i < $pmsgs } { incr i } { 323 incr nproced [proc_msgs_once $envlist] 324 } 325 326 if { [string is true $master_change] } { 327 replclear 1 328 replclear 3 329 puts "\tRep$tnum.g: Downgrade/upgrade master." 330 331 # Downgrade the existing master to a client, switch around the 332 # roles, and then upgrade the newly appointed master. 333 error_check_good downgrade [$envs($master) rep_start -client] 0 334 335 set master C 336 set other A 337 338 error_check_good upgrade [$envs($master) rep_start -master] 0 339 } 340 341 # Simulate a client crash: simply abandon the handle without closing it. 342 # Note that this doesn't work on Windows, because there you can't remove 343 # a file if anyone (including yourself) has it open. This also does not 344 # work on HP-UX, because there you are not allowed to open a second 345 # handle on an env. 346 # 347 # Note that crashing only makes sense with "-recover". 348 # 349 if { [string is true $client_crash] } { 350 error_check_good assert [string compare $recargs "-recover"] 0 351 352 set abandoned_env $envs($test_client) 353 set abandoned true 354 355 set envs($test_client) [eval $env_B_cmd $recargs -rep_client] 356 $envs($test_client) rep_limit 0 0 357 358 # Again, remember: whatever the current roles, a site and its EID 359 # stay linked always. 360 # 361 set envlist "{$envs(A) 1} {$envs(B) 2} {$envs(C) 3}" 362 } else { 363 set abandoned false 364 } 365 366 process_msgs $envlist 367 # 368 # Now simulate continual updates to the new master. Each 369 # time through we just process messages once before 370 # generating more updates. 371 # 372 set niter 10 373 for { set i 0 } { $i < $niter } { incr i } { 374 set nproced 0 375 set start [expr $i * $entries] 376 eval rep_test $method $envs($master) NULL $entries $start \ 377 $start 0 0 $largs 378 incr nproced [proc_msgs_once $envlist] 379 error_check_bad nproced $nproced 0 380 } 381 set start [expr $i * $entries] 382 process_msgs $envlist 383 384 puts "\tRep$tnum.h: Verify logs and databases" 385 # Whether or not we've switched roles, it's always site A that may have 386 # had its logs archived away. When the $init_test flag is turned on, 387 # rep_verify allows the site in the second position to have 388 # (more-)archived logs, so we have to abuse the calling signature a bit 389 # here to get this to work. (I.e., even when A is still master and C is 390 # still the other client, we have to pass things in this order so that 391 # the $init_test different-sized-logs trick can work.) 392 # 393 set init_test 1 394 rep_verify $dirs(C) $envs(C) $dirs(A) $envs(A) $init_test 395 396 # Process messages again in case we are running with debug_rop. 397 process_msgs $envlist 398 rep_verify $dirs($master) $envs($master) \ 399 $dirs($test_client) $envs($test_client) $init_test 400 401 # Add records to the master and update client. 402 puts "\tRep$tnum.i: Add more records and check again." 403 set entries 10 404 eval rep_test $method $envs($master) NULL $entries $start \ 405 $start 0 0 $largs 406 process_msgs $envlist 0 NONE err 407 408 # Check again that everyone is identical. 409 rep_verify $dirs(C) $envs(C) $dirs(A) $envs(A) $init_test 410 process_msgs $envlist 411 rep_verify $dirs($master) $envs($master) \ 412 $dirs($test_client) $envs($test_client) $init_test 413 414 if {$cdb != "NONE"} { 415 if {$abandoned} { 416 # The $cdb was opened in an env which was then 417 # abandoned, recovered, marked panic'ed. We don't 418 # really care; we're just trying to clean up resources. 419 # 420 catch {$cdb close} 421 } else { 422 error_check_good clientdb_close [$cdb close] 0 423 } 424 } 425 error_check_good masterenv_close [$envs($master) close] 0 426 error_check_good clientenv_close [$envs($test_client) close] 0 427 error_check_good clientenv2_close [$envs($other) close] 0 428 if { $abandoned } { 429 catch {$abandoned_env close} 430 } 431 replclose $testdir/MSGQUEUEDIR 432} 433