1# See the file LICENSE for redistribution information. 2# 3# Copyright (c) 2004-2009 Oracle. All rights reserved. 4# 5# $Id$ 6# 7# TEST rep039 8# TEST Test of interrupted internal initialization. The 9# TEST interruption is due to a changed master, or the client crashing, 10# TEST or both. 11# TEST 12# TEST One master, two clients. 13# TEST Generate several log files. Remove old master log files. 14# TEST Restart client, optionally having "cleaned" client env dir. Either 15# TEST way, this has the effect of forcing an internal init. 16# TEST Interrupt the internal init. 17# TEST Vary the number of times we process messages to make sure 18# TEST the interruption occurs at varying stages of the first internal 19# TEST initialization. 20# TEST 21# TEST Run for btree and queue only because of the number of permutations. 22# TEST 23proc rep039 { method { niter 200 } { tnum "039" } args } { 24 25 source ./include.tcl 26 global databases_in_memory 27 global repfiles_in_memory 28 29 # Run for btree and queue methods only. 30 if { $checking_valid_methods } { 31 set test_methods {} 32 foreach method $valid_methods { 33 if { [is_btree $method] == 1 || \ 34 [is_queue $method] == 1 } { 35 lappend test_methods $method 36 } 37 } 38 return $test_methods 39 } 40 if { [is_btree $method] == 0 && [is_queue $method] == 0 } { 41 puts "Rep$tnum: skipping for non-btree, non-queue method." 42 return 43 } 44 45 # Skip for mixed-mode logging -- this test has a very large 46 # set of iterations already. 47 global mixed_mode_logging 48 if { $mixed_mode_logging > 0 } { 49 puts "Rep$tnum: Skipping for mixed mode logging." 50 return 51 } 52 53 # This test needs to set its own pagesize. 54 set pgindex [lsearch -exact $args "-pagesize"] 55 if { $pgindex != -1 } { 56 puts "Rep$tnum: skipping for specific pagesizes" 57 return 58 } 59 60 set args [convert_args $method $args] 61 62 # Set up for on-disk or in-memory databases. 63 set msg "using on-disk databases" 64 if { $databases_in_memory } { 65 set msg "using named in-memory databases" 66 if { [is_queueext $method] } { 67 puts -nonewline "Skipping rep$tnum for method " 68 puts "$method with named in-memory databases." 69 return 70 } 71 } 72 73 set msg2 "and on-disk replication files" 74 if { $repfiles_in_memory } { 75 set msg2 "and in-memory replication files" 76 } 77 78 # Run the body of the test with and without recovery, 79 # and with and without cleaning. 80 set cleanopts { noclean clean } 81 set archopts { archive noarchive } 82 set nummsgs 4 83 set announce {puts "Rep$tnum ($method $r $clean $a $crash $l $args):\ 84 Test of internal init. $i message iters. \ 85 Test $cnt of $maxtest tests $with recovery $msg $msg2."} 86 foreach r $test_recopts { 87 if { $r == "-recover" && ! $is_windows_test && ! $is_hp_test } { 88 set crashopts { master_change client_crash both } 89 } else { 90 set crashopts { master_change } 91 } 92 # Only one of the three sites in the replication group needs to 93 # be tested with in-memory logs: the "client under test". 94 # 95 if { $r == "-recover" } { 96 set cl_logopts { on-disk } 97 set with "with" 98 } else { 99 set cl_logopts { on-disk in-memory } 100 set with "without" 101 } 102 set maxtest [expr [llength $crashopts] * \ 103 [llength $cleanopts] * \ 104 [llength $archopts] * \ 105 [llength $cl_logopts] * \ 106 [expr $nummsgs]] 107 set cnt 1 108 foreach crash $crashopts { 109 foreach clean $cleanopts { 110 foreach a $archopts { 111 foreach l $cl_logopts { 112 for { set i 1 } \ 113 { $i <= $nummsgs } \ 114 { incr i } { 115 eval $announce 116 rep039_sub $method \ 117 $niter $tnum $r \ 118 $clean $a $crash \ 119 $l $i $args 120 incr cnt 121 } 122 } 123 } 124 } 125 } 126 } 127} 128 129proc rep039_sub \ 130 { method niter tnum recargs clean archive crash cl_logopt pmsgs largs } { 131 global testdir 132 global util_path 133 global databases_in_memory 134 global repfiles_in_memory 135 global rep_verbose 136 global verbose_type 137 138 set verbargs "" 139 if { $rep_verbose == 1 } { 140 set verbargs " -verbose {$verbose_type on} " 141 } 142 143 set repmemargs "" 144 if { $repfiles_in_memory } { 145 set repmemargs "-rep_inmem_files " 146 } 147 148 set master_change false 149 set client_crash false 150 if { $crash == "master_change" } { 151 set master_change true 152 } elseif { $crash == "client_crash" } { 153 set client_crash true 154 } elseif { $crash == "both" } { 155 set master_change true 156 set client_crash true 157 } else { 158 error "FAIL:[timestamp] '$crash' is an unrecognized crash type" 159 } 160 161 env_cleanup $testdir 162 163 replsetup $testdir/MSGQUEUEDIR 164 165 # This test has three replication sites: a master, a client whose 166 # behavior is under test, and another client. We'll call them 167 # "A", "B" and "C". At one point during the test, we may (depending on 168 # the setting of $master_change) switch roles between the master and the 169 # other client. 170 # 171 # The initial site/role assignments are as follows: 172 # 173 # A = master 174 # B = client under test 175 # C = other client 176 # 177 # In the case where we do switch roles, the roles become: 178 # 179 # A = other client 180 # B = client under test (no change here) 181 # C = master 182 # 183 # Although the real names are A, B, and C, we'll use mnemonic names 184 # whenever possible. In particular, this means that we'll have to 185 # re-jigger the mnemonic names after the role switch. 186 187 file mkdir [set dirs(A) $testdir/SITE_A] 188 file mkdir [set dirs(B) $testdir/SITE_B] 189 file mkdir [set dirs(C) $testdir/SITE_C] 190 191 # Log size is small so we quickly create more than one. 192 # The documentation says that the log file must be at least 193 # four times the size of the in-memory log buffer. 194 set pagesize 4096 195 append largs " -pagesize $pagesize " 196 set log_buf [expr $pagesize * 2] 197 set log_max [expr $log_buf * 4] 198 199 # Set up the three sites: A, B, and C will correspond to EID's 200 # 1, 2, and 3 in the obvious way. As we start out, site A is always the 201 # master. 202 # 203 repladd 1 204 set env_A_cmd "berkdb_env_noerr -create -txn nosync \ 205 $verbargs $repmemargs \ 206 -log_buffer $log_buf -log_max $log_max -errpfx SITE_A \ 207 -home $dirs(A) -rep_transport \[list 1 replsend\]" 208 set envs(A) [eval $env_A_cmd $recargs -rep_master] 209 210 # Open a client 211 repladd 2 212 set txn_arg [adjust_txnargs $cl_logopt] 213 set log_arg [adjust_logargs $cl_logopt] 214 if { $cl_logopt == "on-disk" } { 215 # Override in this case, because we want to specify log_buffer. 216 set log_arg "-log_buffer $log_buf" 217 } 218 set env_B_cmd "berkdb_env_noerr -create $txn_arg \ 219 $verbargs $repmemargs \ 220 $log_arg -log_max $log_max -errpfx SITE_B \ 221 -home $dirs(B) -rep_transport \[list 2 replsend\]" 222 set envs(B) [eval $env_B_cmd $recargs -rep_client] 223 224 # Open 2nd client 225 repladd 3 226 set env_C_cmd "berkdb_env_noerr -create -txn nosync \ 227 $verbargs $repmemargs \ 228 -log_buffer $log_buf -log_max $log_max -errpfx SITE_C \ 229 -home $dirs(C) -rep_transport \[list 3 replsend\]" 230 set envs(C) [eval $env_C_cmd $recargs -rep_client] 231 232 # Turn off throttling for this test. 233 foreach site [array names envs] { 234 $envs($site) rep_limit 0 0 235 } 236 237 # Bring the clients online by processing the startup messages. 238 set envlist "{$envs(A) 1} {$envs(B) 2} {$envs(C) 3}" 239 process_msgs $envlist 240 241 # Set up the (indirect) mnemonic role names for the first part of the 242 # test. 243 set master A 244 set test_client B 245 set other C 246 247 # Clobber replication's 30-second anti-archive timer, which will have 248 # been started by client sync-up internal init, so that we can do a 249 # log_archive in a moment. 250 # 251 $envs($master) test force noarchive_timeout 252 253 # Run rep_test in the master (and update client). 254 puts "\tRep$tnum.a: Running rep_test in replicated env." 255 eval rep_test $method $envs($master) NULL $niter 0 0 0 $largs 256 process_msgs $envlist 257 258 puts "\tRep$tnum.b: Close client." 259 error_check_good client_close [$envs($test_client) close] 0 260 261 set res [eval exec $util_path/db_archive -l -h $dirs($test_client)] 262 set last_client_log [lindex [lsort $res] end] 263 264 set stop 0 265 while { $stop == 0 } { 266 # Run rep_test in the master (don't update client). 267 puts "\tRep$tnum.c: Running rep_test in replicated env." 268 eval rep_test $method $envs($master) NULL $niter 0 0 0 $largs 269 # 270 # Clear messages for first client. We want that site 271 # to get far behind. 272 # 273 replclear 2 274 puts "\tRep$tnum.d: Run db_archive on master." 275 set res [eval exec $util_path/db_archive -d -h $dirs($master)] 276 set res [eval exec $util_path/db_archive -l -h $dirs($master)] 277 if { [lsearch -exact $res $last_client_log] == -1 } { 278 set stop 1 279 } 280 } 281 282 set envlist "{$envs($master) 1} {$envs($other) 3}" 283 process_msgs $envlist 284 285 if { $archive == "archive" } { 286 puts "\tRep$tnum.d: Run db_archive on other client." 287 set res [eval exec $util_path/db_archive -l -h $dirs($other)] 288 error_check_bad \ 289 log.1.present [lsearch -exact $res log.0000000001] -1 290 set res [eval exec $util_path/db_archive -d -h $dirs($other)] 291 set res [eval exec $util_path/db_archive -l -h $dirs($other)] 292 error_check_good \ 293 log.1.gone [lsearch -exact $res log.0000000001] -1 294 } else { 295 puts "\tRep$tnum.d: Skipping db_archive on other client." 296 } 297 298 puts "\tRep$tnum.e: Reopen test client ($clean)." 299 if { $clean == "clean" } { 300 env_cleanup $dirs($test_client) 301 } 302 303 # (The test client is always site B, EID 2.) 304 # 305 set envs(B) [eval $env_B_cmd $recargs -rep_client] 306 error_check_good client_env [is_valid_env $envs(B)] TRUE 307 $envs(B) rep_limit 0 0 308 309 # Hold an open database handle while doing internal init, to make sure 310 # no back lock interactions are happening. But only do so some of the 311 # time, and of course only if it's reasonable to expect the database to 312 # exist at this point. (It won't, if we're using in-memory databases 313 # and we've just started the client with recovery, since recovery blows 314 # away the mpool.) Set up database as in-memory or on-disk first. 315 # 316 if { $databases_in_memory } { 317 set dbname { "" "test.db" } 318 set have_db [expr {$recargs != "-recover"}] 319 } else { 320 set dbname "test.db" 321 set have_db true 322 } 323 324 if {$clean == "noclean" && $have_db && [berkdb random_int 0 1] == 1} { 325 puts "\tRep$tnum.g: Hold open db handle from client app." 326 set cdb [eval\ 327 {berkdb_open_noerr -env} $envs($test_client) $dbname] 328 error_check_good dbopen [is_valid_db $cdb] TRUE 329 set ccur [$cdb cursor] 330 error_check_good curs [is_valid_cursor $ccur $cdb] TRUE 331 set ret [$ccur get -first] 332 set kd [lindex $ret 0] 333 set key [lindex $kd 0] 334 error_check_good cclose [$ccur close] 0 335 } else { 336 puts "\tRep$tnum.g: (No client app handle will be held.)" 337 set cdb "NONE" 338 } 339 340 set envlist "{$envs(A) 1} {$envs(B) 2} {$envs(C) 3}" 341 proc_msgs_once $envlist 342 343 # 344 # We want to simulate a master continually getting new 345 # records while an update is going on. 346 # 347 set entries 10 348 eval rep_test $method $envs($master) NULL $entries $niter 0 0 $largs 349 # 350 # We call proc_msgs_once N times to get us into page recovery: 351 # 1. Send master messages and client finds master. 352 # 2. Master replies and client does verify. 353 # 3. Master gives verify_fail and client does update_req. 354 # 4. Master send update info and client does page_req. 355 # 356 # We vary the number of times we call proc_msgs_once (via pmsgs) 357 # so that we test switching master at each point in the 358 # internal initialization processing. 359 # 360 set nproced 0 361 puts "\tRep$tnum.f: Get partially through initialization ($pmsgs iters)" 362 for { set i 1 } { $i < $pmsgs } { incr i } { 363 incr nproced [proc_msgs_once $envlist] 364 } 365 366 if { [string is true $master_change] } { 367 replclear 1 368 replclear 3 369 puts "\tRep$tnum.g: Downgrade/upgrade master." 370 371 # Downgrade the existing master to a client, switch around the 372 # roles, and then upgrade the newly appointed master. 373 error_check_good downgrade [$envs($master) rep_start -client] 0 374 375 set master C 376 set other A 377 378 error_check_good upgrade [$envs($master) rep_start -master] 0 379 } 380 381 # Simulate a client crash: simply abandon the handle without closing it. 382 # Note that this doesn't work on Windows, because there you can't remove 383 # a file if anyone (including yourself) has it open. This also does not 384 # work on HP-UX, because there you are not allowed to open a second 385 # handle on an env. 386 # 387 # Note that crashing only makes sense with "-recover". 388 # 389 if { [string is true $client_crash] } { 390 error_check_good assert [string compare $recargs "-recover"] 0 391 392 set abandoned_env $envs($test_client) 393 set abandoned true 394 395 set envs($test_client) [eval $env_B_cmd $recargs -rep_client] 396 $envs($test_client) rep_limit 0 0 397 398 # Again, remember: whatever the current roles, a site and its EID 399 # stay linked always. 400 # 401 set envlist "{$envs(A) 1} {$envs(B) 2} {$envs(C) 3}" 402 } else { 403 set abandoned false 404 } 405 406 process_msgs $envlist 407 # 408 # Now simulate continual updates to the new master. Each 409 # time through we just process messages once before 410 # generating more updates. 411 # 412 set niter 10 413 for { set i 0 } { $i < $niter } { incr i } { 414 set nproced 0 415 set start [expr $i * $entries] 416 eval rep_test $method $envs($master) NULL $entries $start \ 417 $start 0 $largs 418 incr nproced [proc_msgs_once $envlist] 419 error_check_bad nproced $nproced 0 420 } 421 set start [expr $i * $entries] 422 process_msgs $envlist 423 424 puts "\tRep$tnum.h: Verify logs and databases" 425 # Whether or not we've switched roles, it's always site A that may have 426 # had its logs archived away. When the $init_test flag is turned on, 427 # rep_verify allows the site in the second position to have 428 # (more-)archived logs, so we have to abuse the calling signature a bit 429 # here to get this to work. (I.e., even when A is still master and C is 430 # still the other client, we have to pass things in this order so that 431 # the $init_test different-sized-logs trick can work.) 432 # 433 set init_test 1 434 rep_verify $dirs(C) $envs(C) $dirs(A) $envs(A) $init_test 435 436 # Process messages again in case we are running with debug_rop. 437 process_msgs $envlist 438 rep_verify $dirs($master) $envs($master) \ 439 $dirs($test_client) $envs($test_client) $init_test 440 441 # Add records to the master and update client. 442 puts "\tRep$tnum.i: Add more records and check again." 443 set entries 10 444 eval rep_test $method $envs($master) NULL $entries $start \ 445 $start 0 $largs 446 process_msgs $envlist 0 NONE err 447 448 # Check again that everyone is identical. 449 rep_verify $dirs(C) $envs(C) $dirs(A) $envs(A) $init_test 450 process_msgs $envlist 451 rep_verify $dirs($master) $envs($master) \ 452 $dirs($test_client) $envs($test_client) $init_test 453 454 if {$cdb != "NONE"} { 455 if {$abandoned} { 456 # The $cdb was opened in an env which was then 457 # abandoned, recovered, marked panic'ed. We don't 458 # really care; we're just trying to clean up resources. 459 # 460 catch {$cdb close} 461 } else { 462 error_check_good clientdb_close [$cdb close] 0 463 } 464 } 465 error_check_good masterenv_close [$envs($master) close] 0 466 error_check_good clientenv_close [$envs($test_client) close] 0 467 error_check_good clientenv2_close [$envs($other) close] 0 468 if { $abandoned } { 469 catch {$abandoned_env close} 470 } 471 replclose $testdir/MSGQUEUEDIR 472} 473