1# See the file LICENSE for redistribution information. 2# 3# Copyright (c) 2006-2009 Oracle. All rights reserved. 4# 5# $Id$ 6# 7# TEST rep065 8# TEST Tests replication running with different versions. 9# TEST This capability is introduced with 4.5. 10# TEST 11# TEST Start a replication group of 1 master and N sites, all 12# TEST running some historical version greater than or equal to 4.4. 13# TEST Take down a client and bring it up again running current. 14# TEST Run some upgrades, make sure everything works. 15# TEST 16# TEST Each site runs the tcllib of its own version, but uses 17# TEST the current tcl code (e.g. test.tcl). 18proc rep065 { method { nsites 3 } args } { 19 source ./include.tcl 20 global repfiles_in_memory 21 global noenv_messaging 22 set noenv_messaging 1 23 24 if { $is_windows9x_test == 1 } { 25 puts "Skipping replication test on Win 9x platform." 26 return 27 } 28 # 29 # Skip all methods but btree - we don't use the method, as we 30 # run over all of them with varying versions. 31 # 32 if { $checking_valid_methods } { 33 set test_methods { btree } 34 return $test_methods 35 } 36 37 if { [is_btree $method] == 0 } { 38 puts "Rep065: Skipping for method $method." 39 return 40 } 41 42 set msg2 "and on-disk replication files" 43 if { $repfiles_in_memory } { 44 set msg2 "and in-memory replication files" 45 } 46 47 # Make the list of {method version} pairs to test. 48 # 49 set mvlist [method_version] 50 set mvlen [llength $mvlist] 51 puts "Rep065: Testing the following $mvlen method/version pairs:" 52 puts "Rep065: $mvlist" 53 puts "Rep065: $msg2" 54 set count 1 55 set total [llength $mvlist] 56 set slist [setup_sites $nsites] 57 foreach i $mvlist { 58 puts "Rep065: Test iteration $count of $total: $i" 59 rep065_sub $count $i $nsites $slist 60 incr count 61 } 62 set noenv_messaging 0 63} 64 65proc rep065_sub { iter mv nsites slist } { 66 source ./include.tcl 67 global machids 68 global util_path 69 set machids {} 70 set method [lindex $mv 0] 71 set vers [lindex $mv 1] 72 73 puts "\tRep065.$iter.a: Set up." 74 # Whatever directory we started this process from is referred 75 # to as the controlling directory. It will contain the message 76 # queue and start all the child processes. 77 set controldir [pwd] 78 env_cleanup $controldir/$testdir 79 replsetup_noenv $controldir/$testdir/MSGQUEUEDIR 80 81 # Set up the historical build directory. The master will start 82 # running with historical code. 83 # 84 # This test presumes we are running in the current build 85 # directory and that the expected historical builds are 86 # set up in a similar fashion. If they are not, quit gracefully. 87 88 set pwd [pwd] 89 set homedir [file dirname [file dirname $pwd]] 90 set reputils_path $pwd/../test 91 set histdir $homedir/$vers/build_unix 92 if { [file exists $histdir] == 0 } { 93 puts -nonewline "Skipping iteration $iter: cannot find" 94 puts " historical version $vers." 95 return 96 } 97 if { [file exists $histdir/db_verify] == 0 } { 98 puts -nonewline "Skipping iteration $iter: historical version" 99 puts " $vers is missing some executables. Is it built?" 100 return 101 } 102 103 set histtestdir $histdir/TESTDIR 104 105 env_cleanup $histtestdir 106 set markerdir $controldir/$testdir/MARKER 107 file delete -force $markerdir 108 109 # Create site directories. They start running in the historical 110 # directory, too. They will be upgraded to the current version 111 # first. 112 set allids { } 113 for { set i 0 } { $i < $nsites } { incr i } { 114 set siteid($i) [expr $i + 1] 115 set sid $siteid($i) 116 lappend allids $sid 117 set histdirs($sid) $histtestdir/SITE.$i 118 set upgdir($sid) $controldir/$testdir/SITE.$i 119 file mkdir $histdirs($sid) 120 file mkdir $upgdir($sid) 121 } 122 123 # Open master env running 4.4. 124 # 125 # We know that slist has all sites starting in the histdir. 126 # So if we encounter an upgrade value, we upgrade that client 127 # from the hist dir. 128 # 129 set count 1 130 foreach sitevers $slist { 131 puts "\tRep065.b.$iter.$count: Run with sitelist $sitevers." 132 # 133 # Delete the marker directory each iteration so that 134 # we don't find old data in there. 135 # 136 file delete -force $markerdir 137 file mkdir $markerdir 138 # 139 # Get the chosen master index from the list of sites. 140 # 141 set mindex [get_master $nsites $sitevers] 142 set meid [expr $mindex + 1] 143 144 # 145 # Kick off the test processes. We need 1 test process 146 # per site and 1 message process per site. 147 # 148 set pids {} 149 for { set i 0 } { $i < $nsites } { incr i } { 150 set upg [lindex $sitevers $i] 151 set sid $siteid($i) 152 # 153 # If we are running "old" set up an array 154 # saying if this site has run old/new yet. 155 # The reason is that we want to "upgrade" 156 # only the first time we go from old to new, 157 # not every iteration through this loop. 158 # 159 if { $upg == 0 } { 160 puts -nonewline "\t\tRep065.b: Test: Old site $i" 161 set sitedir($i) $histdirs($sid) 162 set already_upgraded($i) 0 163 } else { 164 puts -nonewline "\t\tRep065.b: Test: Upgraded site $i" 165 set sitedir($i) $upgdir($sid) 166 if { $already_upgraded($i) == 0 } { 167 upg_repdir $histdirs($sid) $sitedir($i) 168 } 169 set already_upgraded($i) 1 170 } 171 if { $sid == $meid } { 172 set state MASTER 173 set runtest [list REPTEST $method 15 10] 174 puts " (MASTER)" 175 } else { 176 set state CLIENT 177 set runtest {REPTEST_GET} 178 puts " (CLIENT)" 179 } 180 lappend pids [exec $tclsh_path $test_path/wrap.tcl \ 181 rep065script.tcl \ 182 $controldir/$testdir/$count.S$i.log \ 183 SKIP \ 184 START $state \ 185 $runtest \ 186 $sid $allids $controldir \ 187 $sitedir($i) $reputils_path &] 188 lappend pids [exec $tclsh_path $test_path/wrap.tcl \ 189 rep065script.tcl \ 190 $controldir/$testdir/$count.S$i.msg \ 191 SKIP \ 192 PROCMSGS $state \ 193 NULL \ 194 $sid $allids $controldir \ 195 $sitedir($i) $reputils_path &] 196 } 197 198 watch_procs $pids 20 199 # 200 # At this point, clean up any message files. The message 201 # system leads to a significant number of duplicate 202 # requests. If the master site handled them after the 203 # client message processes exited, then there can be 204 # a large number of "dead" message files waiting for 205 # non-existent clients. Just clean up everyone. 206 # 207 for { set i 0 } { $i < $nsites } { incr i } { 208 replclear_noenv $siteid($i) 209 } 210 211 # 212 # Kick off the verification processes. These just walk 213 # their own logs and databases, so we don't need to have 214 # a message process. We need separate processes because 215 # old sites need to use old utilities. 216 # 217 set pids {} 218 puts "\tRep065.c.$iter.$count: Verify all sites." 219 for { set i 0 } { $i < $nsites } { incr i } { 220 if { $siteid($i) == $meid } { 221 set state MASTER 222 } else { 223 set state CLIENT 224 } 225 lappend pids [exec $tclsh_path $test_path/wrap.tcl \ 226 rep065script.tcl \ 227 $controldir/$testdir/$count.S$i.ver \ 228 SKIP \ 229 VERIFY $state \ 230 {LOG DB} \ 231 $siteid($i) $allids $controldir \ 232 $sitedir($i) $reputils_path &] 233 } 234 235 watch_procs $pids 10 236 # 237 # Now that each site created its verification files, 238 # we can now verify everyone. 239 # 240 for { set i 0 } { $i < $nsites } { incr i } { 241 if { $i == $mindex } { 242 continue 243 } 244 puts \ 245 "\t\tRep065.c: Verify: Compare databases master and client $i" 246 error_check_good db_cmp \ 247 [filecmp $sitedir($mindex)/VERIFY/dbdump \ 248 $sitedir($i)/VERIFY/dbdump] 0 249 set upg [lindex $sitevers $i] 250 # !!! 251 # Although db_printlog works and can read old logs, 252 # there have been some changes to the output text that 253 # makes comparing difficult. One possible solution 254 # is to run db_printlog here, from the current directory 255 # instead of from the historical directory. 256 # 257 if { $upg == 0 } { 258 puts \ 259 "\t\tRep065.c: Verify: Compare logs master and client $i" 260 error_check_good log_cmp \ 261 [filecmp $sitedir($mindex)/VERIFY/prlog \ 262 $sitedir($i)/VERIFY/prlog] 0 263 } else { 264 puts \ 265 "\t\tRep065.c: Verify: Compare LSNs master and client $i" 266 error_check_good log_cmp \ 267 [filecmp $sitedir($mindex)/VERIFY/loglsn \ 268 $sitedir($i)/VERIFY/loglsn] 0 269 } 270 } 271 272 # 273 # At this point we have a master and sites all up to date 274 # with each other. Now, one at a time, upgrade the sites 275 # to the current version and start everyone up again. 276 incr count 277 } 278} 279 280proc setup_sites { nsites } { 281 # 282 # Set up a list that goes from 0 to $nsites running 283 # upgraded. A 0 represents running old version and 1 284 # represents running upgraded. So, for 3 sites it will look like: 285 # { 0 0 0 } { 1 0 0 } { 1 1 0 } { 1 1 1 } 286 # 287 set sitelist {} 288 for { set i 0 } { $i <= $nsites } { incr i } { 289 set l "" 290 for { set j 1 } { $j <= $nsites } { incr j } { 291 if { $i < $j } { 292 lappend l 0 293 } else { 294 lappend l 1 295 } 296 } 297 lappend sitelist $l 298 } 299 return $sitelist 300} 301 302proc upg_repdir { histdir upgdir } { 303 global util_path 304 305 # 306 # Upgrade a site to the current version. This entails: 307 # 1. Removing any old files from the upgrade directory. 308 # 2. Copy all old version files to upgrade directory. 309 # 3. Remove any __db files from upgrade directory except __db.rep*gen. 310 # 4. Force checkpoint in new version. 311 file delete -force $upgdir 312 313 # Recovery was run before as part of upgradescript. 314 # Archive dir by copying it to upgrade dir. 315 file copy -force $histdir $upgdir 316 set dbfiles [glob -nocomplain $upgdir/__db*] 317 foreach d $dbfiles { 318 if { $d == "$upgdir/__db.rep.gen" || 319 $d == "$upgdir/__db.rep.egen" } { 320 continue 321 } 322 file delete -force $d 323 } 324 # Force current version checkpoint 325 set stat [catch {eval exec $util_path/db_checkpoint -1 -h $upgdir} r] 326 if { $stat != 0 } { 327 puts "CHECKPOINT: $upgdir: $r" 328 } 329 error_check_good stat_ckp $stat 0 330} 331 332proc get_master { nsites verslist } { 333 error_check_good vlist_chk [llength $verslist] $nsites 334 # 335 # When we can, simply run an election to get a new master. 336 # We then verify we got an old client. 337 # 338 # For now, randomly pick among the old sites, or if no old 339 # sites just randomly pick anyone. 340 # 341 set old_count 0 342 # Pick 1 out of N old sites or 1 out of nsites if all upgraded. 343 foreach i $verslist { 344 if { $i == 0 } { 345 incr old_count 346 } 347 } 348 if { $old_count == 0 } { 349 set old_count $nsites 350 } 351 set master [berkdb random_int 0 [expr $old_count - 1]] 352 # 353 # Since the Nth old site may not be at the Nth place in the 354 # list unless we used the entire list, we need to loop to find 355 # the right index to return. 356 if { $old_count == $nsites } { 357 return $master 358 } 359 set ocount 0 360 set index 0 361 foreach i $verslist { 362 if { $i == 1 } { 363 incr index 364 continue 365 } 366 if { $ocount == $master } { 367 return $index 368 } 369 incr ocount 370 incr index 371 } 372 # 373 # If we get here there is a problem in the code. 374 # 375 error "FAIL: get_master problem" 376} 377 378proc method_version { } { 379 global valid_methods 380 381 set meth $valid_methods 382 set startmv { {btree db-4.4.20} {hash db-4.5.20} } 383 384 # Remove btree and hash from the method list, we're manually 385 # assigning those versions due to log/recovery record changes 386 # at that version. 387 set midx [lsearch -exact $meth hash] 388 set meth [lreplace $meth $midx $midx] 389 set midx [lsearch -exact $meth btree] 390 set meth [lreplace $meth $midx $midx] 391 392 set vers {db-4.4.20 db-4.5.20 db-4.6.21 db-4.7.25} 393 set dbvlen [llength $vers] 394 # 395 # NOTE: The values in "vers_list" are indices into $vers above. 396 # Since we're explicitly testing 4.4.20 and 4.5.20 above, 397 # weight later versions more. 398 # When you add a new version to $vers, you must 399 # add some new items to $vers_list to choose that index. 400 # Also need to add an entry for 'vtest' below. 401 # 402 set vers_list { 0 0 1 1 2 2 2 3 3 3 } 403 set vers_len [expr [llength $vers_list] - 1] 404 405 # Walk through the list of remaining methods and randomly 406 # assign a version to each one. 407 while { 1 } { 408 set mv $startmv 409 # We want to make sure we test each version. 410 # 4.4.20 411 set vtest(0) 1 412 # 4.5.20 413 set vtest(1) 1 414 # 4.6.21 415 set vtest(2) 0 416 # 4.7.25 417 set vtest(3) 0 418 foreach m $meth { 419 # Index into distribution list. 420 set vidx [berkdb random_int 0 $vers_len] 421 # Index into version list. 422 set vindex [lindex $vers_list $vidx] 423 set vtest($vindex) 1 424 set v [lindex $vers $vindex] 425 lappend mv [list $m $v] 426 } 427 # 428 # Assume success. If we find any $vtest entry of 0, 429 # then we fail and try again. 430 # 431 set all_vers 1 432 for { set i 0 } { $i < $dbvlen } { incr i } { 433 if { $vtest($i) == 0 } { 434 set all_vers 0 435 } 436 } 437 if { $all_vers == 1 } { 438 break 439 } 440# puts "Did not get all versions with $mv." 441 } 442 443 return $mv 444} 445