1# See the file LICENSE for redistribution information. 2# 3# Copyright (c) 2006,2008 Oracle. All rights reserved. 4# 5# $Id: rep065.tcl,v 12.22 2008/02/20 16:59:14 sue Exp $ 6# 7# TEST rep065 8# TEST Tests replication running with different versions. 9# TEST This capability is introduced with 4.5. 10# TEST 11# TEST Start a replication group of 1 master and N sites, all 12# TEST running some historical version greater than or equal to 4.4. 13# TEST Take down a client and bring it up again running current. 14# TEST Run some upgrades, make sure everything works. 15# TEST 16# TEST Each site runs the tcllib of its own version, but uses 17# TEST the current tcl code (e.g. test.tcl). 18proc rep065 { method { nsites 3 } args } { 19 source ./include.tcl 20 global noenv_messaging 21 set noenv_messaging 1 22 23 if { $is_windows9x_test == 1 } { 24 puts "Skipping replication test on Win 9x platform." 25 return 26 } 27 # 28 # Skip all methods but btree - we don't use the method, as we 29 # run over all of them with varying versions. 30 # 31 if { $checking_valid_methods } { 32 set test_methods { btree } 33 return $test_methods 34 } 35 36 if { [is_btree $method] == 0 } { 37 puts "Rep065: Skipping for method $method." 38 return 39 } 40 41 # 42 # Make the list of {method version} pairs to test. 43 # 44 set mvlist [method_version] 45 set mvlen [llength $mvlist] 46 puts "Rep065: Testing the following $mvlen method/version pairs:" 47 puts "Rep065: $mvlist" 48 set count 1 49 set total [llength $mvlist] 50 set slist [setup_sites $nsites] 51 foreach i $mvlist { 52 puts "Rep065: Test iteration $count of $total: $i" 53 rep065_sub $count $i $nsites $slist 54 incr count 55 } 56 set noenv_messaging 0 57} 58 59proc rep065_sub { iter mv nsites slist } { 60 source ./include.tcl 61 global machids 62 global util_path 63 set machids {} 64 set method [lindex $mv 0] 65 set vers [lindex $mv 1] 66 67 puts "\tRep065.$iter.a: Set up." 68 # Whatever directory we started this process from is referred 69 # to as the controlling directory. It will contain the message 70 # queue and start all the child processes. 71 set controldir [pwd] 72 env_cleanup $controldir/$testdir 73 replsetup_noenv $controldir/$testdir/MSGQUEUEDIR 74 75 # Set up the historical build directory. The master will start 76 # running with historical code. 77 # 78 # This test presumes we are running in the current build 79 # directory and that the expected historical builds are 80 # set up in a similar fashion. If they are not, quit gracefully. 81 82 set pwd [pwd] 83 set homedir [file dirname [file dirname $pwd]] 84 set reputils_path $pwd/../test 85 set histdir $homedir/$vers/build_unix 86 if { [file exists $histdir] == 0 } { 87 puts -nonewline "Skipping iteration $iter: cannot find" 88 puts " historical version $vers." 89 return 90 } 91 if { [file exists $histdir/db_verify] == 0 } { 92 puts -nonewline "Skipping iteration $iter: historical version" 93 puts " $vers is missing some executables. Is it built?" 94 return 95 } 96 97 set histtestdir $histdir/TESTDIR 98 99 env_cleanup $histtestdir 100 set markerdir $controldir/$testdir/MARKER 101 file delete -force $markerdir 102 103 # Create site directories. They start running in the historical 104 # directory, too. They will be upgraded to the current version 105 # first. 106 set allids { } 107 for { set i 0 } { $i < $nsites } { incr i } { 108 set siteid($i) [expr $i + 1] 109 set sid $siteid($i) 110 lappend allids $sid 111 set histdirs($sid) $histtestdir/SITE.$i 112 set upgdir($sid) $controldir/$testdir/SITE.$i 113 file mkdir $histdirs($sid) 114 file mkdir $upgdir($sid) 115 } 116 117 # Open master env running 4.4. 118 # 119 # We know that slist has all sites starting in the histdir. 120 # So if we encounter an upgrade value, we upgrade that client 121 # from the hist dir. 122 # 123 set count 1 124 foreach sitevers $slist { 125 puts "\tRep065.b.$iter.$count: Run with sitelist $sitevers." 126 # 127 # Delete the marker directory each iteration so that 128 # we don't find old data in there. 129 # 130 file delete -force $markerdir 131 file mkdir $markerdir 132 # 133 # Get the chosen master index from the list of sites. 134 # 135 set mindex [get_master $nsites $sitevers] 136 set meid [expr $mindex + 1] 137 138 # 139 # Kick off the test processes. We need 1 test process 140 # per site and 1 message process per site. 141 # 142 set pids {} 143 for { set i 0 } { $i < $nsites } { incr i } { 144 set upg [lindex $sitevers $i] 145 set sid $siteid($i) 146 # 147 # If we are running "old" set up an array 148 # saying if this site has run old/new yet. 149 # The reason is that we want to "upgrade" 150 # only the first time we go from old to new, 151 # not every iteration through this loop. 152 # 153 if { $upg == 0 } { 154 puts -nonewline "\t\tRep065.b: Test: Old site $i" 155 set sitedir($i) $histdirs($sid) 156 set already_upgraded($i) 0 157 } else { 158 puts -nonewline "\t\tRep065.b: Test: Upgraded site $i" 159 set sitedir($i) $upgdir($sid) 160 if { $already_upgraded($i) == 0 } { 161 upg_repdir $histdirs($sid) $sitedir($i) 162 } 163 set already_upgraded($i) 1 164 } 165 if { $sid == $meid } { 166 set state MASTER 167 set runtest [list REPTEST $method 15 10] 168 puts " (MASTER)" 169 } else { 170 set state CLIENT 171 set runtest {REPTEST_GET} 172 puts " (CLIENT)" 173 } 174 lappend pids [exec $tclsh_path $test_path/wrap.tcl \ 175 rep065script.tcl \ 176 $controldir/$testdir/$count.S$i.log \ 177 SKIP \ 178 START $state \ 179 $runtest \ 180 $sid $allids $controldir \ 181 $sitedir($i) $reputils_path &] 182 lappend pids [exec $tclsh_path $test_path/wrap.tcl \ 183 rep065script.tcl \ 184 $controldir/$testdir/$count.S$i.msg \ 185 SKIP \ 186 PROCMSGS $state \ 187 NULL \ 188 $sid $allids $controldir \ 189 $sitedir($i) $reputils_path &] 190 } 191 192 watch_procs $pids 20 193 # 194 # At this point, clean up any message files. The message 195 # system leads to a significant number of duplicate 196 # requests. If the master site handled them after the 197 # client message processes exited, then there can be 198 # a large number of "dead" message files waiting for 199 # non-existent clients. Just clean up everyone. 200 # 201 for { set i 0 } { $i < $nsites } { incr i } { 202 replclear_noenv $siteid($i) 203 } 204 205 # 206 # Kick off the verification processes. These just walk 207 # their own logs and databases, so we don't need to have 208 # a message process. We need separate processes because 209 # old sites need to use old utilities. 210 # 211 set pids {} 212 puts "\tRep065.c.$iter.$count: Verify all sites." 213 for { set i 0 } { $i < $nsites } { incr i } { 214 if { $siteid($i) == $meid } { 215 set state MASTER 216 } else { 217 set state CLIENT 218 } 219 lappend pids [exec $tclsh_path $test_path/wrap.tcl \ 220 rep065script.tcl \ 221 $controldir/$testdir/$count.S$i.ver \ 222 SKIP \ 223 VERIFY $state \ 224 {LOG DB} \ 225 $siteid($i) $allids $controldir \ 226 $sitedir($i) $reputils_path &] 227 } 228 229 watch_procs $pids 10 230 # 231 # Now that each site created its verification files, 232 # we can now verify everyone. 233 # 234 for { set i 0 } { $i < $nsites } { incr i } { 235 if { $i == $mindex } { 236 continue 237 } 238 puts \ 239 "\t\tRep065.c: Verify: Compare databases master and client $i" 240 error_check_good db_cmp \ 241 [filecmp $sitedir($mindex)/VERIFY/dbdump \ 242 $sitedir($i)/VERIFY/dbdump] 0 243 set upg [lindex $sitevers $i] 244 # !!! 245 # Although db_printlog works and can read old logs, 246 # there have been some changes to the output text that 247 # makes comparing difficult. One possible solution 248 # is to run db_printlog here, from the current directory 249 # instead of from the historical directory. 250 # 251 if { $upg == 0 } { 252 puts \ 253 "\t\tRep065.c: Verify: Compare logs master and client $i" 254 error_check_good log_cmp \ 255 [filecmp $sitedir($mindex)/VERIFY/prlog \ 256 $sitedir($i)/VERIFY/prlog] 0 257 } else { 258 puts \ 259 "\t\tRep065.c: Verify: Compare LSNs master and client $i" 260 error_check_good log_cmp \ 261 [filecmp $sitedir($mindex)/VERIFY/loglsn \ 262 $sitedir($i)/VERIFY/loglsn] 0 263 } 264 } 265 266 # 267 # At this point we have a master and sites all up to date 268 # with each other. Now, one at a time, upgrade the sites 269 # to the current version and start everyone up again. 270 incr count 271 } 272} 273 274proc setup_sites { nsites } { 275 # 276 # Set up a list that goes from 0 to $nsites running 277 # upgraded. A 0 represents running old version and 1 278 # represents running upgraded. So, for 3 sites it will look like: 279 # { 0 0 0 } { 1 0 0 } { 1 1 0 } { 1 1 1 } 280 # 281 set sitelist {} 282 for { set i 0 } { $i <= $nsites } { incr i } { 283 set l "" 284 for { set j 1 } { $j <= $nsites } { incr j } { 285 if { $i < $j } { 286 lappend l 0 287 } else { 288 lappend l 1 289 } 290 } 291 lappend sitelist $l 292 } 293 return $sitelist 294} 295 296proc upg_repdir { histdir upgdir } { 297 global util_path 298 299 # 300 # Upgrade a site to the current version. This entails: 301 # 1. Removing any old files from the upgrade directory. 302 # 2. Copy all old version files to upgrade directory. 303 # 3. Remove any __db files from upgrade directory except __db.rep*gen. 304 # 4. Force checkpoint in new version. 305 file delete -force $upgdir 306 307 # Recovery was run before as part of upgradescript. 308 # Archive dir by copying it to upgrade dir. 309 file copy -force $histdir $upgdir 310 set dbfiles [glob -nocomplain $upgdir/__db*] 311 foreach d $dbfiles { 312 if { $d == "$upgdir/__db.rep.gen" || 313 $d == "$upgdir/__db.rep.egen" } { 314 continue 315 } 316 file delete -force $d 317 } 318 # Force current version checkpoint 319 set stat [catch {eval exec $util_path/db_checkpoint -1 -h $upgdir} r] 320 if { $stat != 0 } { 321 puts "CHECKPOINT: $upgdir: $r" 322 } 323 error_check_good stat_ckp $stat 0 324} 325 326proc get_master { nsites verslist } { 327 error_check_good vlist_chk [llength $verslist] $nsites 328 # 329 # When we can, simply run an election to get a new master. 330 # We then verify we got an old client. 331 # 332 # For now, randomly pick among the old sites, or if no old 333 # sites just randomly pick anyone. 334 # 335 set old_count 0 336 # Pick 1 out of N old sites or 1 out of nsites if all upgraded. 337 foreach i $verslist { 338 if { $i == 0 } { 339 incr old_count 340 } 341 } 342 if { $old_count == 0 } { 343 set old_count $nsites 344 } 345 set master [berkdb random_int 0 [expr $old_count - 1]] 346 # 347 # Since the Nth old site may not be at the Nth place in the 348 # list unless we used the entire list, we need to loop to find 349 # the right index to return. 350 if { $old_count == $nsites } { 351 return $master 352 } 353 set ocount 0 354 set index 0 355 foreach i $verslist { 356 if { $i == 1 } { 357 incr index 358 continue 359 } 360 if { $ocount == $master } { 361 return $index 362 } 363 incr ocount 364 incr index 365 } 366 # 367 # If we get here there is a problem in the code. 368 # 369 error "FAIL: get_master problem" 370} 371 372proc method_version { } { 373 global valid_methods 374 375 set meth $valid_methods 376 set startmv { {btree db-4.4.20} {hash db-4.5.20} } 377 378 # Remove btree and hash from the method list, we're manually 379 # assigning those versions due to log/recovery record changes 380 # at that version. 381 set midx [lsearch -exact $meth hash] 382 set meth [lreplace $meth $midx $midx] 383 set midx [lsearch -exact $meth btree] 384 set meth [lreplace $meth $midx $midx] 385 386 set vers {db-4.4.20 db-4.5.20 db-4.6.21} 387 set dbvlen [llength $vers] 388 # 389 # NOTE: The values in "vers_list" are indices into $vers above. 390 # Since we're explicitly testing 4.4.20 and 4.5.20 above, 391 # weight later versions more. 392 # When you add a new version to $vers, you must 393 # add some new items to $vers_list to choose that index. 394 # Also need to add an entry for 'vtest' below. 395 # 396 set vers_list { 0 0 1 1 2 2 2 } 397 set vers_len [expr [llength $vers_list] - 1] 398 399 # Walk through the list of remaining methods and randomly 400 # assign a version to each one. 401 while { 1 } { 402 set mv $startmv 403 # We want to make sure we test each version. 404 set vtest(0) 1 405 set vtest(1) 1 406 set vtest(2) 0 407 foreach m $meth { 408 # Index into distribution list. 409 set vidx [berkdb random_int 0 $vers_len] 410 # Index into version list. 411 set vindex [lindex $vers_list $vidx] 412 set vtest($vindex) 1 413 set v [lindex $vers $vindex] 414 lappend mv [list $m $v] 415 } 416 # 417 # Assume success. If we find any $vtest entry of 0, 418 # then we fail and try again. 419 # 420 set all_vers 1 421 for { set i 0 } { $i < $dbvlen } { incr i } { 422 if { $vtest($i) == 0 } { 423 set all_vers 0 424 } 425 } 426 if { $all_vers == 1 } { 427 break 428 } 429# puts "Did not get all versions with $mv." 430 } 431 432 return $mv 433} 434