1# See the file LICENSE for redistribution information.
2#
3# Copyright (c) 2006,2008 Oracle.  All rights reserved.
4#
5# $Id: rep065.tcl,v 12.22 2008/02/20 16:59:14 sue Exp $
6#
7# TEST	rep065
8# TEST	Tests replication running with different versions.
9# TEST	This capability is introduced with 4.5.
10# TEST
11# TEST	Start a replication group of 1 master and N sites, all
12# TEST	running some historical version greater than or equal to 4.4.
13# TEST	Take down a client and bring it up again running current.
14# TEST	Run some upgrades, make sure everything works.
15# TEST
16# TEST	Each site runs the tcllib of its own version, but uses
17# TEST	the current tcl code (e.g. test.tcl).
18proc rep065 { method { nsites 3 } args } {
19	source ./include.tcl
20	global noenv_messaging
21	set noenv_messaging 1
22
23	if { $is_windows9x_test == 1 } {
24		puts "Skipping replication test on Win 9x platform."
25		return
26	}
27	#
28	# Skip all methods but btree - we don't use the method, as we
29	# run over all of them with varying versions.
30	#
31	if { $checking_valid_methods } {
32		set test_methods { btree }
33		return $test_methods
34	}
35
36	if { [is_btree $method] == 0 } {
37		puts "Rep065: Skipping for method $method."
38		return
39	}
40
41	#
42	# Make the list of {method version} pairs to test.
43	#
44	set mvlist [method_version]
45	set mvlen [llength $mvlist]
46	puts "Rep065: Testing the following $mvlen method/version pairs:"
47	puts "Rep065: $mvlist"
48	set count 1
49	set total [llength $mvlist]
50	set slist [setup_sites $nsites]
51	foreach i $mvlist {
52		puts "Rep065: Test iteration $count of $total: $i"
53		rep065_sub $count $i $nsites $slist
54		incr count
55	}
56	set noenv_messaging 0
57}
58
59proc rep065_sub { iter mv nsites slist } {
60	source ./include.tcl
61	global machids
62	global util_path
63	set machids {}
64	set method [lindex $mv 0]
65	set vers [lindex $mv 1]
66
67	puts "\tRep065.$iter.a: Set up."
68	# Whatever directory we started this process from is referred
69	# to as the controlling directory.  It will contain the message
70	# queue and start all the child processes.
71	set controldir [pwd]
72	env_cleanup $controldir/$testdir
73	replsetup_noenv $controldir/$testdir/MSGQUEUEDIR
74
75	# Set up the historical build directory.  The master will start
76	# running with historical code.
77	#
78	# This test presumes we are running in the current build
79	# directory and that the expected historical builds are
80	# set up in a similar fashion.  If they are not, quit gracefully.
81
82	set pwd [pwd]
83	set homedir [file dirname [file dirname $pwd]]
84	set reputils_path $pwd/../test
85	set histdir $homedir/$vers/build_unix
86	if { [file exists $histdir] == 0 } {
87		puts -nonewline "Skipping iteration $iter: cannot find"
88		puts " historical version $vers."
89		return
90	}
91	if { [file exists $histdir/db_verify] == 0 } {
92		puts -nonewline "Skipping iteration $iter: historical version"
93		puts " $vers is missing some executables.  Is it built?"
94		return
95	}
96
97	set histtestdir $histdir/TESTDIR
98
99	env_cleanup $histtestdir
100	set markerdir $controldir/$testdir/MARKER
101	file delete -force $markerdir
102
103	# Create site directories.  They start running in the historical
104	# directory, too.  They will be upgraded to the current version
105	# first.
106	set allids { }
107	for { set i 0 } { $i < $nsites } { incr i } {
108		set siteid($i) [expr $i + 1]
109		set sid $siteid($i)
110		lappend allids $sid
111		set histdirs($sid) $histtestdir/SITE.$i
112		set upgdir($sid) $controldir/$testdir/SITE.$i
113		file mkdir $histdirs($sid)
114		file mkdir $upgdir($sid)
115	}
116
117	# Open master env running 4.4.
118	#
119	# We know that slist has all sites starting in the histdir.
120	# So if we encounter an upgrade value, we upgrade that client
121	# from the hist dir.
122	#
123	set count 1
124	foreach sitevers $slist {
125		puts "\tRep065.b.$iter.$count: Run with sitelist $sitevers."
126		#
127		# Delete the marker directory each iteration so that
128		# we don't find old data in there.
129		#
130		file delete -force $markerdir
131		file mkdir $markerdir
132		#
133		# Get the chosen master index from the list of sites.
134		#
135		set mindex [get_master $nsites $sitevers]
136		set meid [expr $mindex + 1]
137
138		#
139		# Kick off the test processes.  We need 1 test process
140		# per site and 1 message process per site.
141		#
142		set pids {}
143		for { set i 0 } { $i < $nsites } { incr i } {
144			set upg [lindex $sitevers $i]
145			set sid $siteid($i)
146			#
147			# If we are running "old" set up an array
148			# saying if this site has run old/new yet.
149			# The reason is that we want to "upgrade"
150			# only the first time we go from old to new,
151			# not every iteration through this loop.
152			#
153			if { $upg == 0 } {
154				puts -nonewline "\t\tRep065.b: Test: Old site $i"
155				set sitedir($i) $histdirs($sid)
156				set already_upgraded($i) 0
157			} else {
158				puts -nonewline "\t\tRep065.b: Test: Upgraded site $i"
159				set sitedir($i) $upgdir($sid)
160				if { $already_upgraded($i) == 0 } {
161					upg_repdir $histdirs($sid) $sitedir($i)
162				}
163				set already_upgraded($i) 1
164			}
165			if { $sid == $meid } {
166				set state MASTER
167				set runtest [list REPTEST $method 15 10]
168				puts " (MASTER)"
169			} else {
170				set state CLIENT
171				set runtest {REPTEST_GET}
172				puts " (CLIENT)"
173			}
174			lappend pids [exec $tclsh_path $test_path/wrap.tcl \
175			    rep065script.tcl \
176			    $controldir/$testdir/$count.S$i.log \
177		      	    SKIP \
178			    START $state \
179			    $runtest \
180			    $sid $allids $controldir \
181			    $sitedir($i) $reputils_path &]
182			lappend pids [exec $tclsh_path $test_path/wrap.tcl \
183			    rep065script.tcl \
184			    $controldir/$testdir/$count.S$i.msg \
185		    	    SKIP \
186			    PROCMSGS $state \
187		    	    NULL \
188			    $sid $allids $controldir \
189			    $sitedir($i) $reputils_path &]
190		}
191
192		watch_procs $pids 20
193		#
194		# At this point, clean up any message files.  The message
195		# system leads to a significant number of duplicate
196		# requests.  If the master site handled them after the
197		# client message processes exited, then there can be
198		# a large number of "dead" message files waiting for
199		# non-existent clients.  Just clean up everyone.
200		#
201		for { set i 0 } { $i < $nsites } { incr i } {
202			replclear_noenv $siteid($i)
203		}
204
205		#
206		# Kick off the verification processes.  These just walk
207		# their own logs and databases, so we don't need to have
208		# a message process.  We need separate processes because
209		# old sites need to use old utilities.
210		#
211		set pids {}
212		puts "\tRep065.c.$iter.$count: Verify all sites."
213		for { set i 0 } { $i < $nsites } { incr i } {
214			if { $siteid($i) == $meid } {
215				set state MASTER
216			} else {
217				set state CLIENT
218			}
219			lappend pids [exec $tclsh_path $test_path/wrap.tcl \
220			    rep065script.tcl \
221			    $controldir/$testdir/$count.S$i.ver \
222		      	    SKIP \
223			    VERIFY $state \
224		    	    {LOG DB} \
225			    $siteid($i) $allids $controldir \
226			    $sitedir($i) $reputils_path &]
227		}
228
229		watch_procs $pids 10
230		#
231		# Now that each site created its verification files,
232		# we can now verify everyone.
233		#
234		for { set i 0 } { $i < $nsites } { incr i } {
235			if { $i == $mindex } {
236				continue
237			}
238			puts \
239	"\t\tRep065.c: Verify: Compare databases master and client $i"
240			error_check_good db_cmp \
241			    [filecmp $sitedir($mindex)/VERIFY/dbdump \
242			    $sitedir($i)/VERIFY/dbdump] 0
243			set upg [lindex $sitevers $i]
244			# !!!
245			# Although db_printlog works and can read old logs,
246			# there have been some changes to the output text that
247			# makes comparing difficult.  One possible solution
248			# is to run db_printlog here, from the current directory
249			# instead of from the historical directory.
250			#
251			if { $upg == 0 } {
252				puts \
253	"\t\tRep065.c: Verify: Compare logs master and client $i"
254				error_check_good log_cmp \
255				    [filecmp $sitedir($mindex)/VERIFY/prlog \
256				    $sitedir($i)/VERIFY/prlog] 0
257			} else {
258				puts \
259	"\t\tRep065.c: Verify: Compare LSNs master and client $i"
260				error_check_good log_cmp \
261				    [filecmp $sitedir($mindex)/VERIFY/loglsn \
262				    $sitedir($i)/VERIFY/loglsn] 0
263			}
264		}
265
266		#
267		# At this point we have a master and sites all up to date
268		# with each other.  Now, one at a time, upgrade the sites
269		# to the current version and start everyone up again.
270		incr count
271	}
272}
273
274proc setup_sites { nsites } {
275	#
276	# Set up a list that goes from 0 to $nsites running
277	# upgraded.  A 0 represents running old version and 1
278	# represents running upgraded.  So, for 3 sites it will look like:
279	# { 0 0 0 } { 1 0 0 } { 1 1 0 } { 1 1 1 }
280	#
281	set sitelist {}
282	for { set i 0 } { $i <= $nsites } { incr i } {
283		set l ""
284		for { set j 1 } { $j <= $nsites } { incr j } {
285			if { $i < $j } {
286				lappend l 0
287			} else {
288				lappend l 1
289			}
290		}
291		lappend sitelist $l
292	}
293	return $sitelist
294}
295
296proc upg_repdir { histdir upgdir } {
297	global util_path
298
299	#
300	# Upgrade a site to the current version.  This entails:
301	# 1.  Removing any old files from the upgrade directory.
302	# 2.  Copy all old version files to upgrade directory.
303	# 3.  Remove any __db files from upgrade directory except __db.rep*gen.
304	# 4.  Force checkpoint in new version.
305	file delete -force $upgdir
306
307	# Recovery was run before as part of upgradescript.
308	# Archive dir by copying it to upgrade dir.
309	file copy -force $histdir $upgdir
310	set dbfiles [glob -nocomplain $upgdir/__db*]
311	foreach d $dbfiles {
312		if { $d == "$upgdir/__db.rep.gen" ||
313		    $d == "$upgdir/__db.rep.egen" } {
314			continue
315		}
316		file delete -force $d
317	}
318	# Force current version checkpoint
319	set stat [catch {eval exec $util_path/db_checkpoint -1 -h $upgdir} r]
320	if { $stat != 0 } {
321		puts "CHECKPOINT: $upgdir: $r"
322	}
323	error_check_good stat_ckp $stat 0
324}
325
326proc get_master { nsites verslist } {
327	error_check_good vlist_chk [llength $verslist] $nsites
328	#
329	# When we can, simply run an election to get a new master.
330	# We then verify we got an old client.
331	#
332	# For now, randomly pick among the old sites, or if no old
333	# sites just randomly pick anyone.
334	#
335	set old_count 0
336	# Pick 1 out of N old sites or 1 out of nsites if all upgraded.
337	foreach i $verslist {
338		if { $i == 0 } {
339			incr old_count
340		}
341	}
342	if { $old_count == 0 } {
343		set old_count $nsites
344	}
345	set master [berkdb random_int 0 [expr $old_count - 1]]
346	#
347	# Since the Nth old site may not be at the Nth place in the
348	# list unless we used the entire list, we need to loop to find
349	# the right index to return.
350	if { $old_count == $nsites } {
351		return $master
352	}
353	set ocount 0
354	set index 0
355	foreach i $verslist {
356		if { $i == 1 } {
357			incr index
358			continue
359		}
360		if { $ocount == $master } {
361			return $index
362		}
363		incr ocount
364		incr index
365	}
366	#
367	# If we get here there is a problem in the code.
368	#
369	error "FAIL: get_master problem"
370}
371
372proc method_version { } {
373	global valid_methods
374
375	set meth $valid_methods
376	set startmv { {btree db-4.4.20} {hash db-4.5.20} }
377
378	# Remove btree and hash from the method list, we're manually
379	# assigning those versions due to log/recovery record changes
380	# at that version.
381	set midx [lsearch -exact $meth hash]
382	set meth [lreplace $meth $midx $midx]
383	set midx [lsearch -exact $meth btree]
384	set meth [lreplace $meth $midx $midx]
385
386	set vers {db-4.4.20 db-4.5.20 db-4.6.21}
387	set dbvlen [llength $vers]
388	#
389	# NOTE: The values in "vers_list" are indices into $vers above.
390	# Since we're explicitly testing 4.4.20 and 4.5.20 above,
391	# weight later versions more.
392	# When you add a new version to $vers, you must
393	# add some new items to $vers_list to choose that index.
394	# Also need to add an entry for 'vtest' below.
395	#
396	set vers_list { 0 0 1 1 2 2 2 }
397	set vers_len [expr [llength $vers_list] - 1]
398
399	# Walk through the list of remaining methods and randomly
400	# assign a version to each one.
401	while { 1 } {
402		set mv $startmv
403		# We want to make sure we test each version.
404		set vtest(0) 1
405		set vtest(1) 1
406		set vtest(2) 0
407		foreach m $meth {
408			# Index into distribution list.
409			set vidx [berkdb random_int 0 $vers_len]
410			# Index into version list.
411			set vindex [lindex $vers_list $vidx]
412			set vtest($vindex) 1
413			set v [lindex $vers $vindex]
414			lappend mv [list $m $v]
415		}
416		#
417		# Assume success.  If we find any $vtest entry of 0,
418		# then we fail and try again.
419		#
420		set all_vers 1
421		for { set i 0 } { $i < $dbvlen } { incr i } {
422			if { $vtest($i) == 0 } {
423				set all_vers 0
424			}
425		}
426		if { $all_vers == 1 } {
427			break
428		}
429#		puts "Did not get all versions with $mv."
430	}
431
432	return $mv
433}
434