1# See the file LICENSE for redistribution information.
2#
3# Copyright (c) 2006-2009 Oracle.  All rights reserved.
4#
5# $Id$
6#
7# TEST	rep065
8# TEST	Tests replication running with different versions.
9# TEST	This capability is introduced with 4.5.
10# TEST
11# TEST	Start a replication group of 1 master and N sites, all
12# TEST	running some historical version greater than or equal to 4.4.
13# TEST	Take down a client and bring it up again running current.
14# TEST	Run some upgrades, make sure everything works.
15# TEST
16# TEST	Each site runs the tcllib of its own version, but uses
17# TEST	the current tcl code (e.g. test.tcl).
18proc rep065 { method { nsites 3 } args } {
19	source ./include.tcl
20	global repfiles_in_memory
21	global noenv_messaging
22	set noenv_messaging 1
23
24	if { $is_windows9x_test == 1 } {
25		puts "Skipping replication test on Win 9x platform."
26		return
27	}
28	#
29	# Skip all methods but btree - we don't use the method, as we
30	# run over all of them with varying versions.
31	#
32	if { $checking_valid_methods } {
33		set test_methods { btree }
34		return $test_methods
35	}
36
37	if { [is_btree $method] == 0 } {
38		puts "Rep065: Skipping for method $method."
39		return
40	}
41
42	set msg2 "and on-disk replication files"
43	if { $repfiles_in_memory } {
44		set msg2 "and in-memory replication files"
45	}
46
47	# Make the list of {method version} pairs to test.
48	#
49	set mvlist [method_version]
50	set mvlen [llength $mvlist]
51	puts "Rep065: Testing the following $mvlen method/version pairs:"
52	puts "Rep065: $mvlist"
53	puts "Rep065: $msg2"
54	set count 1
55	set total [llength $mvlist]
56	set slist [setup_sites $nsites]
57	foreach i $mvlist {
58		puts "Rep065: Test iteration $count of $total: $i"
59		rep065_sub $count $i $nsites $slist
60		incr count
61	}
62	set noenv_messaging 0
63}
64
65proc rep065_sub { iter mv nsites slist } {
66	source ./include.tcl
67	global machids
68	global util_path
69	set machids {}
70	set method [lindex $mv 0]
71	set vers [lindex $mv 1]
72
73	puts "\tRep065.$iter.a: Set up."
74	# Whatever directory we started this process from is referred
75	# to as the controlling directory.  It will contain the message
76	# queue and start all the child processes.
77	set controldir [pwd]
78	env_cleanup $controldir/$testdir
79	replsetup_noenv $controldir/$testdir/MSGQUEUEDIR
80
81	# Set up the historical build directory.  The master will start
82	# running with historical code.
83	#
84	# This test presumes we are running in the current build
85	# directory and that the expected historical builds are
86	# set up in a similar fashion.  If they are not, quit gracefully.
87
88	set pwd [pwd]
89	set homedir [file dirname [file dirname $pwd]]
90	set reputils_path $pwd/../test
91	set histdir $homedir/$vers/build_unix
92	if { [file exists $histdir] == 0 } {
93		puts -nonewline "Skipping iteration $iter: cannot find"
94		puts " historical version $vers."
95		return
96	}
97	if { [file exists $histdir/db_verify] == 0 } {
98		puts -nonewline "Skipping iteration $iter: historical version"
99		puts " $vers is missing some executables.  Is it built?"
100		return
101	}
102
103	set histtestdir $histdir/TESTDIR
104
105	env_cleanup $histtestdir
106	set markerdir $controldir/$testdir/MARKER
107	file delete -force $markerdir
108
109	# Create site directories.  They start running in the historical
110	# directory, too.  They will be upgraded to the current version
111	# first.
112	set allids { }
113	for { set i 0 } { $i < $nsites } { incr i } {
114		set siteid($i) [expr $i + 1]
115		set sid $siteid($i)
116		lappend allids $sid
117		set histdirs($sid) $histtestdir/SITE.$i
118		set upgdir($sid) $controldir/$testdir/SITE.$i
119		file mkdir $histdirs($sid)
120		file mkdir $upgdir($sid)
121	}
122
123	# Open master env running 4.4.
124	#
125	# We know that slist has all sites starting in the histdir.
126	# So if we encounter an upgrade value, we upgrade that client
127	# from the hist dir.
128	#
129	set count 1
130	foreach sitevers $slist {
131		puts "\tRep065.b.$iter.$count: Run with sitelist $sitevers."
132		#
133		# Delete the marker directory each iteration so that
134		# we don't find old data in there.
135		#
136		file delete -force $markerdir
137		file mkdir $markerdir
138		#
139		# Get the chosen master index from the list of sites.
140		#
141		set mindex [get_master $nsites $sitevers]
142		set meid [expr $mindex + 1]
143
144		#
145		# Kick off the test processes.  We need 1 test process
146		# per site and 1 message process per site.
147		#
148		set pids {}
149		for { set i 0 } { $i < $nsites } { incr i } {
150			set upg [lindex $sitevers $i]
151			set sid $siteid($i)
152			#
153			# If we are running "old" set up an array
154			# saying if this site has run old/new yet.
155			# The reason is that we want to "upgrade"
156			# only the first time we go from old to new,
157			# not every iteration through this loop.
158			#
159			if { $upg == 0 } {
160				puts -nonewline "\t\tRep065.b: Test: Old site $i"
161				set sitedir($i) $histdirs($sid)
162				set already_upgraded($i) 0
163			} else {
164				puts -nonewline "\t\tRep065.b: Test: Upgraded site $i"
165				set sitedir($i) $upgdir($sid)
166				if { $already_upgraded($i) == 0 } {
167					upg_repdir $histdirs($sid) $sitedir($i)
168				}
169				set already_upgraded($i) 1
170			}
171			if { $sid == $meid } {
172				set state MASTER
173				set runtest [list REPTEST $method 15 10]
174				puts " (MASTER)"
175			} else {
176				set state CLIENT
177				set runtest {REPTEST_GET}
178				puts " (CLIENT)"
179			}
180			lappend pids [exec $tclsh_path $test_path/wrap.tcl \
181			    rep065script.tcl \
182			    $controldir/$testdir/$count.S$i.log \
183		      	    SKIP \
184			    START $state \
185			    $runtest \
186			    $sid $allids $controldir \
187			    $sitedir($i) $reputils_path &]
188			lappend pids [exec $tclsh_path $test_path/wrap.tcl \
189			    rep065script.tcl \
190			    $controldir/$testdir/$count.S$i.msg \
191		    	    SKIP \
192			    PROCMSGS $state \
193		    	    NULL \
194			    $sid $allids $controldir \
195			    $sitedir($i) $reputils_path &]
196		}
197
198		watch_procs $pids 20
199		#
200		# At this point, clean up any message files.  The message
201		# system leads to a significant number of duplicate
202		# requests.  If the master site handled them after the
203		# client message processes exited, then there can be
204		# a large number of "dead" message files waiting for
205		# non-existent clients.  Just clean up everyone.
206		#
207		for { set i 0 } { $i < $nsites } { incr i } {
208			replclear_noenv $siteid($i)
209		}
210
211		#
212		# Kick off the verification processes.  These just walk
213		# their own logs and databases, so we don't need to have
214		# a message process.  We need separate processes because
215		# old sites need to use old utilities.
216		#
217		set pids {}
218		puts "\tRep065.c.$iter.$count: Verify all sites."
219		for { set i 0 } { $i < $nsites } { incr i } {
220			if { $siteid($i) == $meid } {
221				set state MASTER
222			} else {
223				set state CLIENT
224			}
225			lappend pids [exec $tclsh_path $test_path/wrap.tcl \
226			    rep065script.tcl \
227			    $controldir/$testdir/$count.S$i.ver \
228		      	    SKIP \
229			    VERIFY $state \
230		    	    {LOG DB} \
231			    $siteid($i) $allids $controldir \
232			    $sitedir($i) $reputils_path &]
233		}
234
235		watch_procs $pids 10
236		#
237		# Now that each site created its verification files,
238		# we can now verify everyone.
239		#
240		for { set i 0 } { $i < $nsites } { incr i } {
241			if { $i == $mindex } {
242				continue
243			}
244			puts \
245	"\t\tRep065.c: Verify: Compare databases master and client $i"
246			error_check_good db_cmp \
247			    [filecmp $sitedir($mindex)/VERIFY/dbdump \
248			    $sitedir($i)/VERIFY/dbdump] 0
249			set upg [lindex $sitevers $i]
250			# !!!
251			# Although db_printlog works and can read old logs,
252			# there have been some changes to the output text that
253			# makes comparing difficult.  One possible solution
254			# is to run db_printlog here, from the current directory
255			# instead of from the historical directory.
256			#
257			if { $upg == 0 } {
258				puts \
259	"\t\tRep065.c: Verify: Compare logs master and client $i"
260				error_check_good log_cmp \
261				    [filecmp $sitedir($mindex)/VERIFY/prlog \
262				    $sitedir($i)/VERIFY/prlog] 0
263			} else {
264				puts \
265	"\t\tRep065.c: Verify: Compare LSNs master and client $i"
266				error_check_good log_cmp \
267				    [filecmp $sitedir($mindex)/VERIFY/loglsn \
268				    $sitedir($i)/VERIFY/loglsn] 0
269			}
270		}
271
272		#
273		# At this point we have a master and sites all up to date
274		# with each other.  Now, one at a time, upgrade the sites
275		# to the current version and start everyone up again.
276		incr count
277	}
278}
279
280proc setup_sites { nsites } {
281	#
282	# Set up a list that goes from 0 to $nsites running
283	# upgraded.  A 0 represents running old version and 1
284	# represents running upgraded.  So, for 3 sites it will look like:
285	# { 0 0 0 } { 1 0 0 } { 1 1 0 } { 1 1 1 }
286	#
287	set sitelist {}
288	for { set i 0 } { $i <= $nsites } { incr i } {
289		set l ""
290		for { set j 1 } { $j <= $nsites } { incr j } {
291			if { $i < $j } {
292				lappend l 0
293			} else {
294				lappend l 1
295			}
296		}
297		lappend sitelist $l
298	}
299	return $sitelist
300}
301
302proc upg_repdir { histdir upgdir } {
303	global util_path
304
305	#
306	# Upgrade a site to the current version.  This entails:
307	# 1.  Removing any old files from the upgrade directory.
308	# 2.  Copy all old version files to upgrade directory.
309	# 3.  Remove any __db files from upgrade directory except __db.rep*gen.
310	# 4.  Force checkpoint in new version.
311	file delete -force $upgdir
312
313	# Recovery was run before as part of upgradescript.
314	# Archive dir by copying it to upgrade dir.
315	file copy -force $histdir $upgdir
316	set dbfiles [glob -nocomplain $upgdir/__db*]
317	foreach d $dbfiles {
318		if { $d == "$upgdir/__db.rep.gen" ||
319		    $d == "$upgdir/__db.rep.egen" } {
320			continue
321		}
322		file delete -force $d
323	}
324	# Force current version checkpoint
325	set stat [catch {eval exec $util_path/db_checkpoint -1 -h $upgdir} r]
326	if { $stat != 0 } {
327		puts "CHECKPOINT: $upgdir: $r"
328	}
329	error_check_good stat_ckp $stat 0
330}
331
332proc get_master { nsites verslist } {
333	error_check_good vlist_chk [llength $verslist] $nsites
334	#
335	# When we can, simply run an election to get a new master.
336	# We then verify we got an old client.
337	#
338	# For now, randomly pick among the old sites, or if no old
339	# sites just randomly pick anyone.
340	#
341	set old_count 0
342	# Pick 1 out of N old sites or 1 out of nsites if all upgraded.
343	foreach i $verslist {
344		if { $i == 0 } {
345			incr old_count
346		}
347	}
348	if { $old_count == 0 } {
349		set old_count $nsites
350	}
351	set master [berkdb random_int 0 [expr $old_count - 1]]
352	#
353	# Since the Nth old site may not be at the Nth place in the
354	# list unless we used the entire list, we need to loop to find
355	# the right index to return.
356	if { $old_count == $nsites } {
357		return $master
358	}
359	set ocount 0
360	set index 0
361	foreach i $verslist {
362		if { $i == 1 } {
363			incr index
364			continue
365		}
366		if { $ocount == $master } {
367			return $index
368		}
369		incr ocount
370		incr index
371	}
372	#
373	# If we get here there is a problem in the code.
374	#
375	error "FAIL: get_master problem"
376}
377
378proc method_version { } {
379	global valid_methods
380
381	set meth $valid_methods
382	set startmv { {btree db-4.4.20} {hash db-4.5.20} }
383
384	# Remove btree and hash from the method list, we're manually
385	# assigning those versions due to log/recovery record changes
386	# at that version.
387	set midx [lsearch -exact $meth hash]
388	set meth [lreplace $meth $midx $midx]
389	set midx [lsearch -exact $meth btree]
390	set meth [lreplace $meth $midx $midx]
391
392	set vers {db-4.4.20 db-4.5.20 db-4.6.21 db-4.7.25}
393	set dbvlen [llength $vers]
394	#
395	# NOTE: The values in "vers_list" are indices into $vers above.
396	# Since we're explicitly testing 4.4.20 and 4.5.20 above,
397	# weight later versions more.
398	# When you add a new version to $vers, you must
399	# add some new items to $vers_list to choose that index.
400	# Also need to add an entry for 'vtest' below.
401	#
402	set vers_list { 0 0 1 1 2 2 2 3 3 3 }
403	set vers_len [expr [llength $vers_list] - 1]
404
405	# Walk through the list of remaining methods and randomly
406	# assign a version to each one.
407	while { 1 } {
408		set mv $startmv
409		# We want to make sure we test each version.
410		# 4.4.20
411		set vtest(0) 1
412		# 4.5.20
413		set vtest(1) 1
414		# 4.6.21
415		set vtest(2) 0
416		# 4.7.25
417		set vtest(3) 0
418		foreach m $meth {
419			# Index into distribution list.
420			set vidx [berkdb random_int 0 $vers_len]
421			# Index into version list.
422			set vindex [lindex $vers_list $vidx]
423			set vtest($vindex) 1
424			set v [lindex $vers $vindex]
425			lappend mv [list $m $v]
426		}
427		#
428		# Assume success.  If we find any $vtest entry of 0,
429		# then we fail and try again.
430		#
431		set all_vers 1
432		for { set i 0 } { $i < $dbvlen } { incr i } {
433			if { $vtest($i) == 0 } {
434				set all_vers 0
435			}
436		}
437		if { $all_vers == 1 } {
438			break
439		}
440#		puts "Did not get all versions with $mv."
441	}
442
443	return $mv
444}
445