1# See the file LICENSE for redistribution information.
2#
3# Copyright (c) 2004,2008 Oracle.  All rights reserved.
4#
5# $Id: rep034.tcl,v 12.26 2008/01/18 18:51:46 sue Exp $
6#
7# TEST	rep034
8# TEST	Test of STARTUPDONE notification.
9# TEST
10# TEST	STARTUPDONE can now be recognized without the need for new "live" log
11# TEST  records from the master (under favorable conditions).  The response to
12# TEST  the ALL_REQ at the end of synchronization includes an end-of-log marker
13# TEST  that now triggers it.  However, the message containing that end marker
14# TEST  could get lost, so live log records still serve as a back-up mechanism.
15# TEST  The end marker may also be set under c2c sync, but only if the serving
16# TEST  client has itself achieved STARTUPDONE.
17#
18proc rep034 { method { niter 2 } { tnum "034" } args } {
19
20	source ./include.tcl
21	if { $is_windows9x_test == 1 } {
22		puts "Skipping replication test on Win 9x platform."
23		return
24	}
25
26	# Valid for all access methods.
27	if { $checking_valid_methods } {
28		return "ALL"
29	}
30
31	set args [convert_args $method $args]
32	set logsets [create_logsets 3]
33	foreach l $logsets {
34		puts "Rep$tnum ($method $args):\
35		    Test of startup synchronization detection."
36		puts "Rep$tnum: Master logs are [lindex $l 0]"
37		puts "Rep$tnum: Client 0 logs are [lindex $l 1]"
38		puts "Rep$tnum: Client 1 logs are [lindex $l 2]"
39		rep034_sub $method $niter $tnum $l $args
40	}
41}
42
43# This test manages on its own the decision of whether or not to open an
44# environment with recovery.  (It varies throughout the test.)  Therefore there
45# is no need to run it twice (as we often do with a loop in the main proc).
46#
47proc rep034_sub { method niter tnum logset largs } {
48	global anywhere
49	global testdir
50	global startup_done
51	global rep_verbose
52	global verbose_type
53	global rep034_got_allreq
54
55	set verbargs ""
56	if { $rep_verbose == 1 } {
57		set verbargs " -verbose {$verbose_type on} "
58	}
59
60	env_cleanup $testdir
61
62	replsetup $testdir/MSGQUEUEDIR
63
64	set masterdir $testdir/MASTERDIR
65	set clientdir $testdir/CLIENTDIR
66	set clientdir2 $testdir/CLIENTDIR2
67
68	file mkdir $masterdir
69	file mkdir $clientdir
70	file mkdir $clientdir2
71
72	set m_logtype [lindex $logset 0]
73	set c_logtype [lindex $logset 1]
74	set c2_logtype [lindex $logset 2]
75
76	# In-memory logs require a large log buffer, and cannot
77	# be used with -txn nosync.
78	set m_logargs [adjust_logargs $m_logtype]
79	set c_logargs [adjust_logargs $c_logtype]
80	set c2_logargs [adjust_logargs $c2_logtype]
81	set m_txnargs [adjust_txnargs $m_logtype]
82	set c_txnargs [adjust_txnargs $c_logtype]
83	set c2_txnargs [adjust_txnargs $c2_logtype]
84
85	# In first part of test master serves requests.
86	#
87	set anywhere 0
88
89	# Create a master; add some data.
90	#
91	repladd 1
92	set ma_envcmd "berkdb_env_noerr -create $m_txnargs $m_logargs \
93	    -event rep_event $verbargs -errpfx MASTER \
94	    -home $masterdir -rep_master -rep_transport \[list 1 replsend\]"
95	set masterenv [eval $ma_envcmd]
96	puts "\tRep$tnum.a: Create master; add some data."
97	eval rep_test $method $masterenv NULL $niter 0 0 0 0 $largs
98
99	# Bring up a new client, and see that it can get STARTUPDONE with no new
100	# live transactions at the master.
101	#
102	puts "\tRep$tnum.b: Bring up client; check STARTUPDONE."
103	repladd 2
104	set cl_envcmd "berkdb_env_noerr -create $c_txnargs $c_logargs \
105	    -event rep_event $verbargs -errpfx CLIENT \
106	    -home $clientdir -rep_client -rep_transport \[list 2 replsend\]"
107	set clientenv [eval $cl_envcmd]
108	set envlist "{$masterenv 1} {$clientenv 2}"
109	set startup_done 0
110	process_msgs $envlist
111
112	error_check_good done_without_live_txns \
113	    [stat_field $clientenv rep_stat "Startup complete"] 1
114
115	# Test that the event got fired as well.  In the rest of the test things
116	# get a little complex (what with having two clients), so only check the
117	# event part here.  The important point is the various ways that
118	# STARTUPDONE can be computed, so testing the event firing mechanism
119	# just this once is enough.
120	#
121	error_check_good done_event_too $startup_done 1
122
123	#
124	# Bring up another client.  Do additional new txns at master, ensure
125	# that STARTUPDONE is not triggered at NEWMASTER LSN.
126	#
127	puts "\tRep$tnum.c: Another client; no STARTUPDONE at NEWMASTER LSN."
128	set newmaster_lsn [stat_field $masterenv rep_stat "Next LSN expected"]
129	repladd 3
130	#
131	# !!! Please note that we're giving client2 a special customized version
132	# of the replication transport call-back function.
133	#
134	set cl2_envcmd "berkdb_env_noerr -create $c2_txnargs $c2_logargs \
135	    -event rep_event $verbargs -errpfx CLIENT2 \
136	    -home $clientdir2 -rep_client -rep_transport \[list 3 rep034_send\]"
137	set client2env [eval $cl2_envcmd]
138
139	set envlist "{$masterenv 1} {$clientenv 2} {$client2env 3}"
140	set verified false
141	for {set i 0} {$i < 10} {incr i} {
142		proc_msgs_once $envlist
143		set client2lsn \
144		    [stat_field $client2env rep_stat "Next LSN expected"]
145
146		# Get to the point where we've gone past where the master's LSN
147		# was at NEWMASTER time, and make sure we haven't yet gotten
148		# STARTUPDONE.  Ten loop iterations should be plenty.
149		#
150		if {[$client2env log_compare $client2lsn $newmaster_lsn] > 0} {
151			if {![stat_field \
152			    $client2env rep_stat "Startup complete"]} {
153				set verified true
154			}
155			break;
156		}
157		eval rep_test $method $masterenv NULL $niter 0 0 0 0 $largs
158	}
159	error_check_good no_newmaster_trigger $verified true
160
161	process_msgs $envlist
162	error_check_good done_during_live_txns \
163	    [stat_field $client2env rep_stat "Startup complete"] 1
164
165	#
166	# From here on out we use client-to-client sync.
167	#
168	set anywhere 1
169
170	# Here we rely on recovery at client 1.  If that client is running with
171	# in-memory logs, forgo the remainder of the test.
172	#
173	if {$c_logtype eq "in-mem"} {
174		puts "\tRep$tnum.d: Skip rest of test for in-memory logging."
175		$masterenv close
176		$clientenv close
177		$client2env close
178		replclose $testdir/MSGQUEUEDIR
179		return
180	}
181
182	# Shut down client 1.  Bring it back, with recovery.  Verify that it can
183	# get STARTUPDONE by syncing to other client, even with no new master
184	# txns.
185	#
186	puts "\tRep$tnum.d: Verify STARTUPDONE using c2c sync."
187	$clientenv close
188	set clientenv [eval $cl_envcmd -recover]
189	set envlist "{$masterenv 1} {$clientenv 2} {$client2env 3}"
190
191	# Clear counters at client2, so that we can check "Client service
192	# requests" in a moment.
193	#
194	$client2env rep_stat -clear
195	process_msgs $envlist
196	error_check_good done_via_c2c \
197	    [stat_field $clientenv rep_stat "Startup complete"] 1
198	#
199	# Make sure our request was served by client2.  This isn't a test of c2c
200	# sync per se, but if this fails it indicates that we're not really
201	# testing what we thought we were testing.
202	#
203	error_check_bad c2c_served_by_master \
204	    [stat_field $client2env rep_stat "Client service requests"] 0
205
206	# Verify that we don't get STARTUPDONE if we are using c2c sync to
207	# another client, and the serving client has not itself reached
208	# STARTUPDONE, because that suggests that the serving client could be
209	# way far behind.   But that we can still eventually get STARTUPDONE, as
210	# a fall-back, once the master starts generating new txns again.
211	#
212	# To do so, we'll need to restart both clients.  Start with the client
213	# that will serve the request.  Turn off "anywhere" process for a moment
214	# so that we can get this client set up without having the other one
215	# running.
216	#
217	# Now it's client 2 that needs recovery.  Forgo the rest of the test if
218	# it is logging in memory.  (We could get this far in mixed mode, with
219	# client 1 logging on disk.)
220	#
221	if {$c2_logtype eq "in-mem"} {
222		puts "\tRep$tnum.e: Skip rest of test for in-memory logging."
223		$masterenv close
224		$clientenv close
225		$client2env close
226		replclose $testdir/MSGQUEUEDIR
227		return
228	}
229	puts "\tRep$tnum.e: Check no STARTUPDONE when c2c server is behind."
230	$clientenv log_flush
231	$clientenv close
232	$client2env log_flush
233	$client2env close
234
235	set anywhere 0
236	set client2env [eval $cl2_envcmd -recover]
237	set envlist "{$masterenv 1} {$client2env 3}"
238
239	# We want client2 to get partway through initialization, but once it
240	# sends the ALL_REQ to the master, we want to cut things off there.
241	# Recall that we gave client2 a special "wrapper" version of the
242	# replication transport call-back function: that function will set a
243	# flag when it sees an ALL_REQ message go by.
244	#
245	set rep034_got_allreq false
246	while { !$rep034_got_allreq } {
247		proc_msgs_once $envlist
248	}
249
250	#
251	# To make sure we're doing a valid test, verify that we really did
252	# succeed in getting the serving client into the state we intended.
253	#
254	error_check_good serve_from_notstarted \
255	    [stat_field $client2env rep_stat "Startup complete"] 0
256
257	# Start up the client to be tested.  Make sure it doesn't get
258	# STARTUPDONE (yet).  Again, the checking of service request stats is
259	# just for test debugging, to make sure we have a valid test.
260	#
261	# To add insult to injury, not only do we not get STARTUPDONE from the
262	# "behind" client, we also don't even get all the log records we need
263	# (because we didn't allow client2's ALL_REQ to get to the master).
264	# And no mechanism to let us know that.  The only resolution is to wait
265	# for gap detection to rerequest (which would then go to the master).
266	# So, set a small rep_request upper bound, so that it doesn't take a ton
267	# of new live txns to reach the trigger.
268	#
269	set anywhere 1
270	$client2env rep_stat -clear
271	replclear 2
272	set clientenv [eval $cl_envcmd -recover]
273	#
274	# Set to 400 usecs.  An average ping to localhost should
275	# be a few 10s usecs.
276	#
277	$clientenv rep_request 400 400
278	set envlist "{$masterenv 1} {$clientenv 2} {$client2env 3}"
279
280	while {[rep034_proc_msgs_once $masterenv $clientenv $client2env] > 0} {}
281
282	error_check_good not_from_undone_c2c_client \
283	    [stat_field $clientenv rep_stat "Startup complete"] 0
284
285	error_check_bad c2c_served_by_master \
286	    [stat_field $client2env rep_stat "Client service requests"] 0
287
288	# Verify that we nevertheless *do* get STARTUPDONE after the master
289	# starts generating new txns again.
290	#
291	puts "\tRep$tnum.f: Check STARTUPDONE via fall-back to live txns."
292	eval rep_test $method $masterenv NULL $niter 0 0 0 0 $largs
293	process_msgs $envlist
294	error_check_good fallback_live_txns \
295	    [stat_field $clientenv rep_stat "Startup complete"] 1
296
297	$masterenv close
298	$clientenv close
299	$client2env close
300	replclose $testdir/MSGQUEUEDIR
301	set anywhere 0
302}
303
304# Do a round of message processing, but juggle things such that client2 can
305# never receive a message from the master.
306#
307# Assumes the usual "{$masterenv 1} {$clientenv 2} {$client2env 3}" structure.
308#
309proc rep034_proc_msgs_once { masterenv clientenv client2env } {
310	set nproced [proc_msgs_once "{$masterenv 1}" NONE err]
311	error_check_good pmonce_1 $err 0
312	replclear 3
313
314	incr nproced [proc_msgs_once "{$clientenv 2} {$client2env 3}" NONE err]
315	error_check_good pmonce_2 $err 0
316
317	return $nproced
318}
319
320# Wrapper for replsend.  Mostly just a pass-through to the real replsend, except
321# we watch for an ALL_REQ, and just set a flag when we see it.
322#
323proc rep034_send { control rec fromid toid flags lsn } {
324	global rep034_got_allreq
325
326	if {[berkdb msgtype $control] eq "all_req"} {
327		set rep034_got_allreq true
328	}
329	return [replsend $control $rec $fromid $toid $flags $lsn]
330}
331