1# See the file LICENSE for redistribution information.
2#
3# Copyright (c) 2007-2009 Oracle.  All rights reserved.
4#
5# $Id$
6#
7# TEST	rep074
8# TEST	Verify replication withstands send errors processing requests.
9# TEST
10# TEST	Run for btree only because access method shouldn't matter.
11# TEST
12proc rep074 { method { niter 20 } { tnum "074" } args } {
13
14	source ./include.tcl
15	global databases_in_memory
16	global repfiles_in_memory
17
18	if { $is_windows9x_test == 1 } {
19		puts "Skipping replication test on Win9x platform."
20		return
21	}
22
23	# Skip for all methods except btree.
24	if { $checking_valid_methods } {
25		return btree
26	}
27	if { [is_btree $method] == 0 } {
28		puts "Rep$tnum: skipping for non-btree method $method."
29		return
30	}
31
32	set args [convert_args $method $args]
33	set logsets [create_logsets 2]
34
35	# Set up for on-disk or in-memory databases.
36	set msg "using on-disk databases"
37	if { $databases_in_memory } {
38		set msg "using named in-memory databases"
39		if { [is_queueext $method] } {
40			puts -nonewline "Skipping rep$tnum for method "
41			puts "$method with named in-memory databases."
42			return
43		}
44	}
45
46	set msg2 "and on-disk replication files"
47	if { $repfiles_in_memory } {
48		set msg2 "and in-memory replication files"
49	}
50
51	foreach l $logsets {
52		puts "Rep$tnum ($method): Test of send errors processing\
53		    requests $msg $msg2."
54		puts "Rep$tnum: Master logs are [lindex $l 0]"
55		puts "Rep$tnum: Client logs are [lindex $l 1]"
56		rep074_sub $method $niter $tnum $l $args
57	}
58}
59
60proc rep074_sub { method niter tnum logset largs } {
61	global testdir
62	global rep074_failure_count
63	global repfiles_in_memory
64	global rep_verbose
65	global verbose_type
66
67	set rep074_failure_count -1
68
69	set verbargs ""
70	if { $rep_verbose == 1 } {
71		set verbargs " -verbose {$verbose_type on} "
72	}
73
74	set repmemargs ""
75	if { $repfiles_in_memory } {
76		set repmemargs "-rep_inmem_files "
77	}
78
79	env_cleanup $testdir
80
81	replsetup $testdir/MSGQUEUEDIR
82
83	set masterdir $testdir/MASTERDIR
84	set clientdir $testdir/CLIENTDIR
85
86	file mkdir $masterdir
87	file mkdir $clientdir
88
89	set m_logtype [lindex $logset 0]
90	set c_logtype [lindex $logset 1]
91
92	# In-memory logs require a large log buffer, and cannot
93	# be used with -txn nosync.  Adjust the args for master
94	# and client.
95	set m_logargs [adjust_logargs $m_logtype]
96	set c_logargs [adjust_logargs $c_logtype]
97	set m_txnargs [adjust_txnargs $m_logtype]
98	set c_txnargs [adjust_txnargs $c_logtype]
99
100	# Open a master.
101	repladd 1
102	set ma_envcmd "berkdb_env_noerr -create $verbargs -errpfx MASTER \
103	    -home $masterdir $m_logargs $m_txnargs $repmemargs \
104	    -rep_transport \[list 1 rep074_replsend\]"
105	set masterenv [eval $ma_envcmd -rep_master]
106
107	# Create some new records, so that the master will have something
108	# substantial to say when asked for LOG_REQ.
109	#
110	puts "\tRep$tnum.a: Running rep_test in replicated env."
111	eval rep_test $method $masterenv NULL $niter 0 0 0 $largs
112
113	# Open a client
114	repladd 2
115	set cl_envcmd "berkdb_env_noerr -create $verbargs -errpfx CLIENT \
116	    -home $clientdir $c_logargs $c_txnargs $repmemargs \
117	    -rep_transport \[list 2 replsend\]"
118	set clientenv [eval $cl_envcmd -rep_client]
119	set envlist "{$masterenv 1} {$clientenv 2}"
120
121	# Bring the client online by processing the startup messages.  This will
122	# cause the client to send a request to the master.
123	#
124	# In the first cycle, the client gets NEWMASTER and sends an UPDATE_REQ.
125	# In the second cycle, the master answers the UPDATE_REQ with an UPDATE,
126	# and the client sends a PAGE_REQ.  Third, once we've gotten pages, we
127	# send a LOG_REQ.
128	#
129	# 1. NEWCLIENT -> NEWMASTER -> UPDATE_REQ
130	# 2.              UPDATE -> PAGE_REQ
131	# 3.              PAGE -> LOG_REQ
132	#
133	puts "\tRep$tnum.b: NEWMASTER -> UPDATE_REQ"
134	proc_msgs_once $envlist
135	puts "\tRep$tnum.c: UPDATE -> PAGE_REQ"
136	proc_msgs_once $envlist
137	puts "\tRep$tnum.d: PAGE -> LOG_REQ"
138	proc_msgs_once $envlist
139
140	# Force a sending error at the master while processing the LOG_REQ.
141	# We should ignore it, and return success to rep_process_message
142	#
143	puts "\tRep$tnum.e: Simulate a send error."
144	set rep074_failure_count [expr $niter / 2]
145	proc_msgs_once $envlist NONE errorp
146
147	puts "\tRep$tnum.f: Check for good return from rep_process_msg."
148	error_check_good rep_resilient $errorp 0
149
150	# Since we interrupted the flow with the simulated error, we don't have
151	# the log records we need yet.
152	#
153	error_check_bad startupdone \
154	    [stat_field $clientenv rep_stat "Startup complete"] 1
155
156	#
157	# Run some more new txns at the master, so that the client eventually
158	# decides to request the remainder of the LOG_REQ response that it's
159	# missing.  Pause for a second to make sure we reach the lower
160	# threshold for re-request on fast machines.  We need to force a
161	# checkpoint because we need to create a gap, and then pause to
162	# reach the rerequest threshold.
163	#
164	set rep074_failure_count -1
165	$masterenv txn_checkpoint -force
166	process_msgs $envlist
167	tclsleep 1
168	eval rep_test $method $masterenv NULL $niter 0 0 0 $largs
169	process_msgs $envlist
170
171	error_check_good startupdone \
172	    [stat_field $clientenv rep_stat "Startup complete"] 1
173
174	$masterenv close
175	$clientenv close
176	replclose $testdir/MSGQUEUEDIR
177}
178
179# Failure count < 0 turns off any special failure simulation processing.
180# When the count is > 0, it means we should process that many messages normally,
181# before invoking a failure.
182#
183proc rep074_replsend { control rec fromid toid flags lsn } {
184	global rep074_failure_count
185
186	if { $rep074_failure_count < 0 } {
187		return [replsend $control $rec $fromid $toid $flags $lsn]
188	}
189
190	if { $rep074_failure_count > 0 } {
191		incr rep074_failure_count -1
192		return [replsend $control $rec $fromid $toid $flags $lsn]
193	}
194
195	# Return an arbitrary non-zero value to indicate an error.
196	return 1
197}
198