1# See the file LICENSE for redistribution information.
2#
3# Copyright (c) 2004-2009 Oracle.  All rights reserved.
4#
5# $Id$
6#
7# TEST	env012
8# TEST	Test DB_REGISTER.
9# TEST
10# TEST	DB_REGISTER will fail on systems without fcntl.  If it
11# TEST	fails, make sure we got the expected DB_OPNOTSUP return.
12# TEST
13# TEST	Then, the real tests:
14# TEST	For each test, we start a process that opens an env with -register.
15# TEST
16# TEST	1. Verify that a 2nd process can enter the existing env with -register.
17# TEST
18# TEST	2. Kill the 1st process, and verify that the 2nd process can enter
19# TEST	with "-register -recover".
20# TEST
21# TEST	3. Kill the 1st process, and verify that the 2nd process cannot
22# TEST	enter with just "-register".
23# TEST
24# TEST	4. While the 1st process is still running, a 2nd process enters
25# TEST	with "-register".  Kill the 1st process.  Verify that a 3rd process
26# TEST	can enter with "-register -recover".  Verify that the 3rd process,
27# TEST	entering, causes process 2 to fail with the message DB_RUNRECOVERY.
28# TEST
29# TEST	5. We had a bug where recovery was always run with -register
30# TEST	if there were empty slots in the process registry file.  Verify
31# TEST	that recovery doesn't automatically run if there is an empty slot.
32# TEST
33# TEST  6. Verify process cannot connect when specifying -failchk and an
34# TEST  isalive function has not been declared.
35# TEST
36# TEST  7. Verify that a 2nd process can enter the existing env with -register
37# TEST  and -failchk and having specified an isalive function
38# TEST
39# TEST  8. Kill the 1st process, and verify that the 2nd process can enter
40# TEST  with "-register -failchk -recover"
41# TEST
42# TEST  9. 2nd process enters with "-register -failchk".  Kill the 1st process.
43# TEST  2nd process may get blocked on a mutex held by process one.  Verify
44# TEST  3rd process can enter with "-register -recover -failchk".   3rd process
45# TEST  should run failchk, clear out open txn/log from process 1.   It will
46# TEST  enter env without need for any additional recovery.   We look for
47# TEST  "Freeing log information .." sentence in the log for 3rd process as
48# TEST  an indication that failchk ran.   If DB_RUNRECOVERY were returned
49# TEST  instead it would mean failchk could not recover.
50
51proc env012 { } {
52	source ./include.tcl
53	set tnum "012"
54
55	puts "Env$tnum: Test of DB_REGISTER."
56
57	puts "\tEnv$tnum.a: Platforms without fcntl fail with DB_OPNOTSUP."
58	env_cleanup $testdir
59	if {[catch {eval {berkdb_env} \
60	    -create -home $testdir -txn -register -recover} env]} {
61		error_check_good fail_OPNOTSUP [is_substr $env DB_OPNOTSUP] 1
62		puts "Skipping env$tnum; DB_REGISTER is not supported."
63	}
64	error_check_good env_close [$env close] 0
65
66	puts "\tEnv$tnum.b: Second process can join with -register."
67	env_cleanup $testdir
68	set testfile TESTFILE
69	set key KEY
70	set data DATA1
71
72	puts "\t\tEnv$tnum.b1: Start process 1."
73	set p1 [exec $tclsh_path $test_path/wrap.tcl envscript.tcl \
74	    $testdir/env$tnum.log.p1 \
75	    $testdir $testfile PUT $key $data RECOVER 0 10 &]
76
77	# Wait a while so process 1 has a chance to get going.
78	tclsleep 2
79
80	puts "\t\tEnv$tnum.b2: Start process 2."
81	set p2 [exec $tclsh_path $test_path/wrap.tcl envscript.tcl \
82	    $testdir/env$tnum.log.p2 \
83	    $testdir $testfile GET $key $data 0 0 0 &]
84
85	watch_procs $p1 1 120
86	watch_procs $p2 1 120
87
88	# Check log files for failures.
89	logcheck $testdir/env$tnum.log.p1
90	logcheck $testdir/env$tnum.log.p2
91
92	puts "\tEnv$tnum.c: Second process can join with -register\
93	    -recover after first process is killed."
94	env_cleanup $testdir
95
96	puts "\t\tEnv$tnum.c1: Start process 1."
97	set pids {}
98	set p1 [exec $tclsh_path $test_path/wrap.tcl envscript.tcl \
99	    $testdir/env$tnum.log.p1 \
100	    $testdir $testfile PUT $key $data RECOVER 0 10 &]
101	lappend pids $p1
102	tclsleep 2
103
104	puts "\t\tEnv$tnum.c2: Kill process 1."
105	set pids [findprocessids $testdir $pids]
106	foreach pid $pids {
107		tclkill $pid
108	}
109
110	puts "\t\tEnv$tnum.c3: Start process 2."
111	set p2 [exec $tclsh_path $test_path/wrap.tcl envscript.tcl \
112	    $testdir/env$tnum.log.p2 \
113	    $testdir $testfile GET $key $data RECOVER 0 0 &]
114
115	watch_procs $p2 1 120
116
117	# Check log files for failures.
118	logcheck $testdir/env$tnum.log.p1
119	logcheck $testdir/env$tnum.log.p2
120
121	if { $is_windows_test == 1 } {
122		puts "Skipping sections .d and .e on Windows platform."
123	} else {
124		puts "\tEnv$tnum.d: Second process cannot join without -recover\
125		    after first process is killed."
126		env_cleanup $testdir
127
128		puts "\t\tEnv$tnum.d1: Start process 1."
129		set pids {}
130		set p1 [exec $tclsh_path $test_path/wrap.tcl envscript.tcl \
131		    $testdir/env$tnum.log.p1 \
132		    $testdir $testfile PUT $key $data RECOVER 0 10 &]
133		lappend pids $p1
134		tclsleep 2
135
136		puts "\t\tEnv$tnum.d2: Kill process 1."
137		set pids [findprocessids $testdir $pids]
138		foreach pid $pids {
139			tclkill $pid
140		}
141
142		puts "\t\tEnv$tnum.d3: Start process 2."
143		set p2 [exec $tclsh_path $test_path/wrap.tcl envscript.tcl \
144		    $testdir/env$tnum.log.p2 \
145		    $testdir $testfile GET $key $data 0 0 0 &]
146		tclsleep 2
147		watch_procs $p2 1 120
148
149		# Check log files.  Log p1 should be clean, but we
150		# expect DB_RUNRECOVERY in log p2.
151		logcheck $testdir/env$tnum.log.p1
152		logcheckfails $testdir/env$tnum.log.p2 DB_RUNRECOVERY
153
154		puts "\tEnv$tnum.e: Running registered process detects failure."
155		env_cleanup $testdir
156
157		puts "\t\tEnv$tnum.e1: Start process 1."
158		set pids {}
159		set p1 [exec $tclsh_path $test_path/wrap.tcl envscript.tcl \
160		    $testdir/env$tnum.log.p1 \
161		    $testdir $testfile PUT $key $data RECOVER 0 10 &]
162		lappend pids $p1
163		tclsleep 2
164
165		# Identify child process to kill later.
166		set pids [findprocessids $testdir $pids]
167
168		puts "\t\tEnv$tnum.e2: Start process 2."
169		set p2 [exec $tclsh_path $test_path/wrap.tcl envscript.tcl \
170		    $testdir/env$tnum.log.p2 \
171		    $testdir $testfile LOOP $key $data 0 0 10 &]
172
173		puts "\t\tEnv$tnum.e3: Kill process 1."
174		foreach pid $pids {
175			tclkill $pid
176		}
177
178		puts "\t\tEnv$tnum.e4: Start process 3."
179		set p3 [exec $tclsh_path $test_path/wrap.tcl envscript.tcl \
180		    $testdir/env$tnum.log.p3 \
181		    $testdir $testfile GET $key $data RECOVER 0 0 &]
182		tclsleep 2
183
184		watch_procs $p2 1 120
185		watch_procs $p3 1 120
186
187		# Check log files.  Logs p1 and p3 should be clean, but we
188		# expect DB_RUNRECOVERY in log p2.
189		logcheck $testdir/env$tnum.log.p1
190		logcheckfails $testdir/env$tnum.log.p2 DB_RUNRECOVERY
191		logcheck $testdir/env$tnum.log.p3
192	}
193
194	puts "\tEnv$tnum.f: Empty slot shouldn't cause automatic recovery."
195
196	# Create 2 empty slots in the registry by letting two processes
197	# run to completion.
198	puts "\t\tEnv$tnum.f1: Start process 1."
199	set p1 [exec $tclsh_path $test_path/wrap.tcl envscript.tcl \
200	    $testdir/env$tnum.log.p1 \
201	    $testdir $testfile PUT $key $data RECOVER 0 1 &]
202
203	puts "\t\tEnv$tnum.f2: Start process 2."
204	set p2 [exec $tclsh_path $test_path/wrap.tcl envscript.tcl \
205	    $testdir/env$tnum.log.p2 \
206	    $testdir $testfile GET $key $data 0 0 1 &]
207
208	watch_procs $p1 1 60
209	watch_procs $p2 1 60
210
211	logcheck $testdir/env$tnum.log.p1
212	logcheck $testdir/env$tnum.log.p2
213
214	# Start two more process.  Neither should signal a need for recovery.
215	puts "\t\tEnv$tnum.f3: Start process 3."
216	set p3 [exec $tclsh_path $test_path/wrap.tcl envscript.tcl \
217	    $testdir/env$tnum.log.p3 \
218	    $testdir $testfile GET $key $data RECOVER 0 10 &]
219
220	tclsleep 2
221
222	puts "\t\tEnv$tnum.f4: Start process 4."
223	set p4 [exec $tclsh_path $test_path/wrap.tcl envscript.tcl \
224	    $testdir/env$tnum.log.p4 \
225	    $testdir $testfile PUT $key $data 0 0 10 &]
226
227	watch_procs $p3 1 120
228	watch_procs $p4 1 120
229
230	# Check log files: neither process should have returned DB_RUNRECOVERY.
231	logcheck $testdir/env$tnum.log.p3
232	logcheck $testdir/env$tnum.log.p4
233
234	puts "\tEnv$tnum.g: One process with -register & -failchk & no isalive"
235
236	# use -failchk only, test will fail as isalive function is needed
237	puts "\t\tEnv$tnum.g1: Start process 1."
238	env_cleanup $testdir
239
240	set p1 [exec $tclsh_path $test_path/wrap.tcl envscript.tcl \
241	    $testdir/env$tnum.log.p1 \
242	    $testdir $testfile PUT $key $data RECOVER FAILCHK0 10 &]
243
244	watch_procs $p1 1 60
245
246	# Check log files for failures.  Expect to see a failure.
247	logcheckfails $testdir/env$tnum.log.p1 DB_FAILCHK
248
249	puts "\tEnv$tnum.h: Second process joins with -register and -failchk."
250	env_cleanup $testdir
251
252        # use -failchk and -isalive flags
253	puts "\t\tEnv$tnum.h1: Start process 1."
254	set p1 [exec $tclsh_path $test_path/wrap.tcl envscript.tcl \
255	    $testdir/env$tnum.log.p1 \
256	    $testdir $testfile PUT $key $data RECOVER FAILCHK1 10 &]
257
258	# Wait a while so process 1 has a chance to get going.
259	tclsleep 2
260
261	puts "\t\tEnv$tnum.h2: Start process 2."
262	set p2 [exec $tclsh_path $test_path/wrap.tcl envscript.tcl \
263	    $testdir/env$tnum.log.p2 \
264	    $testdir $testfile GET $key $data 0 FAILCHK1 0 &]
265
266	watch_procs $p1 1 120
267	watch_procs $p2 1 120
268
269	# Check log files for failures.
270	logcheck $testdir/env$tnum.log.p1
271	logcheck $testdir/env$tnum.log.p2
272
273	puts "\tEnv$tnum.i: Second process can join with -register\
274	    -recover -failchk after first process is killed."
275	env_cleanup $testdir
276
277	puts "\t\tEnv$tnum.i1: Start process 1."
278	set pids {}
279	set p1 [exec $tclsh_path $test_path/wrap.tcl envscript.tcl \
280	    $testdir/env$tnum.log.p1 \
281	    $testdir $testfile PUT $key $data RECOVER FAILCHK1 10 &]
282	lappend pids $p1
283	tclsleep 2
284
285	puts "\t\tEnv$tnum.i2: Kill process 1."
286	set pids [findprocessids $testdir $pids]
287	foreach pid $pids {
288		tclkill $pid
289	}
290
291	puts "\t\tEnv$tnum.i3: Start process 2."
292	set p2 [exec $tclsh_path $test_path/wrap.tcl envscript.tcl \
293	    $testdir/env$tnum.log.p2 \
294	    $testdir $testfile GET $key $data RECOVER FAILCHK1 0 &]
295
296	watch_procs $p2 1 120
297
298	# Check log files for failures.
299	logcheck $testdir/env$tnum.log.p1
300	logcheck $testdir/env$tnum.log.p2
301
302	if { $is_windows_test == 1 } {
303		puts "Skipping sections .j on Windows platform."
304	} else {
305		puts "\tEnv$tnum.j: Running registered process detects failure and recovers."
306		env_cleanup $testdir
307
308		puts "\t\tEnv$tnum.j1: Start process 1."
309		set pids {}
310		set p1 [exec $tclsh_path $test_path/wrap.tcl envscript.tcl \
311		    $testdir/env$tnum.log.p1 \
312		    $testdir $testfile LOOP $key $data RECOVER FAILCHK1 5 &]
313		lappend pids $p1
314		tclsleep 2
315
316		# Identify child process to kill later.
317		set pids [findprocessids $testdir $pids]
318
319		puts "\t\tEnv$tnum.j2: Start process 2."
320		set p2 [exec $tclsh_path $test_path/wrap.tcl envscript.tcl \
321		    $testdir/env$tnum.log.p2 \
322		    $testdir $testfile LOOP $key $data 0 0 10 &]
323
324		puts "\t\tEnv$tnum.j3: Kill process 1."
325		foreach pid $pids {
326			tclkill $pid
327		}
328
329		#identify child process 2, do after process 1 has died
330		lappend pids $p2
331
332		# Identify child process to kill later.
333		set pids [findprocessids $testdir $pids]
334
335		puts "\t\tEnv$tnum.j4: Start process 3."
336		set p3 [exec $tclsh_path $test_path/wrap.tcl envscript.tcl \
337		    $testdir/env$tnum.log.p3 \
338		    $testdir $testfile GET $key $data RECOVER FAILCHK1 0 &]
339		#sleep for approx 20 seconds -- process 2 should still be going
340		tclsleep 20
341
342		puts "\t\tEnv$tnum.j5: Kill process 2."
343		foreach pid $pids {
344			tclkill $pid
345		}
346
347		watch_procs $p3 1 30
348
349		# Check log files.  Logs p1 and p2 should be clean, but we
350		# expect failchk messages in p3 from cleanup
351		logcheckfails $testdir/env$tnum.log.p3 Freeing
352		logcheck $testdir/env$tnum.log.p2
353		logcheck $testdir/env$tnum.log.p1
354	}
355}
356
357# Check log file and report failures with FAIL.  Use this when
358# we don't expect failures.
359proc logcheck { logname } {
360	set errstrings [eval findfail $logname]
361	foreach errstring $errstrings {
362		puts "FAIL: error in $logname : $errstring"
363	}
364}
365
366# When we expect a failure, verify we find the one we expect.
367proc logcheckfails { logname message }  {
368	set f [open $logname r]
369	while { [gets $f line] >= 0 } {
370		if { [is_substr $line $message] == 1 } {
371			close $f
372			return 0
373		}
374	}
375	close $f
376	puts "FAIL: Did not find expected error $message."
377}
378
379# The script wrap.tcl creates a parent and a child process.  We
380# can't see the child pids, so find them by their sentinel files.
381# This creates a list where the parent pid is always listed
382# before the child pid.
383proc findprocessids { testdir plist }  {
384	set beginfiles [glob $testdir/begin.*]
385	foreach b $beginfiles {
386		regsub $testdir/begin. $b {} pid
387		if { [lsearch -exact $plist $pid] == -1 } {
388			lappend plist $pid
389		}
390	}
391	return $plist
392}
393
394