1# See the file LICENSE for redistribution information.
2#
3# Copyright (c) 2001,2008 Oracle.  All rights reserved.
4#
5# $Id: reputils.tcl,v 12.69 2008/05/02 15:35:17 sue Exp $
6#
7# Replication testing utilities
8
9# Environment handle for the env containing the replication "communications
10# structure" (really a CDB environment).
11
12# The test environment consists of a queue and a # directory (environment)
13# per replication site.  The queue is used to hold messages destined for a
14# particular site and the directory will contain the environment for the
15# site.  So the environment looks like:
16#				$testdir
17#			 ___________|______________________________
18#			/           |              \               \
19#		MSGQUEUEDIR	MASTERDIR	CLIENTDIR.0 ...	CLIENTDIR.N-1
20#		| | ... |
21#		1 2 .. N+1
22#
23# The master is site 1 in the MSGQUEUEDIR and clients 1-N map to message
24# queues 2 - N+1.
25#
26# The globals repenv(1-N) contain the environment handles for the sites
27# with a given id (i.e., repenv(1) is the master's environment.
28
29
30# queuedbs is an array of DB handles, one per machine ID/machine ID pair,
31# for the databases that contain messages from one machine to another.
32# We omit the cases where the "from" and "to" machines are the same.
33# Since tcl does not have real two-dimensional arrays, we use this
34# naming convention:  queuedbs(1.2) has the handle for the database
35# containing messages to machid 1 from machid 2.
36#
37global queuedbs
38global machids
39global perm_response_list
40set perm_response_list {}
41global perm_sent_list
42set perm_sent_list {}
43global elect_timeout
44unset -nocomplain elect_timeout
45set elect_timeout(default) 5000000
46global electable_pri
47set electable_pri 5
48set drop 0
49global anywhere
50set anywhere 0
51
52global rep_verbose
53set rep_verbose 0
54global verbose_type
55set verbose_type "rep"
56
57# To run a replication test with verbose messages, type
58# 'run_verbose <test> <method>'.
59# To run a replication test with one of the subsets of verbose
60# messages, use the same syntax with 'run_verbose_elect',
61# 'run_verbose_lease', etc.
62
63proc run_verbose { reptest args } {
64	global verbose_type
65	set verbose_type "rep"
66	run_verb $reptest $args
67}
68
69proc run_verbose_elect { reptest args } {
70	global verbose_type
71	set verbose_type "rep_elect"
72	run_verb $reptest $args
73}
74
75proc run_verbose_lease { reptest args } {
76	global verbose_type
77	set verbose_type "rep_lease"
78	run_verb $reptest $args
79}
80
81proc run_verbose_misc { reptest args } {
82	global verbose_type
83	set verbose_type "rep_misc"
84	run_verb $reptest $args
85}
86
87proc run_verbose_msgs { reptest args } {
88	global verbose_type
89	set verbose_type "rep_msgs"
90	run_verb $reptest $args
91}
92
93proc run_verbose_sync { reptest args } {
94	global verbose_type
95	set verbose_type "rep_sync"
96	run_verb $reptest $args
97}
98
99proc run_verb { reptest args } {
100	global rep_verbose
101	global verbose_type
102
103	if { [string match rep* $reptest] == 0 } {
104		error "run_verbose runs only for rep tests"
105		return
106	}
107
108	set rep_verbose 1
109	if { [catch {
110		eval $reptest $args
111		flush stdout
112		flush stderr
113	} res] != 0 } {
114		global errorInfo
115
116		set rep_verbose 0
117		set fnl [string first "\n" $errorInfo]
118		set theError [string range $errorInfo 0 [expr $fnl - 1]]
119		if {[string first FAIL $errorInfo] == -1} {
120			error "FAIL:[timestamp]\
121			    run_verbose: $reptest: $theError"
122		} else {
123			error $theError;
124		}
125	}
126	set rep_verbose 0
127}
128
129# The default for replication testing is for logs to be on-disk.
130# Mixed-mode log testing provides a mixture of on-disk and
131# in-memory logging, or even all in-memory.  When testing on a
132# 1-master/1-client test, we try all four options.  On a test
133# with more clients, we still try four options, randomly
134# selecting whether the later clients are on-disk or in-memory.
135#
136
137global mixed_mode_logging
138set mixed_mode_logging 0
139
140proc create_logsets { nsites } {
141	global mixed_mode_logging
142	global logsets
143	global rand_init
144
145	error_check_good set_random_seed [berkdb srand $rand_init] 0
146	if { $mixed_mode_logging == 0 || $mixed_mode_logging == 2 } {
147		if { $mixed_mode_logging == 0 } {
148			set logmode "on-disk"
149		} else {
150			set logmode "in-memory"
151		}
152		set loglist {}
153		for { set i 0 } { $i < $nsites } { incr i } {
154			lappend loglist $logmode
155		}
156		set logsets [list $loglist]
157	}
158	if { $mixed_mode_logging == 1 } {
159		set set1 {on-disk on-disk}
160		set set2 {on-disk in-memory}
161		set set3 {in-memory on-disk}
162		set set4 {in-memory in-memory}
163
164		# Start with nsites at 2 since we already set up
165		# the master and first client.
166		for { set i 2 } { $i < $nsites } { incr i } {
167			foreach set { set1 set2 set3 set4 } {
168				if { [berkdb random_int 0 1] == 0 } {
169					lappend $set "on-disk"
170				} else {
171					lappend $set "in-memory"
172				}
173			}
174		}
175		set logsets [list $set1 $set2 $set3 $set4]
176	}
177	return $logsets
178}
179
180proc run_inmem { method test {display 0} {run 1} \
181    {outfile stdout} {largs ""} } {
182	global mixed_mode_logging
183	set mixed_mode_logging 2
184
185	set prefix [string range $test 0 2]
186	if { $prefix != "rep" } {
187		puts "Skipping in-mem log testing for non-rep test."
188		set mixed_mode_logging 0
189		return
190	}
191
192	eval run_method $method $test $display $run $outfile $largs
193
194	# Reset to default values after run.
195	set mixed_mode_logging 0
196}
197
198proc run_mixedmode { method test {display 0} {run 1} \
199    {outfile stdout} {largs ""} } {
200	global mixed_mode_logging
201	set mixed_mode_logging 1
202
203	set prefix [string range $test 0 2]
204	if { $prefix != "rep" } {
205		puts "Skipping mixed-mode log testing for non-rep test."
206		set mixed_mode_logging 0
207		return
208	}
209
210	eval run_method $method $test $display $run $outfile $largs
211
212	# Reset to default values after run.
213	set mixed_mode_logging 0
214}
215
216# Create the directory structure for replication testing.
217# Open the master and client environments; store these in the global repenv
218# Return the master's environment: "-env masterenv"
219proc repl_envsetup { envargs largs test {nclients 1} {droppct 0} { oob 0 } } {
220	source ./include.tcl
221	global clientdir
222	global drop drop_msg
223	global masterdir
224	global repenv
225
226	env_cleanup $testdir
227
228	replsetup $testdir/MSGQUEUEDIR
229
230	set masterdir $testdir/MASTERDIR
231	file mkdir $masterdir
232	if { $droppct != 0 } {
233		set drop 1
234		set drop_msg [expr 100 / $droppct]
235	} else {
236		set drop 0
237	}
238
239	for { set i 0 } { $i < $nclients } { incr i } {
240		set clientdir($i) $testdir/CLIENTDIR.$i
241		file mkdir $clientdir($i)
242	}
243
244	# Open a master.
245	repladd 1
246	#
247	# Set log smaller than default to force changing files,
248	# but big enough so that the tests that use binary files
249	# as keys/data can run.  Increase the size of the log region --
250	# sdb004 needs this, now that subdatabase names are stored
251	# in the env region.
252	#
253	set logmax [expr 3 * 1024 * 1024]
254	set lockmax 40000
255	set logregion 2097152
256
257	set ma_cmd "berkdb_env -create -log_max $logmax $envargs \
258	    -cachesize { 0 4194304 1 } -log_regionmax $logregion \
259	    -lock_max_objects $lockmax -lock_max_locks $lockmax \
260	    -home $masterdir -txn nosync -rep_master -rep_transport \
261	    \[list 1 replsend\]"
262#	set ma_cmd "berkdb_env_noerr -create -log_max $logmax $envargs \
263#	    -cachesize { 0 4194304 1 } -log_regionmax $logregion \
264#	    -lock_max_objects $lockmax -lock_max_locks $lockmax \
265#	    -verbose {rep on} -errfile /dev/stderr -errpfx $masterdir \
266#	    -home $masterdir -txn nosync -rep_master -rep_transport \
267#	    \[list 1 replsend\]"
268	set masterenv [eval $ma_cmd]
269	error_check_good master_env [is_valid_env $masterenv] TRUE
270	set repenv(master) $masterenv
271
272	# Open clients
273	for { set i 0 } { $i < $nclients } { incr i } {
274		set envid [expr $i + 2]
275		repladd $envid
276                set cl_cmd "berkdb_env -create $envargs -txn nosync \
277		    -cachesize { 0 10000000 0 } -log_regionmax $logregion \
278		    -lock_max_objects $lockmax -lock_max_locks $lockmax \
279		    -home $clientdir($i) -rep_client -rep_transport \
280		    \[list $envid replsend\]"
281#		set cl_cmd "berkdb_env_noerr -create $envargs -txn nosync \
282#		    -cachesize { 0 10000000 0 } -log_regionmax $logregion \
283#		    -lock_max_objects $lockmax -lock_max_locks $lockmax \
284#		    -home $clientdir($i) -rep_client -rep_transport \
285#		    \[list $envid replsend\] -verbose {rep on} \
286#		    -errfile /dev/stderr -errpfx $clientdir($i)"
287                set clientenv [eval $cl_cmd]
288		error_check_good client_env [is_valid_env $clientenv] TRUE
289		set repenv($i) $clientenv
290	}
291	set repenv($i) NULL
292	append largs " -env $masterenv "
293
294	# Process startup messages
295	repl_envprocq $test $nclients $oob
296
297	# Clobber replication's 30-second anti-archive timer, which
298	# will have been started by client sync-up internal init, in
299	# case the test we're about to run wants to do any log
300	# archiving, or database renaming and/or removal.
301	$masterenv test force noarchive_timeout
302
303	return $largs
304}
305
306# Process all incoming messages.  Iterate until there are no messages left
307# in anyone's queue so that we capture all message exchanges. We verify that
308# the requested number of clients matches the number of client environments
309# we have.  The oob parameter indicates if we should process the queue
310# with out-of-order delivery.  The replprocess procedure actually does
311# the real work of processing the queue -- this routine simply iterates
312# over the various queues and does the initial setup.
313proc repl_envprocq { test { nclients 1 } { oob 0 }} {
314	global repenv
315	global drop
316
317	set masterenv $repenv(master)
318	for { set i 0 } { 1 } { incr i } {
319		if { $repenv($i) == "NULL"} {
320			break
321		}
322	}
323	error_check_good i_nclients $nclients $i
324
325	berkdb debug_check
326	puts -nonewline "\t$test: Processing master/$i client queues"
327	set rand_skip 0
328	if { $oob } {
329		puts " out-of-order"
330	} else {
331		puts " in order"
332	}
333	set droprestore $drop
334	while { 1 } {
335		set nproced 0
336
337		if { $oob } {
338			set rand_skip [berkdb random_int 2 10]
339		}
340		incr nproced [replprocessqueue $masterenv 1 $rand_skip]
341		for { set i 0 } { $i < $nclients } { incr i } {
342			set envid [expr $i + 2]
343			if { $oob } {
344				set rand_skip [berkdb random_int 2 10]
345			}
346			set n [replprocessqueue $repenv($i) \
347			    $envid $rand_skip]
348			incr nproced $n
349		}
350
351		if { $nproced == 0 } {
352			# Now that we delay requesting records until
353			# we've had a few records go by, we should always
354			# see that the number of requests is lower than the
355			# number of messages that were enqueued.
356			for { set i 0 } { $i < $nclients } { incr i } {
357				set clientenv $repenv($i)
358				set queued [stat_field $clientenv rep_stat \
359				   "Total log records queued"]
360				error_check_bad queued_stats \
361				    $queued -1
362				set requested [stat_field $clientenv rep_stat \
363				   "Log records requested"]
364				error_check_bad requested_stats \
365				    $requested -1
366
367				#
368				# Set to 100 usecs.  An average ping
369				# to localhost should be a few 10s usecs.
370				#
371				$clientenv rep_request 100 400
372			}
373
374			# If we were dropping messages, we might need
375			# to flush the log so that we get everything
376			# and end up in the right state.
377			if { $drop != 0 } {
378				set drop 0
379				$masterenv rep_flush
380				berkdb debug_check
381				puts "\t$test: Flushing Master"
382			} else {
383				break
384			}
385		}
386	}
387
388	# Reset the clients back to the default state in case we
389	# have more processing to do.
390	for { set i 0 } { $i < $nclients } { incr i } {
391		set clientenv $repenv($i)
392		$clientenv rep_request 40000 1280000
393	}
394	set drop $droprestore
395}
396
397# Verify that the directories in the master are exactly replicated in
398# each of the client environments.
399proc repl_envver0 { test method { nclients 1 } } {
400	global clientdir
401	global masterdir
402	global repenv
403
404	# Verify the database in the client dir.
405	# First dump the master.
406	set t1 $masterdir/t1
407	set t2 $masterdir/t2
408	set t3 $masterdir/t3
409	set omethod [convert_method $method]
410
411	#
412	# We are interested in the keys of whatever databases are present
413	# in the master environment, so we just call a no-op check function
414	# since we have no idea what the contents of this database really is.
415	# We just need to walk the master and the clients and make sure they
416	# have the same contents.
417	#
418	set cwd [pwd]
419	cd $masterdir
420	set stat [catch {glob test*.db} dbs]
421	cd $cwd
422	if { $stat == 1 } {
423		return
424	}
425	foreach testfile $dbs {
426		open_and_dump_file $testfile $repenv(master) $masterdir/t2 \
427		    repl_noop dump_file_direction "-first" "-next"
428
429		if { [string compare [convert_method $method] -recno] != 0 } {
430			filesort $t2 $t3
431			file rename -force $t3 $t2
432		}
433		for { set i 0 } { $i < $nclients } { incr i } {
434	puts "\t$test: Verifying client $i database $testfile contents."
435			open_and_dump_file $testfile $repenv($i) \
436			    $t1 repl_noop dump_file_direction "-first" "-next"
437
438			if { [string compare $omethod "-recno"] != 0 } {
439				filesort $t1 $t3
440			} else {
441				catch {file copy -force $t1 $t3} ret
442			}
443			error_check_good diff_files($t2,$t3) [filecmp $t2 $t3] 0
444		}
445	}
446}
447
448# Remove all the elements from the master and verify that these
449# deletions properly propagated to the clients.
450proc repl_verdel { test method { nclients 1 } } {
451	global clientdir
452	global masterdir
453	global repenv
454
455	# Delete all items in the master.
456	set cwd [pwd]
457	cd $masterdir
458	set stat [catch {glob test*.db} dbs]
459	cd $cwd
460	if { $stat == 1 } {
461		return
462	}
463	foreach testfile $dbs {
464		puts "\t$test: Deleting all items from the master."
465		set txn [$repenv(master) txn]
466		error_check_good txn_begin [is_valid_txn $txn \
467		    $repenv(master)] TRUE
468		set db [eval berkdb_open -txn $txn -env $repenv(master) \
469		    $testfile]
470		error_check_good reopen_master [is_valid_db $db] TRUE
471		set dbc [$db cursor -txn $txn]
472		error_check_good reopen_master_cursor \
473		    [is_valid_cursor $dbc $db] TRUE
474		for { set dbt [$dbc get -first] } { [llength $dbt] > 0 } \
475		    { set dbt [$dbc get -next] } {
476			error_check_good del_item [$dbc del] 0
477		}
478		error_check_good dbc_close [$dbc close] 0
479		error_check_good txn_commit [$txn commit] 0
480		error_check_good db_close [$db close] 0
481
482		repl_envprocq $test $nclients
483
484		# Check clients.
485		for { set i 0 } { $i < $nclients } { incr i } {
486			puts "\t$test: Verifying client database $i is empty."
487
488			set db [eval berkdb_open -env $repenv($i) $testfile]
489			error_check_good reopen_client($i) \
490			    [is_valid_db $db] TRUE
491			set dbc [$db cursor]
492			error_check_good reopen_client_cursor($i) \
493			    [is_valid_cursor $dbc $db] TRUE
494
495			error_check_good client($i)_empty \
496			    [llength [$dbc get -first]] 0
497
498			error_check_good dbc_close [$dbc close] 0
499			error_check_good db_close [$db close] 0
500		}
501	}
502}
503
504# Replication "check" function for the dump procs that expect to
505# be able to verify the keys and data.
506proc repl_noop { k d } {
507	return
508}
509
510# Close all the master and client environments in a replication test directory.
511proc repl_envclose { test envargs } {
512	source ./include.tcl
513	global clientdir
514	global encrypt
515	global masterdir
516	global repenv
517	global drop
518
519	if { [lsearch $envargs "-encrypta*"] !=-1 } {
520		set encrypt 1
521	}
522
523	# In order to make sure that we have fully-synced and ready-to-verify
524	# databases on all the clients, do a checkpoint on the master and
525	# process messages in order to flush all the clients.
526	set drop 0
527	berkdb debug_check
528	puts "\t$test: Checkpointing master."
529	error_check_good masterenv_ckp [$repenv(master) txn_checkpoint] 0
530
531	# Count clients.
532	for { set ncli 0 } { 1 } { incr ncli } {
533		if { $repenv($ncli) == "NULL" } {
534			break
535		}
536		$repenv($ncli) rep_request 100 100
537	}
538	repl_envprocq $test $ncli
539
540	error_check_good masterenv_close [$repenv(master) close] 0
541	verify_dir $masterdir "\t$test: " 0 0 1
542	for { set i 0 } { $i < $ncli } { incr i } {
543		error_check_good client($i)_close [$repenv($i) close] 0
544		verify_dir $clientdir($i) "\t$test: " 0 0 1
545	}
546	replclose $testdir/MSGQUEUEDIR
547
548}
549
550# Replnoop is a dummy function to substitute for replsend
551# when replication is off.
552proc replnoop { control rec fromid toid flags lsn } {
553	return 0
554}
555
556proc replclose { queuedir } {
557	global queueenv queuedbs machids
558
559	foreach m $machids {
560		set db $queuedbs($m)
561		error_check_good dbr_close [$db close] 0
562	}
563	error_check_good qenv_close [$queueenv close] 0
564	set machids {}
565}
566
567# Create a replication group for testing.
568proc replsetup { queuedir } {
569	global queueenv queuedbs machids
570
571	file mkdir $queuedir
572	set max_locks 20000
573	set queueenv [berkdb_env \
574	     -create -txn nosync -lock_max_locks $max_locks -home $queuedir]
575	error_check_good queueenv [is_valid_env $queueenv] TRUE
576
577	if { [info exists queuedbs] } {
578		unset queuedbs
579	}
580	set machids {}
581
582	return $queueenv
583}
584
585# Send function for replication.
586proc replsend { control rec fromid toid flags lsn } {
587	global queuedbs queueenv machids
588	global drop drop_msg
589	global perm_sent_list
590	global anywhere
591
592	set permflags [lsearch $flags "perm"]
593	if { [llength $perm_sent_list] != 0 && $permflags != -1 } {
594#		puts "replsend sent perm message, LSN $lsn"
595		lappend perm_sent_list $lsn
596	}
597
598	#
599	# If we are testing with dropped messages, then we drop every
600	# $drop_msg time.  If we do that just return 0 and don't do
601	# anything.
602	#
603	if { $drop != 0 } {
604		incr drop
605		if { $drop == $drop_msg } {
606			set drop 1
607			return 0
608		}
609	}
610	# XXX
611	# -1 is DB_BROADCAST_EID
612	if { $toid == -1 } {
613		set machlist $machids
614	} else {
615		if { [info exists queuedbs($toid)] != 1 } {
616			error "replsend: machid $toid not found"
617		}
618		set m NULL
619		if { $anywhere != 0 } {
620			#
621			# If we can send this anywhere, send it to the first
622			# id we find that is neither toid or fromid.
623			#
624			set anyflags [lsearch $flags "any"]
625			if { $anyflags != -1 } {
626				foreach m $machids {
627					if { $m == $fromid || $m == $toid } {
628						continue
629					}
630					set machlist [list $m]
631					break
632				}
633			}
634		}
635		#
636		# If we didn't find a different site, then we must
637		# fallback to the toid.
638		#
639		if { $m == "NULL" } {
640			set machlist [list $toid]
641		}
642	}
643
644	foreach m $machlist {
645		# do not broadcast to self.
646		if { $m == $fromid } {
647			continue
648		}
649
650		set db $queuedbs($m)
651		set txn [$queueenv txn]
652		$db put -txn $txn -append [list $control $rec $fromid]
653		error_check_good replsend_commit [$txn commit] 0
654	}
655
656	queue_logcheck
657	return 0
658}
659
660#
661# If the message queue log files are getting too numerous, checkpoint
662# and archive them.  Some tests are so large (particularly from
663# run_repmethod) that they can consume far too much disk space.
664proc queue_logcheck { } {
665	global queueenv
666
667
668	set logs [$queueenv log_archive -arch_log]
669	set numlogs [llength $logs]
670	if { $numlogs > 10 } {
671		$queueenv txn_checkpoint
672		$queueenv log_archive -arch_remove
673	}
674}
675
676# Discard all the pending messages for a particular site.
677proc replclear { machid } {
678	global queuedbs queueenv
679
680	if { [info exists queuedbs($machid)] != 1 } {
681		error "FAIL: replclear: machid $machid not found"
682	}
683
684	set db $queuedbs($machid)
685	set txn [$queueenv txn]
686	set dbc [$db cursor -txn $txn]
687	for { set dbt [$dbc get -rmw -first] } { [llength $dbt] > 0 } \
688	    { set dbt [$dbc get -rmw -next] } {
689		error_check_good replclear($machid)_del [$dbc del] 0
690	}
691	error_check_good replclear($machid)_dbc_close [$dbc close] 0
692	error_check_good replclear($machid)_txn_commit [$txn commit] 0
693}
694
695# Add a machine to a replication environment.
696proc repladd { machid } {
697	global queueenv queuedbs machids
698
699	if { [info exists queuedbs($machid)] == 1 } {
700		error "FAIL: repladd: machid $machid already exists"
701	}
702
703	set queuedbs($machid) [berkdb open -auto_commit \
704	    -env $queueenv -create -recno -renumber repqueue$machid.db]
705	error_check_good repqueue_create [is_valid_db $queuedbs($machid)] TRUE
706
707	lappend machids $machid
708}
709
710# Acquire a handle to work with an existing machine's replication
711# queue.  This is for situations where more than one process
712# is working with a message queue.  In general, having more than one
713# process handle the queue is wrong.  However, in order to test some
714# things, we need two processes (since Tcl doesn't support threads).  We
715# go to great pain in the test harness to make sure this works, but we
716# don't let customers do it.
717proc repljoin { machid } {
718	global queueenv queuedbs machids
719
720	set queuedbs($machid) [berkdb open -auto_commit \
721	    -env $queueenv repqueue$machid.db]
722	error_check_good repqueue_create [is_valid_db $queuedbs($machid)] TRUE
723
724	lappend machids $machid
725}
726
727# Process a queue of messages, skipping every "skip_interval" entry.
728# We traverse the entire queue, but since we skip some messages, we
729# may end up leaving things in the queue, which should get picked up
730# on a later run.
731proc replprocessqueue { dbenv machid { skip_interval 0 } { hold_electp NONE } \
732    { dupmasterp NONE } { errp NONE } } {
733	global queuedbs queueenv errorCode
734	global perm_response_list
735	global startup_done
736
737	# hold_electp is a call-by-reference variable which lets our caller
738	# know we need to hold an election.
739	if { [string compare $hold_electp NONE] != 0 } {
740		upvar $hold_electp hold_elect
741	}
742	set hold_elect 0
743
744	# dupmasterp is a call-by-reference variable which lets our caller
745	# know we have a duplicate master.
746	if { [string compare $dupmasterp NONE] != 0 } {
747		upvar $dupmasterp dupmaster
748	}
749	set dupmaster 0
750
751	# errp is a call-by-reference variable which lets our caller
752	# know we have gotten an error (that they expect).
753	if { [string compare $errp NONE] != 0 } {
754		upvar $errp errorp
755	}
756	set errorp 0
757
758	set nproced 0
759
760	set txn [$queueenv txn]
761
762	# If we are running separate processes, the second process has
763	# to join an existing message queue.
764	if { [info exists queuedbs($machid)] == 0 } {
765		repljoin $machid
766	}
767
768	set dbc [$queuedbs($machid) cursor -txn $txn]
769
770	error_check_good process_dbc($machid) \
771	    [is_valid_cursor $dbc $queuedbs($machid)] TRUE
772
773	for { set dbt [$dbc get -first] } \
774	    { [llength $dbt] != 0 } \
775	    { } {
776		set data [lindex [lindex $dbt 0] 1]
777		set recno [lindex [lindex $dbt 0] 0]
778
779		# If skip_interval is nonzero, we want to process messages
780		# out of order.  We do this in a simple but slimy way--
781		# continue walking with the cursor without processing the
782		# message or deleting it from the queue, but do increment
783		# "nproced".  The way this proc is normally used, the
784		# precise value of nproced doesn't matter--we just don't
785		# assume the queues are empty if it's nonzero.  Thus,
786		# if we contrive to make sure it's nonzero, we'll always
787		# come back to records we've skipped on a later call
788		# to replprocessqueue.  (If there really are no records,
789		# we'll never get here.)
790		#
791		# Skip every skip_interval'th record (and use a remainder other
792		# than zero so that we're guaranteed to really process at least
793		# one record on every call).
794		if { $skip_interval != 0 } {
795			if { $nproced % $skip_interval == 1 } {
796				incr nproced
797				set dbt [$dbc get -next]
798				continue
799			}
800		}
801
802		# We need to remove the current message from the queue,
803		# because we're about to end the transaction and someone
804		# else processing messages might come in and reprocess this
805		# message which would be bad.
806		error_check_good queue_remove [$dbc del] 0
807
808		# We have to play an ugly cursor game here:  we currently
809		# hold a lock on the page of messages, but rep_process_message
810		# might need to lock the page with a different cursor in
811		# order to send a response.  So save the next recno, close
812		# the cursor, and then reopen and reset the cursor.
813		# If someone else is processing this queue, our entry might
814		# have gone away, and we need to be able to handle that.
815
816		error_check_good dbc_process_close [$dbc close] 0
817		error_check_good txn_commit [$txn commit] 0
818
819		set ret [catch {$dbenv rep_process_message \
820		    [lindex $data 2] [lindex $data 0] [lindex $data 1]} res]
821
822		# Save all ISPERM and NOTPERM responses so we can compare their
823		# LSNs to the LSN in the log.  The variable perm_response_list
824		# holds the entire response so we can extract responses and
825		# LSNs as needed.
826		#
827		if { [llength $perm_response_list] != 0 && \
828		    ([is_substr $res ISPERM] || [is_substr $res NOTPERM]) } {
829			lappend perm_response_list $res
830		}
831
832		if { $ret != 0 } {
833			if { [string compare $errp NONE] != 0 } {
834				set errorp "$dbenv $machid $res"
835			} else {
836				error "FAIL:[timestamp]\
837				    rep_process_message returned $res"
838			}
839		}
840
841		incr nproced
842
843		# Now, re-establish the cursor position.  We fetch the
844		# current record number.  If there is something there,
845		# that is the record for the next iteration.  If there
846		# is nothing there, then we've consumed the last item
847		# in the queue.
848
849		set txn [$queueenv txn]
850		set dbc [$queuedbs($machid) cursor -txn $txn]
851		set dbt [$dbc get -set_range $recno]
852
853		if { $ret == 0 } {
854			set rettype [lindex $res 0]
855			set retval [lindex $res 1]
856			#
857			# Do nothing for 0 and NEWSITE
858			#
859			if { [is_substr $rettype STARTUPDONE] } {
860				set startup_done 1
861			}
862			if { [is_substr $rettype HOLDELECTION] } {
863				set hold_elect 1
864			}
865			if { [is_substr $rettype DUPMASTER] } {
866				set dupmaster "1 $dbenv $machid"
867			}
868			if { [is_substr $rettype NOTPERM] || \
869			    [is_substr $rettype ISPERM] } {
870				set lsnfile [lindex $retval 0]
871				set lsnoff [lindex $retval 1]
872			}
873		}
874
875		if { $errorp != 0 } {
876			# Break also on an error, caller wants to handle it.
877			break
878		}
879		if { $hold_elect == 1 } {
880			# Break also on a HOLDELECTION, for the same reason.
881			break
882		}
883		if { $dupmaster == 1 } {
884			# Break also on a DUPMASTER, for the same reason.
885			break
886		}
887
888	}
889
890	error_check_good dbc_close [$dbc close] 0
891	error_check_good txn_commit [$txn commit] 0
892
893	# Return the number of messages processed.
894	return $nproced
895}
896
897
898set run_repl_flag "-run_repl"
899
900proc extract_repl_args { args } {
901	global run_repl_flag
902
903	for { set arg [lindex $args [set i 0]] } \
904	    { [string length $arg] > 0 } \
905	    { set arg [lindex $args [incr i]] } {
906		if { [string compare $arg $run_repl_flag] == 0 } {
907			return [lindex $args [expr $i + 1]]
908		}
909	}
910	return ""
911}
912
913proc delete_repl_args { args } {
914	global run_repl_flag
915
916	set ret {}
917
918	for { set arg [lindex $args [set i 0]] } \
919	    { [string length $arg] > 0 } \
920	    { set arg [lindex $args [incr i]] } {
921		if { [string compare $arg $run_repl_flag] != 0 } {
922			lappend ret $arg
923		} else {
924			incr i
925		}
926	}
927	return $ret
928}
929
930global elect_serial
931global elections_in_progress
932set elect_serial 0
933
934# Start an election in a sub-process.
935proc start_election \
936    { pfx qdir envstring nsites nvotes pri timeout {err "none"} {crash 0}} {
937	source ./include.tcl
938	global elect_serial elections_in_progress machids
939	global rep_verbose
940
941	set filelist {}
942	set ret [catch {glob $testdir/ELECTION*.$elect_serial} result]
943	if { $ret == 0 } {
944		set filelist [concat $filelist $result]
945	}
946	foreach f $filelist {
947		fileremove -f $f
948	}
949
950	set oid [open $testdir/ELECTION_SOURCE.$elect_serial w]
951
952	puts $oid "source $test_path/test.tcl"
953	puts $oid "set elected_event 0"
954	puts $oid "set elected_env \"NONE\""
955	puts $oid "set is_repchild 1"
956	puts $oid "replsetup $qdir"
957	foreach i $machids { puts $oid "repladd $i" }
958	puts $oid "set env_cmd \{$envstring\}"
959	if { $rep_verbose == 1 } {
960		puts $oid "set dbenv \[eval \$env_cmd -errfile \
961		    /dev/stdout -errpfx $pfx \]"
962	} else {
963		puts $oid "set dbenv \[eval \$env_cmd -errfile \
964		    $testdir/ELECTION_ERRFILE.$elect_serial -errpfx $pfx \]"
965	}
966	puts $oid "\$dbenv test abort $err"
967	puts $oid "set res \[catch \{\$dbenv rep_elect $nsites \
968	    $nvotes $pri $timeout\} ret\]"
969	puts $oid "set r \[open \$testdir/ELECTION_RESULT.$elect_serial w\]"
970	puts $oid "if \{\$res == 0 \} \{"
971	puts $oid "puts \$r \"SUCCESS \$ret\""
972	puts $oid "\} else \{"
973	puts $oid "puts \$r \"ERROR \$ret\""
974	puts $oid "\}"
975	#
976	# This loop calls rep_elect a second time with the error cleared.
977	# We don't want to do that if we are simulating a crash.
978	if { $err != "none" && $crash != 1 } {
979		puts $oid "\$dbenv test abort none"
980		puts $oid "set res \[catch \{\$dbenv rep_elect $nsites \
981		    $nvotes $pri $timeout\} ret\]"
982		puts $oid "if \{\$res == 0 \} \{"
983		puts $oid "puts \$r \"SUCCESS \$ret\""
984		puts $oid "\} else \{"
985		puts $oid "puts \$r \"ERROR \$ret\""
986		puts $oid "\}"
987	}
988
989	puts $oid "if \{ \$elected_event == 1 \} \{"
990	puts $oid "puts \$r \"ELECTED \$elected_env\""
991	puts $oid "\}"
992
993	puts $oid "close \$r"
994	close $oid
995
996	set t [open "|$tclsh_path >& $testdir/ELECTION_OUTPUT.$elect_serial" w]
997	if { $rep_verbose } {
998		set t [open "|$tclsh_path" w]
999	}
1000	puts $t "source ./include.tcl"
1001	puts $t "source $testdir/ELECTION_SOURCE.$elect_serial"
1002	flush $t
1003
1004	set elections_in_progress($elect_serial) $t
1005	return $elect_serial
1006}
1007
1008#
1009# If we are doing elections during upgrade testing, set
1010# upgrade to 1.  Doing that sets the priority to the
1011# test priority in rep_elect, which will simulate a
1012# 0-priority but electable site.
1013#
1014proc setpriority { priority nclients winner {start 0} {upgrade 0} } {
1015	global electable_pri
1016	upvar $priority pri
1017
1018	for { set i $start } { $i < [expr $nclients + $start] } { incr i } {
1019		if { $i == $winner } {
1020			set pri($i) 100
1021		} else {
1022			if { $upgrade } {
1023				set pri($i) $electable_pri
1024			} else {
1025				set pri($i) 10
1026			}
1027		}
1028	}
1029}
1030
1031# run_election has the following arguments:
1032# Arrays:
1033#	ecmd 		Array of the commands for setting up each client env.
1034#	cenv		Array of the handles to each client env.
1035#	errcmd		Array of where errors should be forced.
1036#	priority	Array of the priorities of each client env.
1037#	crash		If an error is forced, should we crash or recover?
1038# The upvar command takes care of making these arrays available to
1039# the procedure.
1040#
1041# Ordinary variables:
1042# 	qdir		Directory where the message queue is located.
1043#	msg		Message prefixed to the output.
1044#	elector		This client calls the first election.
1045#	nsites		Number of sites in the replication group.
1046#	nvotes		Number of votes required to win the election.
1047# 	nclients	Number of clients participating in the election.
1048#	win		The expected winner of the election.
1049#	reopen		Should the new master (i.e. winner) be closed
1050#			and reopened as a client?
1051#	dbname		Name of the underlying database.  The caller
1052#			should send in "NULL" if the database has not
1053# 			yet been created.
1054# 	ignore		Should the winner ignore its own election?
1055#			If ignore is 1, the winner is not made master.
1056#	timeout_ok	We expect that this election will not succeed
1057# 			in electing a new master (perhaps because there
1058#			already is a master).
1059
1060proc run_election { ecmd celist errcmd priority crsh\
1061    qdir msg elector nsites nvotes nclients win reopen\
1062    dbname {ignore 0} {timeout_ok 0} } {
1063
1064	global elect_timeout elect_serial
1065	global is_hp_test
1066	global is_windows_test
1067	global rand_init
1068	upvar $ecmd env_cmd
1069	upvar $celist cenvlist
1070	upvar $errcmd err_cmd
1071	upvar $priority pri
1072	upvar $crsh crash
1073
1074	set elect_timeout(default) 15000000
1075	# Windows and HP-UX require a longer timeout.
1076	if { $is_windows_test == 1 || $is_hp_test == 1 } {
1077		set elect_timeout(default) [expr $elect_timeout(default) * 2]
1078	}
1079
1080	set long_timeout $elect_timeout(default)
1081	#
1082	# Initialize tries based on the default timeout.
1083	# We use tries to loop looking for messages because
1084	# as sites are sleeping waiting for their timeout
1085	# to expire we need to keep checking for messages.
1086	#
1087	set tries [expr [expr $long_timeout * 4] / 1000000]
1088	#
1089	# Retry indicates whether the test should retry the election
1090	# if it gets a timeout.  This is primarily used for the
1091	# varied timeout election test because we expect short timeouts
1092	# to timeout when interacting with long timeouts and the
1093	# short timeout sites need to call elections again.
1094	#
1095	set retry 0
1096	foreach pair $cenvlist {
1097		set id [lindex $pair 1]
1098		set i [expr $id - 2]
1099		set elect_pipe($i) INVALID
1100		#
1101		# Array get should return us a list of 1 element:
1102		# { {$i timeout_value} }
1103		# If that doesn't exist, use the default.
1104		#
1105		set this_timeout [array get elect_timeout $i]
1106		if { [llength $this_timeout] } {
1107			set e_timeout($i) [lindex $this_timeout 1]
1108			#
1109			# Set number of tries based on the biggest
1110			# timeout we see in this group if using
1111			# varied timeouts.
1112			#
1113			set retry 1
1114			if { $e_timeout($i) > $long_timeout } {
1115				set long_timeout $e_timeout($i)
1116				set tries [expr $long_timeout / 1000000]
1117			}
1118		} else {
1119			set e_timeout($i) $elect_timeout(default)
1120		}
1121		replclear $id
1122	}
1123
1124	#
1125	# XXX
1126	# We need to somehow check for the warning if nvotes is not
1127	# a majority.  Problem is that warning will go into the child
1128	# process' output.  Furthermore, we need a mechanism that can
1129	# handle both sending the output to a file and sending it to
1130	# /dev/stderr when debugging without failing the
1131	# error_check_good check.
1132	#
1133	puts "\t\t$msg.1: Election with nsites=$nsites,\
1134	    nvotes=$nvotes, nclients=$nclients"
1135	puts "\t\t$msg.2: First elector is $elector,\
1136	    expected winner is $win (eid [expr $win + 2])"
1137	incr elect_serial
1138	set pfx "CHILD$elector.$elect_serial"
1139	set elect_pipe($elector) [start_election \
1140	    $pfx $qdir $env_cmd($elector) $nsites $nvotes $pri($elector) \
1141	    $e_timeout($elector) $err_cmd($elector) $crash($elector)]
1142	tclsleep 2
1143
1144	set got_newmaster 0
1145	set max_retry $tries
1146
1147	# If we're simulating a crash, skip the while loop and
1148	# just give the initial election a chance to complete.
1149	set crashing 0
1150	for { set i 0 } { $i < $nclients } { incr i } {
1151		if { $crash($i) == 1 } {
1152			set crashing 1
1153		}
1154	}
1155
1156	global elected_event
1157	global elected_env
1158	set elected_event 0
1159	set c_elected_event 0
1160	set elected_env "NONE"
1161
1162	set orig_tries $tries
1163	if { $crashing == 1 } {
1164		tclsleep 10
1165	} else {
1166		set retry_cnt 0
1167		while { 1 } {
1168			set nproced 0
1169			set he 0
1170			set winning_envid -1
1171			set c_winning_envid -1
1172
1173			foreach pair $cenvlist {
1174				set he 0
1175				set unavail 0
1176				set envid [lindex $pair 1]
1177				set i [expr $envid - 2]
1178				set clientenv($i) [lindex $pair 0]
1179
1180				# If the "elected" event is received by the
1181				# child process, the env set up in that child
1182				# is the elected env.
1183				set child_done [check_election $elect_pipe($i)\
1184				    unavail c_elected_event c_elected_env]
1185				if { $c_elected_event != 0 } {
1186					set elected_event 1
1187					set c_winning_envid $envid
1188					set c_elected_event 0
1189				}
1190
1191				incr nproced [replprocessqueue \
1192				    $clientenv($i) $envid 0 he]
1193# puts "Tries $tries:\
1194# Processed queue for client $i, $nproced msgs he $he unavail $unavail"
1195
1196				# Check for completed election.  If it's the
1197				# first time we've noticed it, deal with it.
1198				if { $elected_event == 1 && \
1199				    $got_newmaster == 0 } {
1200					set got_newmaster 1
1201
1202					# Find env id of winner.
1203					if { $c_winning_envid != -1 } {
1204						set winning_envid \
1205						    $c_winning_envid
1206						set c_winning_envid -1
1207					} else {
1208						foreach pair $cenvlist {
1209							if { [lindex $pair 0]\
1210							    == $elected_env } {
1211								set winning_envid \
1212								    [lindex $pair 1]
1213								break
1214							}
1215						}
1216					}
1217
1218					# Make sure it's the expected winner.
1219					error_check_good right_winner \
1220					    $winning_envid [expr $win + 2]
1221
1222					# Reconfigure winning env as master.
1223					if { $ignore == 0 } {
1224						$clientenv($i) errpfx \
1225						    NEWMASTER
1226						error_check_good \
1227						    make_master($i) \
1228					    	    [$clientenv($i) \
1229						    rep_start -master] 0
1230
1231						# Don't hold another election
1232						# yet if we are setting up a
1233						# new master. This could
1234						# cause the new master to
1235						# declare itself a client
1236						# during internal init.
1237						set he 0
1238					}
1239
1240					# Occasionally force new log records
1241					# to be written, unless the database
1242					# has not yet been created.
1243					set write [berkdb random_int 1 10]
1244					if { $write == 1 && $dbname != "NULL" } {
1245						set db [eval berkdb_open_noerr \
1246						    -env $clientenv($i) \
1247						    -auto_commit $dbname]
1248						error_check_good dbopen \
1249						    [is_valid_db $db] TRUE
1250						error_check_good dbclose \
1251						    [$db close] 0
1252					}
1253				}
1254
1255				# If the previous election failed with a
1256				# timeout and we need to retry because we
1257				# are testing varying site timeouts, force
1258				# a hold election to start a new one.
1259				if { $unavail && $retry && $retry_cnt < $max_retry} {
1260					incr retry_cnt
1261					puts "\t\t$msg.2.b: Client $i timed\
1262					    out. Retry $retry_cnt\
1263					    of max $max_retry"
1264					set he 1
1265					set tries $orig_tries
1266				}
1267				if { $he == 1 && $got_newmaster == 0 } {
1268					#
1269					# Only close down the election pipe if the
1270					# previously created one is done and
1271					# waiting for new commands, otherwise
1272					# if we try to close it while it's in
1273					# progress we hang this main tclsh.
1274					#
1275					if { $elect_pipe($i) != "INVALID" && \
1276					    $child_done == 1 } {
1277						close_election $elect_pipe($i)
1278						set elect_pipe($i) "INVALID"
1279					}
1280# puts "Starting election on client $i"
1281					if { $elect_pipe($i) == "INVALID" } {
1282						incr elect_serial
1283						set pfx "CHILD$i.$elect_serial"
1284						set elect_pipe($i) [start_election \
1285						    $pfx $qdir \
1286						    $env_cmd($i) $nsites \
1287						    $nvotes $pri($i) $e_timeout($i)]
1288						set got_hold_elect($i) 1
1289					}
1290				}
1291			}
1292
1293			# We need to wait around to make doubly sure that the
1294			# election has finished...
1295			if { $nproced == 0 } {
1296				incr tries -1
1297				#
1298				# If we have a newmaster already, set tries
1299				# down to just allow straggling messages to
1300				# be processed.  Tries could be a very large
1301				# number if we have long timeouts.
1302				#
1303				if { $got_newmaster != 0 && $tries > 10 } {
1304					set tries 10
1305				}
1306				if { $tries == 0 } {
1307					break
1308				} else {
1309					tclsleep 1
1310				}
1311			} else {
1312				set tries $tries
1313			}
1314		}
1315
1316		# If we did get a new master, its identity was checked
1317		# at that time.  But we still have to make sure that we
1318		# didn't just time out.
1319
1320		if { $got_newmaster == 0 && $timeout_ok == 0 } {
1321			error "FAIL: Did not elect new master."
1322		}
1323	}
1324	cleanup_elections
1325
1326	#
1327	# Make sure we've really processed all the post-election
1328	# sync-up messages.  If we're simulating a crash, don't process
1329	# any more messages.
1330	#
1331	if { $crashing == 0 } {
1332		process_msgs $cenvlist
1333	}
1334
1335	if { $reopen == 1 } {
1336		puts "\t\t$msg.3: Closing new master and reopening as client"
1337		error_check_good log_flush [$clientenv($win) log_flush] 0
1338		error_check_good newmaster_close [$clientenv($win) close] 0
1339
1340		set clientenv($win) [eval $env_cmd($win)]
1341		error_check_good cl($win) [is_valid_env $clientenv($win)] TRUE
1342		set newelector "$clientenv($win) [expr $win + 2]"
1343		set cenvlist [lreplace $cenvlist $win $win $newelector]
1344		if { $crashing == 0 } {
1345			process_msgs $cenvlist
1346		}
1347	}
1348}
1349
1350proc check_election { id unavailp elected_eventp elected_envp } {
1351	source ./include.tcl
1352
1353	if { $id == "INVALID" } {
1354		return 0
1355	}
1356	upvar $unavailp unavail
1357	upvar $elected_eventp elected_event
1358	upvar $elected_envp elected_env
1359
1360	set unavail 0
1361	set elected_event 0
1362	set elected_env "NONE"
1363
1364	set res [catch {open $testdir/ELECTION_RESULT.$id} nmid]
1365	if { $res != 0 } {
1366		return 0
1367	}
1368	while { [gets $nmid val] != -1 } {
1369#		puts "result $id: $val"
1370		set str [lindex $val 0]
1371		if { [is_substr $val UNAVAIL] } {
1372			set unavail 1
1373		}
1374		if { [is_substr $val ELECTED] } {
1375			set elected_event 1
1376			set elected_env [lindex $val 1]
1377		}
1378	}
1379	close $nmid
1380	return 1
1381}
1382
1383proc close_election { i } {
1384	global elections_in_progress
1385	global noenv_messaging
1386	global qtestdir
1387
1388	if { $noenv_messaging == 1 } {
1389		set testdir $qtestdir
1390	}
1391
1392	set t $elections_in_progress($i)
1393	puts $t "replclose \$testdir/MSGQUEUEDIR"
1394	puts $t "\$dbenv close"
1395	close $t
1396	unset elections_in_progress($i)
1397}
1398
1399proc cleanup_elections { } {
1400	global elect_serial elections_in_progress
1401
1402	for { set i 0 } { $i <= $elect_serial } { incr i } {
1403		if { [info exists elections_in_progress($i)] != 0 } {
1404			close_election $i
1405		}
1406	}
1407
1408	set elect_serial 0
1409}
1410
1411#
1412# This is essentially a copy of test001, but it only does the put/get
1413# loop AND it takes an already-opened db handle.
1414#
1415proc rep_test { method env repdb {nentries 10000} \
1416    {start 0} {skip 0} {needpad 0} {inmem 0} args } {
1417
1418	source ./include.tcl
1419
1420	#
1421	# Open the db if one isn't given.  Close before exit.
1422	#
1423	if { $repdb == "NULL" } {
1424		if { $inmem == 1 } {
1425			set testfile { "" "test.db" }
1426		} else {
1427			set testfile "test.db"
1428		}
1429		set largs [convert_args $method $args]
1430		set omethod [convert_method $method]
1431		set db [eval {berkdb_open_noerr} -env $env -auto_commit\
1432		    -create -mode 0644 $omethod $largs $testfile]
1433		error_check_good reptest_db [is_valid_db $db] TRUE
1434	} else {
1435		set db $repdb
1436	}
1437
1438	puts "\t\tRep_test: $method $nentries key/data pairs starting at $start"
1439	set did [open $dict]
1440
1441	# The "start" variable determines the record number to start
1442	# with, if we're using record numbers.  The "skip" variable
1443	# determines which dictionary entry to start with.  In normal
1444	# use, skip is equal to start.
1445
1446	if { $skip != 0 } {
1447		for { set count 0 } { $count < $skip } { incr count } {
1448			gets $did str
1449		}
1450	}
1451	set pflags ""
1452	set gflags ""
1453	set txn ""
1454
1455	if { [is_record_based $method] == 1 } {
1456		append gflags " -recno"
1457	}
1458	puts "\t\tRep_test.a: put/get loop"
1459	# Here is the loop where we put and get each key/data pair
1460	set count 0
1461
1462	# Checkpoint 10 times during the run, but not more
1463	# frequently than every 5 entries.
1464	set checkfreq [expr $nentries / 10]
1465
1466	# Abort occasionally during the run.
1467	set abortfreq [expr $nentries / 15]
1468
1469	while { [gets $did str] != -1 && $count < $nentries } {
1470		if { [is_record_based $method] == 1 } {
1471			global kvals
1472
1473			set key [expr $count + 1 + $start]
1474			if { 0xffffffff > 0 && $key > 0xffffffff } {
1475				set key [expr $key - 0x100000000]
1476			}
1477			if { $key == 0 || $key - 0xffffffff == 1 } {
1478				incr key
1479				incr count
1480			}
1481			set kvals($key) [pad_data $method $str]
1482		} else {
1483			set key $str
1484			set str [reverse $str]
1485		}
1486		#
1487		# We want to make sure we send in exactly the same
1488		# length data so that LSNs match up for some tests
1489		# in replication (rep021).
1490		#
1491		if { [is_fixed_length $method] == 1 && $needpad } {
1492			#
1493			# Make it something visible and obvious, 'A'.
1494			#
1495			set p 65
1496			set str [make_fixed_length $method $str $p]
1497			set kvals($key) $str
1498		}
1499		set t [$env txn]
1500		error_check_good txn [is_valid_txn $t $env] TRUE
1501		set txn "-txn $t"
1502		set ret [eval \
1503		    {$db put} $txn $pflags {$key [chop_data $method $str]}]
1504		error_check_good put $ret 0
1505		error_check_good txn [$t commit] 0
1506
1507		if { $checkfreq < 5 } {
1508			set checkfreq 5
1509		}
1510		if { $abortfreq < 3 } {
1511			set abortfreq 3
1512		}
1513		#
1514		# Do a few aborted transactions to test that
1515		# aborts don't get processed on clients and the
1516		# master handles them properly.  Just abort
1517		# trying to delete the key we just added.
1518		#
1519		if { $count % $abortfreq == 0 } {
1520			set t [$env txn]
1521			error_check_good txn [is_valid_txn $t $env] TRUE
1522			set ret [$db del -txn $t $key]
1523			error_check_good txn [$t abort] 0
1524		}
1525		if { $count % $checkfreq == 0 } {
1526			error_check_good txn_checkpoint($count) \
1527			    [$env txn_checkpoint] 0
1528		}
1529		incr count
1530	}
1531	close $did
1532	if { $repdb == "NULL" } {
1533		error_check_good rep_close [$db close] 0
1534	}
1535}
1536
1537#
1538# This is essentially a copy of rep_test, but it only does the put/get
1539# loop in a long running txn to an open db.  We use it for bulk testing
1540# because we want to fill the bulk buffer some before sending it out.
1541# Bulk buffer gets transmitted on every commit.
1542#
1543proc rep_test_bulk { method env repdb {nentries 10000} \
1544    {start 0} {skip 0} {useoverflow 0} args } {
1545	source ./include.tcl
1546
1547	global overflowword1
1548	global overflowword2
1549
1550	if { [is_fixed_length $method] && $useoverflow == 1 } {
1551		puts "Skipping overflow for fixed length method $method"
1552		return
1553	}
1554	#
1555	# Open the db if one isn't given.  Close before exit.
1556	#
1557	if { $repdb == "NULL" } {
1558		set testfile "test.db"
1559		set largs [convert_args $method $args]
1560		set omethod [convert_method $method]
1561		set db [eval {berkdb_open_noerr -env $env -auto_commit -create \
1562		    -mode 0644} $largs $omethod $testfile]
1563		error_check_good reptest_db [is_valid_db $db] TRUE
1564	} else {
1565		set db $repdb
1566	}
1567
1568	#
1569	# If we are using an env, then testfile should just be the db name.
1570	# Otherwise it is the test directory and the name.
1571	# If we are not using an external env, then test setting
1572	# the database cache size and using multiple caches.
1573	puts \
1574"\t\tRep_test_bulk: $method $nentries key/data pairs starting at $start"
1575	set did [open $dict]
1576
1577	# The "start" variable determines the record number to start
1578	# with, if we're using record numbers.  The "skip" variable
1579	# determines which dictionary entry to start with.  In normal
1580	# use, skip is equal to start.
1581
1582	if { $skip != 0 } {
1583		for { set count 0 } { $count < $skip } { incr count } {
1584			gets $did str
1585		}
1586	}
1587	set pflags ""
1588	set gflags ""
1589	set txn ""
1590
1591	if { [is_record_based $method] == 1 } {
1592		append gflags " -recno"
1593	}
1594	puts "\t\tRep_test_bulk.a: put/get loop in 1 txn"
1595	# Here is the loop where we put and get each key/data pair
1596	set count 0
1597
1598	set t [$env txn]
1599	error_check_good txn [is_valid_txn $t $env] TRUE
1600	set txn "-txn $t"
1601	set pid [pid]
1602	while { [gets $did str] != -1 && $count < $nentries } {
1603		if { [is_record_based $method] == 1 } {
1604			global kvals
1605
1606			set key [expr $count + 1 + $start]
1607			if { 0xffffffff > 0 && $key > 0xffffffff } {
1608				set key [expr $key - 0x100000000]
1609			}
1610			if { $key == 0 || $key - 0xffffffff == 1 } {
1611				incr key
1612				incr count
1613			}
1614			set kvals($key) [pad_data $method $str]
1615			if { [is_fixed_length $method] == 0 } {
1616				set str [repeat $str 100]
1617			}
1618		} else {
1619			set key $str.$pid
1620			set str [repeat $str 100]
1621		}
1622		#
1623		# For use for overflow test.
1624		#
1625		if { $useoverflow == 0 } {
1626			if { [string length $overflowword1] < \
1627			    [string length $str] } {
1628				set overflowword2 $overflowword1
1629				set overflowword1 $str
1630			}
1631		} else {
1632			if { $count == 0 } {
1633				set len [string length $overflowword1]
1634				set word $overflowword1
1635			} else {
1636				set len [string length $overflowword2]
1637				set word $overflowword1
1638			}
1639			set rpt [expr 1024 * 1024 / $len]
1640			incr rpt
1641			set str [repeat $word $rpt]
1642		}
1643		set ret [eval \
1644		    {$db put} $txn $pflags {$key [chop_data $method $str]}]
1645		error_check_good put $ret 0
1646		incr count
1647	}
1648	error_check_good txn [$t commit] 0
1649	error_check_good txn_checkpoint [$env txn_checkpoint] 0
1650	close $did
1651	if { $repdb == "NULL" } {
1652		error_check_good rep_close [$db close] 0
1653	}
1654}
1655
1656proc rep_test_upg { method env repdb {nentries 10000} \
1657    {start 0} {skip 0} {needpad 0} {inmem 0} args } {
1658
1659	source ./include.tcl
1660
1661	#
1662	# Open the db if one isn't given.  Close before exit.
1663	#
1664	if { $repdb == "NULL" } {
1665		if { $inmem == 1 } {
1666			set testfile { "" "test.db" }
1667		} else {
1668			set testfile "test.db"
1669		}
1670		set largs [convert_args $method $args]
1671		set omethod [convert_method $method]
1672		set db [eval {berkdb_open_noerr} -env $env -auto_commit\
1673		    -create -mode 0644 $omethod $largs $testfile]
1674		error_check_good reptest_db [is_valid_db $db] TRUE
1675	} else {
1676		set db $repdb
1677	}
1678
1679	set pid [pid]
1680	puts "\t\tRep_test_upg($pid): $method $nentries key/data pairs starting at $start"
1681	set did [open $dict]
1682
1683	# The "start" variable determines the record number to start
1684	# with, if we're using record numbers.  The "skip" variable
1685	# determines which dictionary entry to start with.  In normal
1686	# use, skip is equal to start.
1687
1688	if { $skip != 0 } {
1689		for { set count 0 } { $count < $skip } { incr count } {
1690			gets $did str
1691		}
1692	}
1693	set pflags ""
1694	set gflags ""
1695	set txn ""
1696
1697	if { [is_record_based $method] == 1 } {
1698		append gflags " -recno"
1699	}
1700	puts "\t\tRep_test.a: put/get loop"
1701	# Here is the loop where we put and get each key/data pair
1702	set count 0
1703
1704	# Checkpoint 10 times during the run, but not more
1705	# frequently than every 5 entries.
1706	set checkfreq [expr $nentries / 10]
1707
1708	# Abort occasionally during the run.
1709	set abortfreq [expr $nentries / 15]
1710
1711	while { [gets $did str] != -1 && $count < $nentries } {
1712		if { [is_record_based $method] == 1 } {
1713			global kvals
1714
1715			set key [expr $count + 1 + $start]
1716			if { 0xffffffff > 0 && $key > 0xffffffff } {
1717				set key [expr $key - 0x100000000]
1718			}
1719			if { $key == 0 || $key - 0xffffffff == 1 } {
1720				incr key
1721				incr count
1722			}
1723			set kvals($key) [pad_data $method $str]
1724		} else {
1725			#
1726			# With upgrade test, we run the same test several
1727			# times with the same database.  We want to have
1728			# some overwritten records and some new records.
1729			# Therefore append our pid to half the keys.
1730			#
1731			if { $count % 2 } {
1732				set key $str.$pid
1733			} else {
1734				set key $str
1735			}
1736			set str [reverse $str]
1737		}
1738		#
1739		# We want to make sure we send in exactly the same
1740		# length data so that LSNs match up for some tests
1741		# in replication (rep021).
1742		#
1743		if { [is_fixed_length $method] == 1 && $needpad } {
1744			#
1745			# Make it something visible and obvious, 'A'.
1746			#
1747			set p 65
1748			set str [make_fixed_length $method $str $p]
1749			set kvals($key) $str
1750		}
1751		set t [$env txn]
1752		error_check_good txn [is_valid_txn $t $env] TRUE
1753		set txn "-txn $t"
1754# puts "rep_test_upg: put $count of $nentries: key $key, data $str"
1755		set ret [eval \
1756		    {$db put} $txn $pflags {$key [chop_data $method $str]}]
1757		error_check_good put $ret 0
1758		error_check_good txn [$t commit] 0
1759
1760		if { $checkfreq < 5 } {
1761			set checkfreq 5
1762		}
1763		if { $abortfreq < 3 } {
1764			set abortfreq 3
1765		}
1766		#
1767		# Do a few aborted transactions to test that
1768		# aborts don't get processed on clients and the
1769		# master handles them properly.  Just abort
1770		# trying to delete the key we just added.
1771		#
1772		if { $count % $abortfreq == 0 } {
1773			set t [$env txn]
1774			error_check_good txn [is_valid_txn $t $env] TRUE
1775			set ret [$db del -txn $t $key]
1776			error_check_good txn [$t abort] 0
1777		}
1778		if { $count % $checkfreq == 0 } {
1779			error_check_good txn_checkpoint($count) \
1780			    [$env txn_checkpoint] 0
1781		}
1782		incr count
1783	}
1784	close $did
1785	if { $repdb == "NULL" } {
1786		error_check_good rep_close [$db close] 0
1787	}
1788}
1789
1790proc rep_test_upg.check { key data } {
1791	#
1792	# If the key has the pid attached, strip it off before checking.
1793	# If the key does not have the pid attached, then it is a recno
1794	# and we're done.
1795	#
1796	set i [string first . $key]
1797	if { $i != -1 } {
1798		set key [string replace $key $i end]
1799	}
1800	error_check_good "key/data mismatch" $data [reverse $key]
1801}
1802
1803proc rep_test_upg.recno.check { key data } {
1804	#
1805	# If we're a recno database we better not have a pid in the key.
1806	# Otherwise we're done.
1807	#
1808	set i [string first . $key]
1809	error_check_good pid $i -1
1810}
1811
1812proc process_msgs { elist {perm_response 0} {dupp NONE} {errp NONE} \
1813    {upg 0} } {
1814	if { $perm_response == 1 } {
1815		global perm_response_list
1816		set perm_response_list {{}}
1817	}
1818
1819	if { [string compare $dupp NONE] != 0 } {
1820		upvar $dupp dupmaster
1821		set dupmaster 0
1822	} else {
1823		set dupmaster NONE
1824	}
1825
1826	if { [string compare $errp NONE] != 0 } {
1827		upvar $errp errorp
1828		set errorp 0
1829		set var_name errorp
1830	} else {
1831		set errorp NONE
1832		set var_name NONE
1833	}
1834
1835	set upgcount 0
1836	while { 1 } {
1837		set nproced 0
1838		incr nproced [proc_msgs_once $elist dupmaster $var_name]
1839		#
1840		# If we're running the upgrade test, we are running only
1841		# our own env, we need to loop a bit to allow the other
1842		# upgrade procs to run and reply to our messages.
1843		#
1844		if { $upg == 1 && $upgcount < 10 } {
1845			tclsleep 2
1846			incr upgcount
1847			continue
1848		}
1849		if { $nproced == 0 } {
1850			break
1851		} else {
1852			set upgcount 0
1853		}
1854	}
1855}
1856
1857
1858proc proc_msgs_once { elist {dupp NONE} {errp NONE} } {
1859	global noenv_messaging
1860
1861	if { [string compare $dupp NONE] != 0 } {
1862		upvar $dupp dupmaster
1863		set dupmaster 0
1864	} else {
1865		set dupmaster NONE
1866	}
1867
1868	if { [string compare $errp NONE] != 0 } {
1869		upvar $errp errorp
1870		set errorp 0
1871		set var_name errorp
1872	} else {
1873		set errorp NONE
1874		set var_name NONE
1875	}
1876
1877	set nproced 0
1878	foreach pair $elist {
1879		set envname [lindex $pair 0]
1880		set envid [lindex $pair 1]
1881		#
1882		# If we need to send in all the other args
1883# puts "Call replpq with on $envid"
1884		if { $noenv_messaging } {
1885			incr nproced [replprocessqueue_noenv $envname $envid \
1886			    0 NONE dupmaster $var_name]
1887		} else {
1888			incr nproced [replprocessqueue $envname $envid \
1889			    0 NONE dupmaster $var_name]
1890		}
1891		#
1892		# If the user is expecting to handle an error and we get
1893		# one, return the error immediately.
1894		#
1895		if { $dupmaster != 0 && $dupmaster != "NONE" } {
1896			return 0
1897		}
1898		if { $errorp != 0 && $errorp != "NONE" } {
1899# puts "Returning due to error $errorp"
1900			return 0
1901		}
1902	}
1903	return $nproced
1904}
1905
1906proc rep_verify { masterdir masterenv clientdir clientenv \
1907    {compare_shared_portion 0} {match 1} {logcompare 1} {dbname "test.db"} } {
1908	global util_path
1909	global encrypt
1910	global passwd
1911
1912	# The logcompare flag indicates whether to compare logs.
1913	# Sometimes we run a test where rep_verify is run twice with
1914	# no intervening processing of messages.  If that test is
1915	# on a build with debug_rop enabled, the master's log is
1916	# altered by the first rep_verify, and the second rep_verify
1917	# will fail.
1918	# To avoid this, skip the log comparison on the second rep_verify
1919	# by specifying logcompare == 0.
1920	#
1921	if { $logcompare } {
1922		set msg "Logs and databases"
1923	} else {
1924		set msg "Databases ($dbname)"
1925	}
1926
1927	if { $match } {
1928		puts "\t\tRep_verify: $clientdir: $msg match"
1929	} else {
1930		puts "\t\tRep_verify: $clientdir: $msg do not match"
1931	}
1932	# Check that master and client logs and dbs are identical.
1933
1934	# Logs first, if specified ...
1935	#
1936	# If compare_shared_portion is set, run db_printlog on the log
1937	# subset that both client and master have.  Either the client or
1938	# the master may have more (earlier) log files, due to internal
1939	# initialization, in-memory log wraparound, or other causes.
1940	#
1941	if { $logcompare } {
1942		set args ""
1943		if { $compare_shared_portion } {
1944	                set logc [$masterenv log_cursor]
1945			error_check_good logc [is_valid_logc $logc $masterenv] TRUE
1946			set first [$logc get -first]
1947			error_check_good close [$logc close] 0
1948			set m_lsn [lindex $first 0]
1949
1950	                set logc [$clientenv log_cursor]
1951			error_check_good logc [is_valid_logc $logc $clientenv] TRUE
1952			set first [$logc get -first]
1953			error_check_good close [$logc close] 0
1954			set c_lsn [lindex $first 0]
1955
1956			if { [$masterenv log_compare $m_lsn $c_lsn] < 0 } {
1957				set lsn $c_lsn
1958			} else {
1959				set lsn $m_lsn
1960			}
1961
1962			set file [lindex $lsn 0]
1963			set off [lindex $lsn 1]
1964			set args "-b $file/$off"
1965		}
1966		set encargs ""
1967		if { $encrypt == 1 } {
1968			set encargs " -P $passwd "
1969		}
1970
1971		set stat [catch {eval exec $util_path/db_printlog $args \
1972		    $encargs -h $masterdir > $masterdir/prlog} result]
1973		error_check_good stat_master_printlog $stat 0
1974		set stat [catch {eval exec $util_path/db_printlog $args \
1975		    $encargs -h $clientdir > $clientdir/prlog} result]
1976		error_check_good stat_client_printlog $stat 0
1977		if { $match } {
1978			error_check_good log_cmp \
1979			    [filecmp $masterdir/prlog $clientdir/prlog] 0
1980		} else {
1981			error_check_bad log_cmp \
1982			    [filecmp $masterdir/prlog $clientdir/prlog] 0
1983		}
1984
1985		if { $dbname == "NULL" } {
1986			return
1987		}
1988	}
1989
1990	# ... now the databases.
1991	set db1 [eval {berkdb_open_noerr} -env $masterenv -rdonly $dbname]
1992	set db2 [eval {berkdb_open_noerr} -env $clientenv -rdonly $dbname]
1993
1994	if { $match } {
1995		error_check_good [concat comparedbs. $dbname] [db_compare \
1996		    $db1 $db2 $masterdir/$dbname $clientdir/$dbname] 0
1997	} else {
1998		error_check_bad comparedbs [db_compare \
1999		    $db1 $db2 $masterdir/$dbname $clientdir/$dbname] 0
2000	}
2001	error_check_good db1_close [$db1 close] 0
2002	error_check_good db2_close [$db2 close] 0
2003}
2004
2005proc rep_event { env eventlist } {
2006	global startup_done
2007	global elected_event
2008	global elected_env
2009
2010	set event [lindex $eventlist 0]
2011# puts "rep_event: Got event $event on env $env"
2012	set eventlength [llength $eventlist]
2013
2014	if { $event == "startupdone" } {
2015		error_check_good event_nodata $eventlength 1
2016		set startup_done 1
2017	}
2018	if { $event == "elected" } {
2019		error_check_good event_nodata $eventlength 1
2020		set elected_event 1
2021		set elected_env $env
2022	}
2023	if { $event == "newmaster" } {
2024		error_check_good eiddata $eventlength 2
2025		set event_newmasterid [lindex $eventlist 1]
2026	}
2027	return
2028}
2029
2030# Return a list of TCP port numbers that are not currently in use on
2031# the local system.  Note that this doesn't actually reserve the
2032# ports, so it's possible that by the time the caller tries to use
2033# them, another process could have taken one of them.  But for our
2034# purposes that's unlikely enough that this is still useful: it's
2035# still better than trying to find hard-coded port numbers that will
2036# always be available.
2037#
2038proc available_ports { n } {
2039    set ports {}
2040    set socks {}
2041
2042    while {[incr n -1] >= 0} {
2043        set sock [socket -server Unused -myaddr localhost 0]
2044        set port [lindex [fconfigure $sock -sockname] 2]
2045
2046        lappend socks $sock
2047        lappend ports $port
2048    }
2049
2050    foreach sock $socks {
2051        close $sock
2052    }
2053    return $ports
2054}
2055
2056# Wait (a limited amount of time) for the given client environment to achieve
2057# the "start-up done" state.
2058#
2059proc await_startup_done { env { limit 5 } } {
2060	for {set i 0} {$i < $limit} {incr i} {
2061		if {[stat_field $env rep_stat "Startup complete"]} {
2062			break
2063		}
2064		tclsleep 1
2065	}
2066}
2067
2068proc do_leaseop { env db method key envlist { domsgs 1 } } {
2069	global alphabet
2070
2071	#
2072	# Put a txn to the database.  Process messages to envlist
2073	# if directed to do so.  Read data on the master, ignoring
2074	# leases (should always succeed).
2075	#
2076	set num [berkdb random_int 1 100]
2077	set data $alphabet.$num
2078	set t [$env txn]
2079	error_check_good txn [is_valid_txn $t $env] TRUE
2080	set txn "-txn $t"
2081	set ret [eval \
2082	    {$db put} $txn {$key [chop_data $method $data]}]
2083	error_check_good put $ret 0
2084	error_check_good txn [$t commit] 0
2085
2086	if { $domsgs } {
2087		process_msgs $envlist
2088	}
2089
2090	#
2091	# Now make sure we can successfully read on the master
2092	# if we ignore leases.  That should always work.  The
2093	# caller will do any lease related calls and checks
2094	# that are specific to the test.
2095	#
2096	set kd [$db get -nolease $key]
2097	set curs [$db cursor]
2098	set ckd [$curs get -nolease -set $key]
2099	$curs close
2100	error_check_good kd [llength $kd] 1
2101	error_check_good ckd [llength $ckd] 1
2102}
2103
2104#
2105# Get the given key, expecting status depending on whether leases
2106# are currently expected to be valid or not.
2107#
2108proc check_leaseget { db key getarg status } {
2109	set stat [catch {eval {$db get} $getarg $key} kd]
2110	if { $status != 0 } {
2111		error_check_good get_result $stat 1
2112		error_check_good kd_check \
2113		    [is_substr $kd $status] 1
2114	} else {
2115		error_check_good get_result_good $stat $status
2116		error_check_good dbkey [lindex [lindex $kd 0] 0] $key
2117	}
2118	set curs [$db cursor]
2119	set stat [catch {eval {$curs get} $getarg -set $key} kd]
2120	if { $status != 0 } {
2121		error_check_good get_result2 $stat 1
2122		error_check_good kd_check \
2123		    [is_substr $kd $status] 1
2124	} else {
2125		error_check_good get_result2_good $stat $status
2126		error_check_good dbckey [lindex [lindex $kd 0] 0] $key
2127	}
2128	$curs close
2129}
2130