1#
2# The procs in this file are used for replication messaging
3# ONLY when the default mechanism of setting up a queue of
4# messages in a environment is not possible.  This situation
5# is fairly rare, but it is necessary when a replication
6# test simultaneously runs different versions of Berkeley DB,
7# because different versions cannot share an env.
8#
9# Note, all procs should be named with the suffix _noenv
10# so it's explicit that we are using them.
11#
12# Close up a replication group - close all message dbs.
13proc replclose_noenv { queuedir } {
14	global queuedbs machids
15
16	set dbs [array names queuedbs]
17	foreach tofrom $dbs {
18		set handle $queuedbs($tofrom)
19		error_check_good db_close [$handle close] 0
20		unset queuedbs($tofrom)
21	}
22
23	set machids {}
24}
25
26# Create a replication group for testing.
27proc replsetup_noenv { queuedir } {
28	global queuedbs machids
29
30	file mkdir $queuedir
31
32	# If there are any leftover handles, get rid of them.
33	set dbs [array names queuedbs]
34	foreach tofrom $dbs {
35		unset queuedbs($tofrom)
36	}
37	set machids {}
38}
39
40# Send function for replication.
41proc replsend_noenv { control rec fromid toid flags lsn } {
42	global is_repchild
43	global queuedbs machids
44	global drop drop_msg
45	global perm_sent_list
46	global anywhere
47	global qtestdir testdir
48
49	if { ![info exists qtestdir] } {
50		set qtestdir $testdir
51	}
52	set queuedir $qtestdir/MSGQUEUEDIR
53	set permflags [lsearch $flags "perm"]
54	if { [llength $perm_sent_list] != 0 && $permflags != -1 } {
55#		puts "replsend_noenv sent perm message, LSN $lsn"
56		lappend perm_sent_list $lsn
57	}
58
59	#
60	# If we are testing with dropped messages, then we drop every
61	# $drop_msg time.  If we do that just return 0 and don't do
62	# anything.
63	#
64	if { $drop != 0 } {
65		incr drop
66		if { $drop == $drop_msg } {
67			set drop 1
68			return 0
69		}
70	}
71	# XXX
72	# -1 is DB_BROADCAST_EID
73	if { $toid == -1 } {
74		set machlist $machids
75	} else {
76		set m NULL
77		# If we can send this anywhere, send it to the first id
78		# we find that is neither toid or fromid.  If we don't
79		# find any other candidates, this falls back to the
80		# original toid.
81		if { $anywhere != 0 } {
82			set anyflags [lsearch $flags "any"]
83			if { $anyflags != -1 } {
84				foreach m $machids {
85					if { $m == $fromid || $m == $toid } {
86						continue
87					}
88					set machlist [list $m]
89					break
90				}
91			}
92		}
93		#
94		# If we didn't find a different site, fall back
95		# to the toid.
96		#
97		if { $m == "NULL" } {
98			set machlist [list $toid]
99		}
100	}
101	foreach m $machlist {
102		# Do not broadcast to self.
103		if { $m == $fromid } {
104			continue
105		}
106		# Find the handle for the right message file.
107		set pid [pid]
108		set db $queuedbs($m.$fromid.$pid)
109		set stat [catch {$db put -append [list $control $rec $fromid]} ret]
110	}
111	if { $is_repchild } {
112		replready_noenv $fromid from
113	}
114
115	return 0
116}
117
118#
119proc replmsglen_noenv { machid {tf "to"}} {
120	global queuedbs qtestdir testdir
121
122	if { ![info exists qtestdir] } {
123		set qtestdir $testdir
124	}
125	set queuedir $qtestdir/MSGQUEUEDIR
126	set orig [pwd]
127
128	cd $queuedir
129	if { $tf == "to" } {
130		set msgdbs [glob -nocomplain ready.$machid.*]
131	} else {
132		set msgdbs [glob -nocomplain ready.*.$machid.*]
133	}
134	cd $orig
135	return [llength $msgdbs]
136}
137
138# Discard all the pending messages for a particular site.
139proc replclear_noenv { machid {tf "to"}} {
140	global queuedbs qtestdir testdir
141
142	if { ![info exists qtestdir] } {
143		set qtestdir $testdir
144	}
145	set queuedir $qtestdir/MSGQUEUEDIR
146	set orig [pwd]
147
148	cd $queuedir
149	if { $tf == "to" } {
150		set msgdbs [glob -nocomplain ready.$machid.*]
151	} else {
152		set msgdbs [glob -nocomplain ready.*.$machid.*]
153	}
154	foreach m $msgdbs {
155		file delete -force $m
156	}
157	cd $orig
158	set dbs [array names queuedbs]
159	foreach tofrom $dbs {
160		# Process only messages _to_ the specified machid.
161		if { [string match $machid.* $tofrom] == 1 } {
162			set db $queuedbs($tofrom)
163			set dbc [$db cursor]
164			for { set dbt [$dbc get -first] } \
165			    { [llength $dbt] > 0 } \
166			    { set dbt [$dbc get -next] } {
167				error_check_good \
168				    replclear($machid)_del [$dbc del] 0
169			}
170			error_check_good replclear($db)_dbc_close [$dbc close] 0
171		}
172	}
173	cd $queuedir
174	if { $tf == "to" } {
175		set msgdbs [glob -nocomplain temp.$machid.*]
176	} else {
177		set msgdbs [glob -nocomplain temp.*.$machid.*]
178	}
179	foreach m $msgdbs {
180#		file delete -force $m
181	}
182	cd $orig
183}
184
185# Makes messages available to replprocessqueue by closing and
186# renaming the message files.  We ready the files for one machine
187# ID at a time -- just those "to" or "from" the machine we want to
188# process, depending on 'tf'.
189proc replready_noenv { machid tf } {
190	global queuedbs machids
191	global counter
192	global qtestdir testdir
193
194	if { ![info exists qtestdir] } {
195		set qtestdir $testdir
196	}
197	set queuedir $qtestdir/MSGQUEUEDIR
198
199	set pid [pid]
200	#
201	# Close the temporary message files for the specified machine.
202	# Only close it if there are messages available.
203	#
204	set dbs [array names queuedbs]
205	set closed {}
206	foreach tofrom $dbs {
207		set toidx [string first . $tofrom]
208		set toid [string replace $tofrom $toidx end]
209		set fidx [expr $toidx + 1]
210		set fromidx [string first . $tofrom $fidx]
211		#
212		# First chop off the end, then chop off the toid
213		# in the beginning.
214		#
215		set fromid [string replace $tofrom $fromidx end]
216		set fromid [string replace $fromid 0 $toidx]
217		if { ($tf == "to" && $machid == $toid) || \
218		    ($tf == "from" && $machid == $fromid) } {
219			set nkeys [stat_field $queuedbs($tofrom) \
220			    stat "Number of keys"]
221			if { $nkeys != 0 } {
222				lappend closed \
223				    [list $toid $fromid temp.$tofrom]
224		 		error_check_good temp_close \
225				    [$queuedbs($tofrom) close] 0
226			}
227		}
228	}
229
230	# Rename the message files.
231	set cwd [pwd]
232	foreach filename $closed {
233		set toid [lindex $filename 0]
234		set fromid [lindex $filename 1]
235		set fname [lindex $filename 2]
236		set tofrom [string replace $fname 0 4]
237		incr counter($machid)
238		cd $queuedir
239# puts "$queuedir: Msg ready $fname to ready.$tofrom.$counter($machid)"
240		file rename -force $fname ready.$tofrom.$counter($machid)
241		cd $cwd
242		replsetuptempfile_noenv $toid $fromid $queuedir
243
244	}
245}
246
247# Add a machine to a replication environment.  This checks
248# that we have not already established that machine id, and
249# adds the machid to the list of ids.
250proc repladd_noenv { machid } {
251	global queuedbs machids counter qtestdir testdir
252
253	if { ![info exists qtestdir] } {
254		set qtestdir $testdir
255	}
256	set queuedir $qtestdir/MSGQUEUEDIR
257	if { [info exists machids] } {
258		if { [lsearch -exact $machids $machid] >= 0 } {
259			error "FAIL: repladd_noenv: machid $machid already exists."
260		}
261	}
262
263	set counter($machid) 0
264	lappend machids $machid
265
266	# Create all the databases that receive messages sent _to_
267	# the new machid.
268	replcreatetofiles_noenv $machid $queuedir
269
270	# Create all the databases that receive messages sent _from_
271	# the new machid.
272	replcreatefromfiles_noenv $machid $queuedir
273}
274
275# Creates all the databases that a machid needs for receiving messages
276# from other participants in a replication group.  Used when first
277# establishing the temp files, but also used whenever replready_noenv moves
278# the temp files away, because we'll need new files for any future messages.
279proc replcreatetofiles_noenv { toid queuedir } {
280	global machids
281
282	foreach m $machids {
283		# We don't need a file for a machid to send itself messages.
284		if { $m == $toid } {
285			continue
286		}
287		replsetuptempfile_noenv $toid $m $queuedir
288	}
289}
290
291# Creates all the databases that a machid needs for sending messages
292# to other participants in a replication group.  Used when first
293# establishing the temp files only.  Replready moves files based on
294# recipient, so we recreate files based on the recipient, also.
295proc replcreatefromfiles_noenv { fromid queuedir } {
296	global machids
297
298	foreach m $machids {
299		# We don't need a file for a machid to send itself messages.
300		if { $m == $fromid } {
301			continue
302		}
303		replsetuptempfile_noenv $m $fromid $queuedir
304	}
305}
306
307proc replsetuptempfile_noenv { to from queuedir } {
308	global queuedbs
309
310	set pid [pid]
311# puts "Open new temp.$to.$from.$pid"
312	set queuedbs($to.$from.$pid) [berkdb_open -create -excl -recno\
313	    -renumber $queuedir/temp.$to.$from.$pid]
314	error_check_good open_queuedbs [is_valid_db $queuedbs($to.$from.$pid)] TRUE
315}
316
317# Process a queue of messages, skipping every "skip_interval" entry.
318# We traverse the entire queue, but since we skip some messages, we
319# may end up leaving things in the queue, which should get picked up
320# on a later run.
321proc replprocessqueue_noenv { dbenv machid { skip_interval 0 } { hold_electp NONE } \
322    { dupmasterp NONE } { errp NONE } } {
323	global errorCode
324	global perm_response_list
325	global qtestdir testdir
326
327	# hold_electp is a call-by-reference variable which lets our caller
328	# know we need to hold an election.
329	if { [string compare $hold_electp NONE] != 0 } {
330		upvar $hold_electp hold_elect
331	}
332	set hold_elect 0
333
334	# dupmasterp is a call-by-reference variable which lets our caller
335	# know we have a duplicate master.
336	if { [string compare $dupmasterp NONE] != 0 } {
337		upvar $dupmasterp dupmaster
338	}
339	set dupmaster 0
340
341	# errp is a call-by-reference variable which lets our caller
342	# know we have gotten an error (that they expect).
343	if { [string compare $errp NONE] != 0 } {
344		upvar $errp errorp
345	}
346	set errorp 0
347
348	set nproced 0
349
350	set queuedir $qtestdir/MSGQUEUEDIR
351# puts "replprocessqueue_noenv: Make ready messages to eid $machid"
352
353	# Change directories temporarily so we get just the msg file name.
354	set cwd [pwd]
355	cd $queuedir
356	set msgdbs [glob -nocomplain ready.$machid.*]
357# puts "$queuedir.$machid: My messages: $msgdbs"
358	cd $cwd
359
360	foreach msgdb $msgdbs {
361		set db [berkdb_open $queuedir/$msgdb]
362		set dbc [$db cursor]
363
364		error_check_good process_dbc($machid) \
365		    [is_valid_cursor $dbc $db] TRUE
366
367		for { set dbt [$dbc get -first] } \
368		    { [llength $dbt] != 0 } \
369		    { set dbt [$dbc get -next] } {
370			set data [lindex [lindex $dbt 0] 1]
371			set recno [lindex [lindex $dbt 0] 0]
372
373			# If skip_interval is nonzero, we want to process
374			# messages out of order.  We do this in a simple but
375			# slimy way -- continue walking with the cursor
376			# without processing the message or deleting it from
377			# the queue, but do increment "nproced".  The way
378			# this proc is normally used, the precise value of
379			# nproced doesn't matter--we just don't assume the
380			# queues are empty if it's nonzero.  Thus, if we
381			# contrive to make sure it's nonzero, we'll always
382			# come back to records we've skipped on a later call
383			# to replprocessqueue.  (If there really are no records,
384			# we'll never get here.)
385			#
386			# Skip every skip_interval'th record (and use a
387			# remainder other than zero so that we're guaranteed
388			# to really process at least one record on every call).
389			if { $skip_interval != 0 } {
390				if { $nproced % $skip_interval == 1 } {
391					incr nproced
392					set dbt [$dbc get -next]
393					continue
394				}
395			}
396
397			# We need to remove the current message from the
398			# queue, because we're about to end the transaction
399			# and someone else processing messages might come in
400			# and reprocess this message which would be bad.
401			#
402			error_check_good queue_remove [$dbc del] 0
403
404			# We have to play an ugly cursor game here:  we
405			# currently hold a lock on the page of messages, but
406			# rep_process_message might need to lock the page with
407			# a different cursor in order to send a response.  So
408			# save the next recno, close the cursor, and then
409			# reopen and reset the cursor.  If someone else is
410			# processing this queue, our entry might have gone
411			# away, and we need to be able to handle that.
412			#
413#			error_check_good dbc_process_close [$dbc close] 0
414
415			set ret [catch {$dbenv rep_process_message \
416			    [lindex $data 2] [lindex $data 0] \
417			    [lindex $data 1]} res]
418
419			# Save all ISPERM and NOTPERM responses so we can
420			# compare their LSNs to the LSN in the log.  The
421			# variable perm_response_list holds the entire
422			# response so we can extract responses and LSNs as
423			# needed.
424			#
425			if { [llength $perm_response_list] != 0 && \
426			    ([is_substr $res ISPERM] || [is_substr $res NOTPERM]) } {
427				lappend perm_response_list $res
428			}
429
430			if { $ret != 0 } {
431				if { [string compare $errp NONE] != 0 } {
432					set errorp "$dbenv $machid $res"
433				} else {
434					error "FAIL:[timestamp]\
435					    rep_process_message returned $res"
436				}
437			}
438
439			incr nproced
440			if { $ret == 0 } {
441				set rettype [lindex $res 0]
442				set retval [lindex $res 1]
443				#
444				# Do nothing for 0 and NEWSITE
445				#
446				if { [is_substr $rettype HOLDELECTION] } {
447					set hold_elect 1
448				}
449				if { [is_substr $rettype DUPMASTER] } {
450					set dupmaster "1 $dbenv $machid"
451				}
452				if { [is_substr $rettype NOTPERM] || \
453				    [is_substr $rettype ISPERM] } {
454					set lsnfile [lindex $retval 0]
455					set lsnoff [lindex $retval 1]
456				}
457			}
458
459			if { $errorp != 0 } {
460				# Break on an error, caller wants to handle it.
461				break
462			}
463			if { $hold_elect == 1 } {
464				# Break on a HOLDELECTION, for the same reason.
465				break
466			}
467			if { $dupmaster == 1 } {
468				# Break on a DUPMASTER, for the same reason.
469				break
470			}
471
472		}
473		error_check_good dbc_close [$dbc close] 0
474
475		#
476		# Check the number of keys remaining because we only
477		# want to rename to done, message file that are
478		# fully processed.  Some message types might break
479		# out of the loop early and we want to process
480		# the remaining messages the next time through.
481		#
482		set nkeys [stat_field $db stat "Number of keys"]
483		error_check_good db_close [$db close] 0
484
485		if { $nkeys == 0 } {
486			set dbname [string replace $msgdb 0 5 done.]
487			#
488			# We have to do a special dance to get rid of the
489			# empty messaging files because of the way Windows
490			# handles open files marked for deletion.
491			# On Windows, a file is marked for deletion but
492			# does not actually get deleted until the last handle
493			# is closed.  This causes a problem when a test tries
494			# to create a new file with a previously-used name,
495			# and Windows believes the old file still exists.
496			# Therefore, we rename the files before deleting them,
497			# to guarantee they are out of the way.
498			#
499			file rename -force $queuedir/$msgdb $queuedir/$dbname
500			file delete -force $queuedir/$dbname
501		}
502	}
503	# Return the number of messages processed.
504	return $nproced
505}
506
507