1# See the file LICENSE for redistribution information.
2#
3# Copyright (c) 1996,2008 Oracle.  All rights reserved.
4#
5# $Id: join.tcl,v 12.6 2008/01/08 20:58:53 bostic Exp $
6#
7# TEST	jointest
8# TEST	Test duplicate assisted joins.  Executes 1, 2, 3 and 4-way joins
9# TEST	with differing index orders and selectivity.
10# TEST
11# TEST	We'll test 2-way, 3-way, and 4-way joins and figure that if those
12# TEST	work, everything else does as well.  We'll create test databases
13# TEST	called join1.db, join2.db, join3.db, and join4.db.  The number on
14# TEST	the database describes the duplication -- duplicates are of the
15# TEST	form 0, N, 2N, 3N, ...  where N is the number of the database.
16# TEST	Primary.db is the primary database, and null.db is the database
17# TEST	that has no matching duplicates.
18# TEST
19# TEST	We should test this on all btrees, all hash, and a combination thereof
20proc jointest { {psize 8192} {with_dup_dups 0} {flags 0} } {
21	global testdir
22	global rand_init
23	source ./include.tcl
24
25	env_cleanup $testdir
26	berkdb srand $rand_init
27
28	# Use one environment for all database opens so we don't
29	# need oodles of regions.
30	set env [berkdb_env -create -home $testdir]
31	error_check_good env_open [is_valid_env $env] TRUE
32
33	# With the new offpage duplicate code, we don't support
34	# duplicate duplicates in sorted dup sets.  Thus, if with_dup_dups
35	# is greater than one, run only with "-dup".
36	if { $with_dup_dups > 1 } {
37		set doptarray {"-dup"}
38	} else {
39		set doptarray {"-dup -dupsort" "-dup" RANDOMMIX RANDOMMIX }
40	}
41
42	# NB: these flags are internal only, ok
43	foreach m "DB_BTREE DB_HASH DB_BOTH" {
44		# run with two different random mixes.
45		foreach dopt $doptarray {
46			set opt [list "-env" $env $dopt]
47
48			puts "Join test: ($m $dopt) psize $psize,\
49			    $with_dup_dups dup\
50			    dups, flags $flags."
51
52			build_all $m $psize $opt oa $with_dup_dups
53
54			# null.db is db_built fifth but is referenced by
55			# zero;  set up the option array appropriately.
56			set oa(0) $oa(5)
57
58			# Build the primary
59			puts "\tBuilding the primary database $m"
60			set oflags "-create -truncate -mode 0644 -env $env\
61			    [conv $m [berkdb random_int 1 2]]"
62			set db [eval {berkdb_open} $oflags primary.db]
63			error_check_good dbopen [is_valid_db $db] TRUE
64			for { set i 0 } { $i < 1000 } { incr i } {
65				set key [format "%04d" $i]
66				set ret [$db put $key stub]
67				error_check_good "primary put" $ret 0
68			}
69			error_check_good "primary close" [$db close] 0
70			set did [open $dict]
71			gets $did str
72			do_join primary.db "1 0" $str oa $flags\
73			    $with_dup_dups
74			gets $did str
75			do_join primary.db "2 0" $str oa $flags\
76			    $with_dup_dups
77			gets $did str
78			do_join primary.db "3 0" $str oa $flags\
79			    $with_dup_dups
80			gets $did str
81			do_join primary.db "4 0" $str oa $flags\
82			    $with_dup_dups
83			gets $did str
84			do_join primary.db "1" $str oa $flags $with_dup_dups
85			gets $did str
86			do_join primary.db "2" $str oa $flags $with_dup_dups
87			gets $did str
88			do_join primary.db "3" $str oa $flags $with_dup_dups
89			gets $did str
90			do_join primary.db "4" $str oa $flags $with_dup_dups
91			gets $did str
92			do_join primary.db "1 2" $str oa $flags\
93			    $with_dup_dups
94			gets $did str
95			do_join primary.db "1 2 3" $str oa $flags\
96			    $with_dup_dups
97			gets $did str
98			do_join primary.db "1 2 3 4" $str oa $flags\
99			    $with_dup_dups
100			gets $did str
101			do_join primary.db "2 1" $str oa $flags\
102			    $with_dup_dups
103			gets $did str
104			do_join primary.db "3 2 1" $str oa $flags\
105			    $with_dup_dups
106			gets $did str
107			do_join primary.db "4 3 2 1" $str oa $flags\
108			    $with_dup_dups
109			gets $did str
110			do_join primary.db "1 3" $str oa $flags $with_dup_dups
111			gets $did str
112			do_join primary.db "3 1" $str oa $flags $with_dup_dups
113			gets $did str
114			do_join primary.db "1 4" $str oa $flags $with_dup_dups
115			gets $did str
116			do_join primary.db "4 1" $str oa $flags $with_dup_dups
117			gets $did str
118			do_join primary.db "2 3" $str oa $flags $with_dup_dups
119			gets $did str
120			do_join primary.db "3 2" $str oa $flags $with_dup_dups
121			gets $did str
122			do_join primary.db "2 4" $str oa $flags $with_dup_dups
123			gets $did str
124			do_join primary.db "4 2" $str oa $flags $with_dup_dups
125			gets $did str
126			do_join primary.db "3 4" $str oa $flags $with_dup_dups
127			gets $did str
128			do_join primary.db "4 3" $str oa $flags $with_dup_dups
129			gets $did str
130			do_join primary.db "2 3 4" $str oa $flags\
131			    $with_dup_dups
132			gets $did str
133			do_join primary.db "3 4 1" $str oa $flags\
134			    $with_dup_dups
135			gets $did str
136			do_join primary.db "4 2 1" $str oa $flags\
137			    $with_dup_dups
138			gets $did str
139			do_join primary.db "0 2 1" $str oa $flags\
140			    $with_dup_dups
141			gets $did str
142			do_join primary.db "3 2 0" $str oa $flags\
143			    $with_dup_dups
144			gets $did str
145			do_join primary.db "4 3 2 1" $str oa $flags\
146			    $with_dup_dups
147			gets $did str
148			do_join primary.db "4 3 0 1" $str oa $flags\
149			    $with_dup_dups
150			gets $did str
151			do_join primary.db "3 3 3" $str oa $flags\
152			    $with_dup_dups
153			gets $did str
154			do_join primary.db "2 2 3 3" $str oa $flags\
155			    $with_dup_dups
156			gets $did str2
157			gets $did str
158			do_join primary.db "1 2" $str oa $flags\
159			    $with_dup_dups "3" $str2
160
161			# You really don't want to run this section
162			# with $with_dup_dups > 2.
163			if { $with_dup_dups <= 2 } {
164				gets $did str2
165				gets $did str
166				do_join primary.db "1 2 3" $str\
167				    oa $flags $with_dup_dups "3 3 1" $str2
168				gets $did str2
169				gets $did str
170				do_join primary.db "4 0 2" $str\
171				    oa $flags $with_dup_dups "4 3 3" $str2
172				gets $did str2
173				gets $did str
174				do_join primary.db "3 2 1" $str\
175				    oa $flags $with_dup_dups "0 2" $str2
176				gets $did str2
177				gets $did str
178				do_join primary.db "2 2 3 3" $str\
179				    oa $flags $with_dup_dups "1 4 4" $str2
180				gets $did str2
181				gets $did str
182				do_join primary.db "2 2 3 3" $str\
183				    oa $flags $with_dup_dups "0 0 4 4" $str2
184				gets $did str2
185				gets $did str
186				do_join primary.db "2 2 3 3" $str2\
187				    oa $flags $with_dup_dups "2 4 4" $str
188				gets $did str2
189				gets $did str
190				do_join primary.db "2 2 3 3" $str2\
191				    oa $flags $with_dup_dups "0 0 4 4" $str
192			}
193			close $did
194		}
195	}
196
197	error_check_good env_close [$env close] 0
198}
199
200proc build_all { method psize opt oaname with_dup_dups {nentries 100} } {
201	global testdir
202	db_build join1.db $nentries 50 1 [conv $method 1]\
203	    $psize $opt $oaname $with_dup_dups
204	db_build join2.db $nentries 25 2 [conv $method 2]\
205	    $psize $opt $oaname $with_dup_dups
206	db_build join3.db $nentries 16 3 [conv $method 3]\
207	    $psize $opt $oaname $with_dup_dups
208	db_build join4.db $nentries 12 4 [conv $method 4]\
209	    $psize $opt $oaname $with_dup_dups
210	db_build null.db $nentries 0 5 [conv $method 5]\
211	    $psize $opt $oaname $with_dup_dups
212}
213
214proc conv { m i } {
215	switch -- $m {
216		DB_HASH { return "-hash"}
217		"-hash" { return "-hash"}
218		DB_BTREE { return "-btree"}
219		"-btree" { return "-btree"}
220		DB_BOTH {
221			if { [expr $i % 2] == 0 } {
222				return "-hash";
223			} else {
224				return "-btree";
225			}
226		}
227	}
228}
229
230proc random_opts { } {
231	set j [berkdb random_int 0 1]
232	if { $j == 0 } {
233		return " -dup"
234	} else {
235		return " -dup -dupsort"
236	}
237}
238
239proc db_build { name nkeys ndups dup_interval method psize lopt oaname \
240    with_dup_dups } {
241	source ./include.tcl
242
243	# Get array of arg names (from two levels up the call stack)
244	upvar 2 $oaname oa
245
246	# Search for "RANDOMMIX" in $opt, and if present, replace
247	# with " -dup" or " -dup -dupsort" at random.
248	set i [lsearch $lopt RANDOMMIX]
249	if { $i != -1 } {
250		set lopt [lreplace $lopt $i $i [random_opts]]
251	}
252
253	# Save off db_open arguments for this database.
254	set opt [eval concat $lopt]
255	set oa($dup_interval) $opt
256
257	# Create the database and open the dictionary
258	set oflags "-create -truncate -mode 0644 $method\
259	    -pagesize $psize"
260	set db [eval {berkdb_open} $oflags $opt $name]
261	error_check_good dbopen [is_valid_db $db] TRUE
262	set did [open $dict]
263	set count 0
264	puts -nonewline "\tBuilding $name: $nkeys keys "
265	puts -nonewline "with $ndups duplicates at interval of $dup_interval"
266	if { $with_dup_dups > 0 } {
267		puts ""
268		puts "\t\tand $with_dup_dups duplicate duplicates."
269	} else {
270		puts "."
271	}
272	for { set count 0 } { [gets $did str] != -1 && $count < $nkeys } {
273	    incr count} {
274		set str $str$name
275		# We need to make sure that the dups are inserted in a
276		# random, or near random, order.  Do this by generating
277		# them and putting each in a list, then sorting the list
278		# at random.
279		set duplist {}
280		for { set i 0 } { $i < $ndups } { incr i } {
281			set data [format "%04d" [expr $i * $dup_interval]]
282			lappend duplist $data
283		}
284		# randomize the list
285		for { set i 0 } { $i < $ndups } {incr i } {
286		#	set j [berkdb random_int $i [expr $ndups - 1]]
287			set j [expr ($i % 2) + $i]
288			if { $j >= $ndups } { set j $i }
289			set dupi [lindex $duplist $i]
290			set dupj [lindex $duplist $j]
291			set duplist [lreplace $duplist $i $i $dupj]
292			set duplist [lreplace $duplist $j $j $dupi]
293		}
294		foreach data $duplist {
295			if { $with_dup_dups != 0 } {
296				for { set j 0 }\
297				    { $j < $with_dup_dups }\
298				    {incr j} {
299					set ret [$db put $str $data]
300					error_check_good put$j $ret 0
301				}
302			} else {
303				set ret [$db put $str $data]
304				error_check_good put $ret 0
305			}
306		}
307
308		if { $ndups == 0 } {
309			set ret [$db put $str NODUP]
310			error_check_good put $ret 0
311		}
312	}
313	close $did
314	error_check_good close:$name [$db close] 0
315}
316
317proc do_join { primary dbs key oanm flags with_dup_dups {dbs2 ""} {key2 ""} } {
318	global testdir
319	source ./include.tcl
320
321	upvar $oanm oa
322
323	puts -nonewline "\tJoining: $dbs on $key"
324	if { $dbs2 == "" } {
325	    puts ""
326	} else {
327	    puts " with $dbs2 on $key2"
328	}
329
330	# Open all the databases
331	set p [berkdb_open -unknown $testdir/$primary]
332	error_check_good "primary open" [is_valid_db $p] TRUE
333
334	set dblist ""
335	set curslist ""
336
337	set ndx [llength $dbs]
338
339	foreach i [concat $dbs $dbs2] {
340		set opt $oa($i)
341		set db [eval {berkdb_open -unknown} $opt [n_to_name $i]]
342		error_check_good "[n_to_name $i] open" [is_valid_db $db] TRUE
343		set curs [$db cursor]
344		error_check_good "$db cursor" \
345		    [is_substr $curs "$db.c"] 1
346		lappend dblist $db
347		lappend curslist $curs
348
349		if { $ndx > 0 } {
350		    set realkey [concat $key[n_to_name $i]]
351		} else {
352		    set realkey [concat $key2[n_to_name $i]]
353		}
354
355		set pair [$curs get -set $realkey]
356		error_check_good cursor_set:$realkey:$pair \
357			[llength [lindex $pair 0]] 2
358
359		incr ndx -1
360	}
361
362	set join_curs [eval {$p join} $curslist]
363	error_check_good join_cursor \
364	    [is_substr $join_curs "$p.c"] 1
365
366	# Calculate how many dups we expect.
367	# We go through the list of indices.  If we find a 0, then we
368	# expect 0 dups.  For everything else, we look at pairs of numbers,
369	# if the are relatively prime, multiply them and figure out how
370	# many times that goes into 50.  If they aren't relatively prime,
371	# take the number of times the larger goes into 50.
372	set expected 50
373	set last 1
374	foreach n [concat $dbs $dbs2] {
375		if { $n == 0 } {
376			set expected 0
377			break
378		}
379		if { $last == $n } {
380			continue
381		}
382
383		if { [expr $last % $n] == 0 || [expr $n % $last] == 0 } {
384			if { $n > $last } {
385				set last $n
386				set expected [expr 50 / $last]
387			}
388		} else {
389			set last [expr $n * $last / [gcd $n $last]]
390			set expected [expr 50 / $last]
391		}
392	}
393
394	# If $with_dup_dups is greater than zero, each datum has
395	# been inserted $with_dup_dups times.  So we expect the number
396	# of dups to go up by a factor of ($with_dup_dups)^(number of databases)
397
398	if { $with_dup_dups > 0 } {
399		foreach n [concat $dbs $dbs2] {
400			set expected [expr $expected * $with_dup_dups]
401		}
402	}
403
404	set ndups 0
405	if { $flags == " -join_item"} {
406		set l 1
407	} else {
408		set flags ""
409		set l 2
410	}
411	for { set pair [eval {$join_curs get} $flags] } { \
412		[llength [lindex $pair 0]] == $l } {
413	    set pair [eval {$join_curs get} $flags] } {
414		set k [lindex [lindex $pair 0] 0]
415		foreach i $dbs {
416			error_check_bad valid_dup:$i:$dbs $i 0
417			set kval [string trimleft $k 0]
418			if { [string length $kval] == 0 } {
419				set kval 0
420			}
421			error_check_good valid_dup:$i:$dbs [expr $kval % $i] 0
422		}
423		incr ndups
424	}
425	error_check_good number_of_dups:$dbs $ndups $expected
426
427	error_check_good close_primary [$p close] 0
428	foreach i $curslist {
429		error_check_good close_cursor:$i [$i close] 0
430	}
431	foreach i $dblist {
432		error_check_good close_index:$i [$i close] 0
433	}
434}
435
436proc n_to_name { n } {
437global testdir
438	if { $n == 0 } {
439		return null.db;
440	} else {
441		return join$n.db;
442	}
443}
444
445proc gcd { a b } {
446	set g 1
447
448	for { set i 2 } { $i <= $a } { incr i } {
449		if { [expr $a % $i] == 0 && [expr $b % $i] == 0 } {
450			set g $i
451		}
452	}
453	return $g
454}
455