1# See the file LICENSE for redistribution information.
2#
3# Copyright (c) 2001,2008 Oracle.  All rights reserved.
4#
5# $Id: sijointest.tcl,v 12.7 2008/01/08 20:58:53 bostic Exp $
6#
7# TEST	sijointest: Secondary index and join test.
8# TEST 	This used to be si005.tcl.
9proc sijointest { methods {nentries 1000} args } {
10	source ./include.tcl
11
12	# Primary method/args.
13	set pmethod [lindex $methods 0]
14	set pargs [convert_args $pmethod $args]
15	set pomethod [convert_method $pmethod]
16
17	# Si005 does a join within a simulated database schema
18	# in which the primary index maps a record ID to a ZIP code and
19	# name in the form "XXXXXname", and there are two secondaries:
20	# one mapping ZIP to ID, the other mapping name to ID.
21	# The primary may be of any database type;  the two secondaries
22	# must be either btree or hash.
23
24	# Method/args for all the secondaries.  If only one method
25	# was specified, assume the same method for the two secondaries.
26	set methods [lrange $methods 1 end]
27	if { [llength $methods] == 0 } {
28		for { set i 0 } { $i < 2 } { incr i } {
29			lappend methods $pmethod
30		}
31	} elseif { [llength $methods] != 2 } {
32		puts "FAIL: Sijoin requires exactly two secondaries."
33		return
34	}
35
36	set argses [convert_argses $methods $args]
37	set omethods [convert_methods $methods]
38
39	puts "Secondary index join test."
40	puts "sijoin  \{\[ list $pmethod $methods \]\} $nentries"
41	env_cleanup $testdir
42
43	set pname "sijoin-primary.db"
44	set zipname "sijoin-zip.db"
45	set namename "sijoin-name.db"
46
47	# Open an environment
48	# XXX if one is not supplied!
49	set env [berkdb_env -create -home $testdir]
50	error_check_good env_open [is_valid_env $env] TRUE
51
52	# Open the databases.
53	set pdb [eval {berkdb_open -create -env} $env $pomethod $pargs $pname]
54	error_check_good primary_open [is_valid_db $pdb] TRUE
55
56	set zipdb [eval {berkdb_open -create -dup -env} $env \
57	    [lindex $omethods 0] [lindex $argses 0] $zipname]
58	error_check_good zip_open [is_valid_db $zipdb] TRUE
59	error_check_good zip_associate [$pdb associate sj_getzip $zipdb] 0
60
61	set namedb [eval {berkdb_open -create -dup -env} $env \
62	    [lindex $omethods 1] [lindex $argses 1] $namename]
63	error_check_good name_open [is_valid_db $namedb] TRUE
64	error_check_good name_associate [$pdb associate sj_getname $namedb] 0
65
66	puts "\tSijoin.a: Populate database with $nentries \"names\""
67	sj_populate $pdb $nentries
68	puts "\tSijoin.b: Perform a join on each \"name\" and \"ZIP\""
69	sj_jointest $pdb $zipdb $namedb
70
71	error_check_good name_close [$namedb close] 0
72	error_check_good zip_close [$zipdb close] 0
73	error_check_good primary_close [$pdb close] 0
74	error_check_good env_close [$env close] 0
75}
76
77proc sj_jointest { pdb zipdb namedb } {
78	set pdbc [$pdb cursor]
79	error_check_good pdb_cursor [is_valid_cursor $pdbc $pdb] TRUE
80	for { set dbt [$pdbc get -first] } { [llength $dbt] > 0 } \
81	    { set dbt [$pdbc get -next] } {
82		set item [lindex [lindex $dbt 0] 1]
83		set retlist [sj_dojoin $item $pdb $zipdb $namedb]
84	}
85}
86
87proc sj_dojoin { item pdb zipdb namedb } {
88	set name [sj_getname "" $item]
89	set zip [sj_getzip "" $item]
90
91	set zipc [$zipdb cursor]
92	error_check_good zipc($item) [is_valid_cursor $zipc $zipdb] TRUE
93
94	set namec [$namedb cursor]
95	error_check_good namec($item) [is_valid_cursor $namec $namedb] TRUE
96
97	set pc [$pdb cursor]
98	error_check_good pc($item) [is_valid_cursor $pc $pdb] TRUE
99
100	set ret [$zipc get -set $zip]
101	set zd [lindex [lindex $ret 0] 1]
102	error_check_good zipset($zip) [sj_getzip "" $zd] $zip
103
104	set ret [$namec get -set $name]
105	set nd [lindex [lindex $ret 0] 1]
106	error_check_good nameset($name) [sj_getname "" $nd] $name
107
108	set joinc [$pdb join $zipc $namec]
109
110	set anyreturned 0
111	for { set dbt [$joinc get] } { [llength $dbt] > 0 } \
112	    { set dbt [$joinc get] } {
113		set ritem [lindex [lindex $dbt 0] 1]
114		error_check_good returned_item($item) $ritem $item
115		incr anyreturned
116	}
117	error_check_bad anyreturned($item) $anyreturned 0
118
119	error_check_good joinc_close($item) [$joinc close] 0
120	error_check_good pc_close($item) [$pc close] 0
121	error_check_good namec_close($item) [$namec close] 0
122	error_check_good zipc_close($item) [$zipc close] 0
123}
124
125proc sj_populate { db nentries } {
126	global dict
127
128	set did [open $dict]
129	for { set i 1 } { $i <= $nentries } { incr i } {
130		gets $did word
131		if { [string length $word] < 3 } {
132			gets $did word
133			if { [string length $word] < 3 } {
134				puts "FAIL:\
135				    unexpected pair of words < 3 chars long"
136			}
137		}
138		set datalist [sj_name2zips $word]
139		foreach data $datalist {
140			error_check_good db_put($data) [$db put $i $data$word] 0
141		}
142	}
143	close $did
144}
145
146proc sj_getzip { key data } { return [string range $data 0 4] }
147proc sj_getname { key data } { return [string range $data 5 end] }
148
149# The dirty secret of this test is that the ZIP code is a function of the
150# name, so we can generate a database and then verify join results easily
151# without having to consult actual data.
152#
153# Any word passed into this function will generate from 1 to 26 ZIP
154# entries, out of the set {00000, 01000 ... 99000}.  The number of entries
155# is just the position in the alphabet of the word's first letter;  the
156# entries are then hashed to the set {00, 01 ... 99} N different ways.
157proc sj_name2zips { name } {
158	global alphabet
159
160	set n [expr [string first [string index $name 0] $alphabet] + 1]
161	error_check_bad starts_with_abc($name) $n -1
162
163	set ret {}
164	for { set i 0 } { $i < $n } { incr i } {
165		set b 0
166		for { set j 1 } { $j < [string length $name] } \
167		    { incr j } {
168			set b [sj_nhash $name $i $j $b]
169		}
170		lappend ret [format %05u [expr $b % 100]000]
171	}
172	return $ret
173}
174proc sj_nhash { name i j b } {
175	global alphabet
176
177	set c [string first [string index $name $j] $alphabet']
178	return [expr (($b * 991) + ($i * 997) + $c) % 10000000]
179}
180