1# See the file LICENSE for redistribution information. 2# 3# Copyright (c) 2001,2008 Oracle. All rights reserved. 4# 5# $Id: sijointest.tcl,v 12.7 2008/01/08 20:58:53 bostic Exp $ 6# 7# TEST sijointest: Secondary index and join test. 8# TEST This used to be si005.tcl. 9proc sijointest { methods {nentries 1000} args } { 10 source ./include.tcl 11 12 # Primary method/args. 13 set pmethod [lindex $methods 0] 14 set pargs [convert_args $pmethod $args] 15 set pomethod [convert_method $pmethod] 16 17 # Si005 does a join within a simulated database schema 18 # in which the primary index maps a record ID to a ZIP code and 19 # name in the form "XXXXXname", and there are two secondaries: 20 # one mapping ZIP to ID, the other mapping name to ID. 21 # The primary may be of any database type; the two secondaries 22 # must be either btree or hash. 23 24 # Method/args for all the secondaries. If only one method 25 # was specified, assume the same method for the two secondaries. 26 set methods [lrange $methods 1 end] 27 if { [llength $methods] == 0 } { 28 for { set i 0 } { $i < 2 } { incr i } { 29 lappend methods $pmethod 30 } 31 } elseif { [llength $methods] != 2 } { 32 puts "FAIL: Sijoin requires exactly two secondaries." 33 return 34 } 35 36 set argses [convert_argses $methods $args] 37 set omethods [convert_methods $methods] 38 39 puts "Secondary index join test." 40 puts "sijoin \{\[ list $pmethod $methods \]\} $nentries" 41 env_cleanup $testdir 42 43 set pname "sijoin-primary.db" 44 set zipname "sijoin-zip.db" 45 set namename "sijoin-name.db" 46 47 # Open an environment 48 # XXX if one is not supplied! 49 set env [berkdb_env -create -home $testdir] 50 error_check_good env_open [is_valid_env $env] TRUE 51 52 # Open the databases. 53 set pdb [eval {berkdb_open -create -env} $env $pomethod $pargs $pname] 54 error_check_good primary_open [is_valid_db $pdb] TRUE 55 56 set zipdb [eval {berkdb_open -create -dup -env} $env \ 57 [lindex $omethods 0] [lindex $argses 0] $zipname] 58 error_check_good zip_open [is_valid_db $zipdb] TRUE 59 error_check_good zip_associate [$pdb associate sj_getzip $zipdb] 0 60 61 set namedb [eval {berkdb_open -create -dup -env} $env \ 62 [lindex $omethods 1] [lindex $argses 1] $namename] 63 error_check_good name_open [is_valid_db $namedb] TRUE 64 error_check_good name_associate [$pdb associate sj_getname $namedb] 0 65 66 puts "\tSijoin.a: Populate database with $nentries \"names\"" 67 sj_populate $pdb $nentries 68 puts "\tSijoin.b: Perform a join on each \"name\" and \"ZIP\"" 69 sj_jointest $pdb $zipdb $namedb 70 71 error_check_good name_close [$namedb close] 0 72 error_check_good zip_close [$zipdb close] 0 73 error_check_good primary_close [$pdb close] 0 74 error_check_good env_close [$env close] 0 75} 76 77proc sj_jointest { pdb zipdb namedb } { 78 set pdbc [$pdb cursor] 79 error_check_good pdb_cursor [is_valid_cursor $pdbc $pdb] TRUE 80 for { set dbt [$pdbc get -first] } { [llength $dbt] > 0 } \ 81 { set dbt [$pdbc get -next] } { 82 set item [lindex [lindex $dbt 0] 1] 83 set retlist [sj_dojoin $item $pdb $zipdb $namedb] 84 } 85} 86 87proc sj_dojoin { item pdb zipdb namedb } { 88 set name [sj_getname "" $item] 89 set zip [sj_getzip "" $item] 90 91 set zipc [$zipdb cursor] 92 error_check_good zipc($item) [is_valid_cursor $zipc $zipdb] TRUE 93 94 set namec [$namedb cursor] 95 error_check_good namec($item) [is_valid_cursor $namec $namedb] TRUE 96 97 set pc [$pdb cursor] 98 error_check_good pc($item) [is_valid_cursor $pc $pdb] TRUE 99 100 set ret [$zipc get -set $zip] 101 set zd [lindex [lindex $ret 0] 1] 102 error_check_good zipset($zip) [sj_getzip "" $zd] $zip 103 104 set ret [$namec get -set $name] 105 set nd [lindex [lindex $ret 0] 1] 106 error_check_good nameset($name) [sj_getname "" $nd] $name 107 108 set joinc [$pdb join $zipc $namec] 109 110 set anyreturned 0 111 for { set dbt [$joinc get] } { [llength $dbt] > 0 } \ 112 { set dbt [$joinc get] } { 113 set ritem [lindex [lindex $dbt 0] 1] 114 error_check_good returned_item($item) $ritem $item 115 incr anyreturned 116 } 117 error_check_bad anyreturned($item) $anyreturned 0 118 119 error_check_good joinc_close($item) [$joinc close] 0 120 error_check_good pc_close($item) [$pc close] 0 121 error_check_good namec_close($item) [$namec close] 0 122 error_check_good zipc_close($item) [$zipc close] 0 123} 124 125proc sj_populate { db nentries } { 126 global dict 127 128 set did [open $dict] 129 for { set i 1 } { $i <= $nentries } { incr i } { 130 gets $did word 131 if { [string length $word] < 3 } { 132 gets $did word 133 if { [string length $word] < 3 } { 134 puts "FAIL:\ 135 unexpected pair of words < 3 chars long" 136 } 137 } 138 set datalist [sj_name2zips $word] 139 foreach data $datalist { 140 error_check_good db_put($data) [$db put $i $data$word] 0 141 } 142 } 143 close $did 144} 145 146proc sj_getzip { key data } { return [string range $data 0 4] } 147proc sj_getname { key data } { return [string range $data 5 end] } 148 149# The dirty secret of this test is that the ZIP code is a function of the 150# name, so we can generate a database and then verify join results easily 151# without having to consult actual data. 152# 153# Any word passed into this function will generate from 1 to 26 ZIP 154# entries, out of the set {00000, 01000 ... 99000}. The number of entries 155# is just the position in the alphabet of the word's first letter; the 156# entries are then hashed to the set {00, 01 ... 99} N different ways. 157proc sj_name2zips { name } { 158 global alphabet 159 160 set n [expr [string first [string index $name 0] $alphabet] + 1] 161 error_check_bad starts_with_abc($name) $n -1 162 163 set ret {} 164 for { set i 0 } { $i < $n } { incr i } { 165 set b 0 166 for { set j 1 } { $j < [string length $name] } \ 167 { incr j } { 168 set b [sj_nhash $name $i $j $b] 169 } 170 lappend ret [format %05u [expr $b % 100]000] 171 } 172 return $ret 173} 174proc sj_nhash { name i j b } { 175 global alphabet 176 177 set c [string first [string index $name $j] $alphabet'] 178 return [expr (($b * 991) + ($i * 997) + $c) % 10000000] 179} 180