1# See the file LICENSE for redistribution information. 2# 3# Copyright (c) 1996,2008 Oracle. All rights reserved. 4# 5# $Id: join.tcl,v 12.6 2008/01/08 20:58:53 bostic Exp $ 6# 7# TEST jointest 8# TEST Test duplicate assisted joins. Executes 1, 2, 3 and 4-way joins 9# TEST with differing index orders and selectivity. 10# TEST 11# TEST We'll test 2-way, 3-way, and 4-way joins and figure that if those 12# TEST work, everything else does as well. We'll create test databases 13# TEST called join1.db, join2.db, join3.db, and join4.db. The number on 14# TEST the database describes the duplication -- duplicates are of the 15# TEST form 0, N, 2N, 3N, ... where N is the number of the database. 16# TEST Primary.db is the primary database, and null.db is the database 17# TEST that has no matching duplicates. 18# TEST 19# TEST We should test this on all btrees, all hash, and a combination thereof 20proc jointest { {psize 8192} {with_dup_dups 0} {flags 0} } { 21 global testdir 22 global rand_init 23 source ./include.tcl 24 25 env_cleanup $testdir 26 berkdb srand $rand_init 27 28 # Use one environment for all database opens so we don't 29 # need oodles of regions. 30 set env [berkdb_env -create -home $testdir] 31 error_check_good env_open [is_valid_env $env] TRUE 32 33 # With the new offpage duplicate code, we don't support 34 # duplicate duplicates in sorted dup sets. Thus, if with_dup_dups 35 # is greater than one, run only with "-dup". 36 if { $with_dup_dups > 1 } { 37 set doptarray {"-dup"} 38 } else { 39 set doptarray {"-dup -dupsort" "-dup" RANDOMMIX RANDOMMIX } 40 } 41 42 # NB: these flags are internal only, ok 43 foreach m "DB_BTREE DB_HASH DB_BOTH" { 44 # run with two different random mixes. 45 foreach dopt $doptarray { 46 set opt [list "-env" $env $dopt] 47 48 puts "Join test: ($m $dopt) psize $psize,\ 49 $with_dup_dups dup\ 50 dups, flags $flags." 51 52 build_all $m $psize $opt oa $with_dup_dups 53 54 # null.db is db_built fifth but is referenced by 55 # zero; set up the option array appropriately. 56 set oa(0) $oa(5) 57 58 # Build the primary 59 puts "\tBuilding the primary database $m" 60 set oflags "-create -truncate -mode 0644 -env $env\ 61 [conv $m [berkdb random_int 1 2]]" 62 set db [eval {berkdb_open} $oflags primary.db] 63 error_check_good dbopen [is_valid_db $db] TRUE 64 for { set i 0 } { $i < 1000 } { incr i } { 65 set key [format "%04d" $i] 66 set ret [$db put $key stub] 67 error_check_good "primary put" $ret 0 68 } 69 error_check_good "primary close" [$db close] 0 70 set did [open $dict] 71 gets $did str 72 do_join primary.db "1 0" $str oa $flags\ 73 $with_dup_dups 74 gets $did str 75 do_join primary.db "2 0" $str oa $flags\ 76 $with_dup_dups 77 gets $did str 78 do_join primary.db "3 0" $str oa $flags\ 79 $with_dup_dups 80 gets $did str 81 do_join primary.db "4 0" $str oa $flags\ 82 $with_dup_dups 83 gets $did str 84 do_join primary.db "1" $str oa $flags $with_dup_dups 85 gets $did str 86 do_join primary.db "2" $str oa $flags $with_dup_dups 87 gets $did str 88 do_join primary.db "3" $str oa $flags $with_dup_dups 89 gets $did str 90 do_join primary.db "4" $str oa $flags $with_dup_dups 91 gets $did str 92 do_join primary.db "1 2" $str oa $flags\ 93 $with_dup_dups 94 gets $did str 95 do_join primary.db "1 2 3" $str oa $flags\ 96 $with_dup_dups 97 gets $did str 98 do_join primary.db "1 2 3 4" $str oa $flags\ 99 $with_dup_dups 100 gets $did str 101 do_join primary.db "2 1" $str oa $flags\ 102 $with_dup_dups 103 gets $did str 104 do_join primary.db "3 2 1" $str oa $flags\ 105 $with_dup_dups 106 gets $did str 107 do_join primary.db "4 3 2 1" $str oa $flags\ 108 $with_dup_dups 109 gets $did str 110 do_join primary.db "1 3" $str oa $flags $with_dup_dups 111 gets $did str 112 do_join primary.db "3 1" $str oa $flags $with_dup_dups 113 gets $did str 114 do_join primary.db "1 4" $str oa $flags $with_dup_dups 115 gets $did str 116 do_join primary.db "4 1" $str oa $flags $with_dup_dups 117 gets $did str 118 do_join primary.db "2 3" $str oa $flags $with_dup_dups 119 gets $did str 120 do_join primary.db "3 2" $str oa $flags $with_dup_dups 121 gets $did str 122 do_join primary.db "2 4" $str oa $flags $with_dup_dups 123 gets $did str 124 do_join primary.db "4 2" $str oa $flags $with_dup_dups 125 gets $did str 126 do_join primary.db "3 4" $str oa $flags $with_dup_dups 127 gets $did str 128 do_join primary.db "4 3" $str oa $flags $with_dup_dups 129 gets $did str 130 do_join primary.db "2 3 4" $str oa $flags\ 131 $with_dup_dups 132 gets $did str 133 do_join primary.db "3 4 1" $str oa $flags\ 134 $with_dup_dups 135 gets $did str 136 do_join primary.db "4 2 1" $str oa $flags\ 137 $with_dup_dups 138 gets $did str 139 do_join primary.db "0 2 1" $str oa $flags\ 140 $with_dup_dups 141 gets $did str 142 do_join primary.db "3 2 0" $str oa $flags\ 143 $with_dup_dups 144 gets $did str 145 do_join primary.db "4 3 2 1" $str oa $flags\ 146 $with_dup_dups 147 gets $did str 148 do_join primary.db "4 3 0 1" $str oa $flags\ 149 $with_dup_dups 150 gets $did str 151 do_join primary.db "3 3 3" $str oa $flags\ 152 $with_dup_dups 153 gets $did str 154 do_join primary.db "2 2 3 3" $str oa $flags\ 155 $with_dup_dups 156 gets $did str2 157 gets $did str 158 do_join primary.db "1 2" $str oa $flags\ 159 $with_dup_dups "3" $str2 160 161 # You really don't want to run this section 162 # with $with_dup_dups > 2. 163 if { $with_dup_dups <= 2 } { 164 gets $did str2 165 gets $did str 166 do_join primary.db "1 2 3" $str\ 167 oa $flags $with_dup_dups "3 3 1" $str2 168 gets $did str2 169 gets $did str 170 do_join primary.db "4 0 2" $str\ 171 oa $flags $with_dup_dups "4 3 3" $str2 172 gets $did str2 173 gets $did str 174 do_join primary.db "3 2 1" $str\ 175 oa $flags $with_dup_dups "0 2" $str2 176 gets $did str2 177 gets $did str 178 do_join primary.db "2 2 3 3" $str\ 179 oa $flags $with_dup_dups "1 4 4" $str2 180 gets $did str2 181 gets $did str 182 do_join primary.db "2 2 3 3" $str\ 183 oa $flags $with_dup_dups "0 0 4 4" $str2 184 gets $did str2 185 gets $did str 186 do_join primary.db "2 2 3 3" $str2\ 187 oa $flags $with_dup_dups "2 4 4" $str 188 gets $did str2 189 gets $did str 190 do_join primary.db "2 2 3 3" $str2\ 191 oa $flags $with_dup_dups "0 0 4 4" $str 192 } 193 close $did 194 } 195 } 196 197 error_check_good env_close [$env close] 0 198} 199 200proc build_all { method psize opt oaname with_dup_dups {nentries 100} } { 201 global testdir 202 db_build join1.db $nentries 50 1 [conv $method 1]\ 203 $psize $opt $oaname $with_dup_dups 204 db_build join2.db $nentries 25 2 [conv $method 2]\ 205 $psize $opt $oaname $with_dup_dups 206 db_build join3.db $nentries 16 3 [conv $method 3]\ 207 $psize $opt $oaname $with_dup_dups 208 db_build join4.db $nentries 12 4 [conv $method 4]\ 209 $psize $opt $oaname $with_dup_dups 210 db_build null.db $nentries 0 5 [conv $method 5]\ 211 $psize $opt $oaname $with_dup_dups 212} 213 214proc conv { m i } { 215 switch -- $m { 216 DB_HASH { return "-hash"} 217 "-hash" { return "-hash"} 218 DB_BTREE { return "-btree"} 219 "-btree" { return "-btree"} 220 DB_BOTH { 221 if { [expr $i % 2] == 0 } { 222 return "-hash"; 223 } else { 224 return "-btree"; 225 } 226 } 227 } 228} 229 230proc random_opts { } { 231 set j [berkdb random_int 0 1] 232 if { $j == 0 } { 233 return " -dup" 234 } else { 235 return " -dup -dupsort" 236 } 237} 238 239proc db_build { name nkeys ndups dup_interval method psize lopt oaname \ 240 with_dup_dups } { 241 source ./include.tcl 242 243 # Get array of arg names (from two levels up the call stack) 244 upvar 2 $oaname oa 245 246 # Search for "RANDOMMIX" in $opt, and if present, replace 247 # with " -dup" or " -dup -dupsort" at random. 248 set i [lsearch $lopt RANDOMMIX] 249 if { $i != -1 } { 250 set lopt [lreplace $lopt $i $i [random_opts]] 251 } 252 253 # Save off db_open arguments for this database. 254 set opt [eval concat $lopt] 255 set oa($dup_interval) $opt 256 257 # Create the database and open the dictionary 258 set oflags "-create -truncate -mode 0644 $method\ 259 -pagesize $psize" 260 set db [eval {berkdb_open} $oflags $opt $name] 261 error_check_good dbopen [is_valid_db $db] TRUE 262 set did [open $dict] 263 set count 0 264 puts -nonewline "\tBuilding $name: $nkeys keys " 265 puts -nonewline "with $ndups duplicates at interval of $dup_interval" 266 if { $with_dup_dups > 0 } { 267 puts "" 268 puts "\t\tand $with_dup_dups duplicate duplicates." 269 } else { 270 puts "." 271 } 272 for { set count 0 } { [gets $did str] != -1 && $count < $nkeys } { 273 incr count} { 274 set str $str$name 275 # We need to make sure that the dups are inserted in a 276 # random, or near random, order. Do this by generating 277 # them and putting each in a list, then sorting the list 278 # at random. 279 set duplist {} 280 for { set i 0 } { $i < $ndups } { incr i } { 281 set data [format "%04d" [expr $i * $dup_interval]] 282 lappend duplist $data 283 } 284 # randomize the list 285 for { set i 0 } { $i < $ndups } {incr i } { 286 # set j [berkdb random_int $i [expr $ndups - 1]] 287 set j [expr ($i % 2) + $i] 288 if { $j >= $ndups } { set j $i } 289 set dupi [lindex $duplist $i] 290 set dupj [lindex $duplist $j] 291 set duplist [lreplace $duplist $i $i $dupj] 292 set duplist [lreplace $duplist $j $j $dupi] 293 } 294 foreach data $duplist { 295 if { $with_dup_dups != 0 } { 296 for { set j 0 }\ 297 { $j < $with_dup_dups }\ 298 {incr j} { 299 set ret [$db put $str $data] 300 error_check_good put$j $ret 0 301 } 302 } else { 303 set ret [$db put $str $data] 304 error_check_good put $ret 0 305 } 306 } 307 308 if { $ndups == 0 } { 309 set ret [$db put $str NODUP] 310 error_check_good put $ret 0 311 } 312 } 313 close $did 314 error_check_good close:$name [$db close] 0 315} 316 317proc do_join { primary dbs key oanm flags with_dup_dups {dbs2 ""} {key2 ""} } { 318 global testdir 319 source ./include.tcl 320 321 upvar $oanm oa 322 323 puts -nonewline "\tJoining: $dbs on $key" 324 if { $dbs2 == "" } { 325 puts "" 326 } else { 327 puts " with $dbs2 on $key2" 328 } 329 330 # Open all the databases 331 set p [berkdb_open -unknown $testdir/$primary] 332 error_check_good "primary open" [is_valid_db $p] TRUE 333 334 set dblist "" 335 set curslist "" 336 337 set ndx [llength $dbs] 338 339 foreach i [concat $dbs $dbs2] { 340 set opt $oa($i) 341 set db [eval {berkdb_open -unknown} $opt [n_to_name $i]] 342 error_check_good "[n_to_name $i] open" [is_valid_db $db] TRUE 343 set curs [$db cursor] 344 error_check_good "$db cursor" \ 345 [is_substr $curs "$db.c"] 1 346 lappend dblist $db 347 lappend curslist $curs 348 349 if { $ndx > 0 } { 350 set realkey [concat $key[n_to_name $i]] 351 } else { 352 set realkey [concat $key2[n_to_name $i]] 353 } 354 355 set pair [$curs get -set $realkey] 356 error_check_good cursor_set:$realkey:$pair \ 357 [llength [lindex $pair 0]] 2 358 359 incr ndx -1 360 } 361 362 set join_curs [eval {$p join} $curslist] 363 error_check_good join_cursor \ 364 [is_substr $join_curs "$p.c"] 1 365 366 # Calculate how many dups we expect. 367 # We go through the list of indices. If we find a 0, then we 368 # expect 0 dups. For everything else, we look at pairs of numbers, 369 # if the are relatively prime, multiply them and figure out how 370 # many times that goes into 50. If they aren't relatively prime, 371 # take the number of times the larger goes into 50. 372 set expected 50 373 set last 1 374 foreach n [concat $dbs $dbs2] { 375 if { $n == 0 } { 376 set expected 0 377 break 378 } 379 if { $last == $n } { 380 continue 381 } 382 383 if { [expr $last % $n] == 0 || [expr $n % $last] == 0 } { 384 if { $n > $last } { 385 set last $n 386 set expected [expr 50 / $last] 387 } 388 } else { 389 set last [expr $n * $last / [gcd $n $last]] 390 set expected [expr 50 / $last] 391 } 392 } 393 394 # If $with_dup_dups is greater than zero, each datum has 395 # been inserted $with_dup_dups times. So we expect the number 396 # of dups to go up by a factor of ($with_dup_dups)^(number of databases) 397 398 if { $with_dup_dups > 0 } { 399 foreach n [concat $dbs $dbs2] { 400 set expected [expr $expected * $with_dup_dups] 401 } 402 } 403 404 set ndups 0 405 if { $flags == " -join_item"} { 406 set l 1 407 } else { 408 set flags "" 409 set l 2 410 } 411 for { set pair [eval {$join_curs get} $flags] } { \ 412 [llength [lindex $pair 0]] == $l } { 413 set pair [eval {$join_curs get} $flags] } { 414 set k [lindex [lindex $pair 0] 0] 415 foreach i $dbs { 416 error_check_bad valid_dup:$i:$dbs $i 0 417 set kval [string trimleft $k 0] 418 if { [string length $kval] == 0 } { 419 set kval 0 420 } 421 error_check_good valid_dup:$i:$dbs [expr $kval % $i] 0 422 } 423 incr ndups 424 } 425 error_check_good number_of_dups:$dbs $ndups $expected 426 427 error_check_good close_primary [$p close] 0 428 foreach i $curslist { 429 error_check_good close_cursor:$i [$i close] 0 430 } 431 foreach i $dblist { 432 error_check_good close_index:$i [$i close] 0 433 } 434} 435 436proc n_to_name { n } { 437global testdir 438 if { $n == 0 } { 439 return null.db; 440 } else { 441 return join$n.db; 442 } 443} 444 445proc gcd { a b } { 446 set g 1 447 448 for { set i 2 } { $i <= $a } { incr i } { 449 if { [expr $a % $i] == 0 && [expr $b % $i] == 0 } { 450 set g $i 451 } 452 } 453 return $g 454} 455