1package Search::Dict; 2require 5.000; 3require Exporter; 4 5use strict; 6 7our $VERSION = '1.02'; 8our @ISA = qw(Exporter); 9our @EXPORT = qw(look); 10 11=head1 NAME 12 13Search::Dict, look - search for key in dictionary file 14 15=head1 SYNOPSIS 16 17 use Search::Dict; 18 look *FILEHANDLE, $key, $dict, $fold; 19 20 use Search::Dict; 21 look *FILEHANDLE, $params; 22 23=head1 DESCRIPTION 24 25Sets file position in FILEHANDLE to be first line greater than or equal 26(stringwise) to I<$key>. Returns the new file position, or -1 if an error 27occurs. 28 29The flags specify dictionary order and case folding: 30 31If I<$dict> is true, search by dictionary order (ignore anything but word 32characters and whitespace). The default is honour all characters. 33 34If I<$fold> is true, ignore case. The default is to honour case. 35 36If there are only three arguments and the third argument is a hash 37reference, the keys of that hash can have values C<dict>, C<fold>, and 38C<comp> or C<xfrm> (see below), and their correponding values will be 39used as the parameters. 40 41If a comparison subroutine (comp) is defined, it must return less than zero, 42zero, or greater than zero, if the first comparand is less than, 43equal, or greater than the second comparand. 44 45If a transformation subroutine (xfrm) is defined, its value is used to 46transform the lines read from the filehandle before their comparison. 47 48=cut 49 50sub look { 51 my($fh,$key,$dict,$fold) = @_; 52 my ($comp, $xfrm); 53 if (@_ == 3 && ref $dict eq 'HASH') { 54 my $params = $dict; 55 $dict = 0; 56 $dict = $params->{dict} if exists $params->{dict}; 57 $fold = $params->{fold} if exists $params->{fold}; 58 $comp = $params->{comp} if exists $params->{comp}; 59 $xfrm = $params->{xfrm} if exists $params->{xfrm}; 60 } 61 $comp = sub { $_[0] cmp $_[1] } unless defined $comp; 62 local($_); 63 my(@stat) = stat($fh) 64 or return -1; 65 my($size, $blksize) = @stat[7,11]; 66 $blksize ||= 8192; 67 $key =~ s/[^\w\s]//g if $dict; 68 $key = lc $key if $fold; 69 # find the right block 70 my($min, $max) = (0, int($size / $blksize)); 71 my $mid; 72 while ($max - $min > 1) { 73 $mid = int(($max + $min) / 2); 74 seek($fh, $mid * $blksize, 0) 75 or return -1; 76 <$fh> if $mid; # probably a partial line 77 $_ = <$fh>; 78 $_ = $xfrm->($_) if defined $xfrm; 79 chomp; 80 s/[^\w\s]//g if $dict; 81 $_ = lc $_ if $fold; 82 if (defined($_) && $comp->($_, $key) < 0) { 83 $min = $mid; 84 } 85 else { 86 $max = $mid; 87 } 88 } 89 # find the right line 90 $min *= $blksize; 91 seek($fh,$min,0) 92 or return -1; 93 <$fh> if $min; 94 for (;;) { 95 $min = tell($fh); 96 defined($_ = <$fh>) 97 or last; 98 $_ = $xfrm->($_) if defined $xfrm; 99 chomp; 100 s/[^\w\s]//g if $dict; 101 $_ = lc $_ if $fold; 102 last if $comp->($_, $key) >= 0; 103 } 104 seek($fh,$min,0); 105 $min; 106} 107 1081; 109