1package URI::Heuristic; 2 3# $Id: Heuristic.pm,v 4.17 2004/01/14 13:33:44 gisle Exp $ 4 5=head1 NAME 6 7URI::Heuristic - Expand URI using heuristics 8 9=head1 SYNOPSIS 10 11 use URI::Heuristic qw(uf_uristr); 12 $u = uf_uristr("perl"); # http://www.perl.com 13 $u = uf_uristr("www.sol.no/sol"); # http://www.sol.no/sol 14 $u = uf_uristr("aas"); # http://www.aas.no 15 $u = uf_uristr("ftp.funet.fi"); # ftp://ftp.funet.fi 16 $u = uf_uristr("/etc/passwd"); # file:/etc/passwd 17 18=head1 DESCRIPTION 19 20This module provides functions that expand strings into real absolute 21URIs using some built-in heuristics. Strings that already represent 22absolute URIs (i.e. that start with a C<scheme:> part) are never modified 23and are returned unchanged. The main use of these functions is to 24allow abbreviated URIs similar to what many web browsers allow for URIs 25typed in by the user. 26 27The following functions are provided: 28 29=over 4 30 31=item uf_uristr($str) 32 33Tries to make the argument string 34into a proper absolute URI string. The "uf_" prefix stands for "User 35Friendly". Under MacOS, it assumes that any string with a common URL 36scheme (http, ftp, etc.) is a URL rather than a local path. So don't name 37your volumes after common URL schemes and expect uf_uristr() to construct 38valid file: URL's on those volumes for you, because it won't. 39 40=item uf_uri($str) 41 42Works the same way as uf_uristr() but 43returns a C<URI> object. 44 45=back 46 47=head1 ENVIRONMENT 48 49If the hostname portion of a URI does not contain any dots, then 50certain qualified guesses are made. These guesses are governed by 51the following two environment variables: 52 53=over 10 54 55=item COUNTRY 56 57The two-letter country code (ISO 3166) for your location. If 58the domain name of your host ends with two letters, then it is taken 59to be the default country. See also L<Locale::Country>. 60 61=item URL_GUESS_PATTERN 62 63Contains a space-separated list of URL patterns to try. The string 64"ACME" is for some reason used as a placeholder for the host name in 65the URL provided. Example: 66 67 URL_GUESS_PATTERN="www.ACME.no www.ACME.se www.ACME.com" 68 export URL_GUESS_PATTERN 69 70Specifying URL_GUESS_PATTERN disables any guessing rules based on 71country. An empty URL_GUESS_PATTERN disables any guessing that 72involves host name lookups. 73 74=back 75 76=head1 COPYRIGHT 77 78Copyright 1997-1998, Gisle Aas 79 80This library is free software; you can redistribute it and/or 81modify it under the same terms as Perl itself. 82 83=cut 84 85use strict; 86 87use vars qw(@EXPORT_OK $VERSION $MY_COUNTRY %LOCAL_GUESSING $DEBUG); 88 89require Exporter; 90*import = \&Exporter::import; 91@EXPORT_OK = qw(uf_uri uf_uristr uf_url uf_urlstr); 92$VERSION = sprintf("%d.%02d", q$Revision: 4.17 $ =~ /(\d+)\.(\d+)/); 93 94sub MY_COUNTRY() { 95 for ($MY_COUNTRY) { 96 return $_ if defined; 97 98 # First try the environment. 99 $_ = $ENV{COUNTRY}; 100 return $_ if defined; 101 102 # Could use LANG, LC_ALL, etc at this point, but probably too 103 # much of a wild guess. (Catalan != Canada, etc.) 104 # 105 106 # Last bit of domain name. This may access the network. 107 require Net::Domain; 108 my $fqdn = Net::Domain::hostfqdn(); 109 $_ = lc($1) if $fqdn =~ /\.([a-zA-Z]{2})$/; 110 return $_ if defined; 111 112 # Give up. Defined but false. 113 return ($_ = 0); 114 } 115} 116 117%LOCAL_GUESSING = 118( 119 'us' => [qw(www.ACME.gov www.ACME.mil)], 120 'uk' => [qw(www.ACME.co.uk www.ACME.org.uk www.ACME.ac.uk)], 121 'au' => [qw(www.ACME.com.au www.ACME.org.au www.ACME.edu.au)], 122 'il' => [qw(www.ACME.co.il www.ACME.org.il www.ACME.net.il)], 123 # send corrections and new entries to <gisle@aas.no> 124); 125 126 127sub uf_uristr ($) 128{ 129 local($_) = @_; 130 print STDERR "uf_uristr: resolving $_\n" if $DEBUG; 131 return unless defined; 132 133 s/^\s+//; 134 s/\s+$//; 135 136 if (/^(www|web|home)\./) { 137 $_ = "http://$_"; 138 139 } elsif (/^(ftp|gopher|news|wais|http|https)\./) { 140 $_ = "$1://$_"; 141 142 } elsif ($^O ne "MacOS" && 143 (m,^/, || # absolute file name 144 m,^\.\.?/, || # relative file name 145 m,^[a-zA-Z]:[/\\],) # dosish file name 146 ) 147 { 148 $_ = "file:$_"; 149 150 } elsif ($^O eq "MacOS" && m/:/) { 151 # potential MacOS file name 152 unless (m/^(ftp|gopher|news|wais|http|https|mailto):/) { 153 require URI::file; 154 my $a = URI::file->new($_)->as_string; 155 $_ = ($a =~ m/^file:/) ? $a : "file:$a"; 156 } 157 } elsif (/^\w+([\.\-]\w+)*\@(\w+\.)+\w{2,3}$/) { 158 $_ = "mailto:$_"; 159 160 } elsif (!/^[a-zA-Z][a-zA-Z0-9.+\-]*:/) { # no scheme specified 161 if (s/^([-\w]+(?:\.[-\w]+)*)([\/:\?\#]|$)/$2/) { 162 my $host = $1; 163 164 if ($host !~ /\./ && $host ne "localhost") { 165 my @guess; 166 if (exists $ENV{URL_GUESS_PATTERN}) { 167 @guess = map { s/\bACME\b/$host/; $_ } 168 split(' ', $ENV{URL_GUESS_PATTERN}); 169 } else { 170 if (MY_COUNTRY()) { 171 my $special = $LOCAL_GUESSING{MY_COUNTRY()}; 172 if ($special) { 173 my @special = @$special; 174 push(@guess, map { s/\bACME\b/$host/; $_ } 175 @special); 176 } else { 177 push(@guess, 'www.$host.' . MY_COUNTRY()); 178 } 179 } 180 push(@guess, map "www.$host.$_", 181 "com", "org", "net", "edu", "int"); 182 } 183 184 185 my $guess; 186 for $guess (@guess) { 187 print STDERR "uf_uristr: gethostbyname('$guess.')..." 188 if $DEBUG; 189 if (gethostbyname("$guess.")) { 190 print STDERR "yes\n" if $DEBUG; 191 $host = $guess; 192 last; 193 } 194 print STDERR "no\n" if $DEBUG; 195 } 196 } 197 $_ = "http://$host$_"; 198 199 } else { 200 # pure junk, just return it unchanged... 201 202 } 203 } 204 print STDERR "uf_uristr: ==> $_\n" if $DEBUG; 205 206 $_; 207} 208 209sub uf_uri ($) 210{ 211 require URI; 212 URI->new(uf_uristr($_[0])); 213} 214 215# legacy 216*uf_urlstr = \*uf_uristr; 217 218sub uf_url ($) 219{ 220 require URI::URL; 221 URI::URL->new(uf_uristr($_[0])); 222} 223 2241; 225