Detect.pm revision 1.2
1
2# Time-stamp: "2004-06-20 21:47:55 ADT"
3
4require 5;
5package I18N::LangTags::Detect;
6use strict;
7
8use vars qw( @ISA $VERSION $MATCH_SUPERS $USING_LANGUAGE_TAGS
9             $USE_LITERALS $MATCH_SUPERS_TIGHTLY);
10
11BEGIN { unless(defined &DEBUG) { *DEBUG = sub () {0} } }
12 # define the constant 'DEBUG' at compile-time
13
14$VERSION = "1.05_01";
15@ISA = ();
16use I18N::LangTags qw(alternate_language_tags locale2language_tag);
17
18sub _uniq { my %seen; return grep(!($seen{$_}++), @_); }
19sub _normalize {
20  my(@languages) =
21    map lc($_),
22    grep $_,
23    map {; $_, alternate_language_tags($_) } @_;
24  return _uniq(@languages) if wantarray;
25  return $languages[0];
26}
27
28#---------------------------------------------------------------------------
29# The extent of our functional interface:
30
31sub detect () { return __PACKAGE__->ambient_langprefs; }
32
33#===========================================================================
34
35sub ambient_langprefs { # always returns things untainted
36  my $base_class = $_[0];
37
38  return $base_class->http_accept_langs
39   if length( $ENV{'REQUEST_METHOD'} || '' ); # I'm a CGI
40       # it's off in its own routine because it's complicated
41
42  # Not running as a CGI: try to puzzle out from the environment
43  my @languages;
44
45  foreach my $envname (qw( LANGUAGE LC_ALL LC_MESSAGES LANG )) {
46    next unless $ENV{$envname};
47    DEBUG and print "Noting \$$envname: $ENV{$envname}\n";
48    push @languages,
49      map locale2language_tag($_),
50        # if it's a lg tag, fine, pass thru (untainted)
51        # if it's a locale ID, try converting to a lg tag (untainted),
52        # otherwise nix it.
53
54      split m/[,:]/,
55      $ENV{$envname}
56    ;
57    last; # first one wins
58  }
59
60  if($ENV{'IGNORE_WIN32_LOCALE'}) {
61    # no-op
62  } elsif(&_try_use('Win32::Locale')) {
63    # If we have that module installed...
64    push @languages, Win32::Locale::get_language() || ''
65     if defined &Win32::Locale::get_language;
66  }
67  return _normalize @languages;
68}
69
70#---------------------------------------------------------------------------
71
72sub http_accept_langs {
73  # Deal with HTTP "Accept-Language:" stuff.  Hassle.
74  # This code is more lenient than RFC 3282, which you must read.
75  # Hm.  Should I just move this into I18N::LangTags at some point?
76  no integer;
77
78  my $in = (@_ > 1) ? $_[1] : $ENV{'HTTP_ACCEPT_LANGUAGE'};
79  # (always ends up untainting)
80
81  return() unless defined $in and length $in;
82
83  $in =~ s/\([^\)]*\)//g; # nix just about any comment
84
85  if( $in =~ m/^\s*([a-zA-Z][-a-zA-Z]+)\s*$/s ) {
86    # Very common case: just one language tag
87    return _normalize $1;
88  } elsif( $in =~ m/^\s*[a-zA-Z][-a-zA-Z]+(?:\s*,\s*[a-zA-Z][-a-zA-Z]+)*\s*$/s ) {
89    # Common case these days: just "foo, bar, baz"
90    return _normalize( $in =~ m/([a-zA-Z][-a-zA-Z]+)/g );
91  }
92
93  # Else it's complicated...
94
95  $in =~ s/\s+//g;  # Yes, we can just do without the WS!
96  my @in = $in =~ m/([^,]+)/g;
97  my %pref;
98
99  my $q;
100  foreach my $tag (@in) {
101    next unless $tag =~
102     m/^([a-zA-Z][-a-zA-Z]+)
103        (?:
104         ;q=
105         (
106          \d*   # a bit too broad of a RE, but so what.
107          (?:
108            \.\d+
109          )?
110         )
111        )?
112       $
113      /sx
114    ;
115    $q = (defined $2 and length $2) ? $2 : 1;
116    #print "$1 with q=$q\n";
117    push @{ $pref{$q} }, lc $1;
118  }
119
120  return _normalize(
121    # Read off %pref, in descending key order...
122    map @{$pref{$_}},
123    sort {$b <=> $a}
124    keys %pref
125  );
126}
127
128#===========================================================================
129
130my %tried = ();
131  # memoization of whether we've used this module, or found it unusable.
132
133sub _try_use {   # Basically a wrapper around "require Modulename"
134  # "Many men have tried..."  "They tried and failed?"  "They tried and died."
135  return $tried{$_[0]} if exists $tried{$_[0]};  # memoization
136
137  my $module = $_[0];   # ASSUME sane module name!
138  { no strict 'refs';
139    no warnings 'once';
140    return($tried{$module} = 1)
141     if %{$module . "::Lexicon"} or @{$module . "::ISA"};
142    # weird case: we never use'd it, but there it is!
143  }
144
145  print " About to use $module ...\n" if DEBUG;
146  {
147    local $SIG{'__DIE__'};
148    local @INC = @INC;
149    pop @INC if $INC[-1] eq '.';
150    eval "require $module"; # used to be "use $module", but no point in that.
151  }
152  if($@) {
153    print "Error using $module \: $@\n" if DEBUG > 1;
154    return $tried{$module} = 0;
155  } else {
156    print " OK, $module is used\n" if DEBUG;
157    return $tried{$module} = 1;
158  }
159}
160
161#---------------------------------------------------------------------------
1621;
163__END__
164
165
166=head1 NAME
167
168I18N::LangTags::Detect - detect the user's language preferences
169
170=head1 SYNOPSIS
171
172  use I18N::LangTags::Detect;
173  my @user_wants = I18N::LangTags::Detect::detect();
174
175=head1 DESCRIPTION
176
177It is a common problem to want to detect what language(s) the user would
178prefer output in.
179
180=head1 FUNCTIONS
181
182This module defines one public function,
183C<I18N::LangTags::Detect::detect()>.  This function is not exported
184(nor is even exportable), and it takes no parameters.
185
186In scalar context, the function returns the most preferred language
187tag (or undef if no preference was seen).
188
189In list context (which is usually what you want),
190the function returns a
191(possibly empty) list of language tags representing (best first) what
192languages the user apparently would accept output in.  You will
193probably want to pass the output of this through
194C<I18N::LangTags::implicate_supers_tightly(...)>
195or
196C<I18N::LangTags::implicate_supers(...)>, like so:
197
198  my @languages =
199    I18N::LangTags::implicate_supers_tightly(
200      I18N::LangTags::Detect::detect()
201    );
202
203
204=head1 ENVIRONMENT
205
206This module looks for several environment variables, including
207REQUEST_METHOD, HTTP_ACCEPT_LANGUAGE,
208LANGUAGE, LC_ALL, LC_MESSAGES, and LANG.
209
210It will also use the L<Win32::Locale> module, if it's installed.
211
212
213=head1 SEE ALSO
214
215L<I18N::LangTags>, L<Win32::Locale>, L<Locale::Maketext>.
216
217(This module's core code started out as a routine in Locale::Maketext;
218but I moved it here once I realized it was more generally useful.)
219
220
221=head1 COPYRIGHT
222
223Copyright (c) 1998-2004 Sean M. Burke. All rights reserved.
224
225This library is free software; you can redistribute it and/or
226modify it under the same terms as Perl itself.
227
228The programs and documentation in this dist are distributed in
229the hope that they will be useful, but without any warranty; without
230even the implied warranty of merchantability or fitness for a
231particular purpose.
232
233
234=head1 AUTHOR
235
236Sean M. Burke C<sburke@cpan.org>
237
238=cut
239
240# a tip: Put a bit of chopped up pickled ginger in your salad. It's tasty!
241