1package open;
2use warnings;
3use Carp;
4$open::hint_bits = 0x20000; # HINT_LOCALIZE_HH
5
6our $VERSION = '1.03';
7
8my $locale_encoding;
9
10sub in_locale { $^H & ($locale::hint_bits || 0)}
11
12sub _get_locale_encoding {
13    unless (defined $locale_encoding) {
14	# I18N::Langinfo isn't available everywhere
15	eval {
16	    require I18N::Langinfo;
17	    I18N::Langinfo->import(qw(langinfo CODESET));
18	    $locale_encoding = langinfo(CODESET());
19	};
20	my $country_language;
21
22	no warnings 'uninitialized';
23
24        if (not $locale_encoding && in_locale()) {
25	    if ($ENV{LC_ALL} =~ /^([^.]+)\.([^.]+)$/) {
26		($country_language, $locale_encoding) = ($1, $2);
27	    } elsif ($ENV{LANG} =~ /^([^.]+)\.([^.]+)$/) {
28		($country_language, $locale_encoding) = ($1, $2);
29	    }
30	    # LANGUAGE affects only LC_MESSAGES only on glibc
31	} elsif (not $locale_encoding) {
32	    if ($ENV{LC_ALL} =~ /\butf-?8\b/i ||
33		$ENV{LANG}   =~ /\butf-?8\b/i) {
34		$locale_encoding = 'utf8';
35	    }
36	    # Could do more heuristics based on the country and language
37	    # parts of LC_ALL and LANG (the parts before the dot (if any)),
38	    # since we have Locale::Country and Locale::Language available.
39	    # TODO: get a database of Language -> Encoding mappings
40	    # (the Estonian database at http://www.eki.ee/letter/
41	    # would be excellent!) --jhi
42	}
43	if (defined $locale_encoding &&
44	    lc($locale_encoding) eq 'euc' &&
45	    defined $country_language) {
46	    if ($country_language =~ /^ja_JP|japan(?:ese)?$/i) {
47		$locale_encoding = 'euc-jp';
48	    } elsif ($country_language =~ /^ko_KR|korean?$/i) {
49		$locale_encoding = 'euc-kr';
50	    } elsif ($country_language =~ /^zh_CN|chin(?:a|ese)?$/i) {
51		$locale_encoding = 'euc-cn';
52	    } elsif ($country_language =~ /^zh_TW|taiwan(?:ese)?$/i) {
53		$locale_encoding = 'euc-tw';
54	    } else {
55		croak "Locale encoding 'euc' too ambiguous";
56	    }
57	}
58    }
59}
60
61sub import {
62    my ($class,@args) = @_;
63    croak("`use open' needs explicit list of PerlIO layers") unless @args;
64    my $std;
65    $^H |= $open::hint_bits;
66    my ($in,$out) = split(/\0/,(${^OPEN} || "\0"), -1);
67    while (@args) {
68	my $type = shift(@args);
69	my $dscp;
70	if ($type =~ /^:?(utf8|locale|encoding\(.+\))$/) {
71	    $type = 'IO';
72	    $dscp = ":$1";
73	} elsif ($type eq ':std') {
74	    $std = 1;
75	    next;
76	} else {
77	    $dscp = shift(@args) || '';
78	}
79	my @val;
80	foreach my $layer (split(/\s+/,$dscp)) {
81            $layer =~ s/^://;
82	    if ($layer eq 'locale') {
83		require Encode;
84		_get_locale_encoding()
85		    unless defined $locale_encoding;
86		(warnings::warnif("layer", "Cannot figure out an encoding to use"), last)
87		    unless defined $locale_encoding;
88		if ($locale_encoding =~ /^utf-?8$/i) {
89		    $layer = "utf8";
90		} else {
91		    $layer = "encoding($locale_encoding)";
92		}
93		$std = 1;
94	    } else {
95		my $target = $layer;		# the layer name itself
96		$target =~ s/^(\w+)\(.+\)$/$1/;	# strip parameters
97
98		unless(PerlIO::Layer::->find($target,1)) {
99		    warnings::warnif("layer", "Unknown PerlIO layer '$target'");
100		}
101	    }
102	    push(@val,":$layer");
103	    if ($layer =~ /^(crlf|raw)$/) {
104		$^H{"open_$type"} = $layer;
105	    }
106	}
107	if ($type eq 'IN') {
108	    $in  = join(' ',@val);
109	}
110	elsif ($type eq 'OUT') {
111	    $out = join(' ',@val);
112	}
113	elsif ($type eq 'IO') {
114	    $in = $out = join(' ',@val);
115	}
116	else {
117	    croak "Unknown PerlIO layer class '$type'";
118	}
119    }
120    ${^OPEN} = join("\0",$in,$out) if $in or $out;
121    if ($std) {
122	if ($in) {
123	    if ($in =~ /:utf8\b/) {
124		    binmode(STDIN,  ":utf8");
125		} elsif ($in =~ /(\w+\(.+\))/) {
126		    binmode(STDIN,  ":$1");
127		}
128	}
129	if ($out) {
130	    if ($out =~ /:utf8\b/) {
131		binmode(STDOUT,  ":utf8");
132		binmode(STDERR,  ":utf8");
133	    } elsif ($out =~ /(\w+\(.+\))/) {
134		binmode(STDOUT,  ":$1");
135		binmode(STDERR,  ":$1");
136	    }
137	}
138    }
139}
140
1411;
142__END__
143
144=head1 NAME
145
146open - perl pragma to set default PerlIO layers for input and output
147
148=head1 SYNOPSIS
149
150    use open IN  => ":crlf", OUT => ":bytes";
151    use open OUT => ':utf8';
152    use open IO  => ":encoding(iso-8859-7)";
153
154    use open IO  => ':locale';
155
156    use open ':utf8';
157    use open ':locale';
158    use open ':encoding(iso-8859-7)';
159
160    use open ':std';
161
162=head1 DESCRIPTION
163
164Full-fledged support for I/O layers is now implemented provided
165Perl is configured to use PerlIO as its IO system (which is now the
166default).
167
168The C<open> pragma serves as one of the interfaces to declare default
169"layers" (also known as "disciplines") for all I/O. Any two-argument
170open(), readpipe() (aka qx//) and similar operators found within the
171lexical scope of this pragma will use the declared defaults.
172Three-argument opens are not affected by this pragma since there you
173(can) explicitly specify the layers and are supposed to know what you
174are doing.
175
176With the C<IN> subpragma you can declare the default layers
177of input streams, and with the C<OUT> subpragma you can declare
178the default layers of output streams.  With the C<IO>  subpragma
179you can control both input and output streams simultaneously.
180
181If you have a legacy encoding, you can use the C<:encoding(...)> tag.
182
183if you want to set your encoding layers based on your
184locale environment variables, you can use the C<:locale> tag.
185For example:
186
187    $ENV{LANG} = 'ru_RU.KOI8-R';
188    # the :locale will probe the locale environment variables like LANG
189    use open OUT => ':locale';
190    open(O, ">koi8");
191    print O chr(0x430); # Unicode CYRILLIC SMALL LETTER A = KOI8-R 0xc1
192    close O;
193    open(I, "<koi8");
194    printf "%#x\n", ord(<I>), "\n"; # this should print 0xc1
195    close I;
196
197These are equivalent
198
199    use open ':utf8';
200    use open IO => ':utf8';
201
202as are these
203
204    use open ':locale';
205    use open IO => ':locale';
206
207and these
208
209    use open ':encoding(iso-8859-7)';
210    use open IO => ':encoding(iso-8859-7)';
211
212The matching of encoding names is loose: case does not matter, and
213many encodings have several aliases.  See L<Encode::Supported> for
214details and the list of supported locales.
215
216Note that C<:utf8> PerlIO layer must always be specified exactly like
217that, it is not subject to the loose matching of encoding names.
218
219When open() is given an explicit list of layers they are appended to
220the list declared using this pragma.
221
222The C<:std> subpragma on its own has no effect, but if combined with
223the C<:utf8> or C<:encoding> subpragmas, it converts the standard
224filehandles (STDIN, STDOUT, STDERR) to comply with encoding selected
225for input/output handles.  For example, if both input and out are
226chosen to be C<:utf8>, a C<:std> will mean that STDIN, STDOUT, and
227STDERR are also in C<:utf8>.  On the other hand, if only output is
228chosen to be in C<< :encoding(koi8r) >>, a C<:std> will cause only the
229STDOUT and STDERR to be in C<koi8r>.  The C<:locale> subpragma
230implicitly turns on C<:std>.
231
232The logic of C<:locale> is as follows:
233
234=over 4
235
236=item 1.
237
238If the platform supports the langinfo(CODESET) interface, the codeset
239returned is used as the default encoding for the open pragma.
240
241=item 2.
242
243If 1. didn't work but we are under the locale pragma, the environment
244variables LC_ALL and LANG (in that order) are matched for encodings
245(the part after C<.>, if any), and if any found, that is used
246as the default encoding for the open pragma.
247
248=item 3.
249
250If 1. and 2. didn't work, the environment variables LC_ALL and LANG
251(in that order) are matched for anything looking like UTF-8, and if
252any found, C<:utf8> is used as the default encoding for the open
253pragma.
254
255=back
256
257If your locale environment variables (LC_ALL, LC_CTYPE, LANG)
258contain the strings 'UTF-8' or 'UTF8' (case-insensitive matching),
259the default encoding of your STDIN, STDOUT, and STDERR, and of
260B<any subsequent file open>, is UTF-8.
261
262Directory handles may also support PerlIO layers in the future.
263
264=head1 NONPERLIO FUNCTIONALITY
265
266If Perl is not built to use PerlIO as its IO system then only the two
267pseudo-layers C<:bytes> and C<:crlf> are available.
268
269The C<:bytes> layer corresponds to "binary mode" and the C<:crlf>
270layer corresponds to "text mode" on platforms that distinguish
271between the two modes when opening files (which is many DOS-like
272platforms, including Windows).  These two layers are no-ops on
273platforms where binmode() is a no-op, but perform their functions
274everywhere if PerlIO is enabled.
275
276=head1 IMPLEMENTATION DETAILS
277
278There is a class method in C<PerlIO::Layer> C<find> which is
279implemented as XS code.  It is called by C<import> to validate the
280layers:
281
282   PerlIO::Layer::->find("perlio")
283
284The return value (if defined) is a Perl object, of class
285C<PerlIO::Layer> which is created by the C code in F<perlio.c>.  As
286yet there is nothing useful you can do with the object at the perl
287level.
288
289=head1 SEE ALSO
290
291L<perlfunc/"binmode">, L<perlfunc/"open">, L<perlunicode>, L<PerlIO>,
292L<encoding>
293
294=cut
295