1package open; 2use warnings; 3use Carp; 4$open::hint_bits = 0x20000; # HINT_LOCALIZE_HH 5 6our $VERSION = '1.03'; 7 8my $locale_encoding; 9 10sub in_locale { $^H & ($locale::hint_bits || 0)} 11 12sub _get_locale_encoding { 13 unless (defined $locale_encoding) { 14 # I18N::Langinfo isn't available everywhere 15 eval { 16 require I18N::Langinfo; 17 I18N::Langinfo->import(qw(langinfo CODESET)); 18 $locale_encoding = langinfo(CODESET()); 19 }; 20 my $country_language; 21 22 no warnings 'uninitialized'; 23 24 if (not $locale_encoding && in_locale()) { 25 if ($ENV{LC_ALL} =~ /^([^.]+)\.([^.]+)$/) { 26 ($country_language, $locale_encoding) = ($1, $2); 27 } elsif ($ENV{LANG} =~ /^([^.]+)\.([^.]+)$/) { 28 ($country_language, $locale_encoding) = ($1, $2); 29 } 30 # LANGUAGE affects only LC_MESSAGES only on glibc 31 } elsif (not $locale_encoding) { 32 if ($ENV{LC_ALL} =~ /\butf-?8\b/i || 33 $ENV{LANG} =~ /\butf-?8\b/i) { 34 $locale_encoding = 'utf8'; 35 } 36 # Could do more heuristics based on the country and language 37 # parts of LC_ALL and LANG (the parts before the dot (if any)), 38 # since we have Locale::Country and Locale::Language available. 39 # TODO: get a database of Language -> Encoding mappings 40 # (the Estonian database at http://www.eki.ee/letter/ 41 # would be excellent!) --jhi 42 } 43 if (defined $locale_encoding && 44 lc($locale_encoding) eq 'euc' && 45 defined $country_language) { 46 if ($country_language =~ /^ja_JP|japan(?:ese)?$/i) { 47 $locale_encoding = 'euc-jp'; 48 } elsif ($country_language =~ /^ko_KR|korean?$/i) { 49 $locale_encoding = 'euc-kr'; 50 } elsif ($country_language =~ /^zh_CN|chin(?:a|ese)?$/i) { 51 $locale_encoding = 'euc-cn'; 52 } elsif ($country_language =~ /^zh_TW|taiwan(?:ese)?$/i) { 53 $locale_encoding = 'euc-tw'; 54 } else { 55 croak "Locale encoding 'euc' too ambiguous"; 56 } 57 } 58 } 59} 60 61sub import { 62 my ($class,@args) = @_; 63 croak("`use open' needs explicit list of PerlIO layers") unless @args; 64 my $std; 65 $^H |= $open::hint_bits; 66 my ($in,$out) = split(/\0/,(${^OPEN} || "\0"), -1); 67 while (@args) { 68 my $type = shift(@args); 69 my $dscp; 70 if ($type =~ /^:?(utf8|locale|encoding\(.+\))$/) { 71 $type = 'IO'; 72 $dscp = ":$1"; 73 } elsif ($type eq ':std') { 74 $std = 1; 75 next; 76 } else { 77 $dscp = shift(@args) || ''; 78 } 79 my @val; 80 foreach my $layer (split(/\s+/,$dscp)) { 81 $layer =~ s/^://; 82 if ($layer eq 'locale') { 83 require Encode; 84 _get_locale_encoding() 85 unless defined $locale_encoding; 86 (warnings::warnif("layer", "Cannot figure out an encoding to use"), last) 87 unless defined $locale_encoding; 88 if ($locale_encoding =~ /^utf-?8$/i) { 89 $layer = "utf8"; 90 } else { 91 $layer = "encoding($locale_encoding)"; 92 } 93 $std = 1; 94 } else { 95 my $target = $layer; # the layer name itself 96 $target =~ s/^(\w+)\(.+\)$/$1/; # strip parameters 97 98 unless(PerlIO::Layer::->find($target,1)) { 99 warnings::warnif("layer", "Unknown PerlIO layer '$target'"); 100 } 101 } 102 push(@val,":$layer"); 103 if ($layer =~ /^(crlf|raw)$/) { 104 $^H{"open_$type"} = $layer; 105 } 106 } 107 if ($type eq 'IN') { 108 $in = join(' ',@val); 109 } 110 elsif ($type eq 'OUT') { 111 $out = join(' ',@val); 112 } 113 elsif ($type eq 'IO') { 114 $in = $out = join(' ',@val); 115 } 116 else { 117 croak "Unknown PerlIO layer class '$type'"; 118 } 119 } 120 ${^OPEN} = join("\0",$in,$out) if $in or $out; 121 if ($std) { 122 if ($in) { 123 if ($in =~ /:utf8\b/) { 124 binmode(STDIN, ":utf8"); 125 } elsif ($in =~ /(\w+\(.+\))/) { 126 binmode(STDIN, ":$1"); 127 } 128 } 129 if ($out) { 130 if ($out =~ /:utf8\b/) { 131 binmode(STDOUT, ":utf8"); 132 binmode(STDERR, ":utf8"); 133 } elsif ($out =~ /(\w+\(.+\))/) { 134 binmode(STDOUT, ":$1"); 135 binmode(STDERR, ":$1"); 136 } 137 } 138 } 139} 140 1411; 142__END__ 143 144=head1 NAME 145 146open - perl pragma to set default PerlIO layers for input and output 147 148=head1 SYNOPSIS 149 150 use open IN => ":crlf", OUT => ":bytes"; 151 use open OUT => ':utf8'; 152 use open IO => ":encoding(iso-8859-7)"; 153 154 use open IO => ':locale'; 155 156 use open ':utf8'; 157 use open ':locale'; 158 use open ':encoding(iso-8859-7)'; 159 160 use open ':std'; 161 162=head1 DESCRIPTION 163 164Full-fledged support for I/O layers is now implemented provided 165Perl is configured to use PerlIO as its IO system (which is now the 166default). 167 168The C<open> pragma serves as one of the interfaces to declare default 169"layers" (also known as "disciplines") for all I/O. Any two-argument 170open(), readpipe() (aka qx//) and similar operators found within the 171lexical scope of this pragma will use the declared defaults. 172Three-argument opens are not affected by this pragma since there you 173(can) explicitly specify the layers and are supposed to know what you 174are doing. 175 176With the C<IN> subpragma you can declare the default layers 177of input streams, and with the C<OUT> subpragma you can declare 178the default layers of output streams. With the C<IO> subpragma 179you can control both input and output streams simultaneously. 180 181If you have a legacy encoding, you can use the C<:encoding(...)> tag. 182 183if you want to set your encoding layers based on your 184locale environment variables, you can use the C<:locale> tag. 185For example: 186 187 $ENV{LANG} = 'ru_RU.KOI8-R'; 188 # the :locale will probe the locale environment variables like LANG 189 use open OUT => ':locale'; 190 open(O, ">koi8"); 191 print O chr(0x430); # Unicode CYRILLIC SMALL LETTER A = KOI8-R 0xc1 192 close O; 193 open(I, "<koi8"); 194 printf "%#x\n", ord(<I>), "\n"; # this should print 0xc1 195 close I; 196 197These are equivalent 198 199 use open ':utf8'; 200 use open IO => ':utf8'; 201 202as are these 203 204 use open ':locale'; 205 use open IO => ':locale'; 206 207and these 208 209 use open ':encoding(iso-8859-7)'; 210 use open IO => ':encoding(iso-8859-7)'; 211 212The matching of encoding names is loose: case does not matter, and 213many encodings have several aliases. See L<Encode::Supported> for 214details and the list of supported locales. 215 216Note that C<:utf8> PerlIO layer must always be specified exactly like 217that, it is not subject to the loose matching of encoding names. 218 219When open() is given an explicit list of layers they are appended to 220the list declared using this pragma. 221 222The C<:std> subpragma on its own has no effect, but if combined with 223the C<:utf8> or C<:encoding> subpragmas, it converts the standard 224filehandles (STDIN, STDOUT, STDERR) to comply with encoding selected 225for input/output handles. For example, if both input and out are 226chosen to be C<:utf8>, a C<:std> will mean that STDIN, STDOUT, and 227STDERR are also in C<:utf8>. On the other hand, if only output is 228chosen to be in C<< :encoding(koi8r) >>, a C<:std> will cause only the 229STDOUT and STDERR to be in C<koi8r>. The C<:locale> subpragma 230implicitly turns on C<:std>. 231 232The logic of C<:locale> is as follows: 233 234=over 4 235 236=item 1. 237 238If the platform supports the langinfo(CODESET) interface, the codeset 239returned is used as the default encoding for the open pragma. 240 241=item 2. 242 243If 1. didn't work but we are under the locale pragma, the environment 244variables LC_ALL and LANG (in that order) are matched for encodings 245(the part after C<.>, if any), and if any found, that is used 246as the default encoding for the open pragma. 247 248=item 3. 249 250If 1. and 2. didn't work, the environment variables LC_ALL and LANG 251(in that order) are matched for anything looking like UTF-8, and if 252any found, C<:utf8> is used as the default encoding for the open 253pragma. 254 255=back 256 257If your locale environment variables (LC_ALL, LC_CTYPE, LANG) 258contain the strings 'UTF-8' or 'UTF8' (case-insensitive matching), 259the default encoding of your STDIN, STDOUT, and STDERR, and of 260B<any subsequent file open>, is UTF-8. 261 262Directory handles may also support PerlIO layers in the future. 263 264=head1 NONPERLIO FUNCTIONALITY 265 266If Perl is not built to use PerlIO as its IO system then only the two 267pseudo-layers C<:bytes> and C<:crlf> are available. 268 269The C<:bytes> layer corresponds to "binary mode" and the C<:crlf> 270layer corresponds to "text mode" on platforms that distinguish 271between the two modes when opening files (which is many DOS-like 272platforms, including Windows). These two layers are no-ops on 273platforms where binmode() is a no-op, but perform their functions 274everywhere if PerlIO is enabled. 275 276=head1 IMPLEMENTATION DETAILS 277 278There is a class method in C<PerlIO::Layer> C<find> which is 279implemented as XS code. It is called by C<import> to validate the 280layers: 281 282 PerlIO::Layer::->find("perlio") 283 284The return value (if defined) is a Perl object, of class 285C<PerlIO::Layer> which is created by the C code in F<perlio.c>. As 286yet there is nothing useful you can do with the object at the perl 287level. 288 289=head1 SEE ALSO 290 291L<perlfunc/"binmode">, L<perlfunc/"open">, L<perlunicode>, L<PerlIO>, 292L<encoding> 293 294=cut 295