1# $Id: encoding.pm,v 3.00 2020/04/19 10:56:28 dankogai Exp $
2package encoding;
3our $VERSION = sprintf "%d.%02d", q$Revision: 3.00 $ =~ /(\d+)/g;
4
5use Encode;
6use strict;
7use warnings;
8use Config;
9
10use constant {
11    DEBUG => !!$ENV{PERL_ENCODE_DEBUG},
12    HAS_PERLIO => eval { require PerlIO::encoding; PerlIO::encoding->VERSION(0.02) },
13    PERL_5_21_7 => $^V && $^V ge v5.21.7, # lexically scoped
14};
15
16sub _exception {
17    my $name = shift;
18    $] > 5.008 and return 0;    # 5.8.1 or higher then no
19    my %utfs = map { $_ => 1 }
20      qw(utf8 UCS-2BE UCS-2LE UTF-16 UTF-16BE UTF-16LE
21      UTF-32 UTF-32BE UTF-32LE);
22    $utfs{$name} or return 0;    # UTFs or no
23    require Config;
24    Config->import();
25    our %Config;
26    return $Config{perl_patchlevel} ? 0 : 1    # maintperl then no
27}
28
29sub in_locale { $^H & ( $locale::hint_bits || 0 ) }
30
31sub _get_locale_encoding {
32    my $locale_encoding;
33
34    if ($^O eq 'MSWin32') {
35        my @tries = (
36            # First try to get the OutputCP. This will work only if we
37            # are attached to a console
38            'Win32.pm' => 'Win32::GetConsoleOutputCP',
39            'Win32/Console.pm' => 'Win32::Console::OutputCP',
40            # If above failed, this means that we are a GUI app
41            # Let's assume that the ANSI codepage is what matters
42            'Win32.pm' => 'Win32::GetACP',
43        );
44        while (@tries) {
45            my $cp = eval {
46                require $tries[0];
47                no strict 'refs';
48                &{$tries[1]}()
49            };
50            if ($cp) {
51                if ($cp == 65001) { # Code page for UTF-8
52                    $locale_encoding = 'UTF-8';
53                } else {
54                    $locale_encoding = 'cp' . $cp;
55                }
56                return $locale_encoding;
57            }
58            splice(@tries, 0, 2)
59        }
60    }
61
62    # I18N::Langinfo isn't available everywhere
63    $locale_encoding = eval {
64        require I18N::Langinfo;
65        find_encoding(
66            I18N::Langinfo::langinfo( I18N::Langinfo::CODESET() )
67        )->name
68    };
69    return $locale_encoding if defined $locale_encoding;
70
71    eval {
72        require POSIX;
73        # Get the current locale
74        # Remember that MSVCRT impl is quite different from Unixes
75        my $locale = POSIX::setlocale(POSIX::LC_CTYPE());
76        if ( $locale =~ /^([^.]+)\.([^.@]+)(?:@.*)?$/ ) {
77            my $country_language;
78            ( $country_language, $locale_encoding ) = ( $1, $2 );
79
80            # Could do more heuristics based on the country and language
81            # since we have Locale::Country and Locale::Language available.
82            # TODO: get a database of Language -> Encoding mappings
83            # (the Estonian database at http://www.eki.ee/letter/
84            # would be excellent!) --jhi
85            if (lc($locale_encoding) eq 'euc') {
86                if ( $country_language =~ /^ja_JP|japan(?:ese)?$/i ) {
87                    $locale_encoding = 'euc-jp';
88                }
89                elsif ( $country_language =~ /^ko_KR|korean?$/i ) {
90                    $locale_encoding = 'euc-kr';
91                }
92                elsif ( $country_language =~ /^zh_CN|chin(?:a|ese)$/i ) {
93                    $locale_encoding = 'euc-cn';
94                }
95                elsif ( $country_language =~ /^zh_TW|taiwan(?:ese)?$/i ) {
96                    $locale_encoding = 'euc-tw';
97                }
98                else {
99                    require Carp;
100                    Carp::croak(
101                        "encoding: Locale encoding '$locale_encoding' too ambiguous"
102                    );
103                }
104            }
105        }
106    };
107
108    return $locale_encoding;
109}
110
111sub import {
112
113    if ( ord("A") == 193 ) {
114        require Carp;
115        Carp::croak("encoding: pragma does not support EBCDIC platforms");
116    }
117
118    my $deprecate =
119        ($] >= 5.017 and !$Config{usecperl})
120        ? "Use of the encoding pragma is deprecated" : 0;
121
122    my $class = shift;
123    my $name  = shift;
124    if (!$name){
125	require Carp;
126        Carp::croak("encoding: no encoding specified.");
127    }
128    if ( $name eq ':_get_locale_encoding' ) {    # used by lib/open.pm
129        my $caller = caller();
130        {
131            no strict 'refs';
132            *{"${caller}::_get_locale_encoding"} = \&_get_locale_encoding;
133        }
134        return;
135    }
136    $name = _get_locale_encoding() if $name eq ':locale';
137    BEGIN { strict->unimport('hashpairs') if $] >= 5.027 and $^V =~ /c$/; }
138    my %arg = @_;
139    $name = $ENV{PERL_ENCODING} unless defined $name;
140    my $enc = find_encoding($name);
141    unless ( defined $enc ) {
142        require Carp;
143        Carp::croak("encoding: Unknown encoding '$name'");
144    }
145    $name = $enc->name;    # canonize
146    unless ( $arg{Filter} ) {
147        if ($] >= 5.025003 and !$Config{usecperl}) {
148            require Carp;
149            Carp::croak("The encoding pragma is no longer supported. Check cperl");
150        }
151        warnings::warnif("deprecated",$deprecate) if $deprecate;
152
153        DEBUG and warn "_exception($name) = ", _exception($name);
154        if (! _exception($name)) {
155            if (!PERL_5_21_7) {
156                ${^ENCODING} = $enc;
157            }
158            else {
159                # Starting with 5.21.7, this pragma uses a shadow variable
160                # designed explicitly for it, ${^E_NCODING}, to enforce
161                # lexical scope; instead of ${^ENCODING}.
162                $^H{'encoding'} = 1;
163                ${^E_NCODING} = $enc;
164            }
165        }
166        if (! HAS_PERLIO ) {
167            return 1;
168        }
169    }
170    else {
171        warnings::warnif("deprecated",$deprecate) if $deprecate;
172
173        defined( ${^ENCODING} ) and undef ${^ENCODING};
174        undef ${^E_NCODING} if PERL_5_21_7;
175
176        # implicitly 'use utf8'
177        require utf8;      # to fetch $utf8::hint_bits;
178        $^H |= $utf8::hint_bits;
179
180            require Filter::Util::Call;
181            Filter::Util::Call->import;
182            filter_add(
183                sub {
184                    my $status = filter_read();
185                    if ( $status > 0 ) {
186                        $_ = $enc->decode( $_, 1 );
187                        DEBUG and warn $_;
188                    }
189                    $status;
190                }
191            );
192    }
193    defined ${^UNICODE} and ${^UNICODE} != 0 and return 1;
194    for my $h (qw(STDIN STDOUT)) {
195        if ( $arg{$h} ) {
196            unless ( defined find_encoding( $arg{$h} ) ) {
197                require Carp;
198                Carp::croak(
199                    "encoding: Unknown encoding for $h, '$arg{$h}'");
200            }
201            binmode( $h, ":raw :encoding($arg{$h})" );
202        }
203        else {
204            unless ( exists $arg{$h} ) {
205                    no warnings 'uninitialized';
206                    binmode( $h, ":raw :encoding($name)" );
207            }
208        }
209    }
210    return 1;    # I doubt if we need it, though
211}
212
213sub unimport {
214    no warnings;
215    undef ${^ENCODING};
216    undef ${^E_NCODING} if PERL_5_21_7;
217    if (HAS_PERLIO) {
218        binmode( STDIN,  ":raw" );
219        binmode( STDOUT, ":raw" );
220    }
221    else {
222        binmode(STDIN);
223        binmode(STDOUT);
224    }
225    if ( $INC{"Filter/Util/Call.pm"} ) {
226        eval { filter_del() };
227    }
228}
229
2301;
231__END__
232
233=pod
234
235=head1 NAME
236
237encoding - allows you to write your script in non-ASCII and non-UTF-8
238
239=head1 WARNING
240
241This module has been deprecated since perl v5.18.  See L</DESCRIPTION> and
242L</BUGS>.
243
244=head1 SYNOPSIS
245
246  use encoding "greek";  # Perl like Greek to you?
247  use encoding "euc-jp"; # Jperl!
248
249  # or you can even do this if your shell supports your native encoding
250
251  perl -Mencoding=latin2 -e'...' # Feeling centrally European?
252  perl -Mencoding=euc-kr -e'...' # Or Korean?
253
254  # more control
255
256  # A simple euc-cn => utf-8 converter
257  use encoding "euc-cn", STDOUT => "utf8";  while(<>){print};
258
259  # "no encoding;" supported
260  no encoding;
261
262  # an alternate way, Filter
263  use encoding "euc-jp", Filter=>1;
264  # now you can use kanji identifiers -- in euc-jp!
265
266  # encode based on the current locale - specialized purposes only;
267  # fraught with danger!!
268  use encoding ':locale';
269
270=head1 DESCRIPTION
271
272This pragma is used to enable a Perl script to be written in encodings that
273aren't strictly ASCII nor UTF-8.  It translates all or portions of the Perl
274program script from a given encoding into UTF-8, and changes the PerlIO layers
275of C<STDIN> and C<STDOUT> to the encoding specified.
276
277This pragma dates from the days when UTF-8-enabled editors were uncommon.  But
278that was long ago, and the need for it is greatly diminished.  That, coupled
279with the fact that it doesn't work with threads, along with other problems,
280(see L</BUGS>) have led to its being deprecated.  It is planned to remove this
281pragma in a future Perl version.  New code should be written in UTF-8, and the
282C<use utf8> pragma used instead (see L<perluniintro> and L<utf8> for details).
283Old code should be converted to UTF-8, via something like the recipe in the
284L</SYNOPSIS> (though this simple approach may require manual adjustments
285afterwards).
286
287If UTF-8 is not an option, it is recommended that one use a simple source
288filter, such as that provided by L<Filter::Encoding> on CPAN or this
289pragma's own C<Filter> option (see below).
290
291The only legitimate use of this pragma is almost certainly just one per file,
292near the top, with file scope, as the file is likely going to only be written
293in one encoding.  Further restrictions apply in Perls before v5.22 (see
294L</Prior to Perl v5.22>).
295
296There are two basic modes of operation (plus turning if off):
297
298=over 4
299
300=item C<use encoding ['I<ENCNAME>'] ;>
301
302Please note: This mode of operation is no longer supported as of Perl
303v5.26.
304
305This is the normal operation.  It translates various literals encountered in
306the Perl source file from the encoding I<ENCNAME> into UTF-8, and similarly
307converts character code points.  This is used when the script is a combination
308of ASCII (for the variable names and punctuation, I<etc>), but the literal
309data is in the specified encoding.
310
311I<ENCNAME> is optional.  If omitted, the encoding specified in the environment
312variable L<C<PERL_ENCODING>|perlrun/PERL_ENCODING> is used.  If this isn't
313set, or the resolved-to encoding is not known to C<L<Encode>>, the error
314C<Unknown encoding 'I<ENCNAME>'> will be thrown.
315
316Starting in Perl v5.8.6 (C<Encode> version 2.0.1), I<ENCNAME> may be the
317name C<:locale>.  This is for very specialized applications, and is documented
318in L</The C<:locale> sub-pragma> below.
319
320The literals that are converted are C<q//, qq//, qr//, qw///, qx//>, and
321starting in v5.8.1, C<tr///>.  Operations that do conversions include C<chr>,
322C<ord>, C<utf8::upgrade> (but not C<utf8::downgrade>), and C<chomp>.
323
324Also starting in v5.8.1, the C<DATA> pseudo-filehandle is translated from the
325encoding into UTF-8.
326
327For example, you can write code in EUC-JP as follows:
328
329  my $Rakuda = "\xF1\xD1\xF1\xCC"; # Camel in Kanji
330               #<-char-><-char->   # 4 octets
331  s/\bCamel\b/$Rakuda/;
332
333And with C<use encoding "euc-jp"> in effect, it is the same thing as
334that code in UTF-8:
335
336  my $Rakuda = "\x{99F1}\x{99DD}"; # two Unicode Characters
337  s/\bCamel\b/$Rakuda/;
338
339See L</EXAMPLE> below for a more complete example.
340
341Unless C<${^UNICODE}> (available starting in v5.8.2) exists and is non-zero, the
342PerlIO layers of C<STDIN> and C<STDOUT> are set to "C<:encoding(I<ENCNAME>)>".
343Therefore,
344
345  use encoding "euc-jp";
346  my $message = "Camel is the symbol of perl.\n";
347  my $Rakuda = "\xF1\xD1\xF1\xCC"; # Camel in Kanji
348  $message =~ s/\bCamel\b/$Rakuda/;
349  print $message;
350
351will print
352
353 "\xF1\xD1\xF1\xCC is the symbol of perl.\n"
354
355not
356
357 "\x{99F1}\x{99DD} is the symbol of perl.\n"
358
359You can override this by giving extra arguments; see below.
360
361Note that C<STDERR> WILL NOT be changed, regardless.
362
363Also note that non-STD file handles remain unaffected.  Use C<use
364open> or C<binmode> to change the layers of those.
365
366=item C<use encoding I<ENCNAME>, Filter=E<gt>1;>
367
368This operates as above, but the C<Filter> argument with a non-zero
369value causes the entire script, and not just literals, to be translated from
370the encoding into UTF-8.  This allows identifiers in the source to be in that
371encoding as well.  (Problems may occur if the encoding is not a superset of
372ASCII; imagine all your semi-colons being translated into something
373different.)  One can use this form to make
374
375 ${"\x{4eba}"}++
376
377work.  (This is equivalent to C<$I<human>++>, where I<human> is a single Han
378ideograph).
379
380This effectively means that your source code behaves as if it were written in
381UTF-8 with C<'use utf8>' in effect.  So even if your editor only supports
382Shift_JIS, for example, you can still try examples in Chapter 15 of
383C<Programming Perl, 3rd Ed.>.
384
385This option is significantly slower than the other one.
386
387=item C<no encoding;>
388
389Unsets the script encoding. The layers of C<STDIN>, C<STDOUT> are
390reset to "C<:raw>" (the default unprocessed raw stream of bytes).
391
392=back
393
394=head1 OPTIONS
395
396=head2 Setting C<STDIN> and/or C<STDOUT> individually
397
398The encodings of C<STDIN> and C<STDOUT> are individually settable by parameters to
399the pragma:
400
401 use encoding 'euc-tw', STDIN => 'greek'  ...;
402
403In this case, you cannot omit the first I<ENCNAME>.  C<< STDIN => undef >>
404turns the I/O transcoding completely off for that filehandle.
405
406When C<${^UNICODE}> (available starting in v5.8.2) exists and is non-zero,
407these options will be completely ignored.  See L<perlvar/C<${^UNICODE}>> and
408L<"C<-C>" in perlrun|perlrun/-C [numberE<sol>list]> for details.
409
410=head2 The C<:locale> sub-pragma
411
412Starting in v5.8.6, the encoding name may be C<:locale>.  This means that the
413encoding is taken from the current locale, and not hard-coded by the pragma.
414Since a script really can only be encoded in exactly one encoding, this option
415is dangerous.  It makes sense only if the script itself is written in ASCII,
416and all the possible locales that will be in use when the script is executed
417are supersets of ASCII.  That means that the script itself doesn't get
418changed, but the I/O handles have the specified encoding added, and the
419operations like C<chr> and C<ord> use that encoding.
420
421The logic of finding which locale C<:locale> uses is as follows:
422
423=over 4
424
425=item 1.
426
427If the platform supports the C<langinfo(CODESET)> interface, the codeset
428returned is used as the default encoding for the open pragma.
429
430=item 2.
431
432If 1. didn't work but we are under the locale pragma, the environment
433variables C<LC_ALL> and C<LANG> (in that order) are matched for encodings
434(the part after "C<.>", if any), and if any found, that is used
435as the default encoding for the open pragma.
436
437=item 3.
438
439If 1. and 2. didn't work, the environment variables C<LC_ALL> and C<LANG>
440(in that order) are matched for anything looking like UTF-8, and if
441any found, C<:utf8> is used as the default encoding for the open
442pragma.
443
444=back
445
446If your locale environment variables (C<LC_ALL>, C<LC_CTYPE>, C<LANG>)
447contain the strings 'UTF-8' or 'UTF8' (case-insensitive matching),
448the default encoding of your C<STDIN>, C<STDOUT>, and C<STDERR>, and of
449B<any subsequent file open>, is UTF-8.
450
451=head1 CAVEATS
452
453=head2 SIDE EFFECTS
454
455=over
456
457=item *
458
459If the C<encoding> pragma is in scope then the lengths returned are
460calculated from the length of C<$/> in Unicode characters, which is not
461always the same as the length of C<$/> in the native encoding.
462
463=item *
464
465Without this pragma, if strings operating under byte semantics and strings
466with Unicode character data are concatenated, the new string will
467be created by decoding the byte strings as I<ISO 8859-1 (Latin-1)>.
468
469The B<encoding> pragma changes this to use the specified encoding
470instead.  For example:
471
472    use encoding 'utf8';
473    my $string = chr(20000); # a Unicode string
474    utf8::encode($string);   # now it's a UTF-8 encoded byte string
475    # concatenate with another Unicode string
476    print length($string . chr(20000));
477
478Will print C<2>, because C<$string> is upgraded as UTF-8.  Without
479C<use encoding 'utf8';>, it will print C<4> instead, since C<$string>
480is three octets when interpreted as Latin-1.
481
482=back
483
484=head2 DO NOT MIX MULTIPLE ENCODINGS
485
486Notice that only literals (string or regular expression) having only
487legacy code points are affected: if you mix data like this
488
489    \x{100}\xDF
490    \xDF\x{100}
491
492the data is assumed to be in (Latin 1 and) Unicode, not in your native
493encoding.  In other words, this will match in "greek":
494
495    "\xDF" =~ /\x{3af}/
496
497but this will not
498
499    "\xDF\x{100}" =~ /\x{3af}\x{100}/
500
501since the C<\xDF> (ISO 8859-7 GREEK SMALL LETTER IOTA WITH TONOS) on
502the left will B<not> be upgraded to C<\x{3af}> (Unicode GREEK SMALL
503LETTER IOTA WITH TONOS) because of the C<\x{100}> on the left.  You
504should not be mixing your legacy data and Unicode in the same string.
505
506This pragma also affects encoding of the 0x80..0xFF code point range:
507normally characters in that range are left as eight-bit bytes (unless
508they are combined with characters with code points 0x100 or larger,
509in which case all characters need to become UTF-8 encoded), but if
510the C<encoding> pragma is present, even the 0x80..0xFF range always
511gets UTF-8 encoded.
512
513After all, the best thing about this pragma is that you don't have to
514resort to \x{....} just to spell your name in a native encoding.
515So feel free to put your strings in your encoding in quotes and
516regexes.
517
518=head2 Prior to Perl v5.22
519
520The pragma was a per script, not a per block lexical.  Only the last
521C<use encoding> or C<no encoding> mattered, and it affected
522B<the whole script>.  However, the C<no encoding> pragma was supported and
523C<use encoding> could appear as many times as you want in a given script
524(though only the last was effective).
525
526Since the scope wasn't lexical, other modules' use of C<chr>, C<ord>, I<etc.>
527were affected.  This leads to spooky, incorrect action at a distance that is
528hard to debug.
529
530This means you would have to be very careful of the load order:
531
532  # called module
533  package Module_IN_BAR;
534  use encoding "bar";
535  # stuff in "bar" encoding here
536  1;
537
538  # caller script
539  use encoding "foo"
540  use Module_IN_BAR;
541  # surprise! use encoding "bar" is in effect.
542
543The best way to avoid this oddity is to use this pragma RIGHT AFTER
544other modules are loaded.  i.e.
545
546  use Module_IN_BAR;
547  use encoding "foo";
548
549=head2 Prior to Encode version 1.87
550
551=over
552
553=item *
554
555C<STDIN> and C<STDOUT> were not set under the filter option.
556And C<< STDIN=>I<ENCODING> >> and C<< STDOUT=>I<ENCODING> >> didn't work like
557non-filter version.
558
559=item *
560
561C<use utf8> wasn't implicitly declared so you have to C<use utf8> to do
562
563 ${"\x{4eba}"}++
564
565=back
566
567=head2 Prior to Perl v5.8.1
568
569=over
570
571=item "NON-EUC" doublebyte encodings
572
573Because perl needs to parse the script before applying this pragma, such
574encodings as Shift_JIS and Big-5 that may contain C<'\'> (BACKSLASH;
575C<\x5c>) in the second byte fail because the second byte may
576accidentally escape the quoting character that follows.
577
578=item C<tr///>
579
580The B<encoding> pragma works by decoding string literals in
581C<q//,qq//,qr//,qw///, qx//> and so forth.  In perl v5.8.0, this
582does not apply to C<tr///>.  Therefore,
583
584  use encoding 'euc-jp';
585  #....
586  $kana =~ tr/\xA4\xA1-\xA4\xF3/\xA5\xA1-\xA5\xF3/;
587  #           -------- -------- -------- --------
588
589Does not work as
590
591  $kana =~ tr/\x{3041}-\x{3093}/\x{30a1}-\x{30f3}/;
592
593=over
594
595=item Legend of characters above
596
597  utf8     euc-jp   charnames::viacode()
598  -----------------------------------------
599  \x{3041} \xA4\xA1 HIRAGANA LETTER SMALL A
600  \x{3093} \xA4\xF3 HIRAGANA LETTER N
601  \x{30a1} \xA5\xA1 KATAKANA LETTER SMALL A
602  \x{30f3} \xA5\xF3 KATAKANA LETTER N
603
604=back
605
606This counterintuitive behavior has been fixed in perl v5.8.1.
607
608In perl v5.8.0, you can work around this as follows;
609
610  use encoding 'euc-jp';
611  #  ....
612  eval qq{ \$kana =~ tr/\xA4\xA1-\xA4\xF3/\xA5\xA1-\xA5\xF3/ };
613
614Note the C<tr//> expression is surrounded by C<qq{}>.  The idea behind
615this is the same as the classic idiom that makes C<tr///> 'interpolate':
616
617   tr/$from/$to/;            # wrong!
618   eval qq{ tr/$from/$to/ }; # workaround.
619
620=back
621
622=head1 EXAMPLE - Greekperl
623
624    use encoding "iso 8859-7";
625
626    # \xDF in ISO 8859-7 (Greek) is \x{3af} in Unicode.
627
628    $a = "\xDF";
629    $b = "\x{100}";
630
631    printf "%#x\n", ord($a); # will print 0x3af, not 0xdf
632
633    $c = $a . $b;
634
635    # $c will be "\x{3af}\x{100}", not "\x{df}\x{100}".
636
637    # chr() is affected, and ...
638
639    print "mega\n"  if ord(chr(0xdf)) == 0x3af;
640
641    # ... ord() is affected by the encoding pragma ...
642
643    print "tera\n" if ord(pack("C", 0xdf)) == 0x3af;
644
645    # ... as are eq and cmp ...
646
647    print "peta\n" if "\x{3af}" eq  pack("C", 0xdf);
648    print "exa\n"  if "\x{3af}" cmp pack("C", 0xdf) == 0;
649
650    # ... but pack/unpack C are not affected, in case you still
651    # want to go back to your native encoding
652
653    print "zetta\n" if unpack("C", (pack("C", 0xdf))) == 0xdf;
654
655=head1 BUGS
656
657=over
658
659=item Thread safety
660
661C<use encoding ...> is not thread-safe (i.e., do not use in threaded
662applications).
663
664=item Can't be used by more than one module in a single program.
665
666Only one encoding is allowed.  If you combine modules in a program that have
667different encodings, only one will be actually used.
668
669=item Other modules using C<STDIN> and C<STDOUT> get the encoded stream
670
671They may be expecting something completely different.
672
673=item literals in regex that are longer than 127 bytes
674
675For native multibyte encodings (either fixed or variable length),
676the current implementation of the regular expressions may introduce
677recoding errors for regular expression literals longer than 127 bytes.
678
679=item EBCDIC
680
681The encoding pragma is not supported on EBCDIC platforms.
682
683=item C<format>
684
685This pragma doesn't work well with C<format> because PerlIO does not
686get along very well with it.  When C<format> contains non-ASCII
687characters it prints funny or gets "wide character warnings".
688To understand it, try the code below.
689
690  # Save this one in utf8
691  # replace *non-ascii* with a non-ascii string
692  my $camel;
693  format STDOUT =
694  *non-ascii*@>>>>>>>
695  $camel
696  .
697  $camel = "*non-ascii*";
698  binmode(STDOUT=>':encoding(utf8)'); # bang!
699  write;              # funny
700  print $camel, "\n"; # fine
701
702Without binmode this happens to work but without binmode, print()
703fails instead of write().
704
705At any rate, the very use of C<format> is questionable when it comes to
706unicode characters since you have to consider such things as character
707width (i.e. double-width for ideographs) and directions (i.e. BIDI for
708Arabic and Hebrew).
709
710=item See also L</CAVEATS>
711
712=back
713
714=head1 HISTORY
715
716This pragma first appeared in Perl v5.8.0.  It has been enhanced in later
717releases as specified above.
718
719=head1 SEE ALSO
720
721L<perlunicode>, L<Encode>, L<open>, L<Filter::Util::Call>,
722
723Ch. 15 of C<Programming Perl (3rd Edition)>
724by Larry Wall, Tom Christiansen, Jon Orwant;
725O'Reilly & Associates; ISBN 0-596-00027-8
726
727=cut
728