1#!./perl -wT
2
3use strict;
4use warnings;
5use Config;
6
7# This tests plain 'use locale' and adorned 'use locale ":not_characters"'
8# Because these pragmas are compile time, and I (khw) am trying to test
9# without using 'eval' as much as possible, which might cloud the issue,  the
10# crucial parts of the code are duplicated in a block for each pragma.
11
12# Unfortunately, many systems have defective locale definitions.  This test
13# file looks for both perl bugs and bugs in the system's locale definitions.
14# It can be difficult to tease apart which is which.  For the latter, there
15# are tests that are based on the POSIX standard.  A character isn't supposed
16# to be both a space and graphic, for example.  Another example is if a
17# character is the uppercase of another, that other should be the lowercase of
18# the first.  Including tests for these allows you to test for defective
19# locales, as described in perllocale.  The way this file distinguishes
20# between defective locales, and perl bugs is to see what percentage of
21# locales fail a given test.  If it's a lot, then it's more likely to be a
22# perl bug; only a few, those particular locales are likely defective.  In
23# that case the failing tests are marked TODO.  (They should be reported to
24# the vendor, however; but it's not perl's problem.)  In some cases, this
25# script has caused tickets to be filed against perl which turn out to be the
26# platform's bug, but a higher percentage of locales are failing than the
27# built-in cut-off point.  For those platforms, code has been added to
28# increase the cut-off, so those platforms don't trigger failing test reports.
29# Ideally, the platforms would get fixed and that code would be changed to
30# only kick-in when run on versions that are earlier than the fixed one.  But,
31# this rarely happens in practice.
32
33# To make a TODO test, add the string 'TODO' to its %test_names value
34
35my $is_ebcdic = ord("A") == 193;
36my $os = lc $^O;
37
38# Configure now lets you build a perl that silently ignores taint features
39my $NoTaintSupport = exists($Config{taint_support}) && !$Config{taint_support};
40
41no warnings 'locale';  # We test even weird locales; and do some scary things
42                       # in ok locales
43
44binmode STDOUT, ':utf8';
45binmode STDERR, ':utf8';
46
47BEGIN {
48    chdir 't' if -d 't';
49    @INC = '../lib';
50    unshift @INC, '.';
51    require './loc_tools.pl';
52    unless (locales_enabled('LC_CTYPE')) {
53	print "1..0\n";
54	exit;
55    }
56    $| = 1;
57    require Config; import Config;
58}
59
60use feature 'fc';
61use I18N::Langinfo qw(langinfo CODESET CRNCYSTR RADIXCHAR);
62
63# =1 adds debugging output; =2 increases the verbosity somewhat
64our $debug = $ENV{PERL_DEBUG_FULL_TEST} // 0;
65
66# Certain tests have been shown to be problematical for a few locales.  Don't
67# fail them unless at least this percentage of the tested locales fail.
68# EBCDIC os390 has more locales fail than normal, because it has locales that
69# move various critical characters like '['.
70my $acceptable_failure_percentage = ($os =~ / ^ ( os390 ) $ /x)
71                                    ? 10
72                                    : 5;
73
74# The list of test numbers of the problematic tests.
75my %problematical_tests;
76
77# If any %problematical_tests fails in one of these locales, it is
78# considered a TODO.
79my %known_bad_locales = (
80                          irix => qr/ ^ (?: cs | hu | sk ) $/x,
81                          darwin => qr/ ^ lt_LT.ISO8859 /ix,
82                          os390 => qr/ ^ italian /ix,
83                          netbsd => qr/\bISO8859-2\b/i,
84
85                          # This may be the same bug as the cygwin below; it's
86                          # generating malformed UTF-8 on the radix being
87                          # mulit-byte
88                          solaris => qr/ ^ ( ar_ | pa_ ) /x,
89                        );
90
91# cygwin isn't returning proper radix length in this locale, but supposedly to
92# be fixed in later versions.
93if ($os eq 'cygwin' && version->new(($Config{osvers} =~ /^(\d+(?:\.\d+)+)/)[0]) le v2.4.1) {
94    $known_bad_locales{'cygwin'} = qr/ ^ ps_AF /ix;
95}
96
97use Dumpvalue;
98
99my $dumper = Dumpvalue->new(
100                            tick => qq{"},
101                            quoteHighBit => 0,
102                            unctrl => "quote"
103                           );
104
105sub debug {
106  return unless $debug;
107  my($mess) = join "", '# ', @_;
108  chomp $mess;
109  print STDERR $dumper->stringify($mess,1), "\n";
110}
111
112sub note {
113    local $debug = 1;
114    debug @_;
115}
116
117sub debug_more {
118  return unless $debug > 1;
119  return debug(@_);
120}
121
122sub debugf {
123    printf STDERR @_ if $debug;
124}
125
126$a = 'abc %9';
127
128my $test_num = 0;
129
130sub ok {
131    my ($result, $message) = @_;
132    $message = "" unless defined $message;
133
134    print 'not ' unless ($result);
135    print "ok " . ++$test_num;
136    print " $message";
137    print "\n";
138    return ($result) ? 1 : 0;
139}
140
141sub skip {
142    return ok 1, "skipped: " . shift;
143}
144
145sub fail {
146    return ok 0, shift;
147}
148
149# First we'll do a lot of taint checking for locales.
150# This is the easiest to test, actually, as any locale,
151# even the default locale will taint under 'use locale'.
152
153sub is_tainted { # hello, camel two.
154    no warnings 'uninitialized' ;
155    my $dummy;
156    local $@;
157    not eval { $dummy = join("", @_), kill 0; 1 }
158}
159
160sub check_taint ($;$) {
161    my $message_tail = $_[1] // "";
162
163    # Extra blanks are so aligns with taint_not output
164    $message_tail = ":     $message_tail" if $message_tail;
165    if ($NoTaintSupport) {
166        skip("your perl was built without taint support");
167    }
168    else {
169        ok is_tainted($_[0]), "verify that is tainted$message_tail";
170    }
171}
172
173sub check_taint_not ($;$) {
174    my $message_tail = $_[1] // "";
175    $message_tail = ":  $message_tail" if $message_tail;
176    ok((not is_tainted($_[0])), "verify that isn't tainted$message_tail");
177}
178
179foreach my $category (qw(ALL COLLATE CTYPE MESSAGES MONETARY NUMERIC TIME)) {
180    my $short_result = locales_enabled($category);
181    ok ($short_result == 0 || $short_result == 1,
182        "Verify locales_enabled('$category') returns 0 or 1");
183    debug("locales_enabled('$category') returned '$short_result'");
184    my $long_result = locales_enabled("LC_$category");
185    if (! ok ($long_result == $short_result,
186              "   and locales_enabled('LC_$category') returns "
187            . "the same value")
188    ) {
189        debug("locales_enabled('LC_$category') returned $long_result");
190    }
191}
192
193"\tb\t" =~ /^m?(\s)(.*)\1$/;
194check_taint_not   $&, "not tainted outside 'use locale'";
195;
196
197use locale;	# engage locale and therefore locale taint.
198
199# BE SURE TO COPY ANYTHING YOU ADD to these tests to the block below for
200# ":notcharacters"
201
202check_taint_not   $a, '$a';
203
204check_taint       uc($a), 'uc($a)';
205check_taint       "\U$a", '"\U$a"';
206check_taint       ucfirst($a), 'ucfirst($a)';
207check_taint       "\u$a", '"\u$a"';
208check_taint       lc($a), 'lc($a)';
209check_taint       fc($a), 'fc($a)';
210check_taint       "\L$a", '"\L$a"';
211check_taint       "\F$a", '"\F$a"';
212check_taint       lcfirst($a), 'lcfirst($a)';
213check_taint       "\l$a", '"\l$a"';
214
215check_taint_not  sprintf('%e', 123.456), "sprintf('%e', 123.456)";
216check_taint_not  sprintf('%f', 123.456), "sprintf('%f', 123.456)";
217check_taint_not  sprintf('%g', 123.456), "sprintf('%g', 123.456)";
218check_taint_not  sprintf('%d', 123.456), "sprintf('%d', 123.456)";
219check_taint_not  sprintf('%x', 123.456), "sprintf('%x', 123.456)";
220
221$_ = $a;	# untaint $_
222
223$_ = uc($a);	# taint $_
224
225check_taint      $_, '$_ = uc($a)';
226
227/(\w)/;	# taint $&, $`, $', $+, $1.
228check_taint      $&, "\$& from /(\\w)/";
229check_taint      $`, "\t\$`";
230check_taint      $', "\t\$'";
231check_taint      $+, "\t\$+";
232check_taint      $1, "\t\$1";
233check_taint_not  $2, "\t\$2";
234
235/(.)/;	# untaint $&, $`, $', $+, $1.
236check_taint_not  $&, "\$& from /(.)/";
237check_taint_not  $`, "\t\$`";
238check_taint_not  $', "\t\$'";
239check_taint_not  $+, "\t\$+";
240check_taint_not  $1, "\t\$1";
241check_taint_not  $2, "\t\$2";
242
243/(\W)/;	# taint $&, $`, $', $+, $1.
244check_taint      $&, "\$& from /(\\W)/";
245check_taint      $`, "\t\$`";
246check_taint      $', "\t\$'";
247check_taint      $+, "\t\$+";
248check_taint      $1, "\t\$1";
249check_taint_not  $2, "\t\$2";
250
251/(.)/;	# untaint $&, $`, $', $+, $1.
252check_taint_not  $&, "\$& from /(.)/";
253check_taint_not  $`, "\t\$`";
254check_taint_not  $', "\t\$'";
255check_taint_not  $+, "\t\$+";
256check_taint_not  $1, "\t\$1";
257check_taint_not  $2, "\t\$2";
258
259/(\s)/;	# taint $&, $`, $', $+, $1.
260check_taint      $&, "\$& from /(\\s)/";
261check_taint      $`, "\t\$`";
262check_taint      $', "\t\$'";
263check_taint      $+, "\t\$+";
264check_taint      $1, "\t\$1";
265check_taint_not  $2, "\t\$2";
266
267/(.)/;	# untaint $&, $`, $', $+, $1.
268check_taint_not  $&, "\$& from /(.)/";
269
270/(\S)/;	# taint $&, $`, $', $+, $1.
271check_taint      $&, "\$& from /(\\S)/";
272check_taint      $`, "\t\$`";
273check_taint      $', "\t\$'";
274check_taint      $+, "\t\$+";
275check_taint      $1, "\t\$1";
276check_taint_not  $2, "\t\$2";
277
278/(.)/;	# untaint $&, $`, $', $+, $1.
279check_taint_not  $&, "\$& from /(.)/";
280
281"0" =~ /(\d)/;	# taint $&, $`, $', $+, $1.
282check_taint      $&, "\$& from /(\\d)/";
283check_taint      $`, "\t\$`";
284check_taint      $', "\t\$'";
285check_taint      $+, "\t\$+";
286check_taint      $1, "\t\$1";
287check_taint_not  $2, "\t\$2";
288
289/(.)/;	# untaint $&, $`, $', $+, $1.
290check_taint_not  $&, "\$& from /(.)/";
291
292/(\D)/;	# taint $&, $`, $', $+, $1.
293check_taint      $&, "\$& from /(\\D)/";
294check_taint      $`, "\t\$`";
295check_taint      $', "\t\$'";
296check_taint      $+, "\t\$+";
297check_taint      $1, "\t\$1";
298check_taint_not  $2, "\t\$2";
299
300/(.)/;	# untaint $&, $`, $', $+, $1.
301check_taint_not  $&, "\$& from /(.)/";
302
303/([[:alnum:]])/;	# taint $&, $`, $', $+, $1.
304check_taint      $&, "\$& from /([[:alnum:]])/";
305check_taint      $`, "\t\$`";
306check_taint      $', "\t\$'";
307check_taint      $+, "\t\$+";
308check_taint      $1, "\t\$1";
309check_taint_not  $2, "\t\$2";
310
311/(.)/;	# untaint $&, $`, $', $+, $1.
312check_taint_not  $&, "\$& from /(.)/";
313
314/([[:^alnum:]])/;	# taint $&, $`, $', $+, $1.
315check_taint      $&, "\$& from /([[:^alnum:]])/";
316check_taint      $`, "\t\$`";
317check_taint      $', "\t\$'";
318check_taint      $+, "\t\$+";
319check_taint      $1, "\t\$1";
320check_taint_not  $2, "\t\$2";
321
322"a" =~ /(a)|(\w)/;	# taint $&, $`, $', $+, $1.
323check_taint      $&, "\$& from /(a)|(\\w)/";
324check_taint      $`, "\t\$`";
325check_taint      $', "\t\$'";
326check_taint      $+, "\t\$+";
327check_taint      $1, "\t\$1";
328ok($1 eq 'a', ("\t" x 5) . "\$1 is 'a'");
329ok(! defined $2, ("\t" x 5) . "\$2 is undefined");
330check_taint_not  $2, "\t\$2";
331check_taint_not  $3, "\t\$3";
332
333/(.)/;	# untaint $&, $`, $', $+, $1.
334check_taint_not  $&, "\$& from /(.)/";
335
336"\N{CYRILLIC SMALL LETTER A}" =~ /(\N{CYRILLIC CAPITAL LETTER A})/i;	# no tainting because no locale dependence
337check_taint_not      $&, "\$& from /(\\N{CYRILLIC CAPITAL LETTER A})/i";
338check_taint_not      $`, "\t\$`";
339check_taint_not      $', "\t\$'";
340check_taint_not      $+, "\t\$+";
341check_taint_not      $1, "\t\$1";
342ok($1 eq "\N{CYRILLIC SMALL LETTER A}", ("\t" x 4) . "\t\$1 is 'small cyrillic a'");
343check_taint_not      $2, "\t\$2";
344
345/(.)/;	# untaint $&, $`, $', $+, $1.
346check_taint_not  $&, "\$& from /./";
347
348"(\N{KELVIN SIGN})" =~ /(\N{KELVIN SIGN})/i;	# taints because depends on locale
349check_taint      $&, "\$& from /(\\N{KELVIN SIGN})/i";
350check_taint      $`, "\t\$`";
351check_taint      $', "\t\$'";
352check_taint      $+, "\t\$+";
353check_taint      $1, "\t\$1";
354check_taint_not      $2, "\t\$2";
355
356/(.)/;	# untaint $&, $`, $', $+, $1.
357check_taint_not  $&, "\$& from /(.)/";
358
359"a:" =~ /(.)\b(.)/;	# taint $&, $`, $', $+, $1.
360check_taint      $&, "\$& from /(.)\\b(.)/";
361check_taint      $`, "\t\$`";
362check_taint      $', "\t\$'";
363check_taint      $+, "\t\$+";
364check_taint      $1, "\t\$1";
365check_taint      $2, "\t\$2";
366check_taint_not  $3, "\t\$3";
367
368/(.)/;	# untaint $&, $`, $', $+, $1.
369check_taint_not  $&, "\$& from /./";
370
371"aa" =~ /(.)\B(.)/;	# taint $&, $`, $', $+, $1.
372check_taint      $&, "\$& from /(.)\\B(.)/";
373check_taint      $`, "\t\$`";
374check_taint      $', "\t\$'";
375check_taint      $+, "\t\$+";
376check_taint      $1, "\t\$1";
377check_taint      $2, "\t\$2";
378check_taint_not  $3, "\t\$3";
379
380/(.)/;	# untaint $&, $`, $', $+, $1.
381check_taint_not  $&, "\$& from /./";
382
383"aaa" =~ /(.).(\1)/i;	# notaint because not locale dependent
384check_taint_not      $&, "\$ & from /(.).(\\1)/";
385check_taint_not      $`, "\t\$`";
386check_taint_not      $', "\t\$'";
387check_taint_not      $+, "\t\$+";
388check_taint_not      $1, "\t\$1";
389check_taint_not      $2, "\t\$2";
390check_taint_not      $3, "\t\$3";
391
392/(.)/;	# untaint $&, $`, $', $+, $1.
393check_taint_not  $&, "\$ & from /./";
394
395$_ = $a;	# untaint $_
396
397check_taint_not  $_, 'untainting $_ works';
398
399/(b)/;		# this must not taint
400check_taint_not  $&, "\$ & from /(b)/";
401check_taint_not  $`, "\t\$`";
402check_taint_not  $', "\t\$'";
403check_taint_not  $+, "\t\$+";
404check_taint_not  $1, "\t\$1";
405check_taint_not  $2, "\t\$2";
406
407$_ = $a;	# untaint $_
408
409check_taint_not  $_, 'untainting $_ works';
410
411$b = uc($a);	# taint $b
412s/(.+)/$b/;	# this must taint only the $_
413
414check_taint      $_, '$_ (wasn\'t tainted) from s/(.+)/$b/ where $b is tainted';
415check_taint_not  $&, "\t\$&";
416check_taint_not  $`, "\t\$`";
417check_taint_not  $', "\t\$'";
418check_taint_not  $+, "\t\$+";
419check_taint_not  $1, "\t\$1";
420check_taint_not  $2, "\t\$2";
421
422$_ = $a;	# untaint $_
423
424s/(.+)/b/;	# this must not taint
425check_taint_not  $_, '$_ (wasn\'t tainted) from s/(.+)/b/';
426check_taint_not  $&, "\t\$&";
427check_taint_not  $`, "\t\$`";
428check_taint_not  $', "\t\$'";
429check_taint_not  $+, "\t\$+";
430check_taint_not  $1, "\t\$1";
431check_taint_not  $2, "\t\$2";
432
433$b = $a;	# untaint $b
434
435($b = $a) =~ s/\w/$&/;
436check_taint      $b, '$b from ($b = $a) =~ s/\w/$&/';	# $b should be tainted.
437check_taint_not  $a, '$a from ($b = $a) =~ s/\w/$&/';	# $a should be not.
438
439$_ = $a;	# untaint $_
440
441s/(\w)/\l$1/;	# this must taint
442check_taint      $_, '$_ (wasn\'t tainted) from s/(\w)/\l$1/,';	# this must taint
443check_taint      $&, "\t\$&";
444check_taint      $`, "\t\$`";
445check_taint      $', "\t\$'";
446check_taint      $+, "\t\$+";
447check_taint      $1, "\t\$1";
448check_taint_not  $2, "\t\$2";
449
450$_ = $a;	# untaint $_
451
452s/(\w)/\L$1/;	# this must taint
453check_taint      $_, '$_ (wasn\'t tainted) from s/(\w)/\L$1/,';
454check_taint      $&, "\t\$&";
455check_taint      $`, "\t\$`";
456check_taint      $', "\t\$'";
457check_taint      $+, "\t\$+";
458check_taint      $1, "\t\$1";
459check_taint_not  $2, "\t\$2";
460
461$_ = $a;	# untaint $_
462
463s/(\w)/\u$1/;	# this must taint
464check_taint      $_, '$_ (wasn\'t tainted) from s/(\w)/\u$1/';
465check_taint      $&, "\t\$&";
466check_taint      $`, "\t\$`";
467check_taint      $', "\t\$'";
468check_taint      $+, "\t\$+";
469check_taint      $1, "\t\$1";
470check_taint_not  $2, "\t\$2";
471
472$_ = $a;	# untaint $_
473
474s/(\w)/\U$1/;	# this must taint
475check_taint      $_, '$_ (wasn\'t tainted) from s/(\w)/\U$1/';
476check_taint      $&, "\t\$&";
477check_taint      $`, "\t\$`";
478check_taint      $', "\t\$'";
479check_taint      $+, "\t\$+";
480check_taint      $1, "\t\$1";
481check_taint_not  $2, "\t\$2";
482
483# After all this tainting $a should be cool.
484
485check_taint_not  $a, '$a still not tainted';
486
487"a" =~ /([a-z])/;
488check_taint_not $1, '"a" =~ /([a-z])/';
489"foo.bar_baz" =~ /^(.*)[._](.*?)$/;  # Bug 120675
490check_taint_not $1, '"foo.bar_baz" =~ /^(.*)[._](.*?)$/';
491
492# BE SURE TO COPY ANYTHING YOU ADD to the block below
493
494{   # This is just the previous tests copied here with a different
495    # compile-time pragma.
496
497    use locale ':not_characters'; # engage restricted locale with different
498                                  # tainting rules
499    check_taint_not   $a, '$a';
500
501    check_taint_not   uc($a), 'uc($a)';
502    check_taint_not   "\U$a", '"\U$a"';
503    check_taint_not   ucfirst($a), 'ucfirst($a)';
504    check_taint_not   "\u$a", '"\u$a"';
505    check_taint_not   lc($a), 'lc($a)';
506    check_taint_not   fc($a), 'fc($a)';
507    check_taint_not   "\L$a", '"\L$a"';
508    check_taint_not   "\F$a", '"\F$a"';
509    check_taint_not   lcfirst($a), 'lcfirst($a)';
510    check_taint_not   "\l$a", '"\l$a"';
511
512    check_taint_not  sprintf('%e', 123.456), "sprintf('%e', 123.456)";
513    check_taint_not  sprintf('%f', 123.456), "sprintf('%f', 123.456)";
514    check_taint_not  sprintf('%g', 123.456), "sprintf('%g', 123.456)";
515    check_taint_not  sprintf('%d', 123.456), "sprintf('%d', 123.456)";
516    check_taint_not  sprintf('%x', 123.456), "sprintf('%x', 123.456)";
517
518    $_ = $a;	# untaint $_
519
520    $_ = uc($a);
521
522    check_taint_not  $_, '$_ = uc($a)';
523
524    /(\w)/;
525    check_taint_not  $&, "\$& from /(\\w)/";
526    check_taint_not  $`, "\t\$`";
527    check_taint_not  $', "\t\$'";
528    check_taint_not  $+, "\t\$+";
529    check_taint_not  $1, "\t\$1";
530    check_taint_not  $2, "\t\$2";
531
532    /(.)/;	# untaint $&, $`, $', $+, $1.
533    check_taint_not  $&, "\$& from /(.)/";
534    check_taint_not  $`, "\t\$`";
535    check_taint_not  $', "\t\$'";
536    check_taint_not  $+, "\t\$+";
537    check_taint_not  $1, "\t\$1";
538    check_taint_not  $2, "\t\$2";
539
540    /(\W)/;
541    check_taint_not  $&, "\$& from /(\\W)/";
542    check_taint_not  $`, "\t\$`";
543    check_taint_not  $', "\t\$'";
544    check_taint_not  $+, "\t\$+";
545    check_taint_not  $1, "\t\$1";
546    check_taint_not  $2, "\t\$2";
547
548    /(.)/;	# untaint $&, $`, $', $+, $1.
549    check_taint_not  $&, "\$& from /(.)/";
550    check_taint_not  $`, "\t\$`";
551    check_taint_not  $', "\t\$'";
552    check_taint_not  $+, "\t\$+";
553    check_taint_not  $1, "\t\$1";
554    check_taint_not  $2, "\t\$2";
555
556    /(\s)/;
557    check_taint_not  $&, "\$& from /(\\s)/";
558    check_taint_not  $`, "\t\$`";
559    check_taint_not  $', "\t\$'";
560    check_taint_not  $+, "\t\$+";
561    check_taint_not  $1, "\t\$1";
562    check_taint_not  $2, "\t\$2";
563
564    /(.)/;	# untaint $&, $`, $', $+, $1.
565    check_taint_not  $&, "\$& from /(.)/";
566
567    /(\S)/;
568    check_taint_not  $&, "\$& from /(\\S)/";
569    check_taint_not  $`, "\t\$`";
570    check_taint_not  $', "\t\$'";
571    check_taint_not  $+, "\t\$+";
572    check_taint_not  $1, "\t\$1";
573    check_taint_not  $2, "\t\$2";
574
575    /(.)/;	# untaint $&, $`, $', $+, $1.
576    check_taint_not  $&, "\$& from /(.)/";
577
578    "0" =~ /(\d)/;
579    check_taint_not  $&, "\$& from /(\\d)/";
580    check_taint_not  $`, "\t\$`";
581    check_taint_not  $', "\t\$'";
582    check_taint_not  $+, "\t\$+";
583    check_taint_not  $1, "\t\$1";
584    check_taint_not  $2, "\t\$2";
585
586    /(.)/;	# untaint $&, $`, $', $+, $1.
587    check_taint_not  $&, "\$& from /(.)/";
588
589    /(\D)/;
590    check_taint_not  $&, "\$& from /(\\D)/";
591    check_taint_not  $`, "\t\$`";
592    check_taint_not  $', "\t\$'";
593    check_taint_not  $+, "\t\$+";
594    check_taint_not  $1, "\t\$1";
595    check_taint_not  $2, "\t\$2";
596
597    /(.)/;	# untaint $&, $`, $', $+, $1.
598    check_taint_not  $&, "\$& from /(.)/";
599
600    /([[:alnum:]])/;
601    check_taint_not  $&, "\$& from /([[:alnum:]])/";
602    check_taint_not  $`, "\t\$`";
603    check_taint_not  $', "\t\$'";
604    check_taint_not  $+, "\t\$+";
605    check_taint_not  $1, "\t\$1";
606    check_taint_not  $2, "\t\$2";
607
608    /(.)/;	# untaint $&, $`, $', $+, $1.
609    check_taint_not  $&, "\$& from /(.)/";
610
611    /([[:^alnum:]])/;
612    check_taint_not  $&, "\$& from /([[:^alnum:]])/";
613    check_taint_not  $`, "\t\$`";
614    check_taint_not  $', "\t\$'";
615    check_taint_not  $+, "\t\$+";
616    check_taint_not  $1, "\t\$1";
617    check_taint_not  $2, "\t\$2";
618
619    "a" =~ /(a)|(\w)/;
620    check_taint_not  $&, "\$& from /(a)|(\\w)/";
621    check_taint_not  $`, "\t\$`";
622    check_taint_not  $', "\t\$'";
623    check_taint_not  $+, "\t\$+";
624    check_taint_not  $1, "\t\$1";
625    ok($1 eq 'a', ("\t" x 5) . "\$1 is 'a'");
626    ok(! defined $2, ("\t" x 5) . "\$2 is undefined");
627    check_taint_not  $2, "\t\$2";
628    check_taint_not  $3, "\t\$3";
629
630    /(.)/;	# untaint $&, $`, $', $+, $1.
631    check_taint_not  $&, "\$& from /(.)/";
632
633    "\N{CYRILLIC SMALL LETTER A}" =~ /(\N{CYRILLIC CAPITAL LETTER A})/i;
634    check_taint_not      $&, "\$& from /(\\N{CYRILLIC CAPITAL LETTER A})/i";
635    check_taint_not      $`, "\t\$`";
636    check_taint_not      $', "\t\$'";
637    check_taint_not      $+, "\t\$+";
638    check_taint_not      $1, "\t\$1";
639    ok($1 eq "\N{CYRILLIC SMALL LETTER A}", ("\t" x 4) . "\t\$1 is 'small cyrillic a'");
640    check_taint_not      $2, "\t\$2";
641
642    /(.)/;	# untaint $&, $`, $', $+, $1.
643    check_taint_not  $&, "\$& from /./";
644
645    "(\N{KELVIN SIGN})" =~ /(\N{KELVIN SIGN})/i;
646    check_taint_not  $&, "\$& from /(\\N{KELVIN SIGN})/i";
647    check_taint_not  $`, "\t\$`";
648    check_taint_not  $', "\t\$'";
649    check_taint_not  $+, "\t\$+";
650    check_taint_not  $1, "\t\$1";
651    check_taint_not      $2, "\t\$2";
652
653    /(.)/;	# untaint $&, $`, $', $+, $1.
654    check_taint_not  $&, "\$& from /(.)/";
655
656    "a:" =~ /(.)\b(.)/;
657    check_taint_not  $&, "\$& from /(.)\\b(.)/";
658    check_taint_not  $`, "\t\$`";
659    check_taint_not  $', "\t\$'";
660    check_taint_not  $+, "\t\$+";
661    check_taint_not  $1, "\t\$1";
662    check_taint_not  $2, "\t\$2";
663    check_taint_not  $3, "\t\$3";
664
665    /(.)/;	# untaint $&, $`, $', $+, $1.
666    check_taint_not  $&, "\$& from /./";
667
668    "aa" =~ /(.)\B(.)/;
669    check_taint_not  $&, "\$& from /(.)\\B(.)/";
670    check_taint_not  $`, "\t\$`";
671    check_taint_not  $', "\t\$'";
672    check_taint_not  $+, "\t\$+";
673    check_taint_not  $1, "\t\$1";
674    check_taint_not  $2, "\t\$2";
675    check_taint_not  $3, "\t\$3";
676
677    /(.)/;	# untaint $&, $`, $', $+, $1.
678    check_taint_not  $&, "\$& from /./";
679
680    "aaa" =~ /(.).(\1)/i;	# notaint because not locale dependent
681    check_taint_not      $&, "\$ & from /(.).(\\1)/";
682    check_taint_not      $`, "\t\$`";
683    check_taint_not      $', "\t\$'";
684    check_taint_not      $+, "\t\$+";
685    check_taint_not      $1, "\t\$1";
686    check_taint_not      $2, "\t\$2";
687    check_taint_not      $3, "\t\$3";
688
689    /(.)/;	# untaint $&, $`, $', $+, $1.
690    check_taint_not  $&, "\$ & from /./";
691
692    $_ = $a;	# untaint $_
693
694    check_taint_not  $_, 'untainting $_ works';
695
696    /(b)/;
697    check_taint_not  $&, "\$ & from /(b)/";
698    check_taint_not  $`, "\t\$`";
699    check_taint_not  $', "\t\$'";
700    check_taint_not  $+, "\t\$+";
701    check_taint_not  $1, "\t\$1";
702    check_taint_not  $2, "\t\$2";
703
704    $_ = $a;	# untaint $_
705
706    check_taint_not  $_, 'untainting $_ works';
707
708    s/(.+)/b/;
709    check_taint_not  $_, '$_ (wasn\'t tainted) from s/(.+)/b/';
710    check_taint_not  $&, "\t\$&";
711    check_taint_not  $`, "\t\$`";
712    check_taint_not  $', "\t\$'";
713    check_taint_not  $+, "\t\$+";
714    check_taint_not  $1, "\t\$1";
715    check_taint_not  $2, "\t\$2";
716
717    $b = $a;	# untaint $b
718
719    ($b = $a) =~ s/\w/$&/;
720    check_taint_not     $b, '$b from ($b = $a) =~ s/\w/$&/';
721    check_taint_not  $a, '$a from ($b = $a) =~ s/\w/$&/';
722
723    $_ = $a;	# untaint $_
724
725    s/(\w)/\l$1/;
726    check_taint_not     $_, '$_ (wasn\'t tainted) from s/(\w)/\l$1/,';	# this must taint
727    check_taint_not     $&, "\t\$&";
728    check_taint_not     $`, "\t\$`";
729    check_taint_not     $', "\t\$'";
730    check_taint_not     $+, "\t\$+";
731    check_taint_not     $1, "\t\$1";
732    check_taint_not  $2, "\t\$2";
733
734    $_ = $a;	# untaint $_
735
736    s/(\w)/\L$1/;
737    check_taint_not     $_, '$_ (wasn\'t tainted) from s/(\w)/\L$1/,';
738    check_taint_not     $&, "\t\$&";
739    check_taint_not     $`, "\t\$`";
740    check_taint_not     $', "\t\$'";
741    check_taint_not     $+, "\t\$+";
742    check_taint_not     $1, "\t\$1";
743    check_taint_not  $2, "\t\$2";
744
745    $_ = $a;	# untaint $_
746
747    s/(\w)/\u$1/;
748    check_taint_not     $_, '$_ (wasn\'t tainted) from s/(\w)/\u$1/';
749    check_taint_not     $&, "\t\$&";
750    check_taint_not     $`, "\t\$`";
751    check_taint_not     $', "\t\$'";
752    check_taint_not     $+, "\t\$+";
753    check_taint_not     $1, "\t\$1";
754    check_taint_not  $2, "\t\$2";
755
756    $_ = $a;	# untaint $_
757
758    s/(\w)/\U$1/;
759    check_taint_not     $_, '$_ (wasn\'t tainted) from s/(\w)/\U$1/';
760    check_taint_not     $&, "\t\$&";
761    check_taint_not     $`, "\t\$`";
762    check_taint_not     $', "\t\$'";
763    check_taint_not     $+, "\t\$+";
764    check_taint_not     $1, "\t\$1";
765    check_taint_not  $2, "\t\$2";
766
767    # After all this tainting $a should be cool.
768
769    check_taint_not  $a, '$a still not tainted';
770
771    "a" =~ /([a-z])/;
772    check_taint_not $1, '"a" =~ /([a-z])/';
773    "foo.bar_baz" =~ /^(.*)[._](.*?)$/;  # Bug 120675
774    check_taint_not $1, '"foo.bar_baz" =~ /^(.*)[._](.*?)$/';
775
776}
777
778# Here are in scope of 'use locale'
779
780# I think we've seen quite enough of taint.
781# Let us do some *real* locale work now,
782# unless setlocale() is missing (i.e. minitest).
783
784# The test number before our first setlocale()
785my $final_without_setlocale = $test_num;
786
787# Find locales.
788
789debug "Scanning for locales...\n";
790
791require POSIX; import POSIX ':locale_h';
792my $categories = [ 'LC_CTYPE', 'LC_NUMERIC', 'LC_ALL' ];
793my @Locale;
794my @include_incompatible_locales;
795if ($^O eq "aix"
796    and version->new(($Config{osvers} =~ /^(\d+(\.\d+))/)[0]) < 7) {
797    # https://www.ibm.com/support/pages/apar/IV22097
798    skip("setlocale broken on old AIX");
799}
800else {
801    debug "Scanning for just compatible";
802    @Locale = find_locales($categories);
803    debug "Scanning for even incompatible";
804    @include_incompatible_locales = find_locales($categories,
805                                              'even incompatible locales');
806}
807# The locales included in the incompatible list that aren't in the compatible
808# one.
809my @incompatible_locales;
810if (@Locale < @include_incompatible_locales) {
811    my %seen;
812    @seen{@Locale} = ();
813
814    foreach my $item (@include_incompatible_locales) {
815        push @incompatible_locales, $item unless exists $seen{$item};
816    }
817
818    # For each bad locale, switch into it to find out why it's incompatible
819    for my $bad_locale (@incompatible_locales) {
820        my @warnings;
821
822        use warnings 'locale';
823
824        local $SIG{__WARN__} = sub {
825            my $warning = $_[0];
826            chomp $warning;
827            push @warnings, ($warning =~ s/\n/\n# /sgr);
828        };
829
830        debug "Trying incompatible $bad_locale";
831        my $ret = setlocale(&POSIX::LC_CTYPE, $bad_locale);
832
833        my $message = "testing of locale '$bad_locale' is skipped";
834        if (@warnings) {
835            skip $message . ":\n# " . join "\n# ", @warnings;
836        }
837        elsif (! $ret) {
838            skip("$message:\n#"
839               . " setlocale(&POSIX::LC_CTYPE, '$bad_locale') failed");
840        }
841        else {
842            fail $message . ", because it is was found to be incompatible with"
843                          . " Perl, but could not discern reason";
844        }
845    }
846}
847
848debug "Locales =\n";
849for ( @Locale ) {
850    debug "$_\n";
851}
852
853unless (@Locale) {
854    print "1..$test_num\n";
855    exit;
856}
857
858
859setlocale(&POSIX::LC_ALL, "C");
860
861my %posixes;
862
863my %Problem;
864my %Okay;
865my %Known_bad_locale;   # Failed test for a locale known to be bad
866my %Testing;
867my @Added_alpha;   # Alphas that aren't in the C locale.
868my %test_names;
869
870sub disp_chars {
871    # This returns a display string denoting the input parameter @_, each
872    # entry of which is a single character in the range 0-255.  The first part
873    # of the output is a string of the characters in @_ that are ASCII
874    # graphics, and hence unambiguously displayable.  They are given by code
875    # point order.  The second part is the remaining code points, the ordinals
876    # of which are each displayed as 2-digit hex.  Blanks are inserted so as
877    # to keep anything from the first part looking like a 2-digit hex number.
878
879    no locale;
880    my @chars = sort { ord $a <=> ord $b } @_;
881    my $output = "";
882    my $range_start;
883    my $start_class;
884    push @chars, chr(258);  # This sentinel simplifies the loop termination
885                            # logic
886    foreach my $i (0 .. @chars - 1) {
887        my $char = $chars[$i];
888        my $range_end;
889        my $class;
890
891        # We avoid using [:posix:] classes, as these are being tested in this
892        # file.  Each equivalence class below is for things that can appear in
893        # a range; those that can't be in a range have class -1.  0 for those
894        # which should be output in hex; and >0 for the other ranges
895        if ($char =~ /[A-Z]/) {
896            $class = 2;
897        }
898        elsif ($char =~ /[a-z]/) {
899            $class = 3;
900        }
901        elsif ($char =~ /[0-9]/) {
902            $class = 4;
903        }
904        # Uncomment to get literal punctuation displayed instead of hex
905        #elsif ($char =~ /[[\]!"#\$\%&\'()*+,.\/:\\;<=>?\@\^_`{|}~-]/) {
906        #    $class = -1;    # Punct never appears in a range
907        #}
908        else {
909            $class = 0;     # Output in hex
910        }
911
912        if (! defined $range_start) {
913            if ($class < 0) {
914                $output .= " " . $char;
915            }
916            else {
917                $range_start = ord $char;
918                $start_class = $class;
919            }
920        } # A range ends if not consecutive, or the class-type changes
921        elsif (ord $char != ($range_end = ord($chars[$i-1])) + 1
922              || $class != $start_class)
923        {
924
925            # Here, the current character is not in the range.  This means the
926            # previous character must have been.  Output the range up through
927            # that one.
928            my $range_length = $range_end - $range_start + 1;
929            if ($start_class > 0) {
930                $output .= " " . chr($range_start);
931                $output .= "-" . chr($range_end) if $range_length > 1;
932            }
933            else {
934                $output .= sprintf(" %02X", $range_start);
935                $output .= sprintf("-%02X", $range_end) if $range_length > 1;
936            }
937
938            # Handle the new current character, as potentially beginning a new
939            # range
940            undef $range_start;
941            redo;
942        }
943    }
944
945    $output =~ s/^ //;
946    return $output;
947}
948
949sub disp_str ($) {
950    my $string = shift;
951
952    # Displays the string unambiguously.  ASCII printables are always output
953    # as-is, though perhaps separated by blanks from other characters.  If
954    # entirely printable ASCII, just returns the string.  Otherwise if valid
955    # UTF-8 it uses the character names for non-printable-ASCII.  Otherwise it
956    # outputs hex for each non-ASCII-printable byte.
957
958    return $string if $string =~ / ^ [[:print:]]* $/xa;
959
960    my $result = "";
961    my $prev_was_punct = 1; # Beginning is considered punct
962    if (utf8::valid($string) && utf8::is_utf8($string)) {
963        use charnames ();
964        foreach my $char (split "", $string) {
965
966            # Keep punctuation adjacent to other characters; otherwise
967            # separate them with a blank
968            if ($char =~ /[[:punct:]]/a) {
969                $result .= $char;
970                $prev_was_punct = 1;
971            }
972            elsif ($char =~ /[[:print:]]/a) {
973                $result .= "  " unless $prev_was_punct;
974                $result .= $char;
975                $prev_was_punct = 0;
976            }
977            else {
978                $result .= "  " unless $prev_was_punct;
979                my $name = charnames::viacode(ord $char);
980                $result .= (defined $name) ? $name : ':unknown:';
981                $prev_was_punct = 0;
982            }
983        }
984    }
985    else {
986        use bytes;
987        foreach my $char (split "", $string) {
988            if ($char =~ /[[:punct:]]/a) {
989                $result .= $char;
990                $prev_was_punct = 1;
991            }
992            elsif ($char =~ /[[:print:]]/a) {
993                $result .= " " unless $prev_was_punct;
994                $result .= $char;
995                $prev_was_punct = 0;
996            }
997            else {
998                $result .= " " unless $prev_was_punct;
999                $result .= sprintf("%02X", ord $char);
1000                $prev_was_punct = 0;
1001            }
1002        }
1003    }
1004
1005    return $result;
1006}
1007
1008sub report_result {
1009    my ($Locale, $i, $pass_fail, $message) = @_;
1010    if ($pass_fail) {
1011	push @{$Okay{$i}}, $Locale;
1012    }
1013    else {
1014        $message //= "";
1015        $message = "  ($message)" if $message;
1016	$Known_bad_locale{$i}{$Locale} = 1 if exists $known_bad_locales{$os}
1017                                         && $Locale =~ $known_bad_locales{$os};
1018	$Problem{$i}{$Locale} = 1;
1019	debug "failed $i ($test_names{$i}) with locale '$Locale'$message\n";
1020    }
1021}
1022
1023sub report_multi_result {
1024    my ($Locale, $i, $results_ref) = @_;
1025
1026    # $results_ref points to an array, each element of which is a character that was
1027    # in error for this test numbered '$i'.  If empty, the test passed
1028
1029    my $message = "";
1030    if (@$results_ref) {
1031        $message = join " ", "for", disp_chars(@$results_ref);
1032    }
1033    report_result($Locale, $i, @$results_ref == 0, $message);
1034}
1035
1036my $first_locales_test_number = $final_without_setlocale
1037                              + 1 + @incompatible_locales;
1038my $locales_test_number;
1039my $not_necessarily_a_problem_test_number;
1040my $first_casing_test_number;
1041my %setlocale_failed;   # List of locales that setlocale() didn't work on
1042
1043foreach my $Locale (@Locale) {
1044    $locales_test_number = $first_locales_test_number - 1;
1045    debug "\n";
1046    debug "Locale = $Locale\n";
1047
1048    unless (setlocale(&POSIX::LC_ALL, $Locale)) {
1049        $setlocale_failed{$Locale} = $Locale;
1050	next;
1051    }
1052
1053    # We test UTF-8 locales only under ':not_characters';  It is easier to
1054    # test them in other test files than here.  Non- UTF-8 locales are tested
1055    # only under plain 'use locale', as otherwise we would have to convert
1056    # everything in them to Unicode.
1057
1058    my %UPPER = ();     # All alpha X for which uc(X) == X and lc(X) != X
1059    my %lower = ();     # All alpha X for which lc(X) == X and uc(X) != X
1060    my %BoThCaSe = ();  # All alpha X for which uc(X) == lc(X) == X
1061
1062    my $is_utf8_locale = is_locale_utf8($Locale);
1063
1064    if ($debug) {
1065        debug "code set = " . langinfo(CODESET);
1066        debug "is utf8 locale? = $is_utf8_locale\n";
1067        debug "radix = " . disp_str(langinfo(RADIXCHAR)) . "\n";
1068        debug "currency = " . disp_str(langinfo(CRNCYSTR));
1069    }
1070
1071    if (! $is_utf8_locale) {
1072        use locale;
1073        @{$posixes{'word'}} = grep /\w/, map { chr } 0..255;
1074        @{$posixes{'digit'}} = grep /\d/, map { chr } 0..255;
1075        @{$posixes{'space'}} = grep /\s/, map { chr } 0..255;
1076        @{$posixes{'alpha'}} = grep /[[:alpha:]]/, map {chr } 0..255;
1077        @{$posixes{'alnum'}} = grep /[[:alnum:]]/, map {chr } 0..255;
1078        @{$posixes{'ascii'}} = grep /[[:ascii:]]/, map {chr } 0..255;
1079        @{$posixes{'blank'}} = grep /[[:blank:]]/, map {chr } 0..255;
1080        @{$posixes{'cntrl'}} = grep /[[:cntrl:]]/, map {chr } 0..255;
1081        @{$posixes{'graph'}} = grep /[[:graph:]]/, map {chr } 0..255;
1082        @{$posixes{'lower'}} = grep /[[:lower:]]/, map {chr } 0..255;
1083        @{$posixes{'print'}} = grep /[[:print:]]/, map {chr } 0..255;
1084        @{$posixes{'punct'}} = grep /[[:punct:]]/, map {chr } 0..255;
1085        @{$posixes{'upper'}} = grep /[[:upper:]]/, map {chr } 0..255;
1086        @{$posixes{'xdigit'}} = grep /[[:xdigit:]]/, map {chr } 0..255;
1087        @{$posixes{'cased'}} = grep /[[:upper:][:lower:]]/i, map {chr } 0..255;
1088
1089        # Sieve the uppercase and the lowercase.
1090
1091        for (@{$posixes{'word'}}) {
1092            if (/[^\d_]/) { # skip digits and the _
1093                if (uc($_) eq $_) {
1094                    $UPPER{$_} = $_;
1095                }
1096                if (lc($_) eq $_) {
1097                    $lower{$_} = $_;
1098                }
1099            }
1100        }
1101    }
1102    else {
1103        use locale ':not_characters';
1104        @{$posixes{'word'}} = grep /\w/, map { chr } 0..255;
1105        @{$posixes{'digit'}} = grep /\d/, map { chr } 0..255;
1106        @{$posixes{'space'}} = grep /\s/, map { chr } 0..255;
1107        @{$posixes{'alpha'}} = grep /[[:alpha:]]/, map {chr } 0..255;
1108        @{$posixes{'alnum'}} = grep /[[:alnum:]]/, map {chr } 0..255;
1109        @{$posixes{'ascii'}} = grep /[[:ascii:]]/, map {chr } 0..255;
1110        @{$posixes{'blank'}} = grep /[[:blank:]]/, map {chr } 0..255;
1111        @{$posixes{'cntrl'}} = grep /[[:cntrl:]]/, map {chr } 0..255;
1112        @{$posixes{'graph'}} = grep /[[:graph:]]/, map {chr } 0..255;
1113        @{$posixes{'lower'}} = grep /[[:lower:]]/, map {chr } 0..255;
1114        @{$posixes{'print'}} = grep /[[:print:]]/, map {chr } 0..255;
1115        @{$posixes{'punct'}} = grep /[[:punct:]]/, map {chr } 0..255;
1116        @{$posixes{'upper'}} = grep /[[:upper:]]/, map {chr } 0..255;
1117        @{$posixes{'xdigit'}} = grep /[[:xdigit:]]/, map {chr } 0..255;
1118        @{$posixes{'cased'}} = grep /[[:upper:][:lower:]]/i, map {chr } 0..255;
1119        for (@{$posixes{'word'}}) {
1120            if (/[^\d_]/) { # skip digits and the _
1121                if (uc($_) eq $_) {
1122                    $UPPER{$_} = $_;
1123                }
1124                if (lc($_) eq $_) {
1125                    $lower{$_} = $_;
1126                }
1127            }
1128        }
1129    }
1130
1131    # Ordered, where possible,  in groups of "this is a subset of the next
1132    # one"
1133    debug ":upper:  = ", disp_chars(@{$posixes{'upper'}}), "\n";
1134    debug ":lower:  = ", disp_chars(@{$posixes{'lower'}}), "\n";
1135    debug ":cased:  = ", disp_chars(@{$posixes{'cased'}}), "\n";
1136    debug ":alpha:  = ", disp_chars(@{$posixes{'alpha'}}), "\n";
1137    debug ":alnum:  = ", disp_chars(@{$posixes{'alnum'}}), "\n";
1138    debug ' \w      = ', disp_chars(@{$posixes{'word'}}), "\n";
1139    debug ":graph:  = ", disp_chars(@{$posixes{'graph'}}), "\n";
1140    debug ":print:  = ", disp_chars(@{$posixes{'print'}}), "\n";
1141    debug ' \d      = ', disp_chars(@{$posixes{'digit'}}), "\n";
1142    debug ":xdigit: = ", disp_chars(@{$posixes{'xdigit'}}), "\n";
1143    debug ":blank:  = ", disp_chars(@{$posixes{'blank'}}), "\n";
1144    debug ' \s      = ', disp_chars(@{$posixes{'space'}}), "\n";
1145    debug ":punct:  = ", disp_chars(@{$posixes{'punct'}}), "\n";
1146    debug ":cntrl:  = ", disp_chars(@{$posixes{'cntrl'}}), "\n";
1147    debug ":ascii:  = ", disp_chars(@{$posixes{'ascii'}}), "\n";
1148
1149    foreach (keys %UPPER) {
1150
1151	$BoThCaSe{$_}++ if exists $lower{$_};
1152    }
1153    foreach (keys %lower) {
1154	$BoThCaSe{$_}++ if exists $UPPER{$_};
1155    }
1156    foreach (keys %BoThCaSe) {
1157	delete $UPPER{$_};
1158	delete $lower{$_};
1159    }
1160
1161    my %Unassigned;
1162    foreach my $ord ( 0 .. 255 ) {
1163        $Unassigned{chr $ord} = 1;
1164    }
1165    foreach my $class (keys %posixes) {
1166        foreach my $char (@{$posixes{$class}}) {
1167            delete $Unassigned{$char};
1168        }
1169    }
1170
1171    debug "UPPER    = ", disp_chars(sort { ord $a <=> ord $b } keys %UPPER), "\n";
1172    debug "lower    = ", disp_chars(sort { ord $a <=> ord $b } keys %lower), "\n";
1173    debug "BoThCaSe = ", disp_chars(sort { ord $a <=> ord $b } keys %BoThCaSe), "\n";
1174    debug "Unassigned = ", disp_chars(sort { ord $a <=> ord $b } keys %Unassigned), "\n";
1175
1176    my @failures;
1177    my @fold_failures;
1178    foreach my $x (sort { ord $a <=> ord $b } keys %UPPER) {
1179        my $ok;
1180        my $fold_ok;
1181        if ($is_utf8_locale) {
1182            use locale ':not_characters';
1183            $ok = $x =~ /[[:upper:]]/;
1184            $fold_ok = $x =~ /[[:lower:]]/i;
1185        }
1186        else {
1187            use locale;
1188            $ok = $x =~ /[[:upper:]]/;
1189            $fold_ok = $x =~ /[[:lower:]]/i;
1190        }
1191        push @failures, $x unless $ok;
1192        push @fold_failures, $x unless $fold_ok;
1193    }
1194    $locales_test_number++;
1195    $first_casing_test_number = $locales_test_number;
1196    $test_names{$locales_test_number} = 'Verify that /[[:upper:]]/ matches all alpha X for which uc(X) == X and lc(X) != X';
1197    report_multi_result($Locale, $locales_test_number, \@failures);
1198
1199    $locales_test_number++;
1200
1201    $test_names{$locales_test_number} = 'Verify that /[[:lower:]]/i matches all alpha X for which uc(X) == X and lc(X) != X';
1202    report_multi_result($Locale, $locales_test_number, \@fold_failures);
1203
1204    undef @failures;
1205    undef @fold_failures;
1206
1207    foreach my $x (sort { ord $a <=> ord $b } keys %lower) {
1208        my $ok;
1209        my $fold_ok;
1210        if ($is_utf8_locale) {
1211            use locale ':not_characters';
1212            $ok = $x =~ /[[:lower:]]/;
1213            $fold_ok = $x =~ /[[:upper:]]/i;
1214        }
1215        else {
1216            use locale;
1217            $ok = $x =~ /[[:lower:]]/;
1218            $fold_ok = $x =~ /[[:upper:]]/i;
1219        }
1220        push @failures, $x unless $ok;
1221        push @fold_failures, $x unless $fold_ok;
1222    }
1223
1224    $locales_test_number++;
1225    $test_names{$locales_test_number} = 'Verify that /[[:lower:]]/ matches all alpha X for which lc(X) == X and uc(X) != X';
1226    report_multi_result($Locale, $locales_test_number, \@failures);
1227
1228    $locales_test_number++;
1229    $test_names{$locales_test_number} = 'Verify that /[[:upper:]]/i matches all alpha X for which lc(X) == X and uc(X) != X';
1230    report_multi_result($Locale, $locales_test_number, \@fold_failures);
1231
1232    {   # Find the alphabetic characters that are not considered alphabetics
1233        # in the default (C) locale.
1234
1235	no locale;
1236
1237	@Added_alpha = ();
1238	for (keys %UPPER, keys %lower, keys %BoThCaSe) {
1239	    push(@Added_alpha, $_) if (/\W/);
1240	}
1241    }
1242
1243    @Added_alpha = sort { ord $a <=> ord $b } @Added_alpha;
1244
1245    debug "Added_alpha = ", disp_chars(@Added_alpha), "\n";
1246
1247    # Cross-check the whole 8-bit character set.
1248
1249    ++$locales_test_number;
1250    my @f;
1251    $test_names{$locales_test_number} = 'Verify that \w and [:word:] are identical';
1252    for (map { chr } 0..255) {
1253        if ($is_utf8_locale) {
1254            use locale ':not_characters';
1255            push @f, $_ unless /[[:word:]]/ == /\w/;
1256        }
1257        else {
1258            push @f, $_ unless /[[:word:]]/ == /\w/;
1259        }
1260    }
1261    report_multi_result($Locale, $locales_test_number, \@f);
1262
1263    ++$locales_test_number;
1264    undef @f;
1265    $test_names{$locales_test_number} = 'Verify that \d and [:digit:] are identical';
1266    for (map { chr } 0..255) {
1267        if ($is_utf8_locale) {
1268            use locale ':not_characters';
1269            push @f, $_ unless /[[:digit:]]/ == /\d/;
1270        }
1271        else {
1272            push @f, $_ unless /[[:digit:]]/ == /\d/;
1273        }
1274    }
1275    report_multi_result($Locale, $locales_test_number, \@f);
1276
1277    ++$locales_test_number;
1278    undef @f;
1279    $test_names{$locales_test_number} = 'Verify that \s and [:space:] are identical';
1280    for (map { chr } 0..255) {
1281        if ($is_utf8_locale) {
1282            use locale ':not_characters';
1283            push @f, $_ unless /[[:space:]]/ == /\s/;
1284        }
1285        else {
1286            push @f, $_ unless /[[:space:]]/ == /\s/;
1287        }
1288    }
1289    report_multi_result($Locale, $locales_test_number, \@f);
1290
1291    ++$locales_test_number;
1292    undef @f;
1293    $test_names{$locales_test_number} = 'Verify that [:posix:] and [:^posix:] are mutually exclusive';
1294    for (map { chr } 0..255) {
1295        if ($is_utf8_locale) {
1296            use locale ':not_characters';
1297            push @f, $_ unless   (/[[:alpha:]]/ xor /[[:^alpha:]]/)   ||
1298                    (/[[:alnum:]]/ xor /[[:^alnum:]]/)   ||
1299                    (/[[:ascii:]]/ xor /[[:^ascii:]]/)   ||
1300                    (/[[:blank:]]/ xor /[[:^blank:]]/)   ||
1301                    (/[[:cntrl:]]/ xor /[[:^cntrl:]]/)   ||
1302                    (/[[:digit:]]/ xor /[[:^digit:]]/)   ||
1303                    (/[[:graph:]]/ xor /[[:^graph:]]/)   ||
1304                    (/[[:lower:]]/ xor /[[:^lower:]]/)   ||
1305                    (/[[:print:]]/ xor /[[:^print:]]/)   ||
1306                    (/[[:space:]]/ xor /[[:^space:]]/)   ||
1307                    (/[[:upper:]]/ xor /[[:^upper:]]/)   ||
1308                    (/[[:word:]]/  xor /[[:^word:]]/)    ||
1309                    (/[[:xdigit:]]/ xor /[[:^xdigit:]]/) ||
1310
1311                    # effectively is what [:cased:] would be if it existed.
1312                    (/[[:upper:][:lower:]]/i xor /[^[:upper:][:lower:]]/i);
1313        }
1314        else {
1315            push @f, $_ unless   (/[[:alpha:]]/ xor /[[:^alpha:]]/)   ||
1316                    (/[[:alnum:]]/ xor /[[:^alnum:]]/)   ||
1317                    (/[[:ascii:]]/ xor /[[:^ascii:]]/)   ||
1318                    (/[[:blank:]]/ xor /[[:^blank:]]/)   ||
1319                    (/[[:cntrl:]]/ xor /[[:^cntrl:]]/)   ||
1320                    (/[[:digit:]]/ xor /[[:^digit:]]/)   ||
1321                    (/[[:graph:]]/ xor /[[:^graph:]]/)   ||
1322                    (/[[:lower:]]/ xor /[[:^lower:]]/)   ||
1323                    (/[[:print:]]/ xor /[[:^print:]]/)   ||
1324                    (/[[:space:]]/ xor /[[:^space:]]/)   ||
1325                    (/[[:upper:]]/ xor /[[:^upper:]]/)   ||
1326                    (/[[:word:]]/  xor /[[:^word:]]/)    ||
1327                    (/[[:xdigit:]]/ xor /[[:^xdigit:]]/) ||
1328                    (/[[:upper:][:lower:]]/i xor /[^[:upper:][:lower:]]/i);
1329        }
1330    }
1331    report_multi_result($Locale, $locales_test_number, \@f);
1332
1333    # The rules for the relationships are given in:
1334    # http://www.opengroup.org/onlinepubs/009695399/basedefs/xbd_chap07.html
1335
1336
1337    ++$locales_test_number;
1338    undef @f;
1339    $test_names{$locales_test_number} = 'Verify that [:lower:] contains at least a-z';
1340    for ('a' .. 'z') {
1341        if ($is_utf8_locale) {
1342            use locale ':not_characters';
1343            push @f, $_  unless /[[:lower:]]/;
1344        }
1345        else {
1346            push @f, $_  unless /[[:lower:]]/;
1347        }
1348    }
1349    report_multi_result($Locale, $locales_test_number, \@f);
1350
1351    ++$locales_test_number;
1352    undef @f;
1353    $test_names{$locales_test_number} = 'Verify that [:lower:] is a subset of [:alpha:]';
1354    for (map { chr } 0..255) {
1355        if ($is_utf8_locale) {
1356            use locale ':not_characters';
1357            push @f, $_  if /[[:lower:]]/ and ! /[[:alpha:]]/;
1358        }
1359        else {
1360            push @f, $_  if /[[:lower:]]/ and ! /[[:alpha:]]/;
1361        }
1362    }
1363    report_multi_result($Locale, $locales_test_number, \@f);
1364
1365    ++$locales_test_number;
1366    undef @f;
1367    $test_names{$locales_test_number} = 'Verify that [:upper:] contains at least A-Z';
1368    for ('A' .. 'Z') {
1369        if ($is_utf8_locale) {
1370            use locale ':not_characters';
1371            push @f, $_  unless /[[:upper:]]/;
1372        }
1373        else {
1374            push @f, $_  unless /[[:upper:]]/;
1375        }
1376    }
1377    report_multi_result($Locale, $locales_test_number, \@f);
1378
1379    ++$locales_test_number;
1380    undef @f;
1381    $test_names{$locales_test_number} = 'Verify that [:upper:] is a subset of [:alpha:]';
1382    for (map { chr } 0..255) {
1383        if ($is_utf8_locale) {
1384            use locale ':not_characters';
1385            push @f, $_  if /[[:upper:]]/ and ! /[[:alpha:]]/;
1386        }
1387        else {
1388            push @f, $_ if /[[:upper:]]/  and ! /[[:alpha:]]/;
1389        }
1390    }
1391    report_multi_result($Locale, $locales_test_number, \@f);
1392
1393    ++$locales_test_number;
1394    undef @f;
1395    $test_names{$locales_test_number} = 'Verify that /[[:lower:]]/i is a subset of [:alpha:]';
1396    for (map { chr } 0..255) {
1397        if ($is_utf8_locale) {
1398            use locale ':not_characters';
1399            push @f, $_ if /[[:lower:]]/i  and ! /[[:alpha:]]/;
1400        }
1401        else {
1402            push @f, $_ if /[[:lower:]]/i  and ! /[[:alpha:]]/;
1403        }
1404    }
1405    report_multi_result($Locale, $locales_test_number, \@f);
1406
1407    ++$locales_test_number;
1408    undef @f;
1409    $test_names{$locales_test_number} = 'Verify that [:alpha:] is a subset of [:alnum:]';
1410    for (map { chr } 0..255) {
1411        if ($is_utf8_locale) {
1412            use locale ':not_characters';
1413            push @f, $_ if /[[:alpha:]]/  and ! /[[:alnum:]]/;
1414        }
1415        else {
1416            push @f, $_ if /[[:alpha:]]/  and ! /[[:alnum:]]/;
1417        }
1418    }
1419    report_multi_result($Locale, $locales_test_number, \@f);
1420
1421    ++$locales_test_number;
1422    undef @f;
1423    $test_names{$locales_test_number} = 'Verify that [:digit:] contains at least 0-9';
1424    for ('0' .. '9') {
1425        if ($is_utf8_locale) {
1426            use locale ':not_characters';
1427            push @f, $_  unless /[[:digit:]]/;
1428        }
1429        else {
1430            push @f, $_  unless /[[:digit:]]/;
1431        }
1432    }
1433    report_multi_result($Locale, $locales_test_number, \@f);
1434
1435    ++$locales_test_number;
1436    undef @f;
1437    $test_names{$locales_test_number} = 'Verify that [:digit:] is a subset of [:alnum:]';
1438    for (map { chr } 0..255) {
1439        if ($is_utf8_locale) {
1440            use locale ':not_characters';
1441            push @f, $_ if /[[:digit:]]/  and ! /[[:alnum:]]/;
1442        }
1443        else {
1444            push @f, $_ if /[[:digit:]]/  and ! /[[:alnum:]]/;
1445        }
1446    }
1447    report_multi_result($Locale, $locales_test_number, \@f);
1448
1449    ++$locales_test_number;
1450    undef @f;
1451    $test_names{$locales_test_number} = 'Verify that [:digit:] matches either 10 or 20 code points';
1452    report_result($Locale, $locales_test_number, @{$posixes{'digit'}} == 10 || @{$posixes{'digit'}} == 20);
1453
1454    ++$locales_test_number;
1455    undef @f;
1456    $test_names{$locales_test_number} = 'Verify that if there is a second set of digits in [:digit:], they are consecutive';
1457    if (@{$posixes{'digit'}} == 20) {
1458        my $previous_ord;
1459        for (map { chr } 0..255) {
1460            next unless /[[:digit:]]/;
1461            next if /[0-9]/;
1462            if (defined $previous_ord) {
1463                if ($is_utf8_locale) {
1464                    use locale ':not_characters';
1465                    push @f, $_ if ord $_ != $previous_ord + 1;
1466                }
1467                else {
1468                    push @f, $_ if ord $_ != $previous_ord + 1;
1469                }
1470            }
1471            $previous_ord = ord $_;
1472        }
1473    }
1474    report_multi_result($Locale, $locales_test_number, \@f);
1475
1476    ++$locales_test_number;
1477    undef @f;
1478    my @xdigit_digits;  # :digit: & :xdigit:
1479    $test_names{$locales_test_number} = 'Verify that [:xdigit:] contains one or two blocks of 10 consecutive [:digit:] chars';
1480    for (map { chr } 0..255) {
1481        if ($is_utf8_locale) {
1482            use locale ':not_characters';
1483            # For utf8 locales, we actually use a stricter test: that :digit:
1484            # is a subset of :xdigit:, as we know that only 0-9 should match
1485            push @f, $_ if /[[:digit:]]/ and ! /[[:xdigit:]]/;
1486        }
1487        else {
1488            push @xdigit_digits, $_ if /[[:digit:]]/ and /[[:xdigit:]]/;
1489        }
1490    }
1491    if (! $is_utf8_locale) {
1492
1493        # For non-utf8 locales, @xdigit_digits is a list of the characters
1494        # that are both :xdigit: and :digit:.  Because :digit: is stored in
1495        # increasing code point order (unless the tests above failed),
1496        # @xdigit_digits is as well.  There should be exactly 10 or
1497        # 20 of these.
1498        if (@xdigit_digits != 10 && @xdigit_digits != 20) {
1499            @f = @xdigit_digits;
1500        }
1501        else {
1502
1503            # Look for contiguity in the series, adding any wrong ones to @f
1504            my @temp = @xdigit_digits;
1505            while (@temp > 1) {
1506                push @f, $temp[1] if ($temp[0] != $temp[1] - 1)
1507
1508                                     # Skip this test for the 0th character of
1509                                     # the second block of 10, as it won't be
1510                                     # contiguous with the previous block
1511                                     && (! defined $xdigit_digits[10]
1512                                         || $temp[1] != $xdigit_digits[10]);
1513                shift @temp;
1514            }
1515        }
1516    }
1517
1518    report_multi_result($Locale, $locales_test_number, \@f);
1519
1520    ++$locales_test_number;
1521    undef @f;
1522    $test_names{$locales_test_number} = 'Verify that [:xdigit:] contains at least A-F, a-f';
1523    for ('A' .. 'F', 'a' .. 'f') {
1524        if ($is_utf8_locale) {
1525            use locale ':not_characters';
1526            push @f, $_  unless /[[:xdigit:]]/;
1527        }
1528        else {
1529            push @f, $_  unless /[[:xdigit:]]/;
1530        }
1531    }
1532    report_multi_result($Locale, $locales_test_number, \@f);
1533
1534    ++$locales_test_number;
1535    undef @f;
1536    $test_names{$locales_test_number} = 'Verify that any additional members of [:xdigit:], are in groups of 6 consecutive code points';
1537    my $previous_ord;
1538    my $count = 0;
1539    for my $chr (map { chr } 0..255) {
1540        next unless $chr =~ /[[:xdigit:]]/;
1541        if ($is_utf8_locale) {
1542            next if $chr =~ /[[:digit:]]/;
1543        }
1544        else {
1545            next if grep { $chr eq $_ } @xdigit_digits;
1546        }
1547        next if $chr =~ /[A-Fa-f]/;
1548        if (defined $previous_ord) {
1549            if ($is_utf8_locale) {
1550                use locale ':not_characters';
1551                push @f, $chr if ord $chr != $previous_ord + 1;
1552            }
1553            else {
1554                push @f, $chr if ord $chr != $previous_ord + 1;
1555            }
1556        }
1557        $count++;
1558        if ($count == 6) {
1559            undef $previous_ord;
1560        }
1561        else {
1562            $previous_ord = ord $chr;
1563        }
1564    }
1565    report_multi_result($Locale, $locales_test_number, \@f);
1566
1567    ++$locales_test_number;
1568    undef @f;
1569    $test_names{$locales_test_number} = 'Verify that [:xdigit:] is a subset of [:graph:]';
1570    for (map { chr } 0..255) {
1571        if ($is_utf8_locale) {
1572            use locale ':not_characters';
1573            push @f, $_ if /[[:xdigit:]]/  and ! /[[:graph:]]/;
1574        }
1575        else {
1576            push @f, $_ if /[[:xdigit:]]/  and ! /[[:graph:]]/;
1577        }
1578    }
1579    report_multi_result($Locale, $locales_test_number, \@f);
1580
1581    # Note that xdigit doesn't have to be a subset of alnum
1582
1583    ++$locales_test_number;
1584    undef @f;
1585    $test_names{$locales_test_number} = 'Verify that [:punct:] is a subset of [:graph:]';
1586    for (map { chr } 0..255) {
1587        if ($is_utf8_locale) {
1588            use locale ':not_characters';
1589            push @f, $_ if /[[:punct:]]/  and ! /[[:graph:]]/;
1590        }
1591        else {
1592            push @f, $_ if /[[:punct:]]/  and ! /[[:graph:]]/;
1593        }
1594    }
1595    report_multi_result($Locale, $locales_test_number, \@f);
1596
1597    ++$locales_test_number;
1598    undef @f;
1599    $test_names{$locales_test_number} = 'Verify that the space character is not in [:graph:]';
1600    if ($is_utf8_locale) {
1601        use locale ':not_characters';
1602        push @f, " " if " " =~ /[[:graph:]]/;
1603    }
1604    else {
1605        push @f, " " if " " =~ /[[:graph:]]/;
1606    }
1607    report_multi_result($Locale, $locales_test_number, \@f);
1608
1609    ++$locales_test_number;
1610    undef @f;
1611    $test_names{$locales_test_number} = 'Verify that [:space:] contains at least [\f\n\r\t\cK ]';
1612    for (' ', "\f", "\n", "\r", "\t", "\cK") {
1613        if ($is_utf8_locale) {
1614            use locale ':not_characters';
1615            push @f, $_  unless /[[:space:]]/;
1616        }
1617        else {
1618            push @f, $_  unless /[[:space:]]/;
1619        }
1620    }
1621    report_multi_result($Locale, $locales_test_number, \@f);
1622
1623    ++$locales_test_number;
1624    undef @f;
1625    $test_names{$locales_test_number} = 'Verify that [:blank:] contains at least [\t ]';
1626    for (' ', "\t") {
1627        if ($is_utf8_locale) {
1628            use locale ':not_characters';
1629            push @f, $_  unless /[[:blank:]]/;
1630        }
1631        else {
1632            push @f, $_  unless /[[:blank:]]/;
1633        }
1634    }
1635    report_multi_result($Locale, $locales_test_number, \@f);
1636
1637    ++$locales_test_number;
1638    undef @f;
1639    $test_names{$locales_test_number} = 'Verify that [:blank:] is a subset of [:space:]';
1640    for (map { chr } 0..255) {
1641        if ($is_utf8_locale) {
1642            use locale ':not_characters';
1643            push @f, $_ if /[[:blank:]]/  and ! /[[:space:]]/;
1644        }
1645        else {
1646            push @f, $_ if /[[:blank:]]/  and ! /[[:space:]]/;
1647        }
1648    }
1649    report_multi_result($Locale, $locales_test_number, \@f);
1650
1651    ++$locales_test_number;
1652    undef @f;
1653    $test_names{$locales_test_number} = 'Verify that [:graph:] is a subset of [:print:]';
1654    for (map { chr } 0..255) {
1655        if ($is_utf8_locale) {
1656            use locale ':not_characters';
1657            push @f, $_ if /[[:graph:]]/  and ! /[[:print:]]/;
1658        }
1659        else {
1660            push @f, $_ if /[[:graph:]]/  and ! /[[:print:]]/;
1661        }
1662    }
1663    report_multi_result($Locale, $locales_test_number, \@f);
1664
1665    ++$locales_test_number;
1666    undef @f;
1667    $test_names{$locales_test_number} = 'Verify that the space character is in [:print:]';
1668    if ($is_utf8_locale) {
1669        use locale ':not_characters';
1670        push @f, " " if " " !~ /[[:print:]]/;
1671    }
1672    else {
1673        push @f, " " if " " !~ /[[:print:]]/;
1674    }
1675    report_multi_result($Locale, $locales_test_number, \@f);
1676
1677    ++$locales_test_number;
1678    undef @f;
1679    $test_names{$locales_test_number} = 'Verify that isn\'t both [:cntrl:] and [:print:]';
1680    for (map { chr } 0..255) {
1681        if ($is_utf8_locale) {
1682            use locale ':not_characters';
1683            push @f, $_ if (/[[:print:]]/ and /[[:cntrl:]]/);
1684        }
1685        else {
1686            push @f, $_ if (/[[:print:]]/ and /[[:cntrl:]]/);
1687        }
1688    }
1689    report_multi_result($Locale, $locales_test_number, \@f);
1690
1691    ++$locales_test_number;
1692    undef @f;
1693    $test_names{$locales_test_number} = 'Verify that isn\'t both [:alpha:] and [:digit:]';
1694    for (map { chr } 0..255) {
1695        if ($is_utf8_locale) {
1696            use locale ':not_characters';
1697            push @f, $_ if /[[:alpha:]]/ and /[[:digit:]]/;
1698        }
1699        else {
1700            push @f, $_ if /[[:alpha:]]/ and /[[:digit:]]/;
1701        }
1702    }
1703    report_multi_result($Locale, $locales_test_number, \@f);
1704
1705    ++$locales_test_number;
1706    undef @f;
1707    $test_names{$locales_test_number} = 'Verify that isn\'t both [:alnum:] and [:punct:]';
1708    for (map { chr } 0..255) {
1709        if ($is_utf8_locale) {
1710            use locale ':not_characters';
1711            push @f, $_ if /[[:alnum:]]/ and /[[:punct:]]/;
1712        }
1713        else {
1714            push @f, $_ if /[[:alnum:]]/ and /[[:punct:]]/;
1715        }
1716    }
1717    report_multi_result($Locale, $locales_test_number, \@f);
1718
1719    ++$locales_test_number;
1720    undef @f;
1721    $test_names{$locales_test_number} = 'Verify that isn\'t both [:xdigit:] and [:punct:]';
1722    for (map { chr } 0..255) {
1723        if ($is_utf8_locale) {
1724            use locale ':not_characters';
1725            push @f, $_ if (/[[:punct:]]/ and /[[:xdigit:]]/);
1726        }
1727        else {
1728            push @f, $_ if (/[[:punct:]]/ and /[[:xdigit:]]/);
1729        }
1730    }
1731    report_multi_result($Locale, $locales_test_number, \@f);
1732
1733    ++$locales_test_number;
1734    undef @f;
1735    $test_names{$locales_test_number} = 'Verify that isn\'t both [:graph:] and [:space:]';
1736    for (map { chr } 0..255) {
1737        if ($is_utf8_locale) {
1738            use locale ':not_characters';
1739            push @f, $_ if (/[[:graph:]]/ and /[[:space:]]/);
1740        }
1741        else {
1742            push @f, $_ if (/[[:graph:]]/ and /[[:space:]]/);
1743        }
1744    }
1745    report_multi_result($Locale, $locales_test_number, \@f);
1746
1747    foreach ($first_casing_test_number..$locales_test_number) {
1748        $problematical_tests{$_} = 1;
1749    }
1750
1751
1752    # Test for read-only scalars' locale vs non-locale comparisons.
1753
1754    {
1755        no locale;
1756        my $ok;
1757        $a = "qwerty";
1758        if ($is_utf8_locale) {
1759            use locale ':not_characters';
1760            $ok = ($a cmp "qwerty") == 0;
1761        }
1762        else {
1763            use locale;
1764            $ok = ($a cmp "qwerty") == 0;
1765        }
1766        report_result($Locale, ++$locales_test_number, $ok);
1767        $test_names{$locales_test_number} = 'Verify that cmp works with a read-only scalar; no- vs locale';
1768    }
1769
1770    {
1771        my ($from, $to, $lesser, $greater,
1772            @test, %test, $test, $yes, $no, $sign);
1773
1774        ++$locales_test_number;
1775        $test_names{$locales_test_number} = 'Verify that "le", "ne", etc work';
1776        $not_necessarily_a_problem_test_number = $locales_test_number;
1777        for (0..9) {
1778            # Select a slice.
1779            $from = int(($_*@{$posixes{'word'}})/10);
1780            $to = $from + int(@{$posixes{'word'}}/10);
1781            $to = $#{$posixes{'word'}} if ($to > $#{$posixes{'word'}});
1782            $lesser  = join('', @{$posixes{'word'}}[$from..$to]);
1783            # Select a slice one character on.
1784            $from++; $to++;
1785            $to = $#{$posixes{'word'}} if ($to > $#{$posixes{'word'}});
1786            $greater = join('', @{$posixes{'word'}}[$from..$to]);
1787            if ($is_utf8_locale) {
1788                use locale ':not_characters';
1789                ($yes, $no, $sign) = ($lesser lt $greater
1790                                    ? ("    ", "not ", 1)
1791                                    : ("not ", "    ", -1));
1792            }
1793            else {
1794                use locale;
1795                ($yes, $no, $sign) = ($lesser lt $greater
1796                                    ? ("    ", "not ", 1)
1797                                    : ("not ", "    ", -1));
1798            }
1799            # all these tests should FAIL (return 0).  Exact lt or gt cannot
1800            # be tested because in some locales, say, eacute and E may test
1801            # equal.
1802            @test =
1803                (
1804                    $no.'    ($lesser  le $greater)',  # 1
1805                    'not      ($lesser  ne $greater)', # 2
1806                    '         ($lesser  eq $greater)', # 3
1807                    $yes.'    ($lesser  ge $greater)', # 4
1808                    $yes.'    ($lesser  ge $greater)', # 5
1809                    $yes.'    ($greater le $lesser )', # 7
1810                    'not      ($greater ne $lesser )', # 8
1811                    '         ($greater eq $lesser )', # 9
1812                    $no.'     ($greater ge $lesser )', # 10
1813                    'not (($lesser cmp $greater) == -($sign))' # 11
1814                    );
1815            @test{@test} = 0 x @test;
1816            $test = 0;
1817            for my $ti (@test) {
1818                if ($is_utf8_locale) {
1819                    use locale ':not_characters';
1820                    $test{$ti} = eval $ti;
1821                }
1822                else {
1823                    # Already in 'use locale';
1824                    $test{$ti} = eval $ti;
1825                }
1826                $test ||= $test{$ti}
1827            }
1828            report_result($Locale, $locales_test_number, $test == 0);
1829            if ($test) {
1830                debug "lesser  = '$lesser'\n";
1831                debug "greater = '$greater'\n";
1832                debug "lesser cmp greater = ",
1833                        $lesser cmp $greater, "\n";
1834                debug "greater cmp lesser = ",
1835                        $greater cmp $lesser, "\n";
1836                debug "(greater) from = $from, to = $to\n";
1837                for my $ti (@test) {
1838                    debugf("# %-40s %-4s", $ti,
1839                            $test{$ti} ? 'FAIL' : 'ok');
1840                    if ($ti =~ /\(\.*(\$.+ +cmp +\$[^\)]+)\.*\)/) {
1841                        debugf("(%s == %4d)", $1, eval $1);
1842                    }
1843                    debugf("\n#");
1844                }
1845
1846                last;
1847            }
1848        }
1849
1850        use locale;
1851
1852        my @sorted_controls;
1853
1854        ++$locales_test_number;
1855        $test_names{$locales_test_number}
1856                = 'Skip in locales where there are no controls;'
1857                . ' otherwise verify that \0 sorts before any (other) control';
1858        if (! $posixes{'cntrl'}) {
1859            report_result($Locale, $locales_test_number, 1);
1860
1861            # We use all code points for the tests below since there aren't
1862            # any controls
1863            push @sorted_controls, chr $_ for 1..255;
1864            @sorted_controls = sort @sorted_controls;
1865        }
1866        else {
1867            @sorted_controls = @{$posixes{'cntrl'}};
1868            push @sorted_controls, "\0",
1869                                unless grep { $_ eq "\0" } @sorted_controls;
1870            @sorted_controls = sort @sorted_controls;
1871            my $output = "";
1872            for my $control (@sorted_controls) {
1873                $output .= " " . disp_chars($control);
1874            }
1875            debug "sorted :cntrl: (plus NUL) = $output\n";
1876            my $ok = $sorted_controls[0] eq "\0";
1877            report_result($Locale, $locales_test_number, $ok);
1878
1879            shift @sorted_controls if $ok;
1880        }
1881
1882        my $lowest_control = $sorted_controls[0];
1883
1884        ++$locales_test_number;
1885        $test_names{$locales_test_number}
1886            = 'Skip in locales where all controls have primary sorting weight; '
1887            . 'otherwise verify that \0 doesn\'t have primary sorting weight';
1888        if ("a${lowest_control}c" lt "ab") {
1889            report_result($Locale, $locales_test_number, 1);
1890        }
1891        else {
1892            my $ok = "ab" lt "a\0c";
1893            report_result($Locale, $locales_test_number, $ok);
1894        }
1895
1896        ++$locales_test_number;
1897        $test_names{$locales_test_number}
1898                            = 'Verify that strings with embedded NUL collate';
1899        my $ok = "a\0a\0a" lt "a${lowest_control}a${lowest_control}a";
1900        report_result($Locale, $locales_test_number, $ok);
1901
1902        ++$locales_test_number;
1903        $test_names{$locales_test_number}
1904                            = 'Verify that strings with embedded NUL and '
1905                            . 'extra trailing NUL collate';
1906        $ok = "a\0a\0" lt "a${lowest_control}a${lowest_control}";
1907        report_result($Locale, $locales_test_number, $ok);
1908
1909        ++$locales_test_number;
1910        $test_names{$locales_test_number}
1911                            = 'Verify that empty strings collate';
1912        $ok = "" le "";
1913        report_result($Locale, $locales_test_number, $ok);
1914
1915        ++$locales_test_number;
1916        $test_names{$locales_test_number}
1917            = "Skip in non-UTF-8 locales; otherwise verify that UTF8ness "
1918            . "doesn't matter with collation";
1919        if (! $is_utf8_locale) {
1920            report_result($Locale, $locales_test_number, 1);
1921        }
1922        else {
1923
1924            # khw can't think of anything better.  Start with a string that is
1925            # higher than its UTF-8 representation in both EBCDIC and ASCII
1926            my $string = chr utf8::unicode_to_native(0xff);
1927            my $utf8_string = $string;
1928            utf8::upgrade($utf8_string);
1929
1930            # 8 should be lt 9 in all locales (except ones that aren't
1931            # ASCII-based, which might fail this)
1932            $ok = ("a${string}8") lt ("a${utf8_string}9");
1933            report_result($Locale, $locales_test_number, $ok);
1934        }
1935
1936        ++$locales_test_number;
1937        $test_names{$locales_test_number}
1938            = "Skip in UTF-8 locales; otherwise verify that single byte "
1939            . "collates before 0x100 and above";
1940        if ($is_utf8_locale) {
1941            report_result($Locale, $locales_test_number, 1);
1942        }
1943        else {
1944            my $max_collating = chr 0;  # Find byte that collates highest
1945            for my $i (0 .. 255) {
1946                my $char = chr $i;
1947                $max_collating = $char if $char gt $max_collating;
1948            }
1949            $ok = $max_collating lt chr 0x100;
1950            report_result($Locale, $locales_test_number, $ok);
1951        }
1952
1953        ++$locales_test_number;
1954        $test_names{$locales_test_number}
1955            = "Skip in UTF-8 locales; otherwise verify that 0x100 and "
1956            . "above collate in code point order";
1957        if ($is_utf8_locale) {
1958            report_result($Locale, $locales_test_number, 1);
1959        }
1960        else {
1961            $ok = chr 0x100 lt chr 0x101;
1962            report_result($Locale, $locales_test_number, $ok);
1963        }
1964    }
1965
1966    my $ok1;
1967    my $ok2;
1968    my $ok3;
1969    my $ok4;
1970    my $ok5;
1971    my $ok6;
1972    my $ok7;
1973    my $ok8;
1974    my $ok9;
1975    my $ok10;
1976    my $ok11;
1977    my $ok12;
1978    my $ok13;
1979    my $ok14;
1980    my $ok14_5;
1981    my $ok15;
1982    my $ok16;
1983    my $ok17;
1984    my $ok18;
1985    my $ok19;
1986    my $ok20;
1987    my $ok21;
1988
1989    my $c;
1990    my $d;
1991    my $e;
1992    my $f;
1993    my $g;
1994    my $h;
1995    my $i;
1996    my $j;
1997
1998    if (! $is_utf8_locale) {
1999        use locale;
2000
2001        my ($x, $y) = (1.23, 1.23);
2002
2003        $a = "$x";
2004        printf ''; # printf used to reset locale to "C"
2005        $b = "$y";
2006        $ok1 = $a eq $b;
2007
2008        $c = "$x";
2009        my $z = sprintf ''; # sprintf used to reset locale to "C"
2010        $d = "$y";
2011        $ok2 = $c eq $d;
2012        {
2013
2014            use warnings;
2015            my $w = 0;
2016            local $SIG{__WARN__} =
2017                sub {
2018                    print "# @_\n";
2019                    $w++;
2020                };
2021
2022            # The == (among other ops) used to warn for locales
2023            # that had something else than "." as the radix character.
2024
2025            $ok3 = $c == 1.23;
2026            $ok4 = $c == $x;
2027            $ok5 = $c == $d;
2028            {
2029                no locale;
2030
2031                $e = "$x";
2032
2033                $ok6 = $e == 1.23;
2034                $ok7 = $e == $x;
2035                $ok8 = $e == $c;
2036            }
2037
2038            $f = "1.23";
2039            $g = 2.34;
2040            $h = 1.5;
2041            $i = 1.25;
2042            $j = "$h:$i";
2043
2044            $ok9 = $f == 1.23;
2045            $ok10 = $f == $x;
2046            $ok11 = $f == $c;
2047            $ok12 = abs(($f + $g) - 3.57) < 0.01;
2048            $ok13 = $w == 0;
2049            $ok14 = $ok14_5 = $ok15 = $ok16 = 1;  # Skip for non-utf8 locales
2050        }
2051        {
2052            no locale;
2053            $ok17 = "1.5:1.25" eq sprintf("%g:%g", $h, $i);
2054        }
2055        $ok18 = $j eq sprintf("%g:%g", $h, $i);
2056    }
2057    else {
2058        use locale ':not_characters';
2059
2060        my ($x, $y) = (1.23, 1.23);
2061        $a = "$x";
2062        printf ''; # printf used to reset locale to "C"
2063        $b = "$y";
2064        $ok1 = $a eq $b;
2065
2066        $c = "$x";
2067        my $z = sprintf ''; # sprintf used to reset locale to "C"
2068        $d = "$y";
2069        $ok2 = $c eq $d;
2070        {
2071            use warnings;
2072            my $w = 0;
2073            local $SIG{__WARN__} =
2074                sub {
2075                    print "# @_\n";
2076                    $w++;
2077                };
2078            $ok3 = $c == 1.23;
2079            $ok4 = $c == $x;
2080            $ok5 = $c == $d;
2081            {
2082                no locale;
2083                $e = "$x";
2084
2085                $ok6 = $e == 1.23;
2086                $ok7 = $e == $x;
2087                $ok8 = $e == $c;
2088            }
2089
2090            $f = "1.23";
2091            $g = 2.34;
2092            $h = 1.5;
2093            $i = 1.25;
2094            $j = "$h:$i";
2095
2096            $ok9 = $f == 1.23;
2097            $ok10 = $f == $x;
2098            $ok11 = $f == $c;
2099            $ok12 = abs(($f + $g) - 3.57) < 0.01;
2100            $ok13 = $w == 0;
2101
2102            # Look for non-ASCII error messages, and verify that the first
2103            # such is in UTF-8 (the others almost certainly will be like the
2104            # first).  This is only done if the current locale has LC_MESSAGES
2105            $ok14 = 1;
2106            $ok14_5 = 1;
2107            if (   locales_enabled('LC_MESSAGES')
2108                && setlocale(&POSIX::LC_MESSAGES, $Locale))
2109            {
2110                foreach my $err (keys %!) {
2111                    use Errno;
2112                    $! = eval "&Errno::$err";   # Convert to strerror() output
2113                    my $errnum = 0+$!;
2114                    my $strerror = "$!";
2115                    if ("$strerror" =~ /\P{ASCII}/) {
2116                        $ok14 = utf8::is_utf8($strerror);
2117                        no locale;
2118                        $ok14_5 = "$!" !~ /\P{ASCII}/;
2119                        debug( disp_str(
2120                        "non-ASCII \$! for error $errnum='$strerror'"))
2121                                                                   if ! $ok14_5;
2122                        last;
2123                    }
2124                }
2125            }
2126
2127            # Similarly, we verify that a non-ASCII radix is in UTF-8.  This
2128            # also catches if there is a disparity between sprintf and
2129            # stringification.
2130
2131            my $string_g = "$g";
2132            my $sprintf_g = sprintf("%g", $g);
2133
2134            $ok15 = $string_g =~ / ^ \p{ASCII}+ $ /x || utf8::is_utf8($string_g);
2135            $ok16 = $sprintf_g eq $string_g;
2136        }
2137        {
2138            no locale;
2139            $ok17 = "1.5:1.25" eq sprintf("%g:%g", $h, $i);
2140        }
2141        $ok18 = $j eq sprintf("%g:%g", $h, $i);
2142    }
2143
2144    $ok19 = $ok20 = 1;
2145    if (locales_enabled('LC_TIME')) {
2146        if (setlocale(&POSIX::LC_TIME, $Locale)) { # These tests aren't
2147                                                   # affected by
2148                                                   # :not_characters
2149            my @times = CORE::localtime();
2150
2151            use locale;
2152            $ok19 = POSIX::strftime("%p", @times) ne "%p"; # [perl #119425]
2153            my $date = POSIX::strftime("'%A'  '%B'  '%Z'  '%p'", @times);
2154            debug("'Day' 'Month' 'TZ' 'am/pm' = ", disp_str($date));
2155
2156            # If there is any non-ascii, it better be UTF-8 in a UTF-8 locale,
2157            # and not UTF-8 if the locale isn't UTF-8.
2158            $ok20 = $date =~ / ^ \p{ASCII}+ $ /x
2159                    || $is_utf8_locale == utf8::is_utf8($date);
2160        }
2161    }
2162
2163    $ok21 = 1;
2164    if (locales_enabled('LC_MESSAGES')) {
2165        foreach my $err (keys %!) {
2166            no locale;
2167            use Errno;
2168            $! = eval "&Errno::$err";   # Convert to strerror() output
2169            my $strerror = "$!";
2170            if ($strerror =~ /\P{ASCII}/) {
2171                $ok21 = 0;
2172                debug(disp_str("non-ASCII strerror=$strerror"));
2173                last;
2174            }
2175        }
2176    }
2177
2178    report_result($Locale, ++$locales_test_number, $ok1);
2179    $test_names{$locales_test_number} = 'Verify that an intervening printf doesn\'t change assignment results';
2180    my $first_a_test = $locales_test_number;
2181
2182    debug "$first_a_test..$locales_test_number: \$a = $a, \$b = $b, Locale = $Locale\n";
2183
2184    report_result($Locale, ++$locales_test_number, $ok2);
2185    $test_names{$locales_test_number} = 'Verify that an intervening sprintf doesn\'t change assignment results';
2186
2187    my $first_c_test = $locales_test_number;
2188
2189    $test_names{++$locales_test_number} = 'Verify that a different locale radix works when doing "==" with a constant';
2190    report_result($Locale, $locales_test_number, $ok3);
2191    $problematical_tests{$locales_test_number} = 1;
2192
2193    $test_names{++$locales_test_number} = 'Verify that a different locale radix works when doing "==" with a scalar';
2194    report_result($Locale, $locales_test_number, $ok4);
2195    $problematical_tests{$locales_test_number} = 1;
2196
2197    report_result($Locale, ++$locales_test_number, $ok5);
2198    $test_names{$locales_test_number} = 'Verify that a different locale radix works when doing "==" with a scalar and an intervening sprintf';
2199    $problematical_tests{$locales_test_number} = 1;
2200
2201    debug "$first_c_test..$locales_test_number: \$c = $c, \$d = $d, Locale = $Locale\n";
2202
2203    report_result($Locale, ++$locales_test_number, $ok6);
2204    $test_names{$locales_test_number} = 'Verify that can assign stringified under inner no-locale block';
2205    my $first_e_test = $locales_test_number;
2206
2207    report_result($Locale, ++$locales_test_number, $ok7);
2208    $test_names{$locales_test_number} = 'Verify that "==" with a scalar still works in inner no locale';
2209
2210    $test_names{++$locales_test_number} = 'Verify that "==" with a scalar and an intervening sprintf still works in inner no locale';
2211    report_result($Locale, $locales_test_number, $ok8);
2212    $problematical_tests{$locales_test_number} = 1;
2213
2214    debug "$first_e_test..$locales_test_number: \$e = $e, no locale\n";
2215
2216    report_result($Locale, ++$locales_test_number, $ok9);
2217    $test_names{$locales_test_number} = 'Verify that after a no-locale block, a different locale radix still works when doing "==" with a constant';
2218    $problematical_tests{$locales_test_number} = 1;
2219    my $first_f_test = $locales_test_number;
2220
2221    report_result($Locale, ++$locales_test_number, $ok10);
2222    $test_names{$locales_test_number} = 'Verify that after a no-locale block, a different locale radix still works when doing "==" with a scalar';
2223    $problematical_tests{$locales_test_number} = 1;
2224
2225    $test_names{++$locales_test_number} = 'Verify that after a no-locale block, a different locale radix still works when doing "==" with a scalar and an intervening sprintf';
2226    report_result($Locale, $locales_test_number, $ok11);
2227    $problematical_tests{$locales_test_number} = 1;
2228
2229    report_result($Locale, ++$locales_test_number, $ok12);
2230    $test_names{$locales_test_number} = 'Verify that after a no-locale block, a different locale radix can participate in an addition and function call as numeric';
2231    $problematical_tests{$locales_test_number} = 1;
2232
2233    report_result($Locale, ++$locales_test_number, $ok13);
2234    $test_names{$locales_test_number} = 'Verify that don\'t get warning under "==" even if radix is not a dot';
2235    $problematical_tests{$locales_test_number} = 1;
2236
2237    report_result($Locale, ++$locales_test_number, $ok14);
2238    $test_names{$locales_test_number} = 'Verify that non-ASCII UTF-8 error messages are in UTF-8';
2239
2240    report_result($Locale, ++$locales_test_number, $ok14_5);
2241    $test_names{$locales_test_number} = '... and are ASCII outside "use locale"';
2242
2243    report_result($Locale, ++$locales_test_number, $ok15);
2244    $test_names{$locales_test_number} = 'Verify that a number with a UTF-8 radix has a UTF-8 stringification';
2245    $problematical_tests{$locales_test_number} = 1;
2246
2247    report_result($Locale, ++$locales_test_number, $ok16);
2248    $test_names{$locales_test_number} = 'Verify that a sprintf of a number with a UTF-8 radix yields UTF-8';
2249    $problematical_tests{$locales_test_number} = 1;
2250
2251    report_result($Locale, ++$locales_test_number, $ok17);
2252    $test_names{$locales_test_number} = 'Verify that a sprintf of a number outside locale scope uses a dot radix';
2253
2254    report_result($Locale, ++$locales_test_number, $ok18);
2255    $test_names{$locales_test_number} = 'Verify that a sprintf of a number back within locale scope uses locale radix';
2256    $problematical_tests{$locales_test_number} = 1;
2257
2258    report_result($Locale, ++$locales_test_number, $ok19);
2259    $test_names{$locales_test_number} = 'Verify that strftime doesn\'t return "%p" in locales where %p is empty';
2260
2261    report_result($Locale, ++$locales_test_number, $ok20);
2262    $test_names{$locales_test_number} = 'Verify that strftime returns date with UTF-8 flag appropriately set';
2263    $problematical_tests{$locales_test_number} = 1;   # This is broken in
2264                                                      # OS X 10.9.3
2265
2266    report_result($Locale, ++$locales_test_number, $ok21);
2267    $test_names{$locales_test_number} = '"$!" is ASCII-only outside of locale scope';
2268
2269    debug "$first_f_test..$locales_test_number: \$f = $f, \$g = $g, back to locale = $Locale\n";
2270
2271    # Does taking lc separately differ from taking
2272    # the lc "in-line"?  (This was the bug 19990704.002 (#965), change #3568.)
2273    # The bug was in the caching of the 'o'-magic.
2274    if (! $is_utf8_locale) {
2275	use locale;
2276
2277	sub lcA {
2278	    my $lc0 = lc $_[0];
2279	    my $lc1 = lc $_[1];
2280	    return $lc0 cmp $lc1;
2281	}
2282
2283        sub lcB {
2284	    return lc($_[0]) cmp lc($_[1]);
2285	}
2286
2287        my $x = "ab";
2288        my $y = "aa";
2289        my $z = "AB";
2290
2291        report_result($Locale, ++$locales_test_number,
2292		    lcA($x, $y) == 1 && lcB($x, $y) == 1 ||
2293		    lcA($x, $z) == 0 && lcB($x, $z) == 0);
2294    }
2295    else {
2296	use locale ':not_characters';
2297
2298	sub lcC {
2299	    my $lc0 = lc $_[0];
2300	    my $lc1 = lc $_[1];
2301	    return $lc0 cmp $lc1;
2302	}
2303
2304        sub lcD {
2305	    return lc($_[0]) cmp lc($_[1]);
2306	}
2307
2308        my $x = "ab";
2309        my $y = "aa";
2310        my $z = "AB";
2311
2312        report_result($Locale, ++$locales_test_number,
2313		    lcC($x, $y) == 1 && lcD($x, $y) == 1 ||
2314		    lcC($x, $z) == 0 && lcD($x, $z) == 0);
2315    }
2316    $test_names{$locales_test_number} = 'Verify "lc(foo) cmp lc(bar)" is the same as using intermediaries for the cmp';
2317
2318    # Does lc of an UPPER (if different from the UPPER) match
2319    # case-insensitively the UPPER, and does the UPPER match
2320    # case-insensitively the lc of the UPPER.  And vice versa.
2321    {
2322        use locale;
2323        no utf8;
2324        my $re = qr/[\[\(\{\*\+\?\|\^\$\\]/;
2325
2326        my @f = ();
2327        ++$locales_test_number;
2328        $test_names{$locales_test_number} = 'Verify case insensitive matching works';
2329        foreach my $x (sort { ord $a <=> ord $b } keys %UPPER) {
2330            if (! $is_utf8_locale) {
2331                my $y = lc $x;
2332                next unless uc $y eq $x;
2333                debug_more( "UPPER=", disp_chars(($x)),
2334                            "; lc=", disp_chars(($y)), "; ",
2335                            "; fc=", disp_chars((fc $x)), "; ",
2336                            disp_chars(($x)), "=~/", disp_chars(($y)), "/i=",
2337                            $x =~ /\Q$y/i ? 1 : 0,
2338                            "; ",
2339                            disp_chars(($y)), "=~/", disp_chars(($x)), "/i=",
2340                            $y =~ /\Q$x/i ? 1 : 0,
2341                            "\n");
2342                #
2343                # If $x and $y contain regular expression characters
2344                # AND THEY lowercase (/i) to regular expression characters,
2345                # regcomp() will be mightily confused.  No, the \Q doesn't
2346                # help here (maybe regex engine internal lowercasing
2347                # is done after the \Q?)  An example of this happening is
2348                # the bg_BG (Bulgarian) locale under EBCDIC (OS/390 USS):
2349                # the chr(173) (the "[") is the lowercase of the chr(235).
2350                #
2351                # Similarly losing EBCDIC locales include cs_cz, cs_CZ,
2352                # el_gr, el_GR, en_us.IBM-037 (!), en_US.IBM-037 (!),
2353                # et_ee, et_EE, hr_hr, hr_HR, hu_hu, hu_HU, lt_LT,
2354                # mk_mk, mk_MK, nl_nl.IBM-037, nl_NL.IBM-037,
2355                # pl_pl, pl_PL, ro_ro, ro_RO, ru_ru, ru_RU,
2356                # sk_sk, sk_SK, sl_si, sl_SI, tr_tr, tr_TR.
2357                #
2358                # Similar things can happen even under (bastardised)
2359                # non-EBCDIC locales: in many European countries before the
2360                # advent of ISO 8859-x nationally customised versions of
2361                # ISO 646 were devised, reusing certain punctuation
2362                # characters for modified characters needed by the
2363                # country/language.  For example, the "|" might have
2364                # stood for U+00F6 or LATIN SMALL LETTER O WITH DIAERESIS.
2365                #
2366                if ($x =~ $re || $y =~ $re) {
2367                    print "# Regex characters in '$x' or '$y', skipping test $locales_test_number for locale '$Locale'\n";
2368                    next;
2369                }
2370                push @f, $x unless $x =~ /\Q$y/i && $y =~ /\Q$x/i;
2371
2372                # fc is not a locale concept, so Perl uses lc for it.
2373                push @f, $x unless lc $x eq fc $x;
2374            }
2375            else {
2376                use locale ':not_characters';
2377                my $y = lc $x;
2378                next unless uc $y eq $x;
2379                debug_more( "UPPER=", disp_chars(($x)),
2380                            "; lc=", disp_chars(($y)), "; ",
2381                            "; fc=", disp_chars((fc $x)), "; ",
2382                            disp_chars(($x)), "=~/", disp_chars(($y)), "/i=",
2383                            $x =~ /\Q$y/i ? 1 : 0,
2384                            "; ",
2385                            disp_chars(($y)), "=~/", disp_chars(($x)), "/i=",
2386                            $y =~ /\Q$x/i ? 1 : 0,
2387                            "\n");
2388
2389                push @f, $x unless $x =~ /\Q$y/i && $y =~ /\Q$x/i;
2390
2391                # The places where Unicode's lc is different from fc are
2392                # skipped here by virtue of the 'next unless uc...' line above
2393                push @f, $x unless lc $x eq fc $x;
2394            }
2395        }
2396
2397	foreach my $x (sort { ord $a <=> ord $b } keys %lower) {
2398            if (! $is_utf8_locale) {
2399                my $y = uc $x;
2400                next unless lc $y eq $x;
2401                debug_more( "lower=", disp_chars(($x)),
2402                            "; uc=", disp_chars(($y)), "; ",
2403                            "; fc=", disp_chars((fc $x)), "; ",
2404                            disp_chars(($x)), "=~/", disp_chars(($y)), "/i=",
2405                            $x =~ /\Q$y/i ? 1 : 0,
2406                            "; ",
2407                            disp_chars(($y)), "=~/", disp_chars(($x)), "/i=",
2408                            $y =~ /\Q$x/i ? 1 : 0,
2409                            "\n");
2410                if ($x =~ $re || $y =~ $re) { # See above.
2411                    print "# Regex characters in '$x' or '$y', skipping test $locales_test_number for locale '$Locale'\n";
2412                    next;
2413                }
2414                push @f, $x unless $x =~ /\Q$y/i && $y =~ /\Q$x/i;
2415
2416                push @f, $x unless lc $x eq fc $x;
2417            }
2418            else {
2419                use locale ':not_characters';
2420                my $y = uc $x;
2421                next unless lc $y eq $x;
2422                debug_more( "lower=", disp_chars(($x)),
2423                            "; uc=", disp_chars(($y)), "; ",
2424                            "; fc=", disp_chars((fc $x)), "; ",
2425                            disp_chars(($x)), "=~/", disp_chars(($y)), "/i=",
2426                            $x =~ /\Q$y/i ? 1 : 0,
2427                            "; ",
2428                            disp_chars(($y)), "=~/", disp_chars(($x)), "/i=",
2429                            $y =~ /\Q$x/i ? 1 : 0,
2430                            "\n");
2431                push @f, $x unless $x =~ /\Q$y/i && $y =~ /\Q$x/i;
2432
2433                push @f, $x unless lc $x eq fc $x;
2434            }
2435	}
2436	report_multi_result($Locale, $locales_test_number, \@f);
2437        $problematical_tests{$locales_test_number} = 1;
2438    }
2439
2440    # [perl #109318]
2441    {
2442        my @f = ();
2443        ++$locales_test_number;
2444        $test_names{$locales_test_number} = 'Verify atof with locale radix and negative exponent';
2445        $problematical_tests{$locales_test_number} = 1;
2446
2447        my $radix = langinfo(RADIXCHAR);
2448        my @nums = (
2449             "3.14e+9",  "3${radix}14e+9",  "3.14e-9",  "3${radix}14e-9",
2450            "-3.14e+9", "-3${radix}14e+9", "-3.14e-9", "-3${radix}14e-9",
2451        );
2452
2453        if (! $is_utf8_locale) {
2454            use locale;
2455            for my $num (@nums) {
2456                push @f, $num
2457                    unless sprintf("%g", $num) =~ /3.+14/;
2458            }
2459        }
2460        else {
2461            use locale ':not_characters';
2462            for my $num (@nums) {
2463                push @f, $num
2464                    unless sprintf("%g", $num) =~ /3.+14/;
2465            }
2466        }
2467
2468        report_result($Locale, $locales_test_number, @f == 0);
2469        if (@f) {
2470            print "# failed $locales_test_number locale '$Locale' numbers @f\n"
2471	}
2472    }
2473}
2474
2475my $final_locales_test_number = $locales_test_number;
2476
2477# Recount the errors.
2478
2479TEST_NUM:
2480foreach $test_num ($first_locales_test_number..$final_locales_test_number) {
2481    my $has_non_global_failure = $Problem{$test_num}
2482                            || ! defined $Okay{$test_num}
2483                            || ! @{$Okay{$test_num}};
2484    print "not " if %setlocale_failed || $has_non_global_failure;
2485    print "ok $test_num";
2486    $test_names{$test_num} = "" unless defined $test_names{$test_num};
2487
2488    # If TODO is in the test name, make it thus
2489    my $todo = $test_names{$test_num} =~ s/\s*TODO\s*//;
2490    print " $test_names{$test_num}";
2491    if ($todo) {
2492        print " # TODO\n";
2493    }
2494    elsif (%setlocale_failed || ! $has_non_global_failure) {
2495        print "\n";
2496    }
2497    elsif ($has_non_global_failure) {
2498
2499        # If there are any locales that pass this test, or are known-bad, it
2500        # may be that there are enough passes that we TODO the failure, but
2501        # only for tests that we have decided can be problematical.
2502        if (  ($Okay{$test_num} || $Known_bad_locale{$test_num})
2503            && grep { $_ == $test_num } keys %problematical_tests)
2504        {
2505            # Don't count the known-bad failures when calculating the
2506            # percentage that fail.
2507            my $known_failures = (exists $Known_bad_locale{$test_num})
2508                                  ? scalar(keys $Known_bad_locale{$test_num}->%*)
2509                                  : 0;
2510            my $adjusted_failures = scalar(keys $Problem{$test_num}->%*)
2511                                    - $known_failures;
2512
2513            # Specially handle failures where only known-bad locales fail.
2514            # This makes the diagnositics clearer.
2515            if ($adjusted_failures <= 0) {
2516                print " # TODO fails only on known bad locales: ",
2517                      join " ", keys $Known_bad_locale{$test_num}->%*, "\n";
2518                next TEST_NUM;
2519            }
2520
2521            # Round to nearest .1%
2522            my $percent_fail = (int(.5 + (1000 * $adjusted_failures
2523                                          / scalar(@Locale))))
2524                               / 10;
2525            $todo = $percent_fail < $acceptable_failure_percentage;
2526            print " # TODO" if $todo;
2527            print "\n";
2528
2529            if ($debug) {
2530                print "# $percent_fail% of locales (",
2531                      scalar(keys $Problem{$test_num}->%*),
2532                      " of ",
2533                      scalar(@Locale),
2534                      ") fail the above test (TODO cut-off is ",
2535                      $acceptable_failure_percentage,
2536                      "%)\n";
2537            }
2538            elsif ($todo) {
2539                print "# ", 100 - $percent_fail, "% of locales not known to be problematic on this platform\n";
2540                print "# pass the above test, so it is likely that the failures\n";
2541                print "# are errors in the locale definitions.  The test is marked TODO, as the\n";
2542                print "# problem is not likely to be Perl's\n";
2543            }
2544        }
2545
2546        if ($debug) {
2547            print "# The code points that had this failure are given above.  Look for lines\n";
2548            print "# that match 'failed $test_num'\n";
2549        }
2550        else {
2551            print "# For more details, rerun, with environment variable PERL_DEBUG_FULL_TEST=1.\n";
2552            print "# Then look at that output for lines that match 'failed $test_num'\n";
2553        }
2554	if (defined $not_necessarily_a_problem_test_number
2555            && $test_num == $not_necessarily_a_problem_test_number)
2556        {
2557	    print "# The failure of test $not_necessarily_a_problem_test_number is not necessarily fatal.\n";
2558	    print "# It usually indicates a problem in the environment,\n";
2559	    print "# not in Perl itself.\n";
2560	}
2561    }
2562}
2563
2564$test_num = $final_locales_test_number;
2565
2566if ( ! defined $Config{d_setlocale_accepts_any_locale_name}) {
2567    # perl #115808
2568    use warnings;
2569    my $warned = 0;
2570    local $SIG{__WARN__} = sub {
2571        $warned = $_[0] =~ /uninitialized/;
2572    };
2573    my $z = "y" . setlocale(&POSIX::LC_ALL, "xyzzy");
2574    ok($warned, "variable set to setlocale(\"invalid locale name\") is considered uninitialized");
2575}
2576
2577# Test that tainting and case changing works on utf8 strings.  These tests are
2578# placed last to avoid disturbing the hard-coded test numbers that existed at
2579# the time these were added above this in this file.
2580# This also tests that locale overrides unicode_strings in the same scope for
2581# non-utf8 strings.
2582setlocale(&POSIX::LC_ALL, "C");
2583{
2584    use locale;
2585    use feature 'unicode_strings';
2586
2587    foreach my $function ("uc", "ucfirst", "lc", "lcfirst", "fc") {
2588        my @list;   # List of code points to test for $function
2589
2590        # Used to calculate the changed case for ASCII characters by using the
2591        # ord, instead of using one of the functions under test.
2592        my $ascii_case_change_delta;
2593        my $above_latin1_case_change_delta; # Same for the specific ords > 255
2594                                            # that we use
2595
2596        # We test an ASCII character, which should change case;
2597        # a Latin1 character, which shouldn't change case under this C locale,
2598        # an above-Latin1 character that when the case is changed would cross
2599        #   the 255/256 boundary, so doesn't change case
2600        #   (the \x{149} is one of these, but changes into 2 characters, the
2601        #   first one of which doesn't cross the boundary.
2602        # the final one in each list is an above-Latin1 character whose case
2603        #   does change.  The code below uses its position in its list as a
2604        #   marker to indicate that it, unlike the other code points above
2605        #   ASCII, has a successful case change
2606        #
2607        # All casing operations under locale (but not :not_characters) should
2608        # taint
2609        if ($function =~ /^u/) {
2610            @list = ("", "a",
2611                     chr(utf8::unicode_to_native(0xe0)),
2612                     chr(utf8::unicode_to_native(0xff)),
2613                     "\x{fb00}", "\x{149}", "\x{101}");
2614            $ascii_case_change_delta = ($is_ebcdic) ? +64 : -32;
2615            $above_latin1_case_change_delta = -1;
2616        }
2617        else {
2618            @list = ("", "A",
2619                     chr(utf8::unicode_to_native(0xC0)),
2620                     "\x{17F}", "\x{100}");
2621            $ascii_case_change_delta = ($is_ebcdic) ? -64 : +32;
2622            $above_latin1_case_change_delta = +1;
2623        }
2624        foreach my $is_utf8_locale (0 .. 1) {
2625            foreach my $j (0 .. $#list) {
2626                my $char = $list[$j];
2627
2628                for my $encoded_in_utf8 (0 .. 1) {
2629                    my $should_be;
2630                    my $changed;
2631                    if (! $is_utf8_locale) {
2632                        no warnings 'locale';
2633                        $should_be = ($j == $#list)
2634                            ? chr(ord($char) + $above_latin1_case_change_delta)
2635                            : (length $char == 0 || utf8::native_to_unicode(ord($char)) > 127)
2636                              ? $char
2637                              : chr(ord($char) + $ascii_case_change_delta);
2638
2639                        # This monstrosity is in order to avoid using an eval,
2640                        # which might perturb the results
2641                        $changed = ($function eq "uc")
2642                                    ? uc($char)
2643                                    : ($function eq "ucfirst")
2644                                      ? ucfirst($char)
2645                                      : ($function eq "lc")
2646                                        ? lc($char)
2647                                        : ($function eq "lcfirst")
2648                                          ? lcfirst($char)
2649                                          : ($function eq "fc")
2650                                            ? fc($char)
2651                                            : die("Unexpected function \"$function\"");
2652                    }
2653                    else {
2654                        {
2655                            no locale;
2656
2657                            # For utf8-locales the case changing functions
2658                            # should work just like they do outside of locale.
2659                            # Can use eval here because not testing it when
2660                            # not in locale.
2661                            $should_be = eval "$function('$char')";
2662                            die "Unexpected eval error $@ from 'eval \"$function('$char')\"'" if  $@;
2663
2664                        }
2665                        use locale ':not_characters';
2666                        $changed = ($function eq "uc")
2667                                    ? uc($char)
2668                                    : ($function eq "ucfirst")
2669                                      ? ucfirst($char)
2670                                      : ($function eq "lc")
2671                                        ? lc($char)
2672                                        : ($function eq "lcfirst")
2673                                          ? lcfirst($char)
2674                                          : ($function eq "fc")
2675                                            ? fc($char)
2676                                            : die("Unexpected function \"$function\"");
2677                    }
2678                    ok($changed eq $should_be,
2679                        "$function(\"$char\") in C locale "
2680                        . (($is_utf8_locale)
2681                            ? "(use locale ':not_characters'"
2682                            : "(use locale")
2683                        . (($encoded_in_utf8)
2684                            ? "; encoded in utf8)"
2685                            : "; not encoded in utf8)")
2686                        . " should be \"$should_be\", got \"$changed\"");
2687
2688                    # Tainting shouldn't happen for use locale :not_character
2689                    # (a utf8 locale)
2690                    (! $is_utf8_locale)
2691                    ? check_taint($changed)
2692                    : check_taint_not($changed);
2693
2694                    # Use UTF-8 next time through the loop
2695                    utf8::upgrade($char);
2696                }
2697            }
2698        }
2699    }
2700}
2701
2702# Give final advice.
2703
2704my $didwarn = 0;
2705
2706foreach ($first_locales_test_number..$final_locales_test_number) {
2707    if ($Problem{$_}) {
2708	my @f = sort keys %{ $Problem{$_} };
2709
2710        # Don't list the failures caused by known-bad locales.
2711        if (exists $known_bad_locales{$os}) {
2712            @f = grep { $_ !~ $known_bad_locales{$os} } @f;
2713            next unless @f;
2714        }
2715	my $f = join(" ", @f);
2716	$f =~ s/(.{50,60}) /$1\n#\t/g;
2717	print
2718	    "#\n",
2719            "# The locale ", (@f == 1 ? "definition" : "definitions"), "\n#\n",
2720	    "#\t", $f, "\n#\n",
2721	    "# on your system may have errors because the locale test $_\n",
2722	    "# \"$test_names{$_}\"\n",
2723            "# failed in ", (@f == 1 ? "that locale" : "those locales"),
2724            ".\n";
2725	print <<EOW;
2726#
2727# If your users are not using these locales you are safe for the moment,
2728# but please report this failure first to perlbug\@perl.org using the
2729# perlbug script (as described in the INSTALL file) so that the exact
2730# details of the failures can be sorted out first and then your operating
2731# system supplier can be alerted about these anomalies.
2732#
2733EOW
2734	$didwarn = 1;
2735    }
2736}
2737
2738# Tell which locales were okay and which were not.
2739
2740if ($didwarn) {
2741    my (@s, @F);
2742
2743    foreach my $l (@Locale) {
2744	my $p = 0;
2745        if ($setlocale_failed{$l}) {
2746            $p++;
2747        }
2748        else {
2749            foreach my $t
2750                        ($first_locales_test_number..$final_locales_test_number)
2751            {
2752                $p++ if $Problem{$t}{$l};
2753            }
2754	}
2755	push @s, $l if $p == 0;
2756        push @F, $l unless $p == 0;
2757    }
2758
2759    if (@s) {
2760        my $s = join(" ", @s);
2761        $s =~ s/(.{50,60}) /$1\n#\t/g;
2762
2763        print
2764            "# The following locales\n#\n",
2765            "#\t", $s, "\n#\n",
2766	    "# tested okay.\n#\n",
2767    } else {
2768        print "# None of your locales were fully okay.\n";
2769    }
2770
2771    if (@F) {
2772        my $F = join(" ", @F);
2773        $F =~ s/(.{50,60}) /$1\n#\t/g;
2774
2775        my $details = "";
2776        unless ($debug) {
2777            $details = "# For more details, rerun, with environment variable PERL_DEBUG_FULL_TEST=1.\n";
2778        }
2779        elsif ($debug == 1) {
2780            $details = "# For even more details, rerun, with environment variable PERL_DEBUG_FULL_TEST=2.\n";
2781        }
2782
2783        print
2784          "# The following locales\n#\n",
2785          "#\t", $F, "\n#\n",
2786          "# had problems.\n#\n",
2787          $details;
2788    } else {
2789        print "# None of your locales were broken.\n";
2790    }
2791}
2792
2793if (exists $known_bad_locales{$os} && ! %Known_bad_locale) {
2794    $test_num++;
2795    print "ok $test_num $^O no longer has known bad locales # TODO\n";
2796}
2797
2798print "1..$test_num\n";
2799
2800# eof
2801