1#!./perl -wT 2 3use strict; 4use warnings; 5use Config; 6 7# This tests plain 'use locale' and adorned 'use locale ":not_characters"' 8# Because these pragmas are compile time, and I (khw) am trying to test 9# without using 'eval' as much as possible, which might cloud the issue, the 10# crucial parts of the code are duplicated in a block for each pragma. 11 12# Unfortunately, many systems have defective locale definitions. This test 13# file looks for both perl bugs and bugs in the system's locale definitions. 14# It can be difficult to tease apart which is which. For the latter, there 15# are tests that are based on the POSIX standard. A character isn't supposed 16# to be both a space and graphic, for example. Another example is if a 17# character is the uppercase of another, that other should be the lowercase of 18# the first. Including tests for these allows you to test for defective 19# locales, as described in perllocale. The way this file distinguishes 20# between defective locales, and perl bugs is to see what percentage of 21# locales fail a given test. If it's a lot, then it's more likely to be a 22# perl bug; only a few, those particular locales are likely defective. In 23# that case the failing tests are marked TODO. (They should be reported to 24# the vendor, however; but it's not perl's problem.) In some cases, this 25# script has caused tickets to be filed against perl which turn out to be the 26# platform's bug, but a higher percentage of locales are failing than the 27# built-in cut-off point. For those platforms, code has been added to 28# increase the cut-off, so those platforms don't trigger failing test reports. 29# Ideally, the platforms would get fixed and that code would be changed to 30# only kick-in when run on versions that are earlier than the fixed one. But, 31# this rarely happens in practice. 32 33# To make a TODO test, add the string 'TODO' to its %test_names value 34 35my $is_ebcdic = ord("A") == 193; 36my $os = lc $^O; 37 38# Configure now lets you build a perl that silently ignores taint features 39my $NoTaintSupport = exists($Config{taint_support}) && !$Config{taint_support}; 40 41no warnings 'locale'; # We test even weird locales; and do some scary things 42 # in ok locales 43 44binmode STDOUT, ':utf8'; 45binmode STDERR, ':utf8'; 46 47BEGIN { 48 chdir 't' if -d 't'; 49 @INC = '../lib'; 50 unshift @INC, '.'; 51 require './loc_tools.pl'; 52 unless (locales_enabled('LC_CTYPE')) { 53 print "1..0\n"; 54 exit; 55 } 56 $| = 1; 57 require Config; import Config; 58} 59 60use feature 'fc'; 61use I18N::Langinfo qw(langinfo CODESET CRNCYSTR RADIXCHAR); 62 63# =1 adds debugging output; =2 increases the verbosity somewhat 64our $debug = $ENV{PERL_DEBUG_FULL_TEST} // 0; 65 66# Certain tests have been shown to be problematical for a few locales. Don't 67# fail them unless at least this percentage of the tested locales fail. 68# EBCDIC os390 has more locales fail than normal, because it has locales that 69# move various critical characters like '['. 70my $acceptable_failure_percentage = ($os =~ / ^ ( os390 ) $ /x) 71 ? 10 72 : 5; 73 74# The list of test numbers of the problematic tests. 75my %problematical_tests; 76 77# If any %problematical_tests fails in one of these locales, it is 78# considered a TODO. 79my %known_bad_locales = ( 80 irix => qr/ ^ (?: cs | hu | sk ) $/x, 81 darwin => qr/ ^ lt_LT.ISO8859 /ix, 82 os390 => qr/ ^ italian /ix, 83 netbsd => qr/\bISO8859-2\b/i, 84 85 # This may be the same bug as the cygwin below; it's 86 # generating malformed UTF-8 on the radix being 87 # mulit-byte 88 solaris => qr/ ^ ( ar_ | pa_ ) /x, 89 ); 90 91# cygwin isn't returning proper radix length in this locale, but supposedly to 92# be fixed in later versions. 93if ($os eq 'cygwin' && version->new(($Config{osvers} =~ /^(\d+(?:\.\d+)+)/)[0]) le v2.4.1) { 94 $known_bad_locales{'cygwin'} = qr/ ^ ps_AF /ix; 95} 96 97use Dumpvalue; 98 99my $dumper = Dumpvalue->new( 100 tick => qq{"}, 101 quoteHighBit => 0, 102 unctrl => "quote" 103 ); 104 105sub debug { 106 return unless $debug; 107 my($mess) = join "", '# ', @_; 108 chomp $mess; 109 print STDERR $dumper->stringify($mess,1), "\n"; 110} 111 112sub note { 113 local $debug = 1; 114 debug @_; 115} 116 117sub debug_more { 118 return unless $debug > 1; 119 return debug(@_); 120} 121 122sub debugf { 123 printf STDERR @_ if $debug; 124} 125 126$a = 'abc %9'; 127 128my $test_num = 0; 129 130sub ok { 131 my ($result, $message) = @_; 132 $message = "" unless defined $message; 133 134 print 'not ' unless ($result); 135 print "ok " . ++$test_num; 136 print " $message"; 137 print "\n"; 138 return ($result) ? 1 : 0; 139} 140 141sub skip { 142 return ok 1, "skipped: " . shift; 143} 144 145sub fail { 146 return ok 0, shift; 147} 148 149# First we'll do a lot of taint checking for locales. 150# This is the easiest to test, actually, as any locale, 151# even the default locale will taint under 'use locale'. 152 153sub is_tainted { # hello, camel two. 154 no warnings 'uninitialized' ; 155 my $dummy; 156 local $@; 157 not eval { $dummy = join("", @_), kill 0; 1 } 158} 159 160sub check_taint ($;$) { 161 my $message_tail = $_[1] // ""; 162 163 # Extra blanks are so aligns with taint_not output 164 $message_tail = ": $message_tail" if $message_tail; 165 if ($NoTaintSupport) { 166 skip("your perl was built without taint support"); 167 } 168 else { 169 ok is_tainted($_[0]), "verify that is tainted$message_tail"; 170 } 171} 172 173sub check_taint_not ($;$) { 174 my $message_tail = $_[1] // ""; 175 $message_tail = ": $message_tail" if $message_tail; 176 ok((not is_tainted($_[0])), "verify that isn't tainted$message_tail"); 177} 178 179foreach my $category (qw(ALL COLLATE CTYPE MESSAGES MONETARY NUMERIC TIME)) { 180 my $short_result = locales_enabled($category); 181 ok ($short_result == 0 || $short_result == 1, 182 "Verify locales_enabled('$category') returns 0 or 1"); 183 debug("locales_enabled('$category') returned '$short_result'"); 184 my $long_result = locales_enabled("LC_$category"); 185 if (! ok ($long_result == $short_result, 186 " and locales_enabled('LC_$category') returns " 187 . "the same value") 188 ) { 189 debug("locales_enabled('LC_$category') returned $long_result"); 190 } 191} 192 193"\tb\t" =~ /^m?(\s)(.*)\1$/; 194check_taint_not $&, "not tainted outside 'use locale'"; 195; 196 197use locale; # engage locale and therefore locale taint. 198 199# BE SURE TO COPY ANYTHING YOU ADD to these tests to the block below for 200# ":notcharacters" 201 202check_taint_not $a, '$a'; 203 204check_taint uc($a), 'uc($a)'; 205check_taint "\U$a", '"\U$a"'; 206check_taint ucfirst($a), 'ucfirst($a)'; 207check_taint "\u$a", '"\u$a"'; 208check_taint lc($a), 'lc($a)'; 209check_taint fc($a), 'fc($a)'; 210check_taint "\L$a", '"\L$a"'; 211check_taint "\F$a", '"\F$a"'; 212check_taint lcfirst($a), 'lcfirst($a)'; 213check_taint "\l$a", '"\l$a"'; 214 215check_taint_not sprintf('%e', 123.456), "sprintf('%e', 123.456)"; 216check_taint_not sprintf('%f', 123.456), "sprintf('%f', 123.456)"; 217check_taint_not sprintf('%g', 123.456), "sprintf('%g', 123.456)"; 218check_taint_not sprintf('%d', 123.456), "sprintf('%d', 123.456)"; 219check_taint_not sprintf('%x', 123.456), "sprintf('%x', 123.456)"; 220 221$_ = $a; # untaint $_ 222 223$_ = uc($a); # taint $_ 224 225check_taint $_, '$_ = uc($a)'; 226 227/(\w)/; # taint $&, $`, $', $+, $1. 228check_taint $&, "\$& from /(\\w)/"; 229check_taint $`, "\t\$`"; 230check_taint $', "\t\$'"; 231check_taint $+, "\t\$+"; 232check_taint $1, "\t\$1"; 233check_taint_not $2, "\t\$2"; 234 235/(.)/; # untaint $&, $`, $', $+, $1. 236check_taint_not $&, "\$& from /(.)/"; 237check_taint_not $`, "\t\$`"; 238check_taint_not $', "\t\$'"; 239check_taint_not $+, "\t\$+"; 240check_taint_not $1, "\t\$1"; 241check_taint_not $2, "\t\$2"; 242 243/(\W)/; # taint $&, $`, $', $+, $1. 244check_taint $&, "\$& from /(\\W)/"; 245check_taint $`, "\t\$`"; 246check_taint $', "\t\$'"; 247check_taint $+, "\t\$+"; 248check_taint $1, "\t\$1"; 249check_taint_not $2, "\t\$2"; 250 251/(.)/; # untaint $&, $`, $', $+, $1. 252check_taint_not $&, "\$& from /(.)/"; 253check_taint_not $`, "\t\$`"; 254check_taint_not $', "\t\$'"; 255check_taint_not $+, "\t\$+"; 256check_taint_not $1, "\t\$1"; 257check_taint_not $2, "\t\$2"; 258 259/(\s)/; # taint $&, $`, $', $+, $1. 260check_taint $&, "\$& from /(\\s)/"; 261check_taint $`, "\t\$`"; 262check_taint $', "\t\$'"; 263check_taint $+, "\t\$+"; 264check_taint $1, "\t\$1"; 265check_taint_not $2, "\t\$2"; 266 267/(.)/; # untaint $&, $`, $', $+, $1. 268check_taint_not $&, "\$& from /(.)/"; 269 270/(\S)/; # taint $&, $`, $', $+, $1. 271check_taint $&, "\$& from /(\\S)/"; 272check_taint $`, "\t\$`"; 273check_taint $', "\t\$'"; 274check_taint $+, "\t\$+"; 275check_taint $1, "\t\$1"; 276check_taint_not $2, "\t\$2"; 277 278/(.)/; # untaint $&, $`, $', $+, $1. 279check_taint_not $&, "\$& from /(.)/"; 280 281"0" =~ /(\d)/; # taint $&, $`, $', $+, $1. 282check_taint $&, "\$& from /(\\d)/"; 283check_taint $`, "\t\$`"; 284check_taint $', "\t\$'"; 285check_taint $+, "\t\$+"; 286check_taint $1, "\t\$1"; 287check_taint_not $2, "\t\$2"; 288 289/(.)/; # untaint $&, $`, $', $+, $1. 290check_taint_not $&, "\$& from /(.)/"; 291 292/(\D)/; # taint $&, $`, $', $+, $1. 293check_taint $&, "\$& from /(\\D)/"; 294check_taint $`, "\t\$`"; 295check_taint $', "\t\$'"; 296check_taint $+, "\t\$+"; 297check_taint $1, "\t\$1"; 298check_taint_not $2, "\t\$2"; 299 300/(.)/; # untaint $&, $`, $', $+, $1. 301check_taint_not $&, "\$& from /(.)/"; 302 303/([[:alnum:]])/; # taint $&, $`, $', $+, $1. 304check_taint $&, "\$& from /([[:alnum:]])/"; 305check_taint $`, "\t\$`"; 306check_taint $', "\t\$'"; 307check_taint $+, "\t\$+"; 308check_taint $1, "\t\$1"; 309check_taint_not $2, "\t\$2"; 310 311/(.)/; # untaint $&, $`, $', $+, $1. 312check_taint_not $&, "\$& from /(.)/"; 313 314/([[:^alnum:]])/; # taint $&, $`, $', $+, $1. 315check_taint $&, "\$& from /([[:^alnum:]])/"; 316check_taint $`, "\t\$`"; 317check_taint $', "\t\$'"; 318check_taint $+, "\t\$+"; 319check_taint $1, "\t\$1"; 320check_taint_not $2, "\t\$2"; 321 322"a" =~ /(a)|(\w)/; # taint $&, $`, $', $+, $1. 323check_taint $&, "\$& from /(a)|(\\w)/"; 324check_taint $`, "\t\$`"; 325check_taint $', "\t\$'"; 326check_taint $+, "\t\$+"; 327check_taint $1, "\t\$1"; 328ok($1 eq 'a', ("\t" x 5) . "\$1 is 'a'"); 329ok(! defined $2, ("\t" x 5) . "\$2 is undefined"); 330check_taint_not $2, "\t\$2"; 331check_taint_not $3, "\t\$3"; 332 333/(.)/; # untaint $&, $`, $', $+, $1. 334check_taint_not $&, "\$& from /(.)/"; 335 336"\N{CYRILLIC SMALL LETTER A}" =~ /(\N{CYRILLIC CAPITAL LETTER A})/i; # no tainting because no locale dependence 337check_taint_not $&, "\$& from /(\\N{CYRILLIC CAPITAL LETTER A})/i"; 338check_taint_not $`, "\t\$`"; 339check_taint_not $', "\t\$'"; 340check_taint_not $+, "\t\$+"; 341check_taint_not $1, "\t\$1"; 342ok($1 eq "\N{CYRILLIC SMALL LETTER A}", ("\t" x 4) . "\t\$1 is 'small cyrillic a'"); 343check_taint_not $2, "\t\$2"; 344 345/(.)/; # untaint $&, $`, $', $+, $1. 346check_taint_not $&, "\$& from /./"; 347 348"(\N{KELVIN SIGN})" =~ /(\N{KELVIN SIGN})/i; # taints because depends on locale 349check_taint $&, "\$& from /(\\N{KELVIN SIGN})/i"; 350check_taint $`, "\t\$`"; 351check_taint $', "\t\$'"; 352check_taint $+, "\t\$+"; 353check_taint $1, "\t\$1"; 354check_taint_not $2, "\t\$2"; 355 356/(.)/; # untaint $&, $`, $', $+, $1. 357check_taint_not $&, "\$& from /(.)/"; 358 359"a:" =~ /(.)\b(.)/; # taint $&, $`, $', $+, $1. 360check_taint $&, "\$& from /(.)\\b(.)/"; 361check_taint $`, "\t\$`"; 362check_taint $', "\t\$'"; 363check_taint $+, "\t\$+"; 364check_taint $1, "\t\$1"; 365check_taint $2, "\t\$2"; 366check_taint_not $3, "\t\$3"; 367 368/(.)/; # untaint $&, $`, $', $+, $1. 369check_taint_not $&, "\$& from /./"; 370 371"aa" =~ /(.)\B(.)/; # taint $&, $`, $', $+, $1. 372check_taint $&, "\$& from /(.)\\B(.)/"; 373check_taint $`, "\t\$`"; 374check_taint $', "\t\$'"; 375check_taint $+, "\t\$+"; 376check_taint $1, "\t\$1"; 377check_taint $2, "\t\$2"; 378check_taint_not $3, "\t\$3"; 379 380/(.)/; # untaint $&, $`, $', $+, $1. 381check_taint_not $&, "\$& from /./"; 382 383"aaa" =~ /(.).(\1)/i; # notaint because not locale dependent 384check_taint_not $&, "\$ & from /(.).(\\1)/"; 385check_taint_not $`, "\t\$`"; 386check_taint_not $', "\t\$'"; 387check_taint_not $+, "\t\$+"; 388check_taint_not $1, "\t\$1"; 389check_taint_not $2, "\t\$2"; 390check_taint_not $3, "\t\$3"; 391 392/(.)/; # untaint $&, $`, $', $+, $1. 393check_taint_not $&, "\$ & from /./"; 394 395$_ = $a; # untaint $_ 396 397check_taint_not $_, 'untainting $_ works'; 398 399/(b)/; # this must not taint 400check_taint_not $&, "\$ & from /(b)/"; 401check_taint_not $`, "\t\$`"; 402check_taint_not $', "\t\$'"; 403check_taint_not $+, "\t\$+"; 404check_taint_not $1, "\t\$1"; 405check_taint_not $2, "\t\$2"; 406 407$_ = $a; # untaint $_ 408 409check_taint_not $_, 'untainting $_ works'; 410 411$b = uc($a); # taint $b 412s/(.+)/$b/; # this must taint only the $_ 413 414check_taint $_, '$_ (wasn\'t tainted) from s/(.+)/$b/ where $b is tainted'; 415check_taint_not $&, "\t\$&"; 416check_taint_not $`, "\t\$`"; 417check_taint_not $', "\t\$'"; 418check_taint_not $+, "\t\$+"; 419check_taint_not $1, "\t\$1"; 420check_taint_not $2, "\t\$2"; 421 422$_ = $a; # untaint $_ 423 424s/(.+)/b/; # this must not taint 425check_taint_not $_, '$_ (wasn\'t tainted) from s/(.+)/b/'; 426check_taint_not $&, "\t\$&"; 427check_taint_not $`, "\t\$`"; 428check_taint_not $', "\t\$'"; 429check_taint_not $+, "\t\$+"; 430check_taint_not $1, "\t\$1"; 431check_taint_not $2, "\t\$2"; 432 433$b = $a; # untaint $b 434 435($b = $a) =~ s/\w/$&/; 436check_taint $b, '$b from ($b = $a) =~ s/\w/$&/'; # $b should be tainted. 437check_taint_not $a, '$a from ($b = $a) =~ s/\w/$&/'; # $a should be not. 438 439$_ = $a; # untaint $_ 440 441s/(\w)/\l$1/; # this must taint 442check_taint $_, '$_ (wasn\'t tainted) from s/(\w)/\l$1/,'; # this must taint 443check_taint $&, "\t\$&"; 444check_taint $`, "\t\$`"; 445check_taint $', "\t\$'"; 446check_taint $+, "\t\$+"; 447check_taint $1, "\t\$1"; 448check_taint_not $2, "\t\$2"; 449 450$_ = $a; # untaint $_ 451 452s/(\w)/\L$1/; # this must taint 453check_taint $_, '$_ (wasn\'t tainted) from s/(\w)/\L$1/,'; 454check_taint $&, "\t\$&"; 455check_taint $`, "\t\$`"; 456check_taint $', "\t\$'"; 457check_taint $+, "\t\$+"; 458check_taint $1, "\t\$1"; 459check_taint_not $2, "\t\$2"; 460 461$_ = $a; # untaint $_ 462 463s/(\w)/\u$1/; # this must taint 464check_taint $_, '$_ (wasn\'t tainted) from s/(\w)/\u$1/'; 465check_taint $&, "\t\$&"; 466check_taint $`, "\t\$`"; 467check_taint $', "\t\$'"; 468check_taint $+, "\t\$+"; 469check_taint $1, "\t\$1"; 470check_taint_not $2, "\t\$2"; 471 472$_ = $a; # untaint $_ 473 474s/(\w)/\U$1/; # this must taint 475check_taint $_, '$_ (wasn\'t tainted) from s/(\w)/\U$1/'; 476check_taint $&, "\t\$&"; 477check_taint $`, "\t\$`"; 478check_taint $', "\t\$'"; 479check_taint $+, "\t\$+"; 480check_taint $1, "\t\$1"; 481check_taint_not $2, "\t\$2"; 482 483# After all this tainting $a should be cool. 484 485check_taint_not $a, '$a still not tainted'; 486 487"a" =~ /([a-z])/; 488check_taint_not $1, '"a" =~ /([a-z])/'; 489"foo.bar_baz" =~ /^(.*)[._](.*?)$/; # Bug 120675 490check_taint_not $1, '"foo.bar_baz" =~ /^(.*)[._](.*?)$/'; 491 492# BE SURE TO COPY ANYTHING YOU ADD to the block below 493 494{ # This is just the previous tests copied here with a different 495 # compile-time pragma. 496 497 use locale ':not_characters'; # engage restricted locale with different 498 # tainting rules 499 check_taint_not $a, '$a'; 500 501 check_taint_not uc($a), 'uc($a)'; 502 check_taint_not "\U$a", '"\U$a"'; 503 check_taint_not ucfirst($a), 'ucfirst($a)'; 504 check_taint_not "\u$a", '"\u$a"'; 505 check_taint_not lc($a), 'lc($a)'; 506 check_taint_not fc($a), 'fc($a)'; 507 check_taint_not "\L$a", '"\L$a"'; 508 check_taint_not "\F$a", '"\F$a"'; 509 check_taint_not lcfirst($a), 'lcfirst($a)'; 510 check_taint_not "\l$a", '"\l$a"'; 511 512 check_taint_not sprintf('%e', 123.456), "sprintf('%e', 123.456)"; 513 check_taint_not sprintf('%f', 123.456), "sprintf('%f', 123.456)"; 514 check_taint_not sprintf('%g', 123.456), "sprintf('%g', 123.456)"; 515 check_taint_not sprintf('%d', 123.456), "sprintf('%d', 123.456)"; 516 check_taint_not sprintf('%x', 123.456), "sprintf('%x', 123.456)"; 517 518 $_ = $a; # untaint $_ 519 520 $_ = uc($a); 521 522 check_taint_not $_, '$_ = uc($a)'; 523 524 /(\w)/; 525 check_taint_not $&, "\$& from /(\\w)/"; 526 check_taint_not $`, "\t\$`"; 527 check_taint_not $', "\t\$'"; 528 check_taint_not $+, "\t\$+"; 529 check_taint_not $1, "\t\$1"; 530 check_taint_not $2, "\t\$2"; 531 532 /(.)/; # untaint $&, $`, $', $+, $1. 533 check_taint_not $&, "\$& from /(.)/"; 534 check_taint_not $`, "\t\$`"; 535 check_taint_not $', "\t\$'"; 536 check_taint_not $+, "\t\$+"; 537 check_taint_not $1, "\t\$1"; 538 check_taint_not $2, "\t\$2"; 539 540 /(\W)/; 541 check_taint_not $&, "\$& from /(\\W)/"; 542 check_taint_not $`, "\t\$`"; 543 check_taint_not $', "\t\$'"; 544 check_taint_not $+, "\t\$+"; 545 check_taint_not $1, "\t\$1"; 546 check_taint_not $2, "\t\$2"; 547 548 /(.)/; # untaint $&, $`, $', $+, $1. 549 check_taint_not $&, "\$& from /(.)/"; 550 check_taint_not $`, "\t\$`"; 551 check_taint_not $', "\t\$'"; 552 check_taint_not $+, "\t\$+"; 553 check_taint_not $1, "\t\$1"; 554 check_taint_not $2, "\t\$2"; 555 556 /(\s)/; 557 check_taint_not $&, "\$& from /(\\s)/"; 558 check_taint_not $`, "\t\$`"; 559 check_taint_not $', "\t\$'"; 560 check_taint_not $+, "\t\$+"; 561 check_taint_not $1, "\t\$1"; 562 check_taint_not $2, "\t\$2"; 563 564 /(.)/; # untaint $&, $`, $', $+, $1. 565 check_taint_not $&, "\$& from /(.)/"; 566 567 /(\S)/; 568 check_taint_not $&, "\$& from /(\\S)/"; 569 check_taint_not $`, "\t\$`"; 570 check_taint_not $', "\t\$'"; 571 check_taint_not $+, "\t\$+"; 572 check_taint_not $1, "\t\$1"; 573 check_taint_not $2, "\t\$2"; 574 575 /(.)/; # untaint $&, $`, $', $+, $1. 576 check_taint_not $&, "\$& from /(.)/"; 577 578 "0" =~ /(\d)/; 579 check_taint_not $&, "\$& from /(\\d)/"; 580 check_taint_not $`, "\t\$`"; 581 check_taint_not $', "\t\$'"; 582 check_taint_not $+, "\t\$+"; 583 check_taint_not $1, "\t\$1"; 584 check_taint_not $2, "\t\$2"; 585 586 /(.)/; # untaint $&, $`, $', $+, $1. 587 check_taint_not $&, "\$& from /(.)/"; 588 589 /(\D)/; 590 check_taint_not $&, "\$& from /(\\D)/"; 591 check_taint_not $`, "\t\$`"; 592 check_taint_not $', "\t\$'"; 593 check_taint_not $+, "\t\$+"; 594 check_taint_not $1, "\t\$1"; 595 check_taint_not $2, "\t\$2"; 596 597 /(.)/; # untaint $&, $`, $', $+, $1. 598 check_taint_not $&, "\$& from /(.)/"; 599 600 /([[:alnum:]])/; 601 check_taint_not $&, "\$& from /([[:alnum:]])/"; 602 check_taint_not $`, "\t\$`"; 603 check_taint_not $', "\t\$'"; 604 check_taint_not $+, "\t\$+"; 605 check_taint_not $1, "\t\$1"; 606 check_taint_not $2, "\t\$2"; 607 608 /(.)/; # untaint $&, $`, $', $+, $1. 609 check_taint_not $&, "\$& from /(.)/"; 610 611 /([[:^alnum:]])/; 612 check_taint_not $&, "\$& from /([[:^alnum:]])/"; 613 check_taint_not $`, "\t\$`"; 614 check_taint_not $', "\t\$'"; 615 check_taint_not $+, "\t\$+"; 616 check_taint_not $1, "\t\$1"; 617 check_taint_not $2, "\t\$2"; 618 619 "a" =~ /(a)|(\w)/; 620 check_taint_not $&, "\$& from /(a)|(\\w)/"; 621 check_taint_not $`, "\t\$`"; 622 check_taint_not $', "\t\$'"; 623 check_taint_not $+, "\t\$+"; 624 check_taint_not $1, "\t\$1"; 625 ok($1 eq 'a', ("\t" x 5) . "\$1 is 'a'"); 626 ok(! defined $2, ("\t" x 5) . "\$2 is undefined"); 627 check_taint_not $2, "\t\$2"; 628 check_taint_not $3, "\t\$3"; 629 630 /(.)/; # untaint $&, $`, $', $+, $1. 631 check_taint_not $&, "\$& from /(.)/"; 632 633 "\N{CYRILLIC SMALL LETTER A}" =~ /(\N{CYRILLIC CAPITAL LETTER A})/i; 634 check_taint_not $&, "\$& from /(\\N{CYRILLIC CAPITAL LETTER A})/i"; 635 check_taint_not $`, "\t\$`"; 636 check_taint_not $', "\t\$'"; 637 check_taint_not $+, "\t\$+"; 638 check_taint_not $1, "\t\$1"; 639 ok($1 eq "\N{CYRILLIC SMALL LETTER A}", ("\t" x 4) . "\t\$1 is 'small cyrillic a'"); 640 check_taint_not $2, "\t\$2"; 641 642 /(.)/; # untaint $&, $`, $', $+, $1. 643 check_taint_not $&, "\$& from /./"; 644 645 "(\N{KELVIN SIGN})" =~ /(\N{KELVIN SIGN})/i; 646 check_taint_not $&, "\$& from /(\\N{KELVIN SIGN})/i"; 647 check_taint_not $`, "\t\$`"; 648 check_taint_not $', "\t\$'"; 649 check_taint_not $+, "\t\$+"; 650 check_taint_not $1, "\t\$1"; 651 check_taint_not $2, "\t\$2"; 652 653 /(.)/; # untaint $&, $`, $', $+, $1. 654 check_taint_not $&, "\$& from /(.)/"; 655 656 "a:" =~ /(.)\b(.)/; 657 check_taint_not $&, "\$& from /(.)\\b(.)/"; 658 check_taint_not $`, "\t\$`"; 659 check_taint_not $', "\t\$'"; 660 check_taint_not $+, "\t\$+"; 661 check_taint_not $1, "\t\$1"; 662 check_taint_not $2, "\t\$2"; 663 check_taint_not $3, "\t\$3"; 664 665 /(.)/; # untaint $&, $`, $', $+, $1. 666 check_taint_not $&, "\$& from /./"; 667 668 "aa" =~ /(.)\B(.)/; 669 check_taint_not $&, "\$& from /(.)\\B(.)/"; 670 check_taint_not $`, "\t\$`"; 671 check_taint_not $', "\t\$'"; 672 check_taint_not $+, "\t\$+"; 673 check_taint_not $1, "\t\$1"; 674 check_taint_not $2, "\t\$2"; 675 check_taint_not $3, "\t\$3"; 676 677 /(.)/; # untaint $&, $`, $', $+, $1. 678 check_taint_not $&, "\$& from /./"; 679 680 "aaa" =~ /(.).(\1)/i; # notaint because not locale dependent 681 check_taint_not $&, "\$ & from /(.).(\\1)/"; 682 check_taint_not $`, "\t\$`"; 683 check_taint_not $', "\t\$'"; 684 check_taint_not $+, "\t\$+"; 685 check_taint_not $1, "\t\$1"; 686 check_taint_not $2, "\t\$2"; 687 check_taint_not $3, "\t\$3"; 688 689 /(.)/; # untaint $&, $`, $', $+, $1. 690 check_taint_not $&, "\$ & from /./"; 691 692 $_ = $a; # untaint $_ 693 694 check_taint_not $_, 'untainting $_ works'; 695 696 /(b)/; 697 check_taint_not $&, "\$ & from /(b)/"; 698 check_taint_not $`, "\t\$`"; 699 check_taint_not $', "\t\$'"; 700 check_taint_not $+, "\t\$+"; 701 check_taint_not $1, "\t\$1"; 702 check_taint_not $2, "\t\$2"; 703 704 $_ = $a; # untaint $_ 705 706 check_taint_not $_, 'untainting $_ works'; 707 708 s/(.+)/b/; 709 check_taint_not $_, '$_ (wasn\'t tainted) from s/(.+)/b/'; 710 check_taint_not $&, "\t\$&"; 711 check_taint_not $`, "\t\$`"; 712 check_taint_not $', "\t\$'"; 713 check_taint_not $+, "\t\$+"; 714 check_taint_not $1, "\t\$1"; 715 check_taint_not $2, "\t\$2"; 716 717 $b = $a; # untaint $b 718 719 ($b = $a) =~ s/\w/$&/; 720 check_taint_not $b, '$b from ($b = $a) =~ s/\w/$&/'; 721 check_taint_not $a, '$a from ($b = $a) =~ s/\w/$&/'; 722 723 $_ = $a; # untaint $_ 724 725 s/(\w)/\l$1/; 726 check_taint_not $_, '$_ (wasn\'t tainted) from s/(\w)/\l$1/,'; # this must taint 727 check_taint_not $&, "\t\$&"; 728 check_taint_not $`, "\t\$`"; 729 check_taint_not $', "\t\$'"; 730 check_taint_not $+, "\t\$+"; 731 check_taint_not $1, "\t\$1"; 732 check_taint_not $2, "\t\$2"; 733 734 $_ = $a; # untaint $_ 735 736 s/(\w)/\L$1/; 737 check_taint_not $_, '$_ (wasn\'t tainted) from s/(\w)/\L$1/,'; 738 check_taint_not $&, "\t\$&"; 739 check_taint_not $`, "\t\$`"; 740 check_taint_not $', "\t\$'"; 741 check_taint_not $+, "\t\$+"; 742 check_taint_not $1, "\t\$1"; 743 check_taint_not $2, "\t\$2"; 744 745 $_ = $a; # untaint $_ 746 747 s/(\w)/\u$1/; 748 check_taint_not $_, '$_ (wasn\'t tainted) from s/(\w)/\u$1/'; 749 check_taint_not $&, "\t\$&"; 750 check_taint_not $`, "\t\$`"; 751 check_taint_not $', "\t\$'"; 752 check_taint_not $+, "\t\$+"; 753 check_taint_not $1, "\t\$1"; 754 check_taint_not $2, "\t\$2"; 755 756 $_ = $a; # untaint $_ 757 758 s/(\w)/\U$1/; 759 check_taint_not $_, '$_ (wasn\'t tainted) from s/(\w)/\U$1/'; 760 check_taint_not $&, "\t\$&"; 761 check_taint_not $`, "\t\$`"; 762 check_taint_not $', "\t\$'"; 763 check_taint_not $+, "\t\$+"; 764 check_taint_not $1, "\t\$1"; 765 check_taint_not $2, "\t\$2"; 766 767 # After all this tainting $a should be cool. 768 769 check_taint_not $a, '$a still not tainted'; 770 771 "a" =~ /([a-z])/; 772 check_taint_not $1, '"a" =~ /([a-z])/'; 773 "foo.bar_baz" =~ /^(.*)[._](.*?)$/; # Bug 120675 774 check_taint_not $1, '"foo.bar_baz" =~ /^(.*)[._](.*?)$/'; 775 776} 777 778# Here are in scope of 'use locale' 779 780# I think we've seen quite enough of taint. 781# Let us do some *real* locale work now, 782# unless setlocale() is missing (i.e. minitest). 783 784# The test number before our first setlocale() 785my $final_without_setlocale = $test_num; 786 787# Find locales. 788 789debug "Scanning for locales...\n"; 790 791require POSIX; import POSIX ':locale_h'; 792my $categories = [ 'LC_CTYPE', 'LC_NUMERIC', 'LC_ALL' ]; 793my @Locale; 794my @include_incompatible_locales; 795if ($^O eq "aix" 796 and version->new(($Config{osvers} =~ /^(\d+(\.\d+))/)[0]) < 7) { 797 # https://www.ibm.com/support/pages/apar/IV22097 798 skip("setlocale broken on old AIX"); 799} 800else { 801 debug "Scanning for just compatible"; 802 @Locale = find_locales($categories); 803 debug "Scanning for even incompatible"; 804 @include_incompatible_locales = find_locales($categories, 805 'even incompatible locales'); 806} 807# The locales included in the incompatible list that aren't in the compatible 808# one. 809my @incompatible_locales; 810if (@Locale < @include_incompatible_locales) { 811 my %seen; 812 @seen{@Locale} = (); 813 814 foreach my $item (@include_incompatible_locales) { 815 push @incompatible_locales, $item unless exists $seen{$item}; 816 } 817 818 # For each bad locale, switch into it to find out why it's incompatible 819 for my $bad_locale (@incompatible_locales) { 820 my @warnings; 821 822 use warnings 'locale'; 823 824 local $SIG{__WARN__} = sub { 825 my $warning = $_[0]; 826 chomp $warning; 827 push @warnings, ($warning =~ s/\n/\n# /sgr); 828 }; 829 830 debug "Trying incompatible $bad_locale"; 831 my $ret = setlocale(&POSIX::LC_CTYPE, $bad_locale); 832 833 my $message = "testing of locale '$bad_locale' is skipped"; 834 if (@warnings) { 835 skip $message . ":\n# " . join "\n# ", @warnings; 836 } 837 elsif (! $ret) { 838 skip("$message:\n#" 839 . " setlocale(&POSIX::LC_CTYPE, '$bad_locale') failed"); 840 } 841 else { 842 fail $message . ", because it is was found to be incompatible with" 843 . " Perl, but could not discern reason"; 844 } 845 } 846} 847 848debug "Locales =\n"; 849for ( @Locale ) { 850 debug "$_\n"; 851} 852 853unless (@Locale) { 854 print "1..$test_num\n"; 855 exit; 856} 857 858 859setlocale(&POSIX::LC_ALL, "C"); 860 861my %posixes; 862 863my %Problem; 864my %Okay; 865my %Known_bad_locale; # Failed test for a locale known to be bad 866my %Testing; 867my @Added_alpha; # Alphas that aren't in the C locale. 868my %test_names; 869 870sub disp_chars { 871 # This returns a display string denoting the input parameter @_, each 872 # entry of which is a single character in the range 0-255. The first part 873 # of the output is a string of the characters in @_ that are ASCII 874 # graphics, and hence unambiguously displayable. They are given by code 875 # point order. The second part is the remaining code points, the ordinals 876 # of which are each displayed as 2-digit hex. Blanks are inserted so as 877 # to keep anything from the first part looking like a 2-digit hex number. 878 879 no locale; 880 my @chars = sort { ord $a <=> ord $b } @_; 881 my $output = ""; 882 my $range_start; 883 my $start_class; 884 push @chars, chr(258); # This sentinel simplifies the loop termination 885 # logic 886 foreach my $i (0 .. @chars - 1) { 887 my $char = $chars[$i]; 888 my $range_end; 889 my $class; 890 891 # We avoid using [:posix:] classes, as these are being tested in this 892 # file. Each equivalence class below is for things that can appear in 893 # a range; those that can't be in a range have class -1. 0 for those 894 # which should be output in hex; and >0 for the other ranges 895 if ($char =~ /[A-Z]/) { 896 $class = 2; 897 } 898 elsif ($char =~ /[a-z]/) { 899 $class = 3; 900 } 901 elsif ($char =~ /[0-9]/) { 902 $class = 4; 903 } 904 # Uncomment to get literal punctuation displayed instead of hex 905 #elsif ($char =~ /[[\]!"#\$\%&\'()*+,.\/:\\;<=>?\@\^_`{|}~-]/) { 906 # $class = -1; # Punct never appears in a range 907 #} 908 else { 909 $class = 0; # Output in hex 910 } 911 912 if (! defined $range_start) { 913 if ($class < 0) { 914 $output .= " " . $char; 915 } 916 else { 917 $range_start = ord $char; 918 $start_class = $class; 919 } 920 } # A range ends if not consecutive, or the class-type changes 921 elsif (ord $char != ($range_end = ord($chars[$i-1])) + 1 922 || $class != $start_class) 923 { 924 925 # Here, the current character is not in the range. This means the 926 # previous character must have been. Output the range up through 927 # that one. 928 my $range_length = $range_end - $range_start + 1; 929 if ($start_class > 0) { 930 $output .= " " . chr($range_start); 931 $output .= "-" . chr($range_end) if $range_length > 1; 932 } 933 else { 934 $output .= sprintf(" %02X", $range_start); 935 $output .= sprintf("-%02X", $range_end) if $range_length > 1; 936 } 937 938 # Handle the new current character, as potentially beginning a new 939 # range 940 undef $range_start; 941 redo; 942 } 943 } 944 945 $output =~ s/^ //; 946 return $output; 947} 948 949sub disp_str ($) { 950 my $string = shift; 951 952 # Displays the string unambiguously. ASCII printables are always output 953 # as-is, though perhaps separated by blanks from other characters. If 954 # entirely printable ASCII, just returns the string. Otherwise if valid 955 # UTF-8 it uses the character names for non-printable-ASCII. Otherwise it 956 # outputs hex for each non-ASCII-printable byte. 957 958 return $string if $string =~ / ^ [[:print:]]* $/xa; 959 960 my $result = ""; 961 my $prev_was_punct = 1; # Beginning is considered punct 962 if (utf8::valid($string) && utf8::is_utf8($string)) { 963 use charnames (); 964 foreach my $char (split "", $string) { 965 966 # Keep punctuation adjacent to other characters; otherwise 967 # separate them with a blank 968 if ($char =~ /[[:punct:]]/a) { 969 $result .= $char; 970 $prev_was_punct = 1; 971 } 972 elsif ($char =~ /[[:print:]]/a) { 973 $result .= " " unless $prev_was_punct; 974 $result .= $char; 975 $prev_was_punct = 0; 976 } 977 else { 978 $result .= " " unless $prev_was_punct; 979 my $name = charnames::viacode(ord $char); 980 $result .= (defined $name) ? $name : ':unknown:'; 981 $prev_was_punct = 0; 982 } 983 } 984 } 985 else { 986 use bytes; 987 foreach my $char (split "", $string) { 988 if ($char =~ /[[:punct:]]/a) { 989 $result .= $char; 990 $prev_was_punct = 1; 991 } 992 elsif ($char =~ /[[:print:]]/a) { 993 $result .= " " unless $prev_was_punct; 994 $result .= $char; 995 $prev_was_punct = 0; 996 } 997 else { 998 $result .= " " unless $prev_was_punct; 999 $result .= sprintf("%02X", ord $char); 1000 $prev_was_punct = 0; 1001 } 1002 } 1003 } 1004 1005 return $result; 1006} 1007 1008sub report_result { 1009 my ($Locale, $i, $pass_fail, $message) = @_; 1010 if ($pass_fail) { 1011 push @{$Okay{$i}}, $Locale; 1012 } 1013 else { 1014 $message //= ""; 1015 $message = " ($message)" if $message; 1016 $Known_bad_locale{$i}{$Locale} = 1 if exists $known_bad_locales{$os} 1017 && $Locale =~ $known_bad_locales{$os}; 1018 $Problem{$i}{$Locale} = 1; 1019 debug "failed $i ($test_names{$i}) with locale '$Locale'$message\n"; 1020 } 1021} 1022 1023sub report_multi_result { 1024 my ($Locale, $i, $results_ref) = @_; 1025 1026 # $results_ref points to an array, each element of which is a character that was 1027 # in error for this test numbered '$i'. If empty, the test passed 1028 1029 my $message = ""; 1030 if (@$results_ref) { 1031 $message = join " ", "for", disp_chars(@$results_ref); 1032 } 1033 report_result($Locale, $i, @$results_ref == 0, $message); 1034} 1035 1036my $first_locales_test_number = $final_without_setlocale 1037 + 1 + @incompatible_locales; 1038my $locales_test_number; 1039my $not_necessarily_a_problem_test_number; 1040my $first_casing_test_number; 1041my %setlocale_failed; # List of locales that setlocale() didn't work on 1042 1043foreach my $Locale (@Locale) { 1044 $locales_test_number = $first_locales_test_number - 1; 1045 debug "\n"; 1046 debug "Locale = $Locale\n"; 1047 1048 unless (setlocale(&POSIX::LC_ALL, $Locale)) { 1049 $setlocale_failed{$Locale} = $Locale; 1050 next; 1051 } 1052 1053 # We test UTF-8 locales only under ':not_characters'; It is easier to 1054 # test them in other test files than here. Non- UTF-8 locales are tested 1055 # only under plain 'use locale', as otherwise we would have to convert 1056 # everything in them to Unicode. 1057 1058 my %UPPER = (); # All alpha X for which uc(X) == X and lc(X) != X 1059 my %lower = (); # All alpha X for which lc(X) == X and uc(X) != X 1060 my %BoThCaSe = (); # All alpha X for which uc(X) == lc(X) == X 1061 1062 my $is_utf8_locale = is_locale_utf8($Locale); 1063 1064 if ($debug) { 1065 debug "code set = " . langinfo(CODESET); 1066 debug "is utf8 locale? = $is_utf8_locale\n"; 1067 debug "radix = " . disp_str(langinfo(RADIXCHAR)) . "\n"; 1068 debug "currency = " . disp_str(langinfo(CRNCYSTR)); 1069 } 1070 1071 if (! $is_utf8_locale) { 1072 use locale; 1073 @{$posixes{'word'}} = grep /\w/, map { chr } 0..255; 1074 @{$posixes{'digit'}} = grep /\d/, map { chr } 0..255; 1075 @{$posixes{'space'}} = grep /\s/, map { chr } 0..255; 1076 @{$posixes{'alpha'}} = grep /[[:alpha:]]/, map {chr } 0..255; 1077 @{$posixes{'alnum'}} = grep /[[:alnum:]]/, map {chr } 0..255; 1078 @{$posixes{'ascii'}} = grep /[[:ascii:]]/, map {chr } 0..255; 1079 @{$posixes{'blank'}} = grep /[[:blank:]]/, map {chr } 0..255; 1080 @{$posixes{'cntrl'}} = grep /[[:cntrl:]]/, map {chr } 0..255; 1081 @{$posixes{'graph'}} = grep /[[:graph:]]/, map {chr } 0..255; 1082 @{$posixes{'lower'}} = grep /[[:lower:]]/, map {chr } 0..255; 1083 @{$posixes{'print'}} = grep /[[:print:]]/, map {chr } 0..255; 1084 @{$posixes{'punct'}} = grep /[[:punct:]]/, map {chr } 0..255; 1085 @{$posixes{'upper'}} = grep /[[:upper:]]/, map {chr } 0..255; 1086 @{$posixes{'xdigit'}} = grep /[[:xdigit:]]/, map {chr } 0..255; 1087 @{$posixes{'cased'}} = grep /[[:upper:][:lower:]]/i, map {chr } 0..255; 1088 1089 # Sieve the uppercase and the lowercase. 1090 1091 for (@{$posixes{'word'}}) { 1092 if (/[^\d_]/) { # skip digits and the _ 1093 if (uc($_) eq $_) { 1094 $UPPER{$_} = $_; 1095 } 1096 if (lc($_) eq $_) { 1097 $lower{$_} = $_; 1098 } 1099 } 1100 } 1101 } 1102 else { 1103 use locale ':not_characters'; 1104 @{$posixes{'word'}} = grep /\w/, map { chr } 0..255; 1105 @{$posixes{'digit'}} = grep /\d/, map { chr } 0..255; 1106 @{$posixes{'space'}} = grep /\s/, map { chr } 0..255; 1107 @{$posixes{'alpha'}} = grep /[[:alpha:]]/, map {chr } 0..255; 1108 @{$posixes{'alnum'}} = grep /[[:alnum:]]/, map {chr } 0..255; 1109 @{$posixes{'ascii'}} = grep /[[:ascii:]]/, map {chr } 0..255; 1110 @{$posixes{'blank'}} = grep /[[:blank:]]/, map {chr } 0..255; 1111 @{$posixes{'cntrl'}} = grep /[[:cntrl:]]/, map {chr } 0..255; 1112 @{$posixes{'graph'}} = grep /[[:graph:]]/, map {chr } 0..255; 1113 @{$posixes{'lower'}} = grep /[[:lower:]]/, map {chr } 0..255; 1114 @{$posixes{'print'}} = grep /[[:print:]]/, map {chr } 0..255; 1115 @{$posixes{'punct'}} = grep /[[:punct:]]/, map {chr } 0..255; 1116 @{$posixes{'upper'}} = grep /[[:upper:]]/, map {chr } 0..255; 1117 @{$posixes{'xdigit'}} = grep /[[:xdigit:]]/, map {chr } 0..255; 1118 @{$posixes{'cased'}} = grep /[[:upper:][:lower:]]/i, map {chr } 0..255; 1119 for (@{$posixes{'word'}}) { 1120 if (/[^\d_]/) { # skip digits and the _ 1121 if (uc($_) eq $_) { 1122 $UPPER{$_} = $_; 1123 } 1124 if (lc($_) eq $_) { 1125 $lower{$_} = $_; 1126 } 1127 } 1128 } 1129 } 1130 1131 # Ordered, where possible, in groups of "this is a subset of the next 1132 # one" 1133 debug ":upper: = ", disp_chars(@{$posixes{'upper'}}), "\n"; 1134 debug ":lower: = ", disp_chars(@{$posixes{'lower'}}), "\n"; 1135 debug ":cased: = ", disp_chars(@{$posixes{'cased'}}), "\n"; 1136 debug ":alpha: = ", disp_chars(@{$posixes{'alpha'}}), "\n"; 1137 debug ":alnum: = ", disp_chars(@{$posixes{'alnum'}}), "\n"; 1138 debug ' \w = ', disp_chars(@{$posixes{'word'}}), "\n"; 1139 debug ":graph: = ", disp_chars(@{$posixes{'graph'}}), "\n"; 1140 debug ":print: = ", disp_chars(@{$posixes{'print'}}), "\n"; 1141 debug ' \d = ', disp_chars(@{$posixes{'digit'}}), "\n"; 1142 debug ":xdigit: = ", disp_chars(@{$posixes{'xdigit'}}), "\n"; 1143 debug ":blank: = ", disp_chars(@{$posixes{'blank'}}), "\n"; 1144 debug ' \s = ', disp_chars(@{$posixes{'space'}}), "\n"; 1145 debug ":punct: = ", disp_chars(@{$posixes{'punct'}}), "\n"; 1146 debug ":cntrl: = ", disp_chars(@{$posixes{'cntrl'}}), "\n"; 1147 debug ":ascii: = ", disp_chars(@{$posixes{'ascii'}}), "\n"; 1148 1149 foreach (keys %UPPER) { 1150 1151 $BoThCaSe{$_}++ if exists $lower{$_}; 1152 } 1153 foreach (keys %lower) { 1154 $BoThCaSe{$_}++ if exists $UPPER{$_}; 1155 } 1156 foreach (keys %BoThCaSe) { 1157 delete $UPPER{$_}; 1158 delete $lower{$_}; 1159 } 1160 1161 my %Unassigned; 1162 foreach my $ord ( 0 .. 255 ) { 1163 $Unassigned{chr $ord} = 1; 1164 } 1165 foreach my $class (keys %posixes) { 1166 foreach my $char (@{$posixes{$class}}) { 1167 delete $Unassigned{$char}; 1168 } 1169 } 1170 1171 debug "UPPER = ", disp_chars(sort { ord $a <=> ord $b } keys %UPPER), "\n"; 1172 debug "lower = ", disp_chars(sort { ord $a <=> ord $b } keys %lower), "\n"; 1173 debug "BoThCaSe = ", disp_chars(sort { ord $a <=> ord $b } keys %BoThCaSe), "\n"; 1174 debug "Unassigned = ", disp_chars(sort { ord $a <=> ord $b } keys %Unassigned), "\n"; 1175 1176 my @failures; 1177 my @fold_failures; 1178 foreach my $x (sort { ord $a <=> ord $b } keys %UPPER) { 1179 my $ok; 1180 my $fold_ok; 1181 if ($is_utf8_locale) { 1182 use locale ':not_characters'; 1183 $ok = $x =~ /[[:upper:]]/; 1184 $fold_ok = $x =~ /[[:lower:]]/i; 1185 } 1186 else { 1187 use locale; 1188 $ok = $x =~ /[[:upper:]]/; 1189 $fold_ok = $x =~ /[[:lower:]]/i; 1190 } 1191 push @failures, $x unless $ok; 1192 push @fold_failures, $x unless $fold_ok; 1193 } 1194 $locales_test_number++; 1195 $first_casing_test_number = $locales_test_number; 1196 $test_names{$locales_test_number} = 'Verify that /[[:upper:]]/ matches all alpha X for which uc(X) == X and lc(X) != X'; 1197 report_multi_result($Locale, $locales_test_number, \@failures); 1198 1199 $locales_test_number++; 1200 1201 $test_names{$locales_test_number} = 'Verify that /[[:lower:]]/i matches all alpha X for which uc(X) == X and lc(X) != X'; 1202 report_multi_result($Locale, $locales_test_number, \@fold_failures); 1203 1204 undef @failures; 1205 undef @fold_failures; 1206 1207 foreach my $x (sort { ord $a <=> ord $b } keys %lower) { 1208 my $ok; 1209 my $fold_ok; 1210 if ($is_utf8_locale) { 1211 use locale ':not_characters'; 1212 $ok = $x =~ /[[:lower:]]/; 1213 $fold_ok = $x =~ /[[:upper:]]/i; 1214 } 1215 else { 1216 use locale; 1217 $ok = $x =~ /[[:lower:]]/; 1218 $fold_ok = $x =~ /[[:upper:]]/i; 1219 } 1220 push @failures, $x unless $ok; 1221 push @fold_failures, $x unless $fold_ok; 1222 } 1223 1224 $locales_test_number++; 1225 $test_names{$locales_test_number} = 'Verify that /[[:lower:]]/ matches all alpha X for which lc(X) == X and uc(X) != X'; 1226 report_multi_result($Locale, $locales_test_number, \@failures); 1227 1228 $locales_test_number++; 1229 $test_names{$locales_test_number} = 'Verify that /[[:upper:]]/i matches all alpha X for which lc(X) == X and uc(X) != X'; 1230 report_multi_result($Locale, $locales_test_number, \@fold_failures); 1231 1232 { # Find the alphabetic characters that are not considered alphabetics 1233 # in the default (C) locale. 1234 1235 no locale; 1236 1237 @Added_alpha = (); 1238 for (keys %UPPER, keys %lower, keys %BoThCaSe) { 1239 push(@Added_alpha, $_) if (/\W/); 1240 } 1241 } 1242 1243 @Added_alpha = sort { ord $a <=> ord $b } @Added_alpha; 1244 1245 debug "Added_alpha = ", disp_chars(@Added_alpha), "\n"; 1246 1247 # Cross-check the whole 8-bit character set. 1248 1249 ++$locales_test_number; 1250 my @f; 1251 $test_names{$locales_test_number} = 'Verify that \w and [:word:] are identical'; 1252 for (map { chr } 0..255) { 1253 if ($is_utf8_locale) { 1254 use locale ':not_characters'; 1255 push @f, $_ unless /[[:word:]]/ == /\w/; 1256 } 1257 else { 1258 push @f, $_ unless /[[:word:]]/ == /\w/; 1259 } 1260 } 1261 report_multi_result($Locale, $locales_test_number, \@f); 1262 1263 ++$locales_test_number; 1264 undef @f; 1265 $test_names{$locales_test_number} = 'Verify that \d and [:digit:] are identical'; 1266 for (map { chr } 0..255) { 1267 if ($is_utf8_locale) { 1268 use locale ':not_characters'; 1269 push @f, $_ unless /[[:digit:]]/ == /\d/; 1270 } 1271 else { 1272 push @f, $_ unless /[[:digit:]]/ == /\d/; 1273 } 1274 } 1275 report_multi_result($Locale, $locales_test_number, \@f); 1276 1277 ++$locales_test_number; 1278 undef @f; 1279 $test_names{$locales_test_number} = 'Verify that \s and [:space:] are identical'; 1280 for (map { chr } 0..255) { 1281 if ($is_utf8_locale) { 1282 use locale ':not_characters'; 1283 push @f, $_ unless /[[:space:]]/ == /\s/; 1284 } 1285 else { 1286 push @f, $_ unless /[[:space:]]/ == /\s/; 1287 } 1288 } 1289 report_multi_result($Locale, $locales_test_number, \@f); 1290 1291 ++$locales_test_number; 1292 undef @f; 1293 $test_names{$locales_test_number} = 'Verify that [:posix:] and [:^posix:] are mutually exclusive'; 1294 for (map { chr } 0..255) { 1295 if ($is_utf8_locale) { 1296 use locale ':not_characters'; 1297 push @f, $_ unless (/[[:alpha:]]/ xor /[[:^alpha:]]/) || 1298 (/[[:alnum:]]/ xor /[[:^alnum:]]/) || 1299 (/[[:ascii:]]/ xor /[[:^ascii:]]/) || 1300 (/[[:blank:]]/ xor /[[:^blank:]]/) || 1301 (/[[:cntrl:]]/ xor /[[:^cntrl:]]/) || 1302 (/[[:digit:]]/ xor /[[:^digit:]]/) || 1303 (/[[:graph:]]/ xor /[[:^graph:]]/) || 1304 (/[[:lower:]]/ xor /[[:^lower:]]/) || 1305 (/[[:print:]]/ xor /[[:^print:]]/) || 1306 (/[[:space:]]/ xor /[[:^space:]]/) || 1307 (/[[:upper:]]/ xor /[[:^upper:]]/) || 1308 (/[[:word:]]/ xor /[[:^word:]]/) || 1309 (/[[:xdigit:]]/ xor /[[:^xdigit:]]/) || 1310 1311 # effectively is what [:cased:] would be if it existed. 1312 (/[[:upper:][:lower:]]/i xor /[^[:upper:][:lower:]]/i); 1313 } 1314 else { 1315 push @f, $_ unless (/[[:alpha:]]/ xor /[[:^alpha:]]/) || 1316 (/[[:alnum:]]/ xor /[[:^alnum:]]/) || 1317 (/[[:ascii:]]/ xor /[[:^ascii:]]/) || 1318 (/[[:blank:]]/ xor /[[:^blank:]]/) || 1319 (/[[:cntrl:]]/ xor /[[:^cntrl:]]/) || 1320 (/[[:digit:]]/ xor /[[:^digit:]]/) || 1321 (/[[:graph:]]/ xor /[[:^graph:]]/) || 1322 (/[[:lower:]]/ xor /[[:^lower:]]/) || 1323 (/[[:print:]]/ xor /[[:^print:]]/) || 1324 (/[[:space:]]/ xor /[[:^space:]]/) || 1325 (/[[:upper:]]/ xor /[[:^upper:]]/) || 1326 (/[[:word:]]/ xor /[[:^word:]]/) || 1327 (/[[:xdigit:]]/ xor /[[:^xdigit:]]/) || 1328 (/[[:upper:][:lower:]]/i xor /[^[:upper:][:lower:]]/i); 1329 } 1330 } 1331 report_multi_result($Locale, $locales_test_number, \@f); 1332 1333 # The rules for the relationships are given in: 1334 # http://www.opengroup.org/onlinepubs/009695399/basedefs/xbd_chap07.html 1335 1336 1337 ++$locales_test_number; 1338 undef @f; 1339 $test_names{$locales_test_number} = 'Verify that [:lower:] contains at least a-z'; 1340 for ('a' .. 'z') { 1341 if ($is_utf8_locale) { 1342 use locale ':not_characters'; 1343 push @f, $_ unless /[[:lower:]]/; 1344 } 1345 else { 1346 push @f, $_ unless /[[:lower:]]/; 1347 } 1348 } 1349 report_multi_result($Locale, $locales_test_number, \@f); 1350 1351 ++$locales_test_number; 1352 undef @f; 1353 $test_names{$locales_test_number} = 'Verify that [:lower:] is a subset of [:alpha:]'; 1354 for (map { chr } 0..255) { 1355 if ($is_utf8_locale) { 1356 use locale ':not_characters'; 1357 push @f, $_ if /[[:lower:]]/ and ! /[[:alpha:]]/; 1358 } 1359 else { 1360 push @f, $_ if /[[:lower:]]/ and ! /[[:alpha:]]/; 1361 } 1362 } 1363 report_multi_result($Locale, $locales_test_number, \@f); 1364 1365 ++$locales_test_number; 1366 undef @f; 1367 $test_names{$locales_test_number} = 'Verify that [:upper:] contains at least A-Z'; 1368 for ('A' .. 'Z') { 1369 if ($is_utf8_locale) { 1370 use locale ':not_characters'; 1371 push @f, $_ unless /[[:upper:]]/; 1372 } 1373 else { 1374 push @f, $_ unless /[[:upper:]]/; 1375 } 1376 } 1377 report_multi_result($Locale, $locales_test_number, \@f); 1378 1379 ++$locales_test_number; 1380 undef @f; 1381 $test_names{$locales_test_number} = 'Verify that [:upper:] is a subset of [:alpha:]'; 1382 for (map { chr } 0..255) { 1383 if ($is_utf8_locale) { 1384 use locale ':not_characters'; 1385 push @f, $_ if /[[:upper:]]/ and ! /[[:alpha:]]/; 1386 } 1387 else { 1388 push @f, $_ if /[[:upper:]]/ and ! /[[:alpha:]]/; 1389 } 1390 } 1391 report_multi_result($Locale, $locales_test_number, \@f); 1392 1393 ++$locales_test_number; 1394 undef @f; 1395 $test_names{$locales_test_number} = 'Verify that /[[:lower:]]/i is a subset of [:alpha:]'; 1396 for (map { chr } 0..255) { 1397 if ($is_utf8_locale) { 1398 use locale ':not_characters'; 1399 push @f, $_ if /[[:lower:]]/i and ! /[[:alpha:]]/; 1400 } 1401 else { 1402 push @f, $_ if /[[:lower:]]/i and ! /[[:alpha:]]/; 1403 } 1404 } 1405 report_multi_result($Locale, $locales_test_number, \@f); 1406 1407 ++$locales_test_number; 1408 undef @f; 1409 $test_names{$locales_test_number} = 'Verify that [:alpha:] is a subset of [:alnum:]'; 1410 for (map { chr } 0..255) { 1411 if ($is_utf8_locale) { 1412 use locale ':not_characters'; 1413 push @f, $_ if /[[:alpha:]]/ and ! /[[:alnum:]]/; 1414 } 1415 else { 1416 push @f, $_ if /[[:alpha:]]/ and ! /[[:alnum:]]/; 1417 } 1418 } 1419 report_multi_result($Locale, $locales_test_number, \@f); 1420 1421 ++$locales_test_number; 1422 undef @f; 1423 $test_names{$locales_test_number} = 'Verify that [:digit:] contains at least 0-9'; 1424 for ('0' .. '9') { 1425 if ($is_utf8_locale) { 1426 use locale ':not_characters'; 1427 push @f, $_ unless /[[:digit:]]/; 1428 } 1429 else { 1430 push @f, $_ unless /[[:digit:]]/; 1431 } 1432 } 1433 report_multi_result($Locale, $locales_test_number, \@f); 1434 1435 ++$locales_test_number; 1436 undef @f; 1437 $test_names{$locales_test_number} = 'Verify that [:digit:] is a subset of [:alnum:]'; 1438 for (map { chr } 0..255) { 1439 if ($is_utf8_locale) { 1440 use locale ':not_characters'; 1441 push @f, $_ if /[[:digit:]]/ and ! /[[:alnum:]]/; 1442 } 1443 else { 1444 push @f, $_ if /[[:digit:]]/ and ! /[[:alnum:]]/; 1445 } 1446 } 1447 report_multi_result($Locale, $locales_test_number, \@f); 1448 1449 ++$locales_test_number; 1450 undef @f; 1451 $test_names{$locales_test_number} = 'Verify that [:digit:] matches either 10 or 20 code points'; 1452 report_result($Locale, $locales_test_number, @{$posixes{'digit'}} == 10 || @{$posixes{'digit'}} == 20); 1453 1454 ++$locales_test_number; 1455 undef @f; 1456 $test_names{$locales_test_number} = 'Verify that if there is a second set of digits in [:digit:], they are consecutive'; 1457 if (@{$posixes{'digit'}} == 20) { 1458 my $previous_ord; 1459 for (map { chr } 0..255) { 1460 next unless /[[:digit:]]/; 1461 next if /[0-9]/; 1462 if (defined $previous_ord) { 1463 if ($is_utf8_locale) { 1464 use locale ':not_characters'; 1465 push @f, $_ if ord $_ != $previous_ord + 1; 1466 } 1467 else { 1468 push @f, $_ if ord $_ != $previous_ord + 1; 1469 } 1470 } 1471 $previous_ord = ord $_; 1472 } 1473 } 1474 report_multi_result($Locale, $locales_test_number, \@f); 1475 1476 ++$locales_test_number; 1477 undef @f; 1478 my @xdigit_digits; # :digit: & :xdigit: 1479 $test_names{$locales_test_number} = 'Verify that [:xdigit:] contains one or two blocks of 10 consecutive [:digit:] chars'; 1480 for (map { chr } 0..255) { 1481 if ($is_utf8_locale) { 1482 use locale ':not_characters'; 1483 # For utf8 locales, we actually use a stricter test: that :digit: 1484 # is a subset of :xdigit:, as we know that only 0-9 should match 1485 push @f, $_ if /[[:digit:]]/ and ! /[[:xdigit:]]/; 1486 } 1487 else { 1488 push @xdigit_digits, $_ if /[[:digit:]]/ and /[[:xdigit:]]/; 1489 } 1490 } 1491 if (! $is_utf8_locale) { 1492 1493 # For non-utf8 locales, @xdigit_digits is a list of the characters 1494 # that are both :xdigit: and :digit:. Because :digit: is stored in 1495 # increasing code point order (unless the tests above failed), 1496 # @xdigit_digits is as well. There should be exactly 10 or 1497 # 20 of these. 1498 if (@xdigit_digits != 10 && @xdigit_digits != 20) { 1499 @f = @xdigit_digits; 1500 } 1501 else { 1502 1503 # Look for contiguity in the series, adding any wrong ones to @f 1504 my @temp = @xdigit_digits; 1505 while (@temp > 1) { 1506 push @f, $temp[1] if ($temp[0] != $temp[1] - 1) 1507 1508 # Skip this test for the 0th character of 1509 # the second block of 10, as it won't be 1510 # contiguous with the previous block 1511 && (! defined $xdigit_digits[10] 1512 || $temp[1] != $xdigit_digits[10]); 1513 shift @temp; 1514 } 1515 } 1516 } 1517 1518 report_multi_result($Locale, $locales_test_number, \@f); 1519 1520 ++$locales_test_number; 1521 undef @f; 1522 $test_names{$locales_test_number} = 'Verify that [:xdigit:] contains at least A-F, a-f'; 1523 for ('A' .. 'F', 'a' .. 'f') { 1524 if ($is_utf8_locale) { 1525 use locale ':not_characters'; 1526 push @f, $_ unless /[[:xdigit:]]/; 1527 } 1528 else { 1529 push @f, $_ unless /[[:xdigit:]]/; 1530 } 1531 } 1532 report_multi_result($Locale, $locales_test_number, \@f); 1533 1534 ++$locales_test_number; 1535 undef @f; 1536 $test_names{$locales_test_number} = 'Verify that any additional members of [:xdigit:], are in groups of 6 consecutive code points'; 1537 my $previous_ord; 1538 my $count = 0; 1539 for my $chr (map { chr } 0..255) { 1540 next unless $chr =~ /[[:xdigit:]]/; 1541 if ($is_utf8_locale) { 1542 next if $chr =~ /[[:digit:]]/; 1543 } 1544 else { 1545 next if grep { $chr eq $_ } @xdigit_digits; 1546 } 1547 next if $chr =~ /[A-Fa-f]/; 1548 if (defined $previous_ord) { 1549 if ($is_utf8_locale) { 1550 use locale ':not_characters'; 1551 push @f, $chr if ord $chr != $previous_ord + 1; 1552 } 1553 else { 1554 push @f, $chr if ord $chr != $previous_ord + 1; 1555 } 1556 } 1557 $count++; 1558 if ($count == 6) { 1559 undef $previous_ord; 1560 } 1561 else { 1562 $previous_ord = ord $chr; 1563 } 1564 } 1565 report_multi_result($Locale, $locales_test_number, \@f); 1566 1567 ++$locales_test_number; 1568 undef @f; 1569 $test_names{$locales_test_number} = 'Verify that [:xdigit:] is a subset of [:graph:]'; 1570 for (map { chr } 0..255) { 1571 if ($is_utf8_locale) { 1572 use locale ':not_characters'; 1573 push @f, $_ if /[[:xdigit:]]/ and ! /[[:graph:]]/; 1574 } 1575 else { 1576 push @f, $_ if /[[:xdigit:]]/ and ! /[[:graph:]]/; 1577 } 1578 } 1579 report_multi_result($Locale, $locales_test_number, \@f); 1580 1581 # Note that xdigit doesn't have to be a subset of alnum 1582 1583 ++$locales_test_number; 1584 undef @f; 1585 $test_names{$locales_test_number} = 'Verify that [:punct:] is a subset of [:graph:]'; 1586 for (map { chr } 0..255) { 1587 if ($is_utf8_locale) { 1588 use locale ':not_characters'; 1589 push @f, $_ if /[[:punct:]]/ and ! /[[:graph:]]/; 1590 } 1591 else { 1592 push @f, $_ if /[[:punct:]]/ and ! /[[:graph:]]/; 1593 } 1594 } 1595 report_multi_result($Locale, $locales_test_number, \@f); 1596 1597 ++$locales_test_number; 1598 undef @f; 1599 $test_names{$locales_test_number} = 'Verify that the space character is not in [:graph:]'; 1600 if ($is_utf8_locale) { 1601 use locale ':not_characters'; 1602 push @f, " " if " " =~ /[[:graph:]]/; 1603 } 1604 else { 1605 push @f, " " if " " =~ /[[:graph:]]/; 1606 } 1607 report_multi_result($Locale, $locales_test_number, \@f); 1608 1609 ++$locales_test_number; 1610 undef @f; 1611 $test_names{$locales_test_number} = 'Verify that [:space:] contains at least [\f\n\r\t\cK ]'; 1612 for (' ', "\f", "\n", "\r", "\t", "\cK") { 1613 if ($is_utf8_locale) { 1614 use locale ':not_characters'; 1615 push @f, $_ unless /[[:space:]]/; 1616 } 1617 else { 1618 push @f, $_ unless /[[:space:]]/; 1619 } 1620 } 1621 report_multi_result($Locale, $locales_test_number, \@f); 1622 1623 ++$locales_test_number; 1624 undef @f; 1625 $test_names{$locales_test_number} = 'Verify that [:blank:] contains at least [\t ]'; 1626 for (' ', "\t") { 1627 if ($is_utf8_locale) { 1628 use locale ':not_characters'; 1629 push @f, $_ unless /[[:blank:]]/; 1630 } 1631 else { 1632 push @f, $_ unless /[[:blank:]]/; 1633 } 1634 } 1635 report_multi_result($Locale, $locales_test_number, \@f); 1636 1637 ++$locales_test_number; 1638 undef @f; 1639 $test_names{$locales_test_number} = 'Verify that [:blank:] is a subset of [:space:]'; 1640 for (map { chr } 0..255) { 1641 if ($is_utf8_locale) { 1642 use locale ':not_characters'; 1643 push @f, $_ if /[[:blank:]]/ and ! /[[:space:]]/; 1644 } 1645 else { 1646 push @f, $_ if /[[:blank:]]/ and ! /[[:space:]]/; 1647 } 1648 } 1649 report_multi_result($Locale, $locales_test_number, \@f); 1650 1651 ++$locales_test_number; 1652 undef @f; 1653 $test_names{$locales_test_number} = 'Verify that [:graph:] is a subset of [:print:]'; 1654 for (map { chr } 0..255) { 1655 if ($is_utf8_locale) { 1656 use locale ':not_characters'; 1657 push @f, $_ if /[[:graph:]]/ and ! /[[:print:]]/; 1658 } 1659 else { 1660 push @f, $_ if /[[:graph:]]/ and ! /[[:print:]]/; 1661 } 1662 } 1663 report_multi_result($Locale, $locales_test_number, \@f); 1664 1665 ++$locales_test_number; 1666 undef @f; 1667 $test_names{$locales_test_number} = 'Verify that the space character is in [:print:]'; 1668 if ($is_utf8_locale) { 1669 use locale ':not_characters'; 1670 push @f, " " if " " !~ /[[:print:]]/; 1671 } 1672 else { 1673 push @f, " " if " " !~ /[[:print:]]/; 1674 } 1675 report_multi_result($Locale, $locales_test_number, \@f); 1676 1677 ++$locales_test_number; 1678 undef @f; 1679 $test_names{$locales_test_number} = 'Verify that isn\'t both [:cntrl:] and [:print:]'; 1680 for (map { chr } 0..255) { 1681 if ($is_utf8_locale) { 1682 use locale ':not_characters'; 1683 push @f, $_ if (/[[:print:]]/ and /[[:cntrl:]]/); 1684 } 1685 else { 1686 push @f, $_ if (/[[:print:]]/ and /[[:cntrl:]]/); 1687 } 1688 } 1689 report_multi_result($Locale, $locales_test_number, \@f); 1690 1691 ++$locales_test_number; 1692 undef @f; 1693 $test_names{$locales_test_number} = 'Verify that isn\'t both [:alpha:] and [:digit:]'; 1694 for (map { chr } 0..255) { 1695 if ($is_utf8_locale) { 1696 use locale ':not_characters'; 1697 push @f, $_ if /[[:alpha:]]/ and /[[:digit:]]/; 1698 } 1699 else { 1700 push @f, $_ if /[[:alpha:]]/ and /[[:digit:]]/; 1701 } 1702 } 1703 report_multi_result($Locale, $locales_test_number, \@f); 1704 1705 ++$locales_test_number; 1706 undef @f; 1707 $test_names{$locales_test_number} = 'Verify that isn\'t both [:alnum:] and [:punct:]'; 1708 for (map { chr } 0..255) { 1709 if ($is_utf8_locale) { 1710 use locale ':not_characters'; 1711 push @f, $_ if /[[:alnum:]]/ and /[[:punct:]]/; 1712 } 1713 else { 1714 push @f, $_ if /[[:alnum:]]/ and /[[:punct:]]/; 1715 } 1716 } 1717 report_multi_result($Locale, $locales_test_number, \@f); 1718 1719 ++$locales_test_number; 1720 undef @f; 1721 $test_names{$locales_test_number} = 'Verify that isn\'t both [:xdigit:] and [:punct:]'; 1722 for (map { chr } 0..255) { 1723 if ($is_utf8_locale) { 1724 use locale ':not_characters'; 1725 push @f, $_ if (/[[:punct:]]/ and /[[:xdigit:]]/); 1726 } 1727 else { 1728 push @f, $_ if (/[[:punct:]]/ and /[[:xdigit:]]/); 1729 } 1730 } 1731 report_multi_result($Locale, $locales_test_number, \@f); 1732 1733 ++$locales_test_number; 1734 undef @f; 1735 $test_names{$locales_test_number} = 'Verify that isn\'t both [:graph:] and [:space:]'; 1736 for (map { chr } 0..255) { 1737 if ($is_utf8_locale) { 1738 use locale ':not_characters'; 1739 push @f, $_ if (/[[:graph:]]/ and /[[:space:]]/); 1740 } 1741 else { 1742 push @f, $_ if (/[[:graph:]]/ and /[[:space:]]/); 1743 } 1744 } 1745 report_multi_result($Locale, $locales_test_number, \@f); 1746 1747 foreach ($first_casing_test_number..$locales_test_number) { 1748 $problematical_tests{$_} = 1; 1749 } 1750 1751 1752 # Test for read-only scalars' locale vs non-locale comparisons. 1753 1754 { 1755 no locale; 1756 my $ok; 1757 $a = "qwerty"; 1758 if ($is_utf8_locale) { 1759 use locale ':not_characters'; 1760 $ok = ($a cmp "qwerty") == 0; 1761 } 1762 else { 1763 use locale; 1764 $ok = ($a cmp "qwerty") == 0; 1765 } 1766 report_result($Locale, ++$locales_test_number, $ok); 1767 $test_names{$locales_test_number} = 'Verify that cmp works with a read-only scalar; no- vs locale'; 1768 } 1769 1770 { 1771 my ($from, $to, $lesser, $greater, 1772 @test, %test, $test, $yes, $no, $sign); 1773 1774 ++$locales_test_number; 1775 $test_names{$locales_test_number} = 'Verify that "le", "ne", etc work'; 1776 $not_necessarily_a_problem_test_number = $locales_test_number; 1777 for (0..9) { 1778 # Select a slice. 1779 $from = int(($_*@{$posixes{'word'}})/10); 1780 $to = $from + int(@{$posixes{'word'}}/10); 1781 $to = $#{$posixes{'word'}} if ($to > $#{$posixes{'word'}}); 1782 $lesser = join('', @{$posixes{'word'}}[$from..$to]); 1783 # Select a slice one character on. 1784 $from++; $to++; 1785 $to = $#{$posixes{'word'}} if ($to > $#{$posixes{'word'}}); 1786 $greater = join('', @{$posixes{'word'}}[$from..$to]); 1787 if ($is_utf8_locale) { 1788 use locale ':not_characters'; 1789 ($yes, $no, $sign) = ($lesser lt $greater 1790 ? (" ", "not ", 1) 1791 : ("not ", " ", -1)); 1792 } 1793 else { 1794 use locale; 1795 ($yes, $no, $sign) = ($lesser lt $greater 1796 ? (" ", "not ", 1) 1797 : ("not ", " ", -1)); 1798 } 1799 # all these tests should FAIL (return 0). Exact lt or gt cannot 1800 # be tested because in some locales, say, eacute and E may test 1801 # equal. 1802 @test = 1803 ( 1804 $no.' ($lesser le $greater)', # 1 1805 'not ($lesser ne $greater)', # 2 1806 ' ($lesser eq $greater)', # 3 1807 $yes.' ($lesser ge $greater)', # 4 1808 $yes.' ($lesser ge $greater)', # 5 1809 $yes.' ($greater le $lesser )', # 7 1810 'not ($greater ne $lesser )', # 8 1811 ' ($greater eq $lesser )', # 9 1812 $no.' ($greater ge $lesser )', # 10 1813 'not (($lesser cmp $greater) == -($sign))' # 11 1814 ); 1815 @test{@test} = 0 x @test; 1816 $test = 0; 1817 for my $ti (@test) { 1818 if ($is_utf8_locale) { 1819 use locale ':not_characters'; 1820 $test{$ti} = eval $ti; 1821 } 1822 else { 1823 # Already in 'use locale'; 1824 $test{$ti} = eval $ti; 1825 } 1826 $test ||= $test{$ti} 1827 } 1828 report_result($Locale, $locales_test_number, $test == 0); 1829 if ($test) { 1830 debug "lesser = '$lesser'\n"; 1831 debug "greater = '$greater'\n"; 1832 debug "lesser cmp greater = ", 1833 $lesser cmp $greater, "\n"; 1834 debug "greater cmp lesser = ", 1835 $greater cmp $lesser, "\n"; 1836 debug "(greater) from = $from, to = $to\n"; 1837 for my $ti (@test) { 1838 debugf("# %-40s %-4s", $ti, 1839 $test{$ti} ? 'FAIL' : 'ok'); 1840 if ($ti =~ /\(\.*(\$.+ +cmp +\$[^\)]+)\.*\)/) { 1841 debugf("(%s == %4d)", $1, eval $1); 1842 } 1843 debugf("\n#"); 1844 } 1845 1846 last; 1847 } 1848 } 1849 1850 use locale; 1851 1852 my @sorted_controls; 1853 1854 ++$locales_test_number; 1855 $test_names{$locales_test_number} 1856 = 'Skip in locales where there are no controls;' 1857 . ' otherwise verify that \0 sorts before any (other) control'; 1858 if (! $posixes{'cntrl'}) { 1859 report_result($Locale, $locales_test_number, 1); 1860 1861 # We use all code points for the tests below since there aren't 1862 # any controls 1863 push @sorted_controls, chr $_ for 1..255; 1864 @sorted_controls = sort @sorted_controls; 1865 } 1866 else { 1867 @sorted_controls = @{$posixes{'cntrl'}}; 1868 push @sorted_controls, "\0", 1869 unless grep { $_ eq "\0" } @sorted_controls; 1870 @sorted_controls = sort @sorted_controls; 1871 my $output = ""; 1872 for my $control (@sorted_controls) { 1873 $output .= " " . disp_chars($control); 1874 } 1875 debug "sorted :cntrl: (plus NUL) = $output\n"; 1876 my $ok = $sorted_controls[0] eq "\0"; 1877 report_result($Locale, $locales_test_number, $ok); 1878 1879 shift @sorted_controls if $ok; 1880 } 1881 1882 my $lowest_control = $sorted_controls[0]; 1883 1884 ++$locales_test_number; 1885 $test_names{$locales_test_number} 1886 = 'Skip in locales where all controls have primary sorting weight; ' 1887 . 'otherwise verify that \0 doesn\'t have primary sorting weight'; 1888 if ("a${lowest_control}c" lt "ab") { 1889 report_result($Locale, $locales_test_number, 1); 1890 } 1891 else { 1892 my $ok = "ab" lt "a\0c"; 1893 report_result($Locale, $locales_test_number, $ok); 1894 } 1895 1896 ++$locales_test_number; 1897 $test_names{$locales_test_number} 1898 = 'Verify that strings with embedded NUL collate'; 1899 my $ok = "a\0a\0a" lt "a${lowest_control}a${lowest_control}a"; 1900 report_result($Locale, $locales_test_number, $ok); 1901 1902 ++$locales_test_number; 1903 $test_names{$locales_test_number} 1904 = 'Verify that strings with embedded NUL and ' 1905 . 'extra trailing NUL collate'; 1906 $ok = "a\0a\0" lt "a${lowest_control}a${lowest_control}"; 1907 report_result($Locale, $locales_test_number, $ok); 1908 1909 ++$locales_test_number; 1910 $test_names{$locales_test_number} 1911 = 'Verify that empty strings collate'; 1912 $ok = "" le ""; 1913 report_result($Locale, $locales_test_number, $ok); 1914 1915 ++$locales_test_number; 1916 $test_names{$locales_test_number} 1917 = "Skip in non-UTF-8 locales; otherwise verify that UTF8ness " 1918 . "doesn't matter with collation"; 1919 if (! $is_utf8_locale) { 1920 report_result($Locale, $locales_test_number, 1); 1921 } 1922 else { 1923 1924 # khw can't think of anything better. Start with a string that is 1925 # higher than its UTF-8 representation in both EBCDIC and ASCII 1926 my $string = chr utf8::unicode_to_native(0xff); 1927 my $utf8_string = $string; 1928 utf8::upgrade($utf8_string); 1929 1930 # 8 should be lt 9 in all locales (except ones that aren't 1931 # ASCII-based, which might fail this) 1932 $ok = ("a${string}8") lt ("a${utf8_string}9"); 1933 report_result($Locale, $locales_test_number, $ok); 1934 } 1935 1936 ++$locales_test_number; 1937 $test_names{$locales_test_number} 1938 = "Skip in UTF-8 locales; otherwise verify that single byte " 1939 . "collates before 0x100 and above"; 1940 if ($is_utf8_locale) { 1941 report_result($Locale, $locales_test_number, 1); 1942 } 1943 else { 1944 my $max_collating = chr 0; # Find byte that collates highest 1945 for my $i (0 .. 255) { 1946 my $char = chr $i; 1947 $max_collating = $char if $char gt $max_collating; 1948 } 1949 $ok = $max_collating lt chr 0x100; 1950 report_result($Locale, $locales_test_number, $ok); 1951 } 1952 1953 ++$locales_test_number; 1954 $test_names{$locales_test_number} 1955 = "Skip in UTF-8 locales; otherwise verify that 0x100 and " 1956 . "above collate in code point order"; 1957 if ($is_utf8_locale) { 1958 report_result($Locale, $locales_test_number, 1); 1959 } 1960 else { 1961 $ok = chr 0x100 lt chr 0x101; 1962 report_result($Locale, $locales_test_number, $ok); 1963 } 1964 } 1965 1966 my $ok1; 1967 my $ok2; 1968 my $ok3; 1969 my $ok4; 1970 my $ok5; 1971 my $ok6; 1972 my $ok7; 1973 my $ok8; 1974 my $ok9; 1975 my $ok10; 1976 my $ok11; 1977 my $ok12; 1978 my $ok13; 1979 my $ok14; 1980 my $ok14_5; 1981 my $ok15; 1982 my $ok16; 1983 my $ok17; 1984 my $ok18; 1985 my $ok19; 1986 my $ok20; 1987 my $ok21; 1988 1989 my $c; 1990 my $d; 1991 my $e; 1992 my $f; 1993 my $g; 1994 my $h; 1995 my $i; 1996 my $j; 1997 1998 if (! $is_utf8_locale) { 1999 use locale; 2000 2001 my ($x, $y) = (1.23, 1.23); 2002 2003 $a = "$x"; 2004 printf ''; # printf used to reset locale to "C" 2005 $b = "$y"; 2006 $ok1 = $a eq $b; 2007 2008 $c = "$x"; 2009 my $z = sprintf ''; # sprintf used to reset locale to "C" 2010 $d = "$y"; 2011 $ok2 = $c eq $d; 2012 { 2013 2014 use warnings; 2015 my $w = 0; 2016 local $SIG{__WARN__} = 2017 sub { 2018 print "# @_\n"; 2019 $w++; 2020 }; 2021 2022 # The == (among other ops) used to warn for locales 2023 # that had something else than "." as the radix character. 2024 2025 $ok3 = $c == 1.23; 2026 $ok4 = $c == $x; 2027 $ok5 = $c == $d; 2028 { 2029 no locale; 2030 2031 $e = "$x"; 2032 2033 $ok6 = $e == 1.23; 2034 $ok7 = $e == $x; 2035 $ok8 = $e == $c; 2036 } 2037 2038 $f = "1.23"; 2039 $g = 2.34; 2040 $h = 1.5; 2041 $i = 1.25; 2042 $j = "$h:$i"; 2043 2044 $ok9 = $f == 1.23; 2045 $ok10 = $f == $x; 2046 $ok11 = $f == $c; 2047 $ok12 = abs(($f + $g) - 3.57) < 0.01; 2048 $ok13 = $w == 0; 2049 $ok14 = $ok14_5 = $ok15 = $ok16 = 1; # Skip for non-utf8 locales 2050 } 2051 { 2052 no locale; 2053 $ok17 = "1.5:1.25" eq sprintf("%g:%g", $h, $i); 2054 } 2055 $ok18 = $j eq sprintf("%g:%g", $h, $i); 2056 } 2057 else { 2058 use locale ':not_characters'; 2059 2060 my ($x, $y) = (1.23, 1.23); 2061 $a = "$x"; 2062 printf ''; # printf used to reset locale to "C" 2063 $b = "$y"; 2064 $ok1 = $a eq $b; 2065 2066 $c = "$x"; 2067 my $z = sprintf ''; # sprintf used to reset locale to "C" 2068 $d = "$y"; 2069 $ok2 = $c eq $d; 2070 { 2071 use warnings; 2072 my $w = 0; 2073 local $SIG{__WARN__} = 2074 sub { 2075 print "# @_\n"; 2076 $w++; 2077 }; 2078 $ok3 = $c == 1.23; 2079 $ok4 = $c == $x; 2080 $ok5 = $c == $d; 2081 { 2082 no locale; 2083 $e = "$x"; 2084 2085 $ok6 = $e == 1.23; 2086 $ok7 = $e == $x; 2087 $ok8 = $e == $c; 2088 } 2089 2090 $f = "1.23"; 2091 $g = 2.34; 2092 $h = 1.5; 2093 $i = 1.25; 2094 $j = "$h:$i"; 2095 2096 $ok9 = $f == 1.23; 2097 $ok10 = $f == $x; 2098 $ok11 = $f == $c; 2099 $ok12 = abs(($f + $g) - 3.57) < 0.01; 2100 $ok13 = $w == 0; 2101 2102 # Look for non-ASCII error messages, and verify that the first 2103 # such is in UTF-8 (the others almost certainly will be like the 2104 # first). This is only done if the current locale has LC_MESSAGES 2105 $ok14 = 1; 2106 $ok14_5 = 1; 2107 if ( locales_enabled('LC_MESSAGES') 2108 && setlocale(&POSIX::LC_MESSAGES, $Locale)) 2109 { 2110 foreach my $err (keys %!) { 2111 use Errno; 2112 $! = eval "&Errno::$err"; # Convert to strerror() output 2113 my $errnum = 0+$!; 2114 my $strerror = "$!"; 2115 if ("$strerror" =~ /\P{ASCII}/) { 2116 $ok14 = utf8::is_utf8($strerror); 2117 no locale; 2118 $ok14_5 = "$!" !~ /\P{ASCII}/; 2119 debug( disp_str( 2120 "non-ASCII \$! for error $errnum='$strerror'")) 2121 if ! $ok14_5; 2122 last; 2123 } 2124 } 2125 } 2126 2127 # Similarly, we verify that a non-ASCII radix is in UTF-8. This 2128 # also catches if there is a disparity between sprintf and 2129 # stringification. 2130 2131 my $string_g = "$g"; 2132 my $sprintf_g = sprintf("%g", $g); 2133 2134 $ok15 = $string_g =~ / ^ \p{ASCII}+ $ /x || utf8::is_utf8($string_g); 2135 $ok16 = $sprintf_g eq $string_g; 2136 } 2137 { 2138 no locale; 2139 $ok17 = "1.5:1.25" eq sprintf("%g:%g", $h, $i); 2140 } 2141 $ok18 = $j eq sprintf("%g:%g", $h, $i); 2142 } 2143 2144 $ok19 = $ok20 = 1; 2145 if (locales_enabled('LC_TIME')) { 2146 if (setlocale(&POSIX::LC_TIME, $Locale)) { # These tests aren't 2147 # affected by 2148 # :not_characters 2149 my @times = CORE::localtime(); 2150 2151 use locale; 2152 $ok19 = POSIX::strftime("%p", @times) ne "%p"; # [perl #119425] 2153 my $date = POSIX::strftime("'%A' '%B' '%Z' '%p'", @times); 2154 debug("'Day' 'Month' 'TZ' 'am/pm' = ", disp_str($date)); 2155 2156 # If there is any non-ascii, it better be UTF-8 in a UTF-8 locale, 2157 # and not UTF-8 if the locale isn't UTF-8. 2158 $ok20 = $date =~ / ^ \p{ASCII}+ $ /x 2159 || $is_utf8_locale == utf8::is_utf8($date); 2160 } 2161 } 2162 2163 $ok21 = 1; 2164 if (locales_enabled('LC_MESSAGES')) { 2165 foreach my $err (keys %!) { 2166 no locale; 2167 use Errno; 2168 $! = eval "&Errno::$err"; # Convert to strerror() output 2169 my $strerror = "$!"; 2170 if ($strerror =~ /\P{ASCII}/) { 2171 $ok21 = 0; 2172 debug(disp_str("non-ASCII strerror=$strerror")); 2173 last; 2174 } 2175 } 2176 } 2177 2178 report_result($Locale, ++$locales_test_number, $ok1); 2179 $test_names{$locales_test_number} = 'Verify that an intervening printf doesn\'t change assignment results'; 2180 my $first_a_test = $locales_test_number; 2181 2182 debug "$first_a_test..$locales_test_number: \$a = $a, \$b = $b, Locale = $Locale\n"; 2183 2184 report_result($Locale, ++$locales_test_number, $ok2); 2185 $test_names{$locales_test_number} = 'Verify that an intervening sprintf doesn\'t change assignment results'; 2186 2187 my $first_c_test = $locales_test_number; 2188 2189 $test_names{++$locales_test_number} = 'Verify that a different locale radix works when doing "==" with a constant'; 2190 report_result($Locale, $locales_test_number, $ok3); 2191 $problematical_tests{$locales_test_number} = 1; 2192 2193 $test_names{++$locales_test_number} = 'Verify that a different locale radix works when doing "==" with a scalar'; 2194 report_result($Locale, $locales_test_number, $ok4); 2195 $problematical_tests{$locales_test_number} = 1; 2196 2197 report_result($Locale, ++$locales_test_number, $ok5); 2198 $test_names{$locales_test_number} = 'Verify that a different locale radix works when doing "==" with a scalar and an intervening sprintf'; 2199 $problematical_tests{$locales_test_number} = 1; 2200 2201 debug "$first_c_test..$locales_test_number: \$c = $c, \$d = $d, Locale = $Locale\n"; 2202 2203 report_result($Locale, ++$locales_test_number, $ok6); 2204 $test_names{$locales_test_number} = 'Verify that can assign stringified under inner no-locale block'; 2205 my $first_e_test = $locales_test_number; 2206 2207 report_result($Locale, ++$locales_test_number, $ok7); 2208 $test_names{$locales_test_number} = 'Verify that "==" with a scalar still works in inner no locale'; 2209 2210 $test_names{++$locales_test_number} = 'Verify that "==" with a scalar and an intervening sprintf still works in inner no locale'; 2211 report_result($Locale, $locales_test_number, $ok8); 2212 $problematical_tests{$locales_test_number} = 1; 2213 2214 debug "$first_e_test..$locales_test_number: \$e = $e, no locale\n"; 2215 2216 report_result($Locale, ++$locales_test_number, $ok9); 2217 $test_names{$locales_test_number} = 'Verify that after a no-locale block, a different locale radix still works when doing "==" with a constant'; 2218 $problematical_tests{$locales_test_number} = 1; 2219 my $first_f_test = $locales_test_number; 2220 2221 report_result($Locale, ++$locales_test_number, $ok10); 2222 $test_names{$locales_test_number} = 'Verify that after a no-locale block, a different locale radix still works when doing "==" with a scalar'; 2223 $problematical_tests{$locales_test_number} = 1; 2224 2225 $test_names{++$locales_test_number} = 'Verify that after a no-locale block, a different locale radix still works when doing "==" with a scalar and an intervening sprintf'; 2226 report_result($Locale, $locales_test_number, $ok11); 2227 $problematical_tests{$locales_test_number} = 1; 2228 2229 report_result($Locale, ++$locales_test_number, $ok12); 2230 $test_names{$locales_test_number} = 'Verify that after a no-locale block, a different locale radix can participate in an addition and function call as numeric'; 2231 $problematical_tests{$locales_test_number} = 1; 2232 2233 report_result($Locale, ++$locales_test_number, $ok13); 2234 $test_names{$locales_test_number} = 'Verify that don\'t get warning under "==" even if radix is not a dot'; 2235 $problematical_tests{$locales_test_number} = 1; 2236 2237 report_result($Locale, ++$locales_test_number, $ok14); 2238 $test_names{$locales_test_number} = 'Verify that non-ASCII UTF-8 error messages are in UTF-8'; 2239 2240 report_result($Locale, ++$locales_test_number, $ok14_5); 2241 $test_names{$locales_test_number} = '... and are ASCII outside "use locale"'; 2242 2243 report_result($Locale, ++$locales_test_number, $ok15); 2244 $test_names{$locales_test_number} = 'Verify that a number with a UTF-8 radix has a UTF-8 stringification'; 2245 $problematical_tests{$locales_test_number} = 1; 2246 2247 report_result($Locale, ++$locales_test_number, $ok16); 2248 $test_names{$locales_test_number} = 'Verify that a sprintf of a number with a UTF-8 radix yields UTF-8'; 2249 $problematical_tests{$locales_test_number} = 1; 2250 2251 report_result($Locale, ++$locales_test_number, $ok17); 2252 $test_names{$locales_test_number} = 'Verify that a sprintf of a number outside locale scope uses a dot radix'; 2253 2254 report_result($Locale, ++$locales_test_number, $ok18); 2255 $test_names{$locales_test_number} = 'Verify that a sprintf of a number back within locale scope uses locale radix'; 2256 $problematical_tests{$locales_test_number} = 1; 2257 2258 report_result($Locale, ++$locales_test_number, $ok19); 2259 $test_names{$locales_test_number} = 'Verify that strftime doesn\'t return "%p" in locales where %p is empty'; 2260 2261 report_result($Locale, ++$locales_test_number, $ok20); 2262 $test_names{$locales_test_number} = 'Verify that strftime returns date with UTF-8 flag appropriately set'; 2263 $problematical_tests{$locales_test_number} = 1; # This is broken in 2264 # OS X 10.9.3 2265 2266 report_result($Locale, ++$locales_test_number, $ok21); 2267 $test_names{$locales_test_number} = '"$!" is ASCII-only outside of locale scope'; 2268 2269 debug "$first_f_test..$locales_test_number: \$f = $f, \$g = $g, back to locale = $Locale\n"; 2270 2271 # Does taking lc separately differ from taking 2272 # the lc "in-line"? (This was the bug 19990704.002 (#965), change #3568.) 2273 # The bug was in the caching of the 'o'-magic. 2274 if (! $is_utf8_locale) { 2275 use locale; 2276 2277 sub lcA { 2278 my $lc0 = lc $_[0]; 2279 my $lc1 = lc $_[1]; 2280 return $lc0 cmp $lc1; 2281 } 2282 2283 sub lcB { 2284 return lc($_[0]) cmp lc($_[1]); 2285 } 2286 2287 my $x = "ab"; 2288 my $y = "aa"; 2289 my $z = "AB"; 2290 2291 report_result($Locale, ++$locales_test_number, 2292 lcA($x, $y) == 1 && lcB($x, $y) == 1 || 2293 lcA($x, $z) == 0 && lcB($x, $z) == 0); 2294 } 2295 else { 2296 use locale ':not_characters'; 2297 2298 sub lcC { 2299 my $lc0 = lc $_[0]; 2300 my $lc1 = lc $_[1]; 2301 return $lc0 cmp $lc1; 2302 } 2303 2304 sub lcD { 2305 return lc($_[0]) cmp lc($_[1]); 2306 } 2307 2308 my $x = "ab"; 2309 my $y = "aa"; 2310 my $z = "AB"; 2311 2312 report_result($Locale, ++$locales_test_number, 2313 lcC($x, $y) == 1 && lcD($x, $y) == 1 || 2314 lcC($x, $z) == 0 && lcD($x, $z) == 0); 2315 } 2316 $test_names{$locales_test_number} = 'Verify "lc(foo) cmp lc(bar)" is the same as using intermediaries for the cmp'; 2317 2318 # Does lc of an UPPER (if different from the UPPER) match 2319 # case-insensitively the UPPER, and does the UPPER match 2320 # case-insensitively the lc of the UPPER. And vice versa. 2321 { 2322 use locale; 2323 no utf8; 2324 my $re = qr/[\[\(\{\*\+\?\|\^\$\\]/; 2325 2326 my @f = (); 2327 ++$locales_test_number; 2328 $test_names{$locales_test_number} = 'Verify case insensitive matching works'; 2329 foreach my $x (sort { ord $a <=> ord $b } keys %UPPER) { 2330 if (! $is_utf8_locale) { 2331 my $y = lc $x; 2332 next unless uc $y eq $x; 2333 debug_more( "UPPER=", disp_chars(($x)), 2334 "; lc=", disp_chars(($y)), "; ", 2335 "; fc=", disp_chars((fc $x)), "; ", 2336 disp_chars(($x)), "=~/", disp_chars(($y)), "/i=", 2337 $x =~ /\Q$y/i ? 1 : 0, 2338 "; ", 2339 disp_chars(($y)), "=~/", disp_chars(($x)), "/i=", 2340 $y =~ /\Q$x/i ? 1 : 0, 2341 "\n"); 2342 # 2343 # If $x and $y contain regular expression characters 2344 # AND THEY lowercase (/i) to regular expression characters, 2345 # regcomp() will be mightily confused. No, the \Q doesn't 2346 # help here (maybe regex engine internal lowercasing 2347 # is done after the \Q?) An example of this happening is 2348 # the bg_BG (Bulgarian) locale under EBCDIC (OS/390 USS): 2349 # the chr(173) (the "[") is the lowercase of the chr(235). 2350 # 2351 # Similarly losing EBCDIC locales include cs_cz, cs_CZ, 2352 # el_gr, el_GR, en_us.IBM-037 (!), en_US.IBM-037 (!), 2353 # et_ee, et_EE, hr_hr, hr_HR, hu_hu, hu_HU, lt_LT, 2354 # mk_mk, mk_MK, nl_nl.IBM-037, nl_NL.IBM-037, 2355 # pl_pl, pl_PL, ro_ro, ro_RO, ru_ru, ru_RU, 2356 # sk_sk, sk_SK, sl_si, sl_SI, tr_tr, tr_TR. 2357 # 2358 # Similar things can happen even under (bastardised) 2359 # non-EBCDIC locales: in many European countries before the 2360 # advent of ISO 8859-x nationally customised versions of 2361 # ISO 646 were devised, reusing certain punctuation 2362 # characters for modified characters needed by the 2363 # country/language. For example, the "|" might have 2364 # stood for U+00F6 or LATIN SMALL LETTER O WITH DIAERESIS. 2365 # 2366 if ($x =~ $re || $y =~ $re) { 2367 print "# Regex characters in '$x' or '$y', skipping test $locales_test_number for locale '$Locale'\n"; 2368 next; 2369 } 2370 push @f, $x unless $x =~ /\Q$y/i && $y =~ /\Q$x/i; 2371 2372 # fc is not a locale concept, so Perl uses lc for it. 2373 push @f, $x unless lc $x eq fc $x; 2374 } 2375 else { 2376 use locale ':not_characters'; 2377 my $y = lc $x; 2378 next unless uc $y eq $x; 2379 debug_more( "UPPER=", disp_chars(($x)), 2380 "; lc=", disp_chars(($y)), "; ", 2381 "; fc=", disp_chars((fc $x)), "; ", 2382 disp_chars(($x)), "=~/", disp_chars(($y)), "/i=", 2383 $x =~ /\Q$y/i ? 1 : 0, 2384 "; ", 2385 disp_chars(($y)), "=~/", disp_chars(($x)), "/i=", 2386 $y =~ /\Q$x/i ? 1 : 0, 2387 "\n"); 2388 2389 push @f, $x unless $x =~ /\Q$y/i && $y =~ /\Q$x/i; 2390 2391 # The places where Unicode's lc is different from fc are 2392 # skipped here by virtue of the 'next unless uc...' line above 2393 push @f, $x unless lc $x eq fc $x; 2394 } 2395 } 2396 2397 foreach my $x (sort { ord $a <=> ord $b } keys %lower) { 2398 if (! $is_utf8_locale) { 2399 my $y = uc $x; 2400 next unless lc $y eq $x; 2401 debug_more( "lower=", disp_chars(($x)), 2402 "; uc=", disp_chars(($y)), "; ", 2403 "; fc=", disp_chars((fc $x)), "; ", 2404 disp_chars(($x)), "=~/", disp_chars(($y)), "/i=", 2405 $x =~ /\Q$y/i ? 1 : 0, 2406 "; ", 2407 disp_chars(($y)), "=~/", disp_chars(($x)), "/i=", 2408 $y =~ /\Q$x/i ? 1 : 0, 2409 "\n"); 2410 if ($x =~ $re || $y =~ $re) { # See above. 2411 print "# Regex characters in '$x' or '$y', skipping test $locales_test_number for locale '$Locale'\n"; 2412 next; 2413 } 2414 push @f, $x unless $x =~ /\Q$y/i && $y =~ /\Q$x/i; 2415 2416 push @f, $x unless lc $x eq fc $x; 2417 } 2418 else { 2419 use locale ':not_characters'; 2420 my $y = uc $x; 2421 next unless lc $y eq $x; 2422 debug_more( "lower=", disp_chars(($x)), 2423 "; uc=", disp_chars(($y)), "; ", 2424 "; fc=", disp_chars((fc $x)), "; ", 2425 disp_chars(($x)), "=~/", disp_chars(($y)), "/i=", 2426 $x =~ /\Q$y/i ? 1 : 0, 2427 "; ", 2428 disp_chars(($y)), "=~/", disp_chars(($x)), "/i=", 2429 $y =~ /\Q$x/i ? 1 : 0, 2430 "\n"); 2431 push @f, $x unless $x =~ /\Q$y/i && $y =~ /\Q$x/i; 2432 2433 push @f, $x unless lc $x eq fc $x; 2434 } 2435 } 2436 report_multi_result($Locale, $locales_test_number, \@f); 2437 $problematical_tests{$locales_test_number} = 1; 2438 } 2439 2440 # [perl #109318] 2441 { 2442 my @f = (); 2443 ++$locales_test_number; 2444 $test_names{$locales_test_number} = 'Verify atof with locale radix and negative exponent'; 2445 $problematical_tests{$locales_test_number} = 1; 2446 2447 my $radix = langinfo(RADIXCHAR); 2448 my @nums = ( 2449 "3.14e+9", "3${radix}14e+9", "3.14e-9", "3${radix}14e-9", 2450 "-3.14e+9", "-3${radix}14e+9", "-3.14e-9", "-3${radix}14e-9", 2451 ); 2452 2453 if (! $is_utf8_locale) { 2454 use locale; 2455 for my $num (@nums) { 2456 push @f, $num 2457 unless sprintf("%g", $num) =~ /3.+14/; 2458 } 2459 } 2460 else { 2461 use locale ':not_characters'; 2462 for my $num (@nums) { 2463 push @f, $num 2464 unless sprintf("%g", $num) =~ /3.+14/; 2465 } 2466 } 2467 2468 report_result($Locale, $locales_test_number, @f == 0); 2469 if (@f) { 2470 print "# failed $locales_test_number locale '$Locale' numbers @f\n" 2471 } 2472 } 2473} 2474 2475my $final_locales_test_number = $locales_test_number; 2476 2477# Recount the errors. 2478 2479TEST_NUM: 2480foreach $test_num ($first_locales_test_number..$final_locales_test_number) { 2481 my $has_non_global_failure = $Problem{$test_num} 2482 || ! defined $Okay{$test_num} 2483 || ! @{$Okay{$test_num}}; 2484 print "not " if %setlocale_failed || $has_non_global_failure; 2485 print "ok $test_num"; 2486 $test_names{$test_num} = "" unless defined $test_names{$test_num}; 2487 2488 # If TODO is in the test name, make it thus 2489 my $todo = $test_names{$test_num} =~ s/\s*TODO\s*//; 2490 print " $test_names{$test_num}"; 2491 if ($todo) { 2492 print " # TODO\n"; 2493 } 2494 elsif (%setlocale_failed || ! $has_non_global_failure) { 2495 print "\n"; 2496 } 2497 elsif ($has_non_global_failure) { 2498 2499 # If there are any locales that pass this test, or are known-bad, it 2500 # may be that there are enough passes that we TODO the failure, but 2501 # only for tests that we have decided can be problematical. 2502 if ( ($Okay{$test_num} || $Known_bad_locale{$test_num}) 2503 && grep { $_ == $test_num } keys %problematical_tests) 2504 { 2505 # Don't count the known-bad failures when calculating the 2506 # percentage that fail. 2507 my $known_failures = (exists $Known_bad_locale{$test_num}) 2508 ? scalar(keys $Known_bad_locale{$test_num}->%*) 2509 : 0; 2510 my $adjusted_failures = scalar(keys $Problem{$test_num}->%*) 2511 - $known_failures; 2512 2513 # Specially handle failures where only known-bad locales fail. 2514 # This makes the diagnositics clearer. 2515 if ($adjusted_failures <= 0) { 2516 print " # TODO fails only on known bad locales: ", 2517 join " ", keys $Known_bad_locale{$test_num}->%*, "\n"; 2518 next TEST_NUM; 2519 } 2520 2521 # Round to nearest .1% 2522 my $percent_fail = (int(.5 + (1000 * $adjusted_failures 2523 / scalar(@Locale)))) 2524 / 10; 2525 $todo = $percent_fail < $acceptable_failure_percentage; 2526 print " # TODO" if $todo; 2527 print "\n"; 2528 2529 if ($debug) { 2530 print "# $percent_fail% of locales (", 2531 scalar(keys $Problem{$test_num}->%*), 2532 " of ", 2533 scalar(@Locale), 2534 ") fail the above test (TODO cut-off is ", 2535 $acceptable_failure_percentage, 2536 "%)\n"; 2537 } 2538 elsif ($todo) { 2539 print "# ", 100 - $percent_fail, "% of locales not known to be problematic on this platform\n"; 2540 print "# pass the above test, so it is likely that the failures\n"; 2541 print "# are errors in the locale definitions. The test is marked TODO, as the\n"; 2542 print "# problem is not likely to be Perl's\n"; 2543 } 2544 } 2545 2546 if ($debug) { 2547 print "# The code points that had this failure are given above. Look for lines\n"; 2548 print "# that match 'failed $test_num'\n"; 2549 } 2550 else { 2551 print "# For more details, rerun, with environment variable PERL_DEBUG_FULL_TEST=1.\n"; 2552 print "# Then look at that output for lines that match 'failed $test_num'\n"; 2553 } 2554 if (defined $not_necessarily_a_problem_test_number 2555 && $test_num == $not_necessarily_a_problem_test_number) 2556 { 2557 print "# The failure of test $not_necessarily_a_problem_test_number is not necessarily fatal.\n"; 2558 print "# It usually indicates a problem in the environment,\n"; 2559 print "# not in Perl itself.\n"; 2560 } 2561 } 2562} 2563 2564$test_num = $final_locales_test_number; 2565 2566if ( ! defined $Config{d_setlocale_accepts_any_locale_name}) { 2567 # perl #115808 2568 use warnings; 2569 my $warned = 0; 2570 local $SIG{__WARN__} = sub { 2571 $warned = $_[0] =~ /uninitialized/; 2572 }; 2573 my $z = "y" . setlocale(&POSIX::LC_ALL, "xyzzy"); 2574 ok($warned, "variable set to setlocale(\"invalid locale name\") is considered uninitialized"); 2575} 2576 2577# Test that tainting and case changing works on utf8 strings. These tests are 2578# placed last to avoid disturbing the hard-coded test numbers that existed at 2579# the time these were added above this in this file. 2580# This also tests that locale overrides unicode_strings in the same scope for 2581# non-utf8 strings. 2582setlocale(&POSIX::LC_ALL, "C"); 2583{ 2584 use locale; 2585 use feature 'unicode_strings'; 2586 2587 foreach my $function ("uc", "ucfirst", "lc", "lcfirst", "fc") { 2588 my @list; # List of code points to test for $function 2589 2590 # Used to calculate the changed case for ASCII characters by using the 2591 # ord, instead of using one of the functions under test. 2592 my $ascii_case_change_delta; 2593 my $above_latin1_case_change_delta; # Same for the specific ords > 255 2594 # that we use 2595 2596 # We test an ASCII character, which should change case; 2597 # a Latin1 character, which shouldn't change case under this C locale, 2598 # an above-Latin1 character that when the case is changed would cross 2599 # the 255/256 boundary, so doesn't change case 2600 # (the \x{149} is one of these, but changes into 2 characters, the 2601 # first one of which doesn't cross the boundary. 2602 # the final one in each list is an above-Latin1 character whose case 2603 # does change. The code below uses its position in its list as a 2604 # marker to indicate that it, unlike the other code points above 2605 # ASCII, has a successful case change 2606 # 2607 # All casing operations under locale (but not :not_characters) should 2608 # taint 2609 if ($function =~ /^u/) { 2610 @list = ("", "a", 2611 chr(utf8::unicode_to_native(0xe0)), 2612 chr(utf8::unicode_to_native(0xff)), 2613 "\x{fb00}", "\x{149}", "\x{101}"); 2614 $ascii_case_change_delta = ($is_ebcdic) ? +64 : -32; 2615 $above_latin1_case_change_delta = -1; 2616 } 2617 else { 2618 @list = ("", "A", 2619 chr(utf8::unicode_to_native(0xC0)), 2620 "\x{17F}", "\x{100}"); 2621 $ascii_case_change_delta = ($is_ebcdic) ? -64 : +32; 2622 $above_latin1_case_change_delta = +1; 2623 } 2624 foreach my $is_utf8_locale (0 .. 1) { 2625 foreach my $j (0 .. $#list) { 2626 my $char = $list[$j]; 2627 2628 for my $encoded_in_utf8 (0 .. 1) { 2629 my $should_be; 2630 my $changed; 2631 if (! $is_utf8_locale) { 2632 no warnings 'locale'; 2633 $should_be = ($j == $#list) 2634 ? chr(ord($char) + $above_latin1_case_change_delta) 2635 : (length $char == 0 || utf8::native_to_unicode(ord($char)) > 127) 2636 ? $char 2637 : chr(ord($char) + $ascii_case_change_delta); 2638 2639 # This monstrosity is in order to avoid using an eval, 2640 # which might perturb the results 2641 $changed = ($function eq "uc") 2642 ? uc($char) 2643 : ($function eq "ucfirst") 2644 ? ucfirst($char) 2645 : ($function eq "lc") 2646 ? lc($char) 2647 : ($function eq "lcfirst") 2648 ? lcfirst($char) 2649 : ($function eq "fc") 2650 ? fc($char) 2651 : die("Unexpected function \"$function\""); 2652 } 2653 else { 2654 { 2655 no locale; 2656 2657 # For utf8-locales the case changing functions 2658 # should work just like they do outside of locale. 2659 # Can use eval here because not testing it when 2660 # not in locale. 2661 $should_be = eval "$function('$char')"; 2662 die "Unexpected eval error $@ from 'eval \"$function('$char')\"'" if $@; 2663 2664 } 2665 use locale ':not_characters'; 2666 $changed = ($function eq "uc") 2667 ? uc($char) 2668 : ($function eq "ucfirst") 2669 ? ucfirst($char) 2670 : ($function eq "lc") 2671 ? lc($char) 2672 : ($function eq "lcfirst") 2673 ? lcfirst($char) 2674 : ($function eq "fc") 2675 ? fc($char) 2676 : die("Unexpected function \"$function\""); 2677 } 2678 ok($changed eq $should_be, 2679 "$function(\"$char\") in C locale " 2680 . (($is_utf8_locale) 2681 ? "(use locale ':not_characters'" 2682 : "(use locale") 2683 . (($encoded_in_utf8) 2684 ? "; encoded in utf8)" 2685 : "; not encoded in utf8)") 2686 . " should be \"$should_be\", got \"$changed\""); 2687 2688 # Tainting shouldn't happen for use locale :not_character 2689 # (a utf8 locale) 2690 (! $is_utf8_locale) 2691 ? check_taint($changed) 2692 : check_taint_not($changed); 2693 2694 # Use UTF-8 next time through the loop 2695 utf8::upgrade($char); 2696 } 2697 } 2698 } 2699 } 2700} 2701 2702# Give final advice. 2703 2704my $didwarn = 0; 2705 2706foreach ($first_locales_test_number..$final_locales_test_number) { 2707 if ($Problem{$_}) { 2708 my @f = sort keys %{ $Problem{$_} }; 2709 2710 # Don't list the failures caused by known-bad locales. 2711 if (exists $known_bad_locales{$os}) { 2712 @f = grep { $_ !~ $known_bad_locales{$os} } @f; 2713 next unless @f; 2714 } 2715 my $f = join(" ", @f); 2716 $f =~ s/(.{50,60}) /$1\n#\t/g; 2717 print 2718 "#\n", 2719 "# The locale ", (@f == 1 ? "definition" : "definitions"), "\n#\n", 2720 "#\t", $f, "\n#\n", 2721 "# on your system may have errors because the locale test $_\n", 2722 "# \"$test_names{$_}\"\n", 2723 "# failed in ", (@f == 1 ? "that locale" : "those locales"), 2724 ".\n"; 2725 print <<EOW; 2726# 2727# If your users are not using these locales you are safe for the moment, 2728# but please report this failure first to perlbug\@perl.org using the 2729# perlbug script (as described in the INSTALL file) so that the exact 2730# details of the failures can be sorted out first and then your operating 2731# system supplier can be alerted about these anomalies. 2732# 2733EOW 2734 $didwarn = 1; 2735 } 2736} 2737 2738# Tell which locales were okay and which were not. 2739 2740if ($didwarn) { 2741 my (@s, @F); 2742 2743 foreach my $l (@Locale) { 2744 my $p = 0; 2745 if ($setlocale_failed{$l}) { 2746 $p++; 2747 } 2748 else { 2749 foreach my $t 2750 ($first_locales_test_number..$final_locales_test_number) 2751 { 2752 $p++ if $Problem{$t}{$l}; 2753 } 2754 } 2755 push @s, $l if $p == 0; 2756 push @F, $l unless $p == 0; 2757 } 2758 2759 if (@s) { 2760 my $s = join(" ", @s); 2761 $s =~ s/(.{50,60}) /$1\n#\t/g; 2762 2763 print 2764 "# The following locales\n#\n", 2765 "#\t", $s, "\n#\n", 2766 "# tested okay.\n#\n", 2767 } else { 2768 print "# None of your locales were fully okay.\n"; 2769 } 2770 2771 if (@F) { 2772 my $F = join(" ", @F); 2773 $F =~ s/(.{50,60}) /$1\n#\t/g; 2774 2775 my $details = ""; 2776 unless ($debug) { 2777 $details = "# For more details, rerun, with environment variable PERL_DEBUG_FULL_TEST=1.\n"; 2778 } 2779 elsif ($debug == 1) { 2780 $details = "# For even more details, rerun, with environment variable PERL_DEBUG_FULL_TEST=2.\n"; 2781 } 2782 2783 print 2784 "# The following locales\n#\n", 2785 "#\t", $F, "\n#\n", 2786 "# had problems.\n#\n", 2787 $details; 2788 } else { 2789 print "# None of your locales were broken.\n"; 2790 } 2791} 2792 2793if (exists $known_bad_locales{$os} && ! %Known_bad_locale) { 2794 $test_num++; 2795 print "ok $test_num $^O no longer has known bad locales # TODO\n"; 2796} 2797 2798print "1..$test_num\n"; 2799 2800# eof 2801