1# Common tools for test files to find the locales which exist on the 2# system. Caller should have verified that this isn't miniperl before calling 3# the functions. 4 5# Note that it's okay that some languages have their native names 6# capitalized here even though that's not "right". They are lowercased 7# anyway later during the scanning process (and besides, some clueless 8# vendor might have them capitalized erroneously anyway). 9 10# Functions whose names begin with underscore are internal helper functions 11# for this file, and are not to be used by outside callers. 12 13use Config; 14use strict; 15use warnings; 16use feature 'state'; 17 18my %known_bad_locales = ( # XXX eventually will need version info if and 19 # when these get fixed. 20 solaris => [ 'vi_VN.UTF-8', ], # Use of U+A8 segfaults: GH #20578 21); 22 23eval { require POSIX; import POSIX 'locale_h'; }; 24my $has_locale_h = ! $@; 25 26my @known_categories = ( qw(LC_ALL LC_COLLATE LC_CTYPE LC_MESSAGES LC_MONETARY 27 LC_NUMERIC LC_TIME LC_ADDRESS LC_IDENTIFICATION 28 LC_MEASUREMENT LC_PAPER LC_TELEPHONE LC_SYNTAX 29 LC_TOD LC_NAME)); 30my @platform_categories; 31 32sub category_excluded($) { 33 my $cat_name = shift =~ s/^LC_//r; 34 35 # Recognize Configure option to exclude a category 36 return $Config{ccflags} =~ /\bD?NO_LOCALE_$cat_name\b/; 37} 38 39# LC_ALL can be -1 on some platforms. And, in fact the implementors could 40# legally use any integer to represent any category. But it makes the most 41# sense for them to have used small integers. Below, we create new locale 42# numbers for ones missing from this machine. We make them very negative, 43# hopefully more negative than anything likely to be a valid category on the 44# platform, but also below is a check to be sure that our guess is valid. 45my $max_bad_category_number = -1000000; 46 47# Initialize this hash so that it looks like e.g., 48# 6 => 'CTYPE', 49# where 6 is the value of &POSIX::LC_CTYPE 50my %category_name; 51my %category_number; 52if ($has_locale_h) { 53 my $number_for_missing_category = $max_bad_category_number; 54 foreach my $name (@known_categories) { 55 my $number = eval "&POSIX::$name"; 56 if ($@) { 57 # Use a negative number (smaller than any legitimate category 58 # number) if the platform doesn't support this category, so we 59 # have an entry for all the ones that might be specified in calls 60 # to us. 61 $number = $number_for_missing_category--; 62 } 63 elsif ( $number !~ / ^ -? \d+ $ /x 64 || $number <= $max_bad_category_number) 65 { 66 # We think this should be an int. And it has to be larger than 67 # any of our synthetic numbers. 68 die "Unexpected locale category number '$number' for $name" 69 } 70 else { 71 push @platform_categories, $name; 72 } 73 74 $name =~ s/LC_//; 75 $category_name{$number} = "$name"; 76 $category_number{$name} = $number; 77 } 78} 79 80sub _my_diag($) { 81 my $message = shift; 82 if (defined &main::diag) { 83 diag($message); 84 } 85 else { 86 local($\, $", $,) = (undef, ' ', ''); 87 print STDERR $message, "\n"; 88 } 89} 90 91# Larger than any real test 92my $my_count = 1_000_000; 93 94sub _my_fail($) { 95 my $message = shift; 96 if (defined &main::fail) { 97 fail($message); 98 } 99 else { 100 local($\, $", $,) = (undef, ' ', ''); 101 print "not ok " . $my_count++ . $message . "\n"; 102 } 103} 104 105sub valid_locale_categories() { 106 # Returns a list of the locale categories (expressed as strings, like 107 # "LC_ALL) known to this program that are available on this platform. 108 109 return grep { ! category_excluded($_) } @platform_categories; 110} 111 112sub is_category_valid($) { 113 my $name = shift; 114 $name = 'LC_' . $name =~ s/^LC_//r; 115 return grep { $name eq $_ } valid_locale_categories(); 116} 117 118# It turns out that strings generated under the control of a given locale 119# category are often affected as well by LC_CTYPE. If the two categories 120# don't match, one can get mojibake or even core dumps. (khw thinks it more 121# likely that it's the code set, not the locale that's critical here; but 122# didn't run experiments to verify this.) Hence, in the code below, CTYPE and 123# the tested categories are all set to the same locale. If CTYPE isn't 124# available on the platform, LC_ALL is instead used. One might think to just 125# use LC_ALL all the time, but on Windows 126# setlocale(LC_ALL, "some_borked_locale") 127# can return success, whereas setting LC_CTYPE to it fails. 128my $master_category; 129$master_category = $category_number{'CTYPE'} 130 if is_category_valid('LC_CTYPE') && defined $category_number{'CTYPE'}; 131$master_category = $category_number{'ALL'} 132 if ! defined $master_category 133 && is_category_valid('LC_ALL') && defined $category_number{'ALL'}; 134 135sub _trylocale ($$$$) { # For use only by other functions in this file! 136 137 # Adds the locale given by the first parameter to the list given by the 138 # 3rd iff the platform supports the locale in each of the category numbers 139 # given by the 2nd parameter, which is either a single category or a 140 # reference to a list of categories. 141 # 142 # The 4th parameter is true if to accept locales that aren't apparently 143 # fully compatible with Perl. 144 145 my $locale = shift; 146 my $categories = shift; 147 my $list = shift; 148 my $allow_incompatible = shift; 149 150 my $normalized_locale = lc ($locale =~ s/\W//gr); 151 return if ! $locale || grep { $normalized_locale eq lc ($_ =~ s/\W//gr) } @$list; 152 153 # This is a toy (pig latin) locale that is not fully implemented on some 154 # systems 155 return if $locale =~ / ^ pig $ /ix; 156 157 # Certain platforms have a crippled locale system in which setlocale 158 # returns success for just about any possible locale name, but if anything 159 # actually happens as a result of the call, it is that the underlying 160 # locale is set to a system default, likely C or C.UTF-8. We can't test 161 # such systems fully, but we shouldn't disable the user from using 162 # locales, as it may work out for them (or not). 163 return if defined $Config{d_setlocale_accepts_any_locale_name} 164 && $locale !~ / ^ (?: C | POSIX | C\.UTF-?8 ) $/ix; 165 166 if (exists $known_bad_locales{$^O}) { 167 my @bad_locales = $known_bad_locales{$^O}->@*; 168 return if grep { $locale eq $_ } @bad_locales; 169 } 170 171 $categories = [ $categories ] unless ref $categories; 172 173 my $badutf8 = 0; 174 my $plays_well = 1; 175 my $unsupported = 0; 176 177 use warnings 'locale'; 178 179 local $SIG{__WARN__} = sub { 180 $badutf8 = 1 if grep { /Malformed UTF-8/ } @_; 181 $unsupported = 1 if grep { /Locale .* is unsupported/i } @_; 182 $plays_well = 0 if grep { 183 /The following characters .* may not have the same meaning as the Perl program expects(?# 184 )|The Perl program will use the expected meanings/i 185 } @_; 186 }; 187 188 my $first_time = 1; 189 foreach my $category ($master_category, $categories->@*) { 190 next if ! defined $category || (! $first_time && $category == $master_category); 191 $first_time = 0; 192 193 my $save_locale = setlocale($category); 194 if (! $save_locale) { 195 _my_fail("Verify could save previous locale"); 196 return; 197 } 198 199 # Incompatible locales aren't warned about unless using locales. 200 use locale; 201 202 my $result = setlocale($category, $locale); 203 return unless defined $result; 204 205 no locale; 206 207 # We definitely don't want the locale set to something that is 208 # unsupported 209 if (! setlocale($category, $save_locale)) { 210 my $error_text = "\$!=$!"; 211 $error_text .= "; \$^E=$^E" if $^E != $!; 212 die "Couldn't restore locale '$save_locale', category $category;" 213 . $error_text; 214 } 215 if ($badutf8) { 216 _my_fail("Verify locale name doesn't contain malformed utf8"); 217 return; 218 } 219 220 return if $unsupported; 221 222 # Commas in locale names are bad in Windows, and there is a bug in 223 # some versions where setlocale() turns a legal input locale name into 224 # an illegal return value, which it can't later parse. 225 return if $result =~ /,/; 226 227 return unless $plays_well || $allow_incompatible; 228 } 229 230 push @$list, $locale; 231} 232 233sub _decode_encodings { # For use only by other functions in this file! 234 my @enc; 235 236 foreach (split(/ /, shift)) { 237 if (/^(\d+)$/) { 238 push @enc, "ISO8859-$1"; 239 push @enc, "iso8859$1"; # HP 240 if ($1 eq '1') { 241 push @enc, "roman8"; # HP 242 } 243 push @enc, $_; 244 push @enc, "$_.UTF-8"; 245 push @enc, "$_.65001"; # Windows UTF-8 246 push @enc, "$_.ACP"; # Windows ANSI code page 247 push @enc, "$_.OCP"; # Windows OEM code page 248 push @enc, "$_.1252"; # Windows 249 } 250 } 251 if ($^O eq 'os390') { 252 push @enc, qw(IBM-037 IBM-819 IBM-1047); 253 } 254 push @enc, "UTF-8"; 255 push @enc, "65001"; # Windows UTF-8 256 257 return @enc; 258} 259 260sub locales_enabled(;$) { 261 # If no parameter is specified, the function returns 1 if there is any 262 # "safe" locale handling available to the caller; otherwise 0. Safeness 263 # is defined here as the caller operating in the main thread of a program, 264 # or if threaded locales are safe on the platform and Configured to be 265 # used. This sub is used for testing purposes, and for those, this 266 # definition of safety is sufficient, and necessary to get some tests to 267 # run on certain configurations on certain platforms. But beware that the 268 # main thread can change the locale of any subthreads unless 269 # ${^SAFE_LOCALES} is non-zero. 270 # 271 # Use the optional parameter to discover if a particular category or 272 # categories are available on the system. 1 is returned if the global 273 # criteria described in the previous paragraph are true, AND if all the 274 # specified categories are available on the platform and Configured to be 275 # used. Otherwise 0 is returned. The parameter is either a single POSIX 276 # locale category or a reference to a list of them. Each category must be 277 # its name as a string, like 'LC_TIME' (the initial 'LC_' is optional), or 278 # the number this platform uses to signify the category (e.g., 279 # 'locales_enabled(&POSIX::LC_CTYPE)' 280 # 281 # When the function returns 1 and a parameter was specified as a list 282 # reference, the reference will be altered on return to point to an 283 # equivalent list such that the categories are numeric instead of strings 284 # and sorted to meet the input expectations of _trylocale(). 285 # 286 # It is a fatal error to call this with something that isn't a known 287 # category to this file. If this happens, look first for a typo, and 288 # second if you are using a category unknown to Perl. In the latter case 289 # a bug report should be submitted. 290 291 # khw cargo-culted the '?' in the pattern on the next line. 292 return 0 if $Config{ccflags} =~ /\bD?NO_LOCALE\b/; 293 294 # If we can't load the POSIX XS module, we can't have locales even if they 295 # normally would be available 296 return 0 if ! defined &DynaLoader::boot_DynaLoader; 297 298 # Don't test locales where they aren't safe. On systems with unsafe 299 # threads, for the purposes of testing, we consider the main thread safe, 300 # and all other threads unsafe. 301 if (! ${^SAFE_LOCALES}) { 302 return 0 if $^O eq 'os390'; # Threaded locales don't work well here 303 require threads; 304 return 0 if threads->tid() != 0; 305 } 306 307 # If no setlocale, we need the POSIX 2008 alternatives 308 if (! $Config{d_setlocale}) { 309 return 0 if $Config{ccflags} =~ /\bD?NO_POSIX_2008_LOCALE\b/; 310 return 0 unless $Config{d_newlocale}; 311 return 0 unless $Config{d_uselocale}; 312 return 0 unless $Config{d_duplocale}; 313 return 0 unless $Config{d_freelocale}; 314 } 315 316 # Done with the global possibilities. Now check if any passed in category 317 # is disabled. 318 319 my $categories_ref = $_[0]; 320 my $return_categories_numbers = 0; 321 my @categories_numbers; 322 my $has_LC_ALL = 0; 323 my $has_LC_COLLATE = 0; 324 325 if (defined $categories_ref) { 326 my @local_categories_copy; 327 328 my $reftype = ref $categories_ref; 329 if ($reftype eq 'ARRAY') { 330 @local_categories_copy = @$categories_ref; 331 $return_categories_numbers = 1; 332 } 333 elsif ($reftype ne "") { 334 die "Parameter to locales_enabled() must be an ARRAY;" 335 . " instead you used a $reftype"; 336 } 337 else { # Single category passed in 338 @local_categories_copy = $categories_ref; 339 } 340 341 for my $category_name_or_number (@local_categories_copy) { 342 my $name; 343 my $number; 344 if ($category_name_or_number =~ / ^ -? \d+ $ /x) { 345 $number = $category_name_or_number; 346 die "Invalid locale category number '$number'" 347 unless grep { $number == $_ } keys %category_name; 348 $name = $category_name{$number}; 349 } 350 else { 351 $name = $category_name_or_number; 352 $name =~ s/ ^ LC_ //x; 353 foreach my $trial (keys %category_name) { 354 if ($category_name{$trial} eq $name) { 355 $number = $trial; 356 last; 357 } 358 } 359 die "Invalid locale category name '$name'" 360 unless defined $number; 361 } 362 363 return 0 if $number <= $max_bad_category_number 364 || category_excluded($name); 365 366 367 eval "defined &POSIX::LC_$name"; 368 return 0 if $@; 369 370 if ($return_categories_numbers) { 371 if ($name eq 'CTYPE') { 372 unshift @categories_numbers, $number; # Always first 373 } 374 elsif ($name eq 'ALL') { 375 $has_LC_ALL = 1; 376 } 377 elsif ($name eq 'COLLATE') { 378 $has_LC_COLLATE = 1; 379 } 380 else { 381 push @categories_numbers, $number; 382 } 383 } 384 } 385 } 386 387 if ($return_categories_numbers) { 388 389 # COLLATE comes after all other locales except ALL, which comes last 390 if ($has_LC_COLLATE) { 391 push @categories_numbers, $category_number{'COLLATE'}; 392 } 393 if ($has_LC_ALL) { 394 push @categories_numbers, $category_number{'ALL'}; 395 } 396 397 @$categories_ref = @categories_numbers; 398 } 399 400 return 1; 401} 402 403 404sub find_locales ($;$) { 405 406 # Returns an array of all the locales we found on the system. If the 407 # optional 2nd parameter is non-zero, the list includes all found locales; 408 # otherwise it is restricted to those locales that play well with Perl, as 409 # far as we can easily determine. 410 # 411 # The first parameter is either a single locale category or a reference to 412 # a list of categories to find valid locales for it (or in the case of 413 # multiple) for all of them. Each category can be a name (like 'LC_ALL' 414 # or simply 'ALL') or the C enum value for the category. 415 416 my $input_categories = shift; 417 my $allow_incompatible = shift // 0; 418 419 my @categories = (ref $input_categories) 420 ? $input_categories->@* 421 : $input_categories; 422 return unless locales_enabled(\@categories); 423 424 # Note, the subroutine call above converts the $categories into a form 425 # suitable for _trylocale(). 426 427 # Visual C's CRT goes silly on strings of the form "en_US.ISO8859-1" 428 # and mingw32 uses said silly CRT 429 # This doesn't seem to be an issue any more, at least on Windows XP, 430 # so re-enable the tests for Windows XP onwards. 431 my $winxp = ($^O eq 'MSWin32' && defined &Win32::GetOSVersion && 432 join('.', (Win32::GetOSVersion())[1..2]) >= 5.1); 433 return if (($^O eq 'MSWin32' && !$winxp) 434 && $Config{cc} =~ /^(cl|gcc|g\+\+|ici)/i); 435 436 my @Locale; 437 _trylocale("C", \@categories, \@Locale, $allow_incompatible); 438 _trylocale("POSIX", \@categories, \@Locale, $allow_incompatible); 439 440 if ($Config{d_has_C_UTF8} && $Config{d_has_C_UTF8} eq 'true') { 441 _trylocale("C.UTF-8", \@categories, \@Locale, $allow_incompatible); 442 } 443 444 # There's no point in looking at anything more if we know that setlocale 445 # will return success on any garbage or non-garbage name. 446 return sort @Locale if defined $Config{d_setlocale_accepts_any_locale_name}; 447 448 foreach (1..16) { 449 _trylocale("ISO8859-$_", \@categories, \@Locale, $allow_incompatible); 450 _trylocale("iso8859$_", \@categories, \@Locale, $allow_incompatible); 451 _trylocale("iso8859-$_", \@categories, \@Locale, $allow_incompatible); 452 _trylocale("iso_8859_$_", \@categories, \@Locale, $allow_incompatible); 453 _trylocale("isolatin$_", \@categories, \@Locale, $allow_incompatible); 454 _trylocale("isolatin-$_", \@categories, \@Locale, $allow_incompatible); 455 _trylocale("iso_latin_$_", \@categories, \@Locale, $allow_incompatible); 456 } 457 458 # Sanitize the environment so that we can run the external 'locale' 459 # program without the taint mode getting grumpy. 460 461 # $ENV{PATH} is special in VMS. 462 delete local $ENV{PATH} if $^O ne 'VMS' or $Config{d_setenv}; 463 464 # Other subversive stuff. 465 delete local @ENV{qw(IFS CDPATH ENV BASH_ENV)}; 466 467 if (-x "/usr/bin/locale" 468 && open(LOCALES, '-|', "/usr/bin/locale -a 2>/dev/null")) 469 { 470 while (<LOCALES>) { 471 # It seems that /usr/bin/locale steadfastly outputs 8 bit data, which 472 # ain't great when we're running this testPERL_UNICODE= so that utf8 473 # locales will cause all IO hadles to default to (assume) utf8 474 next unless utf8::valid($_); 475 chomp; 476 _trylocale($_, \@categories, \@Locale, $allow_incompatible); 477 } 478 close(LOCALES); 479 } elsif ($^O eq 'VMS' 480 && defined($ENV{'SYS$I18N_LOCALE'}) 481 && -d 'SYS$I18N_LOCALE') 482 { 483 # The SYS$I18N_LOCALE logical name search list was not present on 484 # VAX VMS V5.5-12, but was on AXP && VAX VMS V6.2 as well as later versions. 485 opendir(LOCALES, "SYS\$I18N_LOCALE:"); 486 while ($_ = readdir(LOCALES)) { 487 chomp; 488 _trylocale($_, \@categories, \@Locale, $allow_incompatible); 489 } 490 close(LOCALES); 491 } elsif (($^O eq 'openbsd' || $^O eq 'bitrig' ) && -e '/usr/share/locale') { 492 493 # OpenBSD doesn't have a locale executable, so reading 494 # /usr/share/locale is much easier and faster than the last resort 495 # method. 496 497 opendir(LOCALES, '/usr/share/locale'); 498 while ($_ = readdir(LOCALES)) { 499 chomp; 500 _trylocale($_, \@categories, \@Locale, $allow_incompatible); 501 } 502 close(LOCALES); 503 } else { # Final fallback. Try our list of locales hard-coded here 504 505 # This is going to be slow. 506 my @Data; 507 508 # Locales whose name differs if the utf8 bit is on are stored in these 509 # two files with appropriate encodings. 510 my $data_file = ($^H & 0x08 || (${^OPEN} || "") =~ /:utf8/) 511 ? _source_location() . "/lib/locale/utf8" 512 : _source_location() . "/lib/locale/latin1"; 513 if (-e $data_file) { 514 @Data = do $data_file; 515 } 516 else { 517 _my_diag(__FILE__ . ":" . __LINE__ . ": '$data_file' doesn't exist"); 518 } 519 520 # The rest of the locales are in this file. 521 state @my_data = <DATA>; close DATA if fileno DATA; 522 push @Data, @my_data; 523 524 foreach my $line (@Data) { 525 chomp $line; 526 my ($locale_name, $language_codes, $country_codes, $encodings) = 527 split /:/, $line; 528 _my_diag(__FILE__ . ":" . __LINE__ . ": Unexpected syntax in '$line'") 529 unless defined $locale_name; 530 my @enc = _decode_encodings($encodings); 531 foreach my $loc (split(/ /, $locale_name)) { 532 _trylocale($loc, \@categories, \@Locale, $allow_incompatible); 533 foreach my $enc (@enc) { 534 _trylocale("$loc.$enc", \@categories, \@Locale, 535 $allow_incompatible); 536 } 537 $loc = lc $loc; 538 foreach my $enc (@enc) { 539 _trylocale("$loc.$enc", \@categories, \@Locale, 540 $allow_incompatible); 541 } 542 } 543 foreach my $lang (split(/ /, $language_codes)) { 544 _trylocale($lang, \@categories, \@Locale, $allow_incompatible); 545 foreach my $country (split(/ /, $country_codes)) { 546 my $lc = "${lang}_${country}"; 547 _trylocale($lc, \@categories, \@Locale, $allow_incompatible); 548 foreach my $enc (@enc) { 549 _trylocale("$lc.$enc", \@categories, \@Locale, 550 $allow_incompatible); 551 } 552 my $lC = "${lang}_\U${country}"; 553 _trylocale($lC, \@categories, \@Locale, $allow_incompatible); 554 foreach my $enc (@enc) { 555 _trylocale("$lC.$enc", \@categories, \@Locale, 556 $allow_incompatible); 557 } 558 } 559 } 560 } 561 } 562 563 @Locale = sort @Locale; 564 565 return @Locale; 566} 567 568sub is_locale_utf8 ($) { # Return a boolean as to if core Perl thinks the input 569 # is a UTF-8 locale 570 571 # On z/OS, even locales marked as UTF-8 aren't. 572 return 0 if ord "A" != 65; 573 574 return 0 unless locales_enabled('LC_CTYPE'); 575 576 my $locale = shift; 577 578 no warnings 'locale'; # We may be trying out a weird locale 579 use locale; 580 581 my $save_locale = setlocale(&POSIX::LC_CTYPE()); 582 if (! $save_locale) { 583 _my_fail("Verify could save previous locale"); 584 return 0; 585 } 586 587 if (! setlocale(&POSIX::LC_CTYPE(), $locale)) { 588 _my_fail("Verify could setlocale to $locale"); 589 return 0; 590 } 591 592 my $ret = 0; 593 594 # Use an op that gives different results for UTF-8 than any other locale. 595 # If a platform has UTF-8 locales, there should be at least one locale on 596 # most platforms with UTF-8 in its name, so if there is a bug in the op 597 # giving a false negative, we should get a failure for those locales as we 598 # go through testing all the locales on the platform. 599 if (CORE::fc(chr utf8::unicode_to_native(0xdf)) ne "ss") { 600 if ($locale =~ /UTF-?8/i) { 601 _my_fail("Verify $locale with UTF-8 in name is a UTF-8 locale"); 602 } 603 } 604 else { 605 $ret = 1; 606 } 607 608 die "Couldn't restore locale '$save_locale'" 609 unless setlocale(&POSIX::LC_CTYPE(), $save_locale); 610 611 return $ret; 612} 613 614sub classify_locales_wrt_utf8ness($) { 615 616 # Takes the input list of locales, and returns two lists split apart from 617 # it: the UTF-8 ones, and the non-UTF-8 ones. 618 619 my $locales_ref = shift; 620 my (@utf8, @non_utf8); 621 622 if (! locales_enabled('LC_CTYPE')) { # No CTYPE implies all are non-UTF-8 623 @non_utf8 = $locales_ref->@*; 624 return ( \@utf8, \@non_utf8 ); 625 } 626 627 foreach my $locale (@$locales_ref) { 628 my $which = (is_locale_utf8($locale)) ? \@utf8 : \@non_utf8; 629 push $which->@*, $locale; 630 } 631 632 return ( \@utf8, \@non_utf8 ); 633} 634 635sub find_utf8_ctype_locales (;$) { 636 637 # Return the names of the locales that core Perl thinks are UTF-8 LC_CTYPE 638 # locales. Optional parameter is a reference to a list of locales to try; 639 # if omitted, this tries all locales it can find on the platform 640 641 return unless locales_enabled('LC_CTYPE'); 642 643 my $locales_ref = shift; 644 if (! defined $locales_ref) { 645 646 my @locales = find_locales(&POSIX::LC_CTYPE()); 647 $locales_ref = \@locales; 648 } 649 650 my ($utf8_ref, undef) = classify_locales_wrt_utf8ness($locales_ref); 651 return unless $utf8_ref; 652 return $utf8_ref->@*; 653} 654 655sub find_utf8_ctype_locale (;$) { # Return the name of a locale that core Perl 656 # thinks is a UTF-8 LC_CTYPE non-turkic 657 # locale. 658 # Optional parameter is a reference to a 659 # list of locales to try; if omitted, this 660 # tries all locales it can find on the 661 # platform 662 my $try_locales_ref = shift; 663 664 my @utf8_locales = find_utf8_ctype_locales($try_locales_ref); 665 my @turkic_locales = find_utf8_turkic_locales($try_locales_ref); 666 667 my %seen_turkic; 668 669 # Create undef elements in the hash for turkic locales 670 @seen_turkic{@turkic_locales} = (); 671 672 foreach my $locale (@utf8_locales) { 673 return $locale unless exists $seen_turkic{$locale}; 674 } 675 676 return; 677} 678 679sub find_utf8_turkic_locales (;$) { 680 681 # Return the name of all the locales that core Perl thinks are UTF-8 682 # Turkic LC_CTYPE. Optional parameter is a reference to a list of locales 683 # to try; if omitted, this tries all locales it can find on the platform 684 685 my @return; 686 687 return unless locales_enabled('LC_CTYPE'); 688 689 my $save_locale = setlocale(&POSIX::LC_CTYPE()); 690 foreach my $locale (find_utf8_ctype_locales(shift)) { 691 use locale; 692 setlocale(&POSIX::LC_CTYPE(), $locale); 693 push @return, $locale if uc('i') eq "\x{130}"; 694 } 695 696 die "Couldn't restore locale '$save_locale'" 697 unless setlocale(&POSIX::LC_CTYPE(), $save_locale); 698 699 return @return; 700} 701 702sub find_utf8_turkic_locale (;$) { 703 my @turkics = find_utf8_turkic_locales(shift); 704 705 return unless @turkics; 706 return $turkics[0] 707} 708 709 710# returns full path to the directory containing the current source 711# file, inspired by mauke's Dir::Self 712sub _source_location { 713 require File::Spec; 714 715 my $caller_filename = (caller)[1]; 716 717 my $loc = File::Spec->rel2abs( 718 File::Spec->catpath( 719 (File::Spec->splitpath($caller_filename))[0, 1], '' 720 ) 721 ); 722 723 return ($loc =~ /^(.*)$/)[0]; # untaint 724} 725 7261 727 728# Format of data is: locale_name, language_codes, country_codes, encodings 729__DATA__ 730Afrikaans:af:za:1 15 731Arabic:ar:dz eg sa:6 arabic8 732Brezhoneg Breton:br:fr:1 15 733Bulgarski Bulgarian:bg:bg:5 734Chinese:zh:cn tw:cn.EUC eucCN eucTW euc.CN euc.TW Big5 GB2312 tw.EUC 735Hrvatski Croatian:hr:hr:2 736Cymraeg Welsh:cy:cy:1 14 15 737Czech:cs:cz:2 738Dansk Danish:da:dk:1 15 739Nederlands Dutch:nl:be nl:1 15 740English American British:en:au ca gb ie nz us uk zw:1 15 cp850 741Esperanto:eo:eo:3 742Eesti Estonian:et:ee:4 6 13 743Suomi Finnish:fi:fi:1 15 744Flamish::fl:1 15 745Deutsch German:de:at be ch de lu:1 15 746Euskaraz Basque:eu:es fr:1 15 747Galego Galician:gl:es:1 15 748Ellada Greek:el:gr:7 g8 749Frysk:fy:nl:1 15 750Greenlandic:kl:gl:4 6 751Hebrew:iw:il:8 hebrew8 752Hungarian:hu:hu:2 753Indonesian:id:id:1 15 754Gaeilge Irish:ga:IE:1 14 15 755Italiano Italian:it:ch it:1 15 756Nihongo Japanese:ja:jp:euc eucJP jp.EUC sjis 757Korean:ko:kr: 758Latine Latin:la:va:1 15 759Latvian:lv:lv:4 6 13 760Lithuanian:lt:lt:4 6 13 761Macedonian:mk:mk:1 15 762Maltese:mt:mt:3 763Moldovan:mo:mo:2 764Norsk Norwegian:no no\@nynorsk nb nn:no:1 15 765Occitan:oc:es:1 15 766Polski Polish:pl:pl:2 767Rumanian:ro:ro:2 768Russki Russian:ru:ru su ua:5 koi8 koi8r KOI8-R koi8u cp1251 cp866 769Serbski Serbian:sr:yu:5 770Slovak:sk:sk:2 771Slovene Slovenian:sl:si:2 772Sqhip Albanian:sq:sq:1 15 773Svenska Swedish:sv:fi se:1 15 774Thai:th:th:11 tis620 775Turkish:tr:tr:9 turkish8 776Yiddish:yi::1 15 777