1/* 2****************************************************************************** 3* 4* Copyright (C) 1997-2014, International Business Machines 5* Corporation and others. All Rights Reserved. 6* 7****************************************************************************** 8* 9* FILE NAME : putil.c (previously putil.cpp and ptypes.cpp) 10* 11* Date Name Description 12* 04/14/97 aliu Creation. 13* 04/24/97 aliu Added getDefaultDataDirectory() and 14* getDefaultLocaleID(). 15* 04/28/97 aliu Rewritten to assume Unix and apply general methods 16* for assumed case. Non-UNIX platforms must be 17* special-cased. Rewrote numeric methods dealing 18* with NaN and Infinity to be platform independent 19* over all IEEE 754 platforms. 20* 05/13/97 aliu Restored sign of timezone 21* (semantics are hours West of GMT) 22* 06/16/98 erm Added IEEE_754 stuff, cleaned up isInfinite, isNan, 23* nextDouble.. 24* 07/22/98 stephen Added remainder, max, min, trunc 25* 08/13/98 stephen Added isNegativeInfinity, isPositiveInfinity 26* 08/24/98 stephen Added longBitsFromDouble 27* 09/08/98 stephen Minor changes for Mac Port 28* 03/02/99 stephen Removed openFile(). Added AS400 support. 29* Fixed EBCDIC tables 30* 04/15/99 stephen Converted to C. 31* 06/28/99 stephen Removed mutex locking in u_isBigEndian(). 32* 08/04/99 jeffrey R. Added OS/2 changes 33* 11/15/99 helena Integrated S/390 IEEE support. 34* 04/26/01 Barry N. OS/400 support for uprv_getDefaultLocaleID 35* 08/15/01 Steven H. OS/400 support for uprv_getDefaultCodepage 36* 01/03/08 Steven L. Fake Time Support 37****************************************************************************** 38*/ 39 40// Defines _XOPEN_SOURCE for access to POSIX functions. 41// Must be before any other #includes. 42#include "uposixdefs.h" 43 44/* include ICU headers */ 45#include "unicode/utypes.h" 46#include "unicode/putil.h" 47#include "unicode/ustring.h" 48#include "putilimp.h" 49#include "uassert.h" 50#include "umutex.h" 51#include "cmemory.h" 52#include "cstring.h" 53#include "locmap.h" 54#include "ucln_cmn.h" 55 56/* Include standard headers. */ 57#include <stdio.h> 58#include <stdlib.h> 59#include <string.h> 60#include <math.h> 61#include <locale.h> 62#include <float.h> 63 64#ifndef U_COMMON_IMPLEMENTATION 65#error U_COMMON_IMPLEMENTATION not set - must be set for all ICU source files in common/ - see http://userguide.icu-project.org/howtouseicu 66#endif 67 68 69/* include system headers */ 70#if U_PLATFORM_USES_ONLY_WIN32_API 71 /* 72 * TODO: U_PLATFORM_USES_ONLY_WIN32_API includes MinGW. 73 * Should Cygwin be included as well (U_PLATFORM_HAS_WIN32_API) 74 * to use native APIs as much as possible? 75 */ 76# define WIN32_LEAN_AND_MEAN 77# define VC_EXTRALEAN 78# define NOUSER 79# define NOSERVICE 80# define NOIME 81# define NOMCX 82# include <windows.h> 83# include "wintz.h" 84#elif U_PLATFORM == U_PF_OS400 85# include <float.h> 86# include <qusec.h> /* error code structure */ 87# include <qusrjobi.h> 88# include <qliept.h> /* EPT_CALL macro - this include must be after all other "QSYSINCs" */ 89# include <mih/testptr.h> /* For uprv_maximumPtr */ 90#elif U_PLATFORM == U_PF_CLASSIC_MACOS 91# include <Files.h> 92# include <IntlResources.h> 93# include <Script.h> 94# include <Folders.h> 95# include <MacTypes.h> 96# include <TextUtils.h> 97# define ICU_NO_USER_DATA_OVERRIDE 1 98#elif U_PLATFORM == U_PF_OS390 99# include "unicode/ucnv.h" /* Needed for UCNV_SWAP_LFNL_OPTION_STRING */ 100#elif U_PLATFORM_IS_DARWIN_BASED || U_PLATFORM_IS_LINUX_BASED || U_PLATFORM == U_PF_BSD || U_PLATFORM == U_PF_SOLARIS 101# include <limits.h> 102# include <unistd.h> 103# if U_PLATFORM == U_PF_SOLARIS 104# ifndef _XPG4_2 105# define _XPG4_2 106# endif 107# endif 108#elif U_PLATFORM == U_PF_QNX 109# include <sys/neutrino.h> 110#endif 111 112#if (U_PF_MINGW <= U_PLATFORM && U_PLATFORM <= U_PF_CYGWIN) && defined(__STRICT_ANSI__) 113/* tzset isn't defined in strict ANSI on Cygwin and MinGW. */ 114#undef __STRICT_ANSI__ 115#endif 116 117/* 118 * Cygwin with GCC requires inclusion of time.h after the above disabling strict asci mode statement. 119 */ 120#include <time.h> 121 122#if !U_PLATFORM_USES_ONLY_WIN32_API 123#include <sys/time.h> 124#endif 125 126/* 127 * Only include langinfo.h if we have a way to get the codeset. If we later 128 * depend on more feature, we can test on U_HAVE_NL_LANGINFO. 129 * 130 */ 131 132#if U_HAVE_NL_LANGINFO_CODESET 133#include <langinfo.h> 134#endif 135 136/** 137 * Simple things (presence of functions, etc) should just go in configure.in and be added to 138 * icucfg.h via autoheader. 139 */ 140#if U_PLATFORM_IMPLEMENTS_POSIX 141# if U_PLATFORM == U_PF_OS400 142# define HAVE_DLFCN_H 0 143# define HAVE_DLOPEN 0 144# else 145# ifndef HAVE_DLFCN_H 146# define HAVE_DLFCN_H 1 147# endif 148# ifndef HAVE_DLOPEN 149# define HAVE_DLOPEN 1 150# endif 151# endif 152# ifndef HAVE_GETTIMEOFDAY 153# define HAVE_GETTIMEOFDAY 1 154# endif 155#else 156# define HAVE_DLFCN_H 0 157# define HAVE_DLOPEN 0 158# define HAVE_GETTIMEOFDAY 0 159#endif 160 161#define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0])) 162 163/* Define the extension for data files, again... */ 164#define DATA_TYPE "dat" 165 166/* Leave this copyright notice here! */ 167static const char copyright[] = U_COPYRIGHT_STRING; 168 169/* floating point implementations ------------------------------------------- */ 170 171/* We return QNAN rather than SNAN*/ 172#define SIGN 0x80000000U 173 174/* Make it easy to define certain types of constants */ 175typedef union { 176 int64_t i64; /* This must be defined first in order to allow the initialization to work. This is a C89 feature. */ 177 double d64; 178} BitPatternConversion; 179static const BitPatternConversion gNan = { (int64_t) INT64_C(0x7FF8000000000000) }; 180static const BitPatternConversion gInf = { (int64_t) INT64_C(0x7FF0000000000000) }; 181 182/*--------------------------------------------------------------------------- 183 Platform utilities 184 Our general strategy is to assume we're on a POSIX platform. Platforms which 185 are non-POSIX must declare themselves so. The default POSIX implementation 186 will sometimes work for non-POSIX platforms as well (e.g., the NaN-related 187 functions). 188 ---------------------------------------------------------------------------*/ 189 190#if U_PLATFORM_USES_ONLY_WIN32_API || U_PLATFORM == U_PF_CLASSIC_MACOS || U_PLATFORM == U_PF_OS400 191# undef U_POSIX_LOCALE 192#else 193# define U_POSIX_LOCALE 1 194#endif 195 196/* 197 WARNING! u_topNBytesOfDouble and u_bottomNBytesOfDouble 198 can't be properly optimized by the gcc compiler sometimes (i.e. gcc 3.2). 199*/ 200#if !IEEE_754 201static char* 202u_topNBytesOfDouble(double* d, int n) 203{ 204#if U_IS_BIG_ENDIAN 205 return (char*)d; 206#else 207 return (char*)(d + 1) - n; 208#endif 209} 210 211static char* 212u_bottomNBytesOfDouble(double* d, int n) 213{ 214#if U_IS_BIG_ENDIAN 215 return (char*)(d + 1) - n; 216#else 217 return (char*)d; 218#endif 219} 220#endif /* !IEEE_754 */ 221 222#if IEEE_754 223static UBool 224u_signBit(double d) { 225 uint8_t hiByte; 226#if U_IS_BIG_ENDIAN 227 hiByte = *(uint8_t *)&d; 228#else 229 hiByte = *(((uint8_t *)&d) + sizeof(double) - 1); 230#endif 231 return (hiByte & 0x80) != 0; 232} 233#endif 234 235 236 237#if defined (U_DEBUG_FAKETIME) 238/* Override the clock to test things without having to move the system clock. 239 * Assumes POSIX gettimeofday() will function 240 */ 241UDate fakeClock_t0 = 0; /** Time to start the clock from **/ 242UDate fakeClock_dt = 0; /** Offset (fake time - real time) **/ 243UBool fakeClock_set = FALSE; /** True if fake clock has spun up **/ 244static UMutex fakeClockMutex = U_MUTEX_INTIALIZER; 245 246static UDate getUTCtime_real() { 247 struct timeval posixTime; 248 gettimeofday(&posixTime, NULL); 249 return (UDate)(((int64_t)posixTime.tv_sec * U_MILLIS_PER_SECOND) + (posixTime.tv_usec/1000)); 250} 251 252static UDate getUTCtime_fake() { 253 umtx_lock(&fakeClockMutex); 254 if(!fakeClock_set) { 255 UDate real = getUTCtime_real(); 256 const char *fake_start = getenv("U_FAKETIME_START"); 257 if((fake_start!=NULL) && (fake_start[0]!=0)) { 258 sscanf(fake_start,"%lf",&fakeClock_t0); 259 fakeClock_dt = fakeClock_t0 - real; 260 fprintf(stderr,"U_DEBUG_FAKETIME was set at compile time, so the ICU clock will start at a preset value\n" 261 "env variable U_FAKETIME_START=%.0f (%s) for an offset of %.0f ms from the current time %.0f\n", 262 fakeClock_t0, fake_start, fakeClock_dt, real); 263 } else { 264 fakeClock_dt = 0; 265 fprintf(stderr,"U_DEBUG_FAKETIME was set at compile time, but U_FAKETIME_START was not set.\n" 266 "Set U_FAKETIME_START to the number of milliseconds since 1/1/1970 to set the ICU clock.\n"); 267 } 268 fakeClock_set = TRUE; 269 } 270 umtx_unlock(&fakeClockMutex); 271 272 return getUTCtime_real() + fakeClock_dt; 273} 274#endif 275 276#if U_PLATFORM_USES_ONLY_WIN32_API 277typedef union { 278 int64_t int64; 279 FILETIME fileTime; 280} FileTimeConversion; /* This is like a ULARGE_INTEGER */ 281 282/* Number of 100 nanoseconds from 1/1/1601 to 1/1/1970 */ 283#define EPOCH_BIAS INT64_C(116444736000000000) 284#define HECTONANOSECOND_PER_MILLISECOND 10000 285 286#endif 287 288/*--------------------------------------------------------------------------- 289 Universal Implementations 290 These are designed to work on all platforms. Try these, and if they 291 don't work on your platform, then special case your platform with new 292 implementations. 293---------------------------------------------------------------------------*/ 294 295U_CAPI UDate U_EXPORT2 296uprv_getUTCtime() 297{ 298#if defined(U_DEBUG_FAKETIME) 299 return getUTCtime_fake(); /* Hook for overriding the clock */ 300#else 301 return uprv_getRawUTCtime(); 302#endif 303} 304 305/* Return UTC (GMT) time measured in milliseconds since 0:00 on 1/1/70.*/ 306U_CAPI UDate U_EXPORT2 307uprv_getRawUTCtime() 308{ 309#if U_PLATFORM == U_PF_CLASSIC_MACOS 310 time_t t, t1, t2; 311 struct tm tmrec; 312 313 uprv_memset( &tmrec, 0, sizeof(tmrec) ); 314 tmrec.tm_year = 70; 315 tmrec.tm_mon = 0; 316 tmrec.tm_mday = 1; 317 t1 = mktime(&tmrec); /* seconds of 1/1/1970*/ 318 319 time(&t); 320 uprv_memcpy( &tmrec, gmtime(&t), sizeof(tmrec) ); 321 t2 = mktime(&tmrec); /* seconds of current GMT*/ 322 return (UDate)(t2 - t1) * U_MILLIS_PER_SECOND; /* GMT (or UTC) in seconds since 1970*/ 323#elif U_PLATFORM_USES_ONLY_WIN32_API 324 325 FileTimeConversion winTime; 326 GetSystemTimeAsFileTime(&winTime.fileTime); 327 return (UDate)((winTime.int64 - EPOCH_BIAS) / HECTONANOSECOND_PER_MILLISECOND); 328#else 329 330#if HAVE_GETTIMEOFDAY 331 struct timeval posixTime; 332 gettimeofday(&posixTime, NULL); 333 return (UDate)(((int64_t)posixTime.tv_sec * U_MILLIS_PER_SECOND) + (posixTime.tv_usec/1000)); 334#else 335 time_t epochtime; 336 time(&epochtime); 337 return (UDate)epochtime * U_MILLIS_PER_SECOND; 338#endif 339 340#endif 341} 342 343/*----------------------------------------------------------------------------- 344 IEEE 754 345 These methods detect and return NaN and infinity values for doubles 346 conforming to IEEE 754. Platforms which support this standard include X86, 347 Mac 680x0, Mac PowerPC, AIX RS/6000, and most others. 348 If this doesn't work on your platform, you have non-IEEE floating-point, and 349 will need to code your own versions. A naive implementation is to return 0.0 350 for getNaN and getInfinity, and false for isNaN and isInfinite. 351 ---------------------------------------------------------------------------*/ 352 353U_CAPI UBool U_EXPORT2 354uprv_isNaN(double number) 355{ 356#if IEEE_754 357 BitPatternConversion convertedNumber; 358 convertedNumber.d64 = number; 359 /* Infinity is 0x7FF0000000000000U. Anything greater than that is a NaN */ 360 return (UBool)((convertedNumber.i64 & U_INT64_MAX) > gInf.i64); 361 362#elif U_PLATFORM == U_PF_OS390 363 uint32_t highBits = *(uint32_t*)u_topNBytesOfDouble(&number, 364 sizeof(uint32_t)); 365 uint32_t lowBits = *(uint32_t*)u_bottomNBytesOfDouble(&number, 366 sizeof(uint32_t)); 367 368 return ((highBits & 0x7F080000L) == 0x7F080000L) && 369 (lowBits == 0x00000000L); 370 371#else 372 /* If your platform doesn't support IEEE 754 but *does* have an NaN value,*/ 373 /* you'll need to replace this default implementation with what's correct*/ 374 /* for your platform.*/ 375 return number != number; 376#endif 377} 378 379U_CAPI UBool U_EXPORT2 380uprv_isInfinite(double number) 381{ 382#if IEEE_754 383 BitPatternConversion convertedNumber; 384 convertedNumber.d64 = number; 385 /* Infinity is exactly 0x7FF0000000000000U. */ 386 return (UBool)((convertedNumber.i64 & U_INT64_MAX) == gInf.i64); 387#elif U_PLATFORM == U_PF_OS390 388 uint32_t highBits = *(uint32_t*)u_topNBytesOfDouble(&number, 389 sizeof(uint32_t)); 390 uint32_t lowBits = *(uint32_t*)u_bottomNBytesOfDouble(&number, 391 sizeof(uint32_t)); 392 393 return ((highBits & ~SIGN) == 0x70FF0000L) && (lowBits == 0x00000000L); 394 395#else 396 /* If your platform doesn't support IEEE 754 but *does* have an infinity*/ 397 /* value, you'll need to replace this default implementation with what's*/ 398 /* correct for your platform.*/ 399 return number == (2.0 * number); 400#endif 401} 402 403U_CAPI UBool U_EXPORT2 404uprv_isPositiveInfinity(double number) 405{ 406#if IEEE_754 || U_PLATFORM == U_PF_OS390 407 return (UBool)(number > 0 && uprv_isInfinite(number)); 408#else 409 return uprv_isInfinite(number); 410#endif 411} 412 413U_CAPI UBool U_EXPORT2 414uprv_isNegativeInfinity(double number) 415{ 416#if IEEE_754 || U_PLATFORM == U_PF_OS390 417 return (UBool)(number < 0 && uprv_isInfinite(number)); 418 419#else 420 uint32_t highBits = *(uint32_t*)u_topNBytesOfDouble(&number, 421 sizeof(uint32_t)); 422 return((highBits & SIGN) && uprv_isInfinite(number)); 423 424#endif 425} 426 427U_CAPI double U_EXPORT2 428uprv_getNaN() 429{ 430#if IEEE_754 || U_PLATFORM == U_PF_OS390 431 return gNan.d64; 432#else 433 /* If your platform doesn't support IEEE 754 but *does* have an NaN value,*/ 434 /* you'll need to replace this default implementation with what's correct*/ 435 /* for your platform.*/ 436 return 0.0; 437#endif 438} 439 440U_CAPI double U_EXPORT2 441uprv_getInfinity() 442{ 443#if IEEE_754 || U_PLATFORM == U_PF_OS390 444 return gInf.d64; 445#else 446 /* If your platform doesn't support IEEE 754 but *does* have an infinity*/ 447 /* value, you'll need to replace this default implementation with what's*/ 448 /* correct for your platform.*/ 449 return 0.0; 450#endif 451} 452 453U_CAPI double U_EXPORT2 454uprv_floor(double x) 455{ 456 return floor(x); 457} 458 459U_CAPI double U_EXPORT2 460uprv_ceil(double x) 461{ 462 return ceil(x); 463} 464 465U_CAPI double U_EXPORT2 466uprv_round(double x) 467{ 468 return uprv_floor(x + 0.5); 469} 470 471U_CAPI double U_EXPORT2 472uprv_fabs(double x) 473{ 474 return fabs(x); 475} 476 477U_CAPI double U_EXPORT2 478uprv_modf(double x, double* y) 479{ 480 return modf(x, y); 481} 482 483U_CAPI double U_EXPORT2 484uprv_fmod(double x, double y) 485{ 486 return fmod(x, y); 487} 488 489U_CAPI double U_EXPORT2 490uprv_pow(double x, double y) 491{ 492 /* This is declared as "double pow(double x, double y)" */ 493 return pow(x, y); 494} 495 496U_CAPI double U_EXPORT2 497uprv_pow10(int32_t x) 498{ 499 return pow(10.0, (double)x); 500} 501 502U_CAPI double U_EXPORT2 503uprv_fmax(double x, double y) 504{ 505#if IEEE_754 506 /* first handle NaN*/ 507 if(uprv_isNaN(x) || uprv_isNaN(y)) 508 return uprv_getNaN(); 509 510 /* check for -0 and 0*/ 511 if(x == 0.0 && y == 0.0 && u_signBit(x)) 512 return y; 513 514#endif 515 516 /* this should work for all flt point w/o NaN and Inf special cases */ 517 return (x > y ? x : y); 518} 519 520U_CAPI double U_EXPORT2 521uprv_fmin(double x, double y) 522{ 523#if IEEE_754 524 /* first handle NaN*/ 525 if(uprv_isNaN(x) || uprv_isNaN(y)) 526 return uprv_getNaN(); 527 528 /* check for -0 and 0*/ 529 if(x == 0.0 && y == 0.0 && u_signBit(y)) 530 return y; 531 532#endif 533 534 /* this should work for all flt point w/o NaN and Inf special cases */ 535 return (x > y ? y : x); 536} 537 538/** 539 * Truncates the given double. 540 * trunc(3.3) = 3.0, trunc (-3.3) = -3.0 541 * This is different than calling floor() or ceil(): 542 * floor(3.3) = 3, floor(-3.3) = -4 543 * ceil(3.3) = 4, ceil(-3.3) = -3 544 */ 545U_CAPI double U_EXPORT2 546uprv_trunc(double d) 547{ 548#if IEEE_754 549 /* handle error cases*/ 550 if(uprv_isNaN(d)) 551 return uprv_getNaN(); 552 if(uprv_isInfinite(d)) 553 return uprv_getInfinity(); 554 555 if(u_signBit(d)) /* Signbit() picks up -0.0; d<0 does not. */ 556 return ceil(d); 557 else 558 return floor(d); 559 560#else 561 return d >= 0 ? floor(d) : ceil(d); 562 563#endif 564} 565 566/** 567 * Return the largest positive number that can be represented by an integer 568 * type of arbitrary bit length. 569 */ 570U_CAPI double U_EXPORT2 571uprv_maxMantissa(void) 572{ 573 return pow(2.0, DBL_MANT_DIG + 1.0) - 1.0; 574} 575 576U_CAPI double U_EXPORT2 577uprv_log(double d) 578{ 579 return log(d); 580} 581 582U_CAPI void * U_EXPORT2 583uprv_maximumPtr(void * base) 584{ 585#if U_PLATFORM == U_PF_OS400 586 /* 587 * With the provided function we should never be out of range of a given segment 588 * (a traditional/typical segment that is). Our segments have 5 bytes for the 589 * id and 3 bytes for the offset. The key is that the casting takes care of 590 * only retrieving the offset portion minus x1000. Hence, the smallest offset 591 * seen in a program is x001000 and when casted to an int would be 0. 592 * That's why we can only add 0xffefff. Otherwise, we would exceed the segment. 593 * 594 * Currently, 16MB is the current addressing limitation on i5/OS if the activation is 595 * non-TERASPACE. If it is TERASPACE it is 2GB - 4k(header information). 596 * This function determines the activation based on the pointer that is passed in and 597 * calculates the appropriate maximum available size for 598 * each pointer type (TERASPACE and non-TERASPACE) 599 * 600 * Unlike other operating systems, the pointer model isn't determined at 601 * compile time on i5/OS. 602 */ 603 if ((base != NULL) && (_TESTPTR(base, _C_TERASPACE_CHECK))) { 604 /* if it is a TERASPACE pointer the max is 2GB - 4k */ 605 return ((void *)(((char *)base)-((uint32_t)(base))+((uint32_t)0x7fffefff))); 606 } 607 /* otherwise 16MB since NULL ptr is not checkable or the ptr is not TERASPACE */ 608 return ((void *)(((char *)base)-((uint32_t)(base))+((uint32_t)0xffefff))); 609 610#else 611 return U_MAX_PTR(base); 612#endif 613} 614 615/*--------------------------------------------------------------------------- 616 Platform-specific Implementations 617 Try these, and if they don't work on your platform, then special case your 618 platform with new implementations. 619 ---------------------------------------------------------------------------*/ 620 621/* Generic time zone layer -------------------------------------------------- */ 622 623/* Time zone utilities */ 624U_CAPI void U_EXPORT2 625uprv_tzset() 626{ 627#if defined(U_TZSET) 628 U_TZSET(); 629#else 630 /* no initialization*/ 631#endif 632} 633 634U_CAPI int32_t U_EXPORT2 635uprv_timezone() 636{ 637#ifdef U_TIMEZONE 638 return U_TIMEZONE; 639#else 640 time_t t, t1, t2; 641 struct tm tmrec; 642 int32_t tdiff = 0; 643 644 time(&t); 645 uprv_memcpy( &tmrec, localtime(&t), sizeof(tmrec) ); 646#if U_PLATFORM != U_PF_IPHONE 647 UBool dst_checked = (tmrec.tm_isdst != 0); /* daylight savings time is checked*/ 648#endif 649 t1 = mktime(&tmrec); /* local time in seconds*/ 650 uprv_memcpy( &tmrec, gmtime(&t), sizeof(tmrec) ); 651 t2 = mktime(&tmrec); /* GMT (or UTC) in seconds*/ 652 tdiff = t2 - t1; 653 654#if U_PLATFORM != U_PF_IPHONE 655 /* imitate NT behaviour, which returns same timezone offset to GMT for 656 winter and summer. 657 This does not work on all platforms. For instance, on glibc on Linux 658 and on Mac OS 10.5, tdiff calculated above remains the same 659 regardless of whether DST is in effect or not. iOS is another 660 platform where this does not work. Linux + glibc and Mac OS 10.5 661 have U_TIMEZONE defined so that this code is not reached. 662 */ 663 if (dst_checked) 664 tdiff += 3600; 665#endif 666 return tdiff; 667#endif 668} 669 670/* Note that U_TZNAME does *not* have to be tzname, but if it is, 671 some platforms need to have it declared here. */ 672 673#if defined(U_TZNAME) && (U_PLATFORM == U_PF_IRIX || U_PLATFORM_IS_DARWIN_BASED || (U_PLATFORM == U_PF_CYGWIN && !U_PLATFORM_USES_ONLY_WIN32_API)) 674/* RS6000 and others reject char **tzname. */ 675extern U_IMPORT char *U_TZNAME[]; 676#endif 677 678#if !UCONFIG_NO_FILE_IO && ((U_PLATFORM_IS_DARWIN_BASED && (U_PLATFORM != U_PF_IPHONE || defined(U_TIMEZONE))) || U_PLATFORM_IS_LINUX_BASED || U_PLATFORM == U_PF_BSD || U_PLATFORM == U_PF_SOLARIS) 679/* These platforms are likely to use Olson timezone IDs. */ 680#define CHECK_LOCALTIME_LINK 1 681#if U_PLATFORM_IS_DARWIN_BASED 682#include <tzfile.h> 683#define TZZONEINFO (TZDIR "/") 684#elif U_PLATFORM == U_PF_SOLARIS 685#define TZDEFAULT "/etc/localtime" 686#define TZZONEINFO "/usr/share/lib/zoneinfo/" 687#define TZZONEINFO2 "../usr/share/lib/zoneinfo/" 688#define TZ_ENV_CHECK "localtime" 689#else 690#define TZDEFAULT "/etc/localtime" 691#define TZZONEINFO "/usr/share/zoneinfo/" 692#endif 693#if U_HAVE_DIRENT_H 694#define TZFILE_SKIP "posixrules" /* tz file to skip when searching. */ 695/* Some Linux distributions have 'localtime' in /usr/share/zoneinfo 696 symlinked to /etc/localtime, which makes searchForTZFile return 697 'localtime' when it's the first match. */ 698#define TZFILE_SKIP2 "localtime" 699#define SEARCH_TZFILE 700#include <dirent.h> /* Needed to search through system timezone files */ 701#endif 702static char gTimeZoneBuffer[PATH_MAX]; 703static char *gTimeZoneBufferPtr = NULL; 704#endif 705 706#if !U_PLATFORM_USES_ONLY_WIN32_API 707#define isNonDigit(ch) (ch < '0' || '9' < ch) 708static UBool isValidOlsonID(const char *id) { 709 int32_t idx = 0; 710 711 /* Determine if this is something like Iceland (Olson ID) 712 or AST4ADT (non-Olson ID) */ 713 while (id[idx] && isNonDigit(id[idx]) && id[idx] != ',') { 714 idx++; 715 } 716 717 /* If we went through the whole string, then it might be okay. 718 The timezone is sometimes set to "CST-7CDT", "CST6CDT5,J129,J131/19:30", 719 "GRNLNDST3GRNLNDDT" or similar, so we cannot use it. 720 The rest of the time it could be an Olson ID. George */ 721 return (UBool)(id[idx] == 0 722 || uprv_strcmp(id, "PST8PDT") == 0 723 || uprv_strcmp(id, "MST7MDT") == 0 724 || uprv_strcmp(id, "CST6CDT") == 0 725 || uprv_strcmp(id, "EST5EDT") == 0); 726} 727 728/* On some Unix-like OS, 'posix' subdirectory in 729 /usr/share/zoneinfo replicates the top-level contents. 'right' 730 subdirectory has the same set of files, but individual files 731 are different from those in the top-level directory or 'posix' 732 because 'right' has files for TAI (Int'l Atomic Time) while 'posix' 733 has files for UTC. 734 When the first match for /etc/localtime is in either of them 735 (usually in posix because 'right' has different file contents), 736 or TZ environment variable points to one of them, createTimeZone 737 fails because, say, 'posix/America/New_York' is not an Olson 738 timezone id ('America/New_York' is). So, we have to skip 739 'posix/' and 'right/' at the beginning. */ 740static void skipZoneIDPrefix(const char** id) { 741 if (uprv_strncmp(*id, "posix/", 6) == 0 742 || uprv_strncmp(*id, "right/", 6) == 0) 743 { 744 *id += 6; 745 } 746} 747#endif 748 749#if defined(U_TZNAME) && !U_PLATFORM_USES_ONLY_WIN32_API 750 751#define CONVERT_HOURS_TO_SECONDS(offset) (int32_t)(offset*3600) 752typedef struct OffsetZoneMapping { 753 int32_t offsetSeconds; 754 int32_t daylightType; /* 0=U_DAYLIGHT_NONE, 1=daylight in June-U_DAYLIGHT_JUNE, 2=daylight in December=U_DAYLIGHT_DECEMBER*/ 755 const char *stdID; 756 const char *dstID; 757 const char *olsonID; 758} OffsetZoneMapping; 759 760enum { U_DAYLIGHT_NONE=0,U_DAYLIGHT_JUNE=1,U_DAYLIGHT_DECEMBER=2 }; 761 762/* 763This list tries to disambiguate a set of abbreviated timezone IDs and offsets 764and maps it to an Olson ID. 765Before adding anything to this list, take a look at 766icu/source/tools/tzcode/tz.alias 767Sometimes no daylight savings (0) is important to define due to aliases. 768This list can be tested with icu/source/test/compat/tzone.pl 769More values could be added to daylightType to increase precision. 770*/ 771static const struct OffsetZoneMapping OFFSET_ZONE_MAPPINGS[] = { 772 {-45900, 2, "CHAST", "CHADT", "Pacific/Chatham"}, 773 {-43200, 1, "PETT", "PETST", "Asia/Kamchatka"}, 774 {-43200, 2, "NZST", "NZDT", "Pacific/Auckland"}, 775 {-43200, 1, "ANAT", "ANAST", "Asia/Anadyr"}, 776 {-39600, 1, "MAGT", "MAGST", "Asia/Magadan"}, 777 {-37800, 2, "LHST", "LHST", "Australia/Lord_Howe"}, 778 {-36000, 2, "EST", "EST", "Australia/Sydney"}, 779 {-36000, 1, "SAKT", "SAKST", "Asia/Sakhalin"}, 780 {-36000, 1, "VLAT", "VLAST", "Asia/Vladivostok"}, 781 {-34200, 2, "CST", "CST", "Australia/South"}, 782 {-32400, 1, "YAKT", "YAKST", "Asia/Yakutsk"}, 783 {-32400, 1, "CHOT", "CHOST", "Asia/Choibalsan"}, 784 {-31500, 2, "CWST", "CWST", "Australia/Eucla"}, 785 {-28800, 1, "IRKT", "IRKST", "Asia/Irkutsk"}, 786 {-28800, 1, "ULAT", "ULAST", "Asia/Ulaanbaatar"}, 787 {-28800, 2, "WST", "WST", "Australia/West"}, 788 {-25200, 1, "HOVT", "HOVST", "Asia/Hovd"}, 789 {-25200, 1, "KRAT", "KRAST", "Asia/Krasnoyarsk"}, 790 {-21600, 1, "NOVT", "NOVST", "Asia/Novosibirsk"}, 791 {-21600, 1, "OMST", "OMSST", "Asia/Omsk"}, 792 {-18000, 1, "YEKT", "YEKST", "Asia/Yekaterinburg"}, 793 {-14400, 1, "SAMT", "SAMST", "Europe/Samara"}, 794 {-14400, 1, "AMT", "AMST", "Asia/Yerevan"}, 795 {-14400, 1, "AZT", "AZST", "Asia/Baku"}, 796 {-10800, 1, "AST", "ADT", "Asia/Baghdad"}, 797 {-10800, 1, "MSK", "MSD", "Europe/Moscow"}, 798 {-10800, 1, "VOLT", "VOLST", "Europe/Volgograd"}, 799 {-7200, 0, "EET", "CEST", "Africa/Tripoli"}, 800 {-7200, 1, "EET", "EEST", "Europe/Athens"}, /* Conflicts with Africa/Cairo */ 801 {-7200, 1, "IST", "IDT", "Asia/Jerusalem"}, 802 {-3600, 0, "CET", "WEST", "Africa/Algiers"}, 803 {-3600, 2, "WAT", "WAST", "Africa/Windhoek"}, 804 {0, 1, "GMT", "IST", "Europe/Dublin"}, 805 {0, 1, "GMT", "BST", "Europe/London"}, 806 {0, 0, "WET", "WEST", "Africa/Casablanca"}, 807 {0, 0, "WET", "WET", "Africa/El_Aaiun"}, 808 {3600, 1, "AZOT", "AZOST", "Atlantic/Azores"}, 809 {3600, 1, "EGT", "EGST", "America/Scoresbysund"}, 810 {10800, 1, "PMST", "PMDT", "America/Miquelon"}, 811 {10800, 2, "UYT", "UYST", "America/Montevideo"}, 812 {10800, 1, "WGT", "WGST", "America/Godthab"}, 813 {10800, 2, "BRT", "BRST", "Brazil/East"}, 814 {12600, 1, "NST", "NDT", "America/St_Johns"}, 815 {14400, 1, "AST", "ADT", "Canada/Atlantic"}, 816 {14400, 2, "AMT", "AMST", "America/Cuiaba"}, 817 {14400, 2, "CLT", "CLST", "Chile/Continental"}, 818 {14400, 2, "FKT", "FKST", "Atlantic/Stanley"}, 819 {14400, 2, "PYT", "PYST", "America/Asuncion"}, 820 {18000, 1, "CST", "CDT", "America/Havana"}, 821 {18000, 1, "EST", "EDT", "US/Eastern"}, /* Conflicts with America/Grand_Turk */ 822 {21600, 2, "EAST", "EASST", "Chile/EasterIsland"}, 823 {21600, 0, "CST", "MDT", "Canada/Saskatchewan"}, 824 {21600, 0, "CST", "CDT", "America/Guatemala"}, 825 {21600, 1, "CST", "CDT", "US/Central"}, /* Conflicts with Mexico/General */ 826 {25200, 1, "MST", "MDT", "US/Mountain"}, /* Conflicts with Mexico/BajaSur */ 827 {28800, 0, "PST", "PST", "Pacific/Pitcairn"}, 828 {28800, 1, "PST", "PDT", "US/Pacific"}, /* Conflicts with Mexico/BajaNorte */ 829 {32400, 1, "AKST", "AKDT", "US/Alaska"}, 830 {36000, 1, "HAST", "HADT", "US/Aleutian"} 831}; 832 833/*#define DEBUG_TZNAME*/ 834 835static const char* remapShortTimeZone(const char *stdID, const char *dstID, int32_t daylightType, int32_t offset) 836{ 837 int32_t idx; 838#ifdef DEBUG_TZNAME 839 fprintf(stderr, "TZ=%s std=%s dst=%s daylight=%d offset=%d\n", getenv("TZ"), stdID, dstID, daylightType, offset); 840#endif 841 for (idx = 0; idx < LENGTHOF(OFFSET_ZONE_MAPPINGS); idx++) 842 { 843 if (offset == OFFSET_ZONE_MAPPINGS[idx].offsetSeconds 844 && daylightType == OFFSET_ZONE_MAPPINGS[idx].daylightType 845 && strcmp(OFFSET_ZONE_MAPPINGS[idx].stdID, stdID) == 0 846 && strcmp(OFFSET_ZONE_MAPPINGS[idx].dstID, dstID) == 0) 847 { 848 return OFFSET_ZONE_MAPPINGS[idx].olsonID; 849 } 850 } 851 return NULL; 852} 853#endif 854 855#ifdef SEARCH_TZFILE 856#define MAX_PATH_SIZE PATH_MAX /* Set the limit for the size of the path. */ 857#define MAX_READ_SIZE 512 858 859typedef struct DefaultTZInfo { 860 char* defaultTZBuffer; 861 int64_t defaultTZFileSize; 862 FILE* defaultTZFilePtr; 863 UBool defaultTZstatus; 864 int32_t defaultTZPosition; 865} DefaultTZInfo; 866 867/* 868 * This method compares the two files given to see if they are a match. 869 * It is currently use to compare two TZ files. 870 */ 871static UBool compareBinaryFiles(const char* defaultTZFileName, const char* TZFileName, DefaultTZInfo* tzInfo) { 872 FILE* file; 873 int64_t sizeFile; 874 int64_t sizeFileLeft; 875 int32_t sizeFileRead; 876 int32_t sizeFileToRead; 877 char bufferFile[MAX_READ_SIZE]; 878 UBool result = TRUE; 879 880 if (tzInfo->defaultTZFilePtr == NULL) { 881 tzInfo->defaultTZFilePtr = fopen(defaultTZFileName, "r"); 882 } 883 file = fopen(TZFileName, "r"); 884 885 tzInfo->defaultTZPosition = 0; /* reset position to begin search */ 886 887 if (file != NULL && tzInfo->defaultTZFilePtr != NULL) { 888 /* First check that the file size are equal. */ 889 if (tzInfo->defaultTZFileSize == 0) { 890 fseek(tzInfo->defaultTZFilePtr, 0, SEEK_END); 891 tzInfo->defaultTZFileSize = ftell(tzInfo->defaultTZFilePtr); 892 } 893 fseek(file, 0, SEEK_END); 894 sizeFile = ftell(file); 895 sizeFileLeft = sizeFile; 896 897 if (sizeFile != tzInfo->defaultTZFileSize) { 898 result = FALSE; 899 } else { 900 /* Store the data from the files in seperate buffers and 901 * compare each byte to determine equality. 902 */ 903 if (tzInfo->defaultTZBuffer == NULL) { 904 rewind(tzInfo->defaultTZFilePtr); 905 tzInfo->defaultTZBuffer = (char*)uprv_malloc(sizeof(char) * tzInfo->defaultTZFileSize); 906 sizeFileRead = fread(tzInfo->defaultTZBuffer, 1, tzInfo->defaultTZFileSize, tzInfo->defaultTZFilePtr); 907 } 908 rewind(file); 909 while(sizeFileLeft > 0) { 910 uprv_memset(bufferFile, 0, MAX_READ_SIZE); 911 sizeFileToRead = sizeFileLeft < MAX_READ_SIZE ? sizeFileLeft : MAX_READ_SIZE; 912 913 sizeFileRead = fread(bufferFile, 1, sizeFileToRead, file); 914 if (memcmp(tzInfo->defaultTZBuffer + tzInfo->defaultTZPosition, bufferFile, sizeFileRead) != 0) { 915 result = FALSE; 916 break; 917 } 918 sizeFileLeft -= sizeFileRead; 919 tzInfo->defaultTZPosition += sizeFileRead; 920 } 921 } 922 } else { 923 result = FALSE; 924 } 925 926 if (file != NULL) { 927 fclose(file); 928 } 929 930 return result; 931} 932/* 933 * This method recursively traverses the directory given for a matching TZ file and returns the first match. 934 */ 935/* dirent also lists two entries: "." and ".." that we can safely ignore. */ 936#define SKIP1 "." 937#define SKIP2 ".." 938static char SEARCH_TZFILE_RESULT[MAX_PATH_SIZE] = ""; 939static char* searchForTZFile(const char* path, DefaultTZInfo* tzInfo) { 940 char curpath[MAX_PATH_SIZE]; 941 DIR* dirp = opendir(path); 942 DIR* subDirp = NULL; 943 struct dirent* dirEntry = NULL; 944 945 char* result = NULL; 946 if (dirp == NULL) { 947 return result; 948 } 949 950 /* Save the current path */ 951 uprv_memset(curpath, 0, MAX_PATH_SIZE); 952 uprv_strcpy(curpath, path); 953 954 /* Check each entry in the directory. */ 955 while((dirEntry = readdir(dirp)) != NULL) { 956 const char* dirName = dirEntry->d_name; 957 if (uprv_strcmp(dirName, SKIP1) != 0 && uprv_strcmp(dirName, SKIP2) != 0) { 958 /* Create a newpath with the new entry to test each entry in the directory. */ 959 char newpath[MAX_PATH_SIZE]; 960 uprv_strcpy(newpath, curpath); 961 uprv_strcat(newpath, dirName); 962 963 if ((subDirp = opendir(newpath)) != NULL) { 964 /* If this new path is a directory, make a recursive call with the newpath. */ 965 closedir(subDirp); 966 uprv_strcat(newpath, "/"); 967 result = searchForTZFile(newpath, tzInfo); 968 /* 969 Have to get out here. Otherwise, we'd keep looking 970 and return the first match in the top-level directory 971 if there's a match in the top-level. If not, this function 972 would return NULL and set gTimeZoneBufferPtr to NULL in initDefault(). 973 It worked without this in most cases because we have a fallback of calling 974 localtime_r to figure out the default timezone. 975 */ 976 if (result != NULL) 977 break; 978 } else if (uprv_strcmp(TZFILE_SKIP, dirName) != 0 && uprv_strcmp(TZFILE_SKIP2, dirName) != 0) { 979 if(compareBinaryFiles(TZDEFAULT, newpath, tzInfo)) { 980 const char* zoneid = newpath + (sizeof(TZZONEINFO)) - 1; 981 skipZoneIDPrefix(&zoneid); 982 uprv_strcpy(SEARCH_TZFILE_RESULT, zoneid); 983 result = SEARCH_TZFILE_RESULT; 984 /* Get out after the first one found. */ 985 break; 986 } 987 } 988 } 989 } 990 closedir(dirp); 991 return result; 992} 993#endif 994U_CAPI const char* U_EXPORT2 995uprv_tzname(int n) 996{ 997 const char *tzid = NULL; 998#if U_PLATFORM_USES_ONLY_WIN32_API 999 tzid = uprv_detectWindowsTimeZone(); 1000 1001 if (tzid != NULL) { 1002 return tzid; 1003 } 1004#else 1005 1006/*#if U_PLATFORM_IS_DARWIN_BASED 1007 int ret; 1008 1009 tzid = getenv("TZFILE"); 1010 if (tzid != NULL) { 1011 return tzid; 1012 } 1013#endif*/ 1014 1015/* This code can be temporarily disabled to test tzname resolution later on. */ 1016#ifndef DEBUG_TZNAME 1017 tzid = getenv("TZ"); 1018 if (tzid != NULL && isValidOlsonID(tzid) 1019#if U_PLATFORM == U_PF_SOLARIS 1020 /* When TZ equals localtime on Solaris, check the /etc/localtime file. */ 1021 && uprv_strcmp(tzid, TZ_ENV_CHECK) != 0 1022#endif 1023 ) { 1024 /* This might be a good Olson ID. */ 1025 skipZoneIDPrefix(&tzid); 1026 return tzid; 1027 } 1028 /* else U_TZNAME will give a better result. */ 1029#endif 1030 1031#if defined(CHECK_LOCALTIME_LINK) && !defined(DEBUG_SKIP_LOCALTIME_LINK) 1032 /* Caller must handle threading issues */ 1033 if (gTimeZoneBufferPtr == NULL) { 1034 /* 1035 This is a trick to look at the name of the link to get the Olson ID 1036 because the tzfile contents is underspecified. 1037 This isn't guaranteed to work because it may not be a symlink. 1038 */ 1039 int32_t ret = (int32_t)readlink(TZDEFAULT, gTimeZoneBuffer, sizeof(gTimeZoneBuffer)); 1040 if (0 < ret) { 1041 int32_t tzZoneInfoLen = uprv_strlen(TZZONEINFO); 1042 gTimeZoneBuffer[ret] = 0; 1043 if (uprv_strncmp(gTimeZoneBuffer, TZZONEINFO, tzZoneInfoLen) == 0 1044 && isValidOlsonID(gTimeZoneBuffer + tzZoneInfoLen)) 1045 { 1046 return (gTimeZoneBufferPtr = gTimeZoneBuffer + tzZoneInfoLen); 1047 } 1048#if U_PLATFORM == U_PF_SOLARIS 1049 else 1050 { 1051 tzZoneInfoLen = uprv_strlen(TZZONEINFO2); 1052 if (uprv_strncmp(gTimeZoneBuffer, TZZONEINFO2, tzZoneInfoLen) == 0 1053 && isValidOlsonID(gTimeZoneBuffer + tzZoneInfoLen)) 1054 { 1055 return (gTimeZoneBufferPtr = gTimeZoneBuffer + tzZoneInfoLen); 1056 } 1057 } 1058#endif 1059 } else { 1060#if defined(SEARCH_TZFILE) 1061 DefaultTZInfo* tzInfo = (DefaultTZInfo*)uprv_malloc(sizeof(DefaultTZInfo)); 1062 if (tzInfo != NULL) { 1063 tzInfo->defaultTZBuffer = NULL; 1064 tzInfo->defaultTZFileSize = 0; 1065 tzInfo->defaultTZFilePtr = NULL; 1066 tzInfo->defaultTZstatus = FALSE; 1067 tzInfo->defaultTZPosition = 0; 1068 1069 gTimeZoneBufferPtr = searchForTZFile(TZZONEINFO, tzInfo); 1070 1071 /* Free previously allocated memory */ 1072 if (tzInfo->defaultTZBuffer != NULL) { 1073 uprv_free(tzInfo->defaultTZBuffer); 1074 } 1075 if (tzInfo->defaultTZFilePtr != NULL) { 1076 fclose(tzInfo->defaultTZFilePtr); 1077 } 1078 uprv_free(tzInfo); 1079 } 1080 1081 if (gTimeZoneBufferPtr != NULL && isValidOlsonID(gTimeZoneBufferPtr)) { 1082 return gTimeZoneBufferPtr; 1083 } 1084#endif 1085 } 1086 } 1087 else { 1088 return gTimeZoneBufferPtr; 1089 } 1090#endif 1091#endif 1092 1093#ifdef U_TZNAME 1094#if U_PLATFORM_USES_ONLY_WIN32_API 1095 /* The return value is free'd in timezone.cpp on Windows because 1096 * the other code path returns a pointer to a heap location. */ 1097 return uprv_strdup(U_TZNAME[n]); 1098#else 1099 /* 1100 U_TZNAME is usually a non-unique abbreviation, which isn't normally usable. 1101 So we remap the abbreviation to an olson ID. 1102 1103 Since Windows exposes a little more timezone information, 1104 we normally don't use this code on Windows because 1105 uprv_detectWindowsTimeZone should have already given the correct answer. 1106 */ 1107 { 1108 struct tm juneSol, decemberSol; 1109 int daylightType; 1110 static const time_t juneSolstice=1182478260; /*2007-06-21 18:11 UT*/ 1111 static const time_t decemberSolstice=1198332540; /*2007-12-22 06:09 UT*/ 1112 1113 /* This probing will tell us when daylight savings occurs. */ 1114 localtime_r(&juneSolstice, &juneSol); 1115 localtime_r(&decemberSolstice, &decemberSol); 1116 if(decemberSol.tm_isdst > 0) { 1117 daylightType = U_DAYLIGHT_DECEMBER; 1118 } else if(juneSol.tm_isdst > 0) { 1119 daylightType = U_DAYLIGHT_JUNE; 1120 } else { 1121 daylightType = U_DAYLIGHT_NONE; 1122 } 1123 tzid = remapShortTimeZone(U_TZNAME[0], U_TZNAME[1], daylightType, uprv_timezone()); 1124 if (tzid != NULL) { 1125 return tzid; 1126 } 1127 } 1128 return U_TZNAME[n]; 1129#endif 1130#else 1131 return ""; 1132#endif 1133} 1134 1135/* Get and set the ICU data directory --------------------------------------- */ 1136 1137static char *gDataDirectory = NULL; 1138#if U_POSIX_LOCALE || U_PLATFORM_USES_ONLY_WIN32_API 1139 static char *gCorrectedPOSIXLocale = NULL; /* Heap allocated */ 1140#endif 1141 1142static UBool U_CALLCONV putil_cleanup(void) 1143{ 1144 if (gDataDirectory && *gDataDirectory) { 1145 uprv_free(gDataDirectory); 1146 } 1147 gDataDirectory = NULL; 1148#if U_POSIX_LOCALE || U_PLATFORM_USES_ONLY_WIN32_API 1149 if (gCorrectedPOSIXLocale) { 1150 uprv_free(gCorrectedPOSIXLocale); 1151 gCorrectedPOSIXLocale = NULL; 1152 } 1153#endif 1154 return TRUE; 1155} 1156 1157/* 1158 * Set the data directory. 1159 * Make a copy of the passed string, and set the global data dir to point to it. 1160 */ 1161U_CAPI void U_EXPORT2 1162u_setDataDirectory(const char *directory) { 1163 char *newDataDir; 1164 int32_t length; 1165 1166 if(directory==NULL || *directory==0) { 1167 /* A small optimization to prevent the malloc and copy when the 1168 shared library is used, and this is a way to make sure that NULL 1169 is never returned. 1170 */ 1171 newDataDir = (char *)""; 1172 } 1173 else { 1174 length=(int32_t)uprv_strlen(directory); 1175 newDataDir = (char *)uprv_malloc(length + 2); 1176 /* Exit out if newDataDir could not be created. */ 1177 if (newDataDir == NULL) { 1178 return; 1179 } 1180 uprv_strcpy(newDataDir, directory); 1181 1182#if (U_FILE_SEP_CHAR != U_FILE_ALT_SEP_CHAR) 1183 { 1184 char *p; 1185 while(p = uprv_strchr(newDataDir, U_FILE_ALT_SEP_CHAR)) { 1186 *p = U_FILE_SEP_CHAR; 1187 } 1188 } 1189#endif 1190 } 1191 1192 if (gDataDirectory && *gDataDirectory) { 1193 uprv_free(gDataDirectory); 1194 } 1195 gDataDirectory = newDataDir; 1196 ucln_common_registerCleanup(UCLN_COMMON_PUTIL, putil_cleanup); 1197} 1198 1199U_CAPI UBool U_EXPORT2 1200uprv_pathIsAbsolute(const char *path) 1201{ 1202 if(!path || !*path) { 1203 return FALSE; 1204 } 1205 1206 if(*path == U_FILE_SEP_CHAR) { 1207 return TRUE; 1208 } 1209 1210#if (U_FILE_SEP_CHAR != U_FILE_ALT_SEP_CHAR) 1211 if(*path == U_FILE_ALT_SEP_CHAR) { 1212 return TRUE; 1213 } 1214#endif 1215 1216#if U_PLATFORM_USES_ONLY_WIN32_API 1217 if( (((path[0] >= 'A') && (path[0] <= 'Z')) || 1218 ((path[0] >= 'a') && (path[0] <= 'z'))) && 1219 path[1] == ':' ) { 1220 return TRUE; 1221 } 1222#endif 1223 1224 return FALSE; 1225} 1226 1227/* Temporary backup setting of ICU_DATA_DIR_PREFIX_ENV_VAR 1228 until some client wrapper makefiles are updated */ 1229#if U_PLATFORM_IS_DARWIN_BASED && TARGET_IPHONE_SIMULATOR 1230# if !defined(ICU_DATA_DIR_PREFIX_ENV_VAR) 1231# define ICU_DATA_DIR_PREFIX_ENV_VAR "IPHONE_SIMULATOR_ROOT" 1232# endif 1233#endif 1234 1235U_CAPI const char * U_EXPORT2 1236u_getDataDirectory(void) { 1237 const char *path = NULL; 1238#if defined(ICU_DATA_DIR_PREFIX_ENV_VAR) 1239 char datadir_path_buffer[PATH_MAX]; 1240#endif 1241 1242 /* if we have the directory, then return it immediately */ 1243 if(gDataDirectory) { 1244 return gDataDirectory; 1245 } 1246 1247 /* 1248 When ICU_NO_USER_DATA_OVERRIDE is defined, users aren't allowed to 1249 override ICU's data with the ICU_DATA environment variable. This prevents 1250 problems where multiple custom copies of ICU's specific version of data 1251 are installed on a system. Either the application must define the data 1252 directory with u_setDataDirectory, define ICU_DATA_DIR when compiling 1253 ICU, set the data with udata_setCommonData or trust that all of the 1254 required data is contained in ICU's data library that contains 1255 the entry point defined by U_ICUDATA_ENTRY_POINT. 1256 1257 There may also be some platforms where environment variables 1258 are not allowed. 1259 */ 1260# if !defined(ICU_NO_USER_DATA_OVERRIDE) && !UCONFIG_NO_FILE_IO 1261 /* First try to get the environment variable */ 1262 path=getenv("ICU_DATA"); 1263# endif 1264 1265 /* ICU_DATA_DIR may be set as a compile option. 1266 * U_ICU_DATA_DEFAULT_DIR is provided and is set by ICU at compile time 1267 * and is used only when data is built in archive mode eliminating the need 1268 * for ICU_DATA_DIR to be set. U_ICU_DATA_DEFAULT_DIR is set to the installation 1269 * directory of the data dat file. Users should use ICU_DATA_DIR if they want to 1270 * set their own path. 1271 */ 1272#if defined(ICU_DATA_DIR) || defined(U_ICU_DATA_DEFAULT_DIR) 1273 if(path==NULL || *path==0) { 1274# if defined(ICU_DATA_DIR_PREFIX_ENV_VAR) 1275 const char *prefix = getenv(ICU_DATA_DIR_PREFIX_ENV_VAR); 1276# endif 1277# ifdef ICU_DATA_DIR 1278 path=ICU_DATA_DIR; 1279# else 1280 path=U_ICU_DATA_DEFAULT_DIR; 1281# endif 1282# if defined(ICU_DATA_DIR_PREFIX_ENV_VAR) 1283 if (prefix != NULL) { 1284 snprintf(datadir_path_buffer, PATH_MAX, "%s%s", prefix, path); 1285 path=datadir_path_buffer; 1286 } 1287# endif 1288 } 1289#endif 1290 1291 if(path==NULL) { 1292 /* It looks really bad, set it to something. */ 1293 path = ""; 1294 } 1295 1296 u_setDataDirectory(path); 1297 return gDataDirectory; 1298} 1299 1300 1301 1302 1303 1304/* Macintosh-specific locale information ------------------------------------ */ 1305#if U_PLATFORM == U_PF_CLASSIC_MACOS 1306 1307typedef struct { 1308 int32_t script; 1309 int32_t region; 1310 int32_t lang; 1311 int32_t date_region; 1312 const char* posixID; 1313} mac_lc_rec; 1314 1315/* Todo: This will be updated with a newer version from www.unicode.org web 1316 page when it's available.*/ 1317#define MAC_LC_MAGIC_NUMBER -5 1318#define MAC_LC_INIT_NUMBER -9 1319 1320static const mac_lc_rec mac_lc_recs[] = { 1321 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 0, "en_US", 1322 /* United States*/ 1323 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 1, "fr_FR", 1324 /* France*/ 1325 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 2, "en_GB", 1326 /* Great Britain*/ 1327 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 3, "de_DE", 1328 /* Germany*/ 1329 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 4, "it_IT", 1330 /* Italy*/ 1331 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 5, "nl_NL", 1332 /* Metherlands*/ 1333 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 6, "fr_BE", 1334 /* French for Belgium or Lxembourg*/ 1335 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 7, "sv_SE", 1336 /* Sweden*/ 1337 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 9, "da_DK", 1338 /* Denmark*/ 1339 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 10, "pt_PT", 1340 /* Portugal*/ 1341 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 11, "fr_CA", 1342 /* French Canada*/ 1343 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 13, "is_IS", 1344 /* Israel*/ 1345 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 14, "ja_JP", 1346 /* Japan*/ 1347 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 15, "en_AU", 1348 /* Australia*/ 1349 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 16, "ar_AE", 1350 /* the Arabic world (?)*/ 1351 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 17, "fi_FI", 1352 /* Finland*/ 1353 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 18, "fr_CH", 1354 /* French for Switzerland*/ 1355 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 19, "de_CH", 1356 /* German for Switzerland*/ 1357 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 20, "el_GR", 1358 /* Greece*/ 1359 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 21, "is_IS", 1360 /* Iceland ===*/ 1361 /*MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 22, "",*/ 1362 /* Malta ===*/ 1363 /*MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 23, "",*/ 1364 /* Cyprus ===*/ 1365 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 24, "tr_TR", 1366 /* Turkey ===*/ 1367 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 25, "sh_YU", 1368 /* Croatian system for Yugoslavia*/ 1369 /*MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 33, "",*/ 1370 /* Hindi system for India*/ 1371 /*MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 34, "",*/ 1372 /* Pakistan*/ 1373 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 41, "lt_LT", 1374 /* Lithuania*/ 1375 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 42, "pl_PL", 1376 /* Poland*/ 1377 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 43, "hu_HU", 1378 /* Hungary*/ 1379 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 44, "et_EE", 1380 /* Estonia*/ 1381 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 45, "lv_LV", 1382 /* Latvia*/ 1383 /*MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 46, "",*/ 1384 /* Lapland [Ask Rich for the data. HS]*/ 1385 /*MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 47, "",*/ 1386 /* Faeroe Islands*/ 1387 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 48, "fa_IR", 1388 /* Iran*/ 1389 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 49, "ru_RU", 1390 /* Russia*/ 1391 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 50, "en_IE", 1392 /* Ireland*/ 1393 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 51, "ko_KR", 1394 /* Korea*/ 1395 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 52, "zh_CN", 1396 /* People's Republic of China*/ 1397 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 53, "zh_TW", 1398 /* Taiwan*/ 1399 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 54, "th_TH", 1400 /* Thailand*/ 1401 1402 /* fallback is en_US*/ 1403 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 1404 MAC_LC_MAGIC_NUMBER, "en_US" 1405}; 1406 1407#endif 1408 1409#if U_POSIX_LOCALE 1410/* A helper function used by uprv_getPOSIXIDForDefaultLocale and 1411 * uprv_getPOSIXIDForDefaultCodepage. Returns the posix locale id for 1412 * LC_CTYPE and LC_MESSAGES. It doesn't support other locale categories. 1413 */ 1414static const char *uprv_getPOSIXIDForCategory(int category) 1415{ 1416 const char* posixID = NULL; 1417 if (category == LC_MESSAGES || category == LC_CTYPE) { 1418 /* 1419 * On Solaris two different calls to setlocale can result in 1420 * different values. Only get this value once. 1421 * 1422 * We must check this first because an application can set this. 1423 * 1424 * LC_ALL can't be used because it's platform dependent. The LANG 1425 * environment variable seems to affect LC_CTYPE variable by default. 1426 * Here is what setlocale(LC_ALL, NULL) can return. 1427 * HPUX can return 'C C C C C C C' 1428 * Solaris can return /en_US/C/C/C/C/C on the second try. 1429 * Linux can return LC_CTYPE=C;LC_NUMERIC=C;... 1430 * 1431 * The default codepage detection also needs to use LC_CTYPE. 1432 * 1433 * Do not call setlocale(LC_*, "")! Using an empty string instead 1434 * of NULL, will modify the libc behavior. 1435 */ 1436 posixID = setlocale(category, NULL); 1437 if ((posixID == 0) 1438 || (uprv_strcmp("C", posixID) == 0) 1439 || (uprv_strcmp("POSIX", posixID) == 0)) 1440 { 1441 /* Maybe we got some garbage. Try something more reasonable */ 1442 posixID = getenv("LC_ALL"); 1443 if (posixID == 0) { 1444 posixID = getenv(category == LC_MESSAGES ? "LC_MESSAGES" : "LC_CTYPE"); 1445 if (posixID == 0) { 1446 posixID = getenv("LANG"); 1447 } 1448 } 1449 } 1450 } 1451 if ((posixID==0) 1452 || (uprv_strcmp("C", posixID) == 0) 1453 || (uprv_strcmp("POSIX", posixID) == 0)) 1454 { 1455 /* Nothing worked. Give it a nice POSIX default value. */ 1456 posixID = "en_US_POSIX"; 1457 } 1458 return posixID; 1459} 1460 1461/* Return just the POSIX id for the default locale, whatever happens to be in 1462 * it. It gets the value from LC_MESSAGES and indirectly from LC_ALL and LANG. 1463 */ 1464static const char *uprv_getPOSIXIDForDefaultLocale(void) 1465{ 1466 static const char* posixID = NULL; 1467 if (posixID == 0) { 1468 posixID = uprv_getPOSIXIDForCategory(LC_MESSAGES); 1469 } 1470 return posixID; 1471} 1472 1473#if !U_CHARSET_IS_UTF8 1474/* Return just the POSIX id for the default codepage, whatever happens to be in 1475 * it. It gets the value from LC_CTYPE and indirectly from LC_ALL and LANG. 1476 */ 1477static const char *uprv_getPOSIXIDForDefaultCodepage(void) 1478{ 1479 static const char* posixID = NULL; 1480 if (posixID == 0) { 1481 posixID = uprv_getPOSIXIDForCategory(LC_CTYPE); 1482 } 1483 return posixID; 1484} 1485#endif 1486#endif 1487 1488/* NOTE: The caller should handle thread safety */ 1489U_CAPI const char* U_EXPORT2 1490uprv_getDefaultLocaleID() 1491{ 1492#if U_POSIX_LOCALE 1493/* 1494 Note that: (a '!' means the ID is improper somehow) 1495 LC_ALL ----> default_loc codepage 1496-------------------------------------------------------- 1497 ab.CD ab CD 1498 ab@CD ab__CD - 1499 ab@CD.EF ab__CD EF 1500 1501 ab_CD.EF@GH ab_CD_GH EF 1502 1503Some 'improper' ways to do the same as above: 1504 ! ab_CD@GH.EF ab_CD_GH EF 1505 ! ab_CD.EF@GH.IJ ab_CD_GH EF 1506 ! ab_CD@ZZ.EF@GH.IJ ab_CD_GH EF 1507 1508 _CD@GH _CD_GH - 1509 _CD.EF@GH _CD_GH EF 1510 1511The variant cannot have dots in it. 1512The 'rightmost' variant (@xxx) wins. 1513The leftmost codepage (.xxx) wins. 1514*/ 1515 char *correctedPOSIXLocale = 0; 1516 const char* posixID = uprv_getPOSIXIDForDefaultLocale(); 1517 const char *p; 1518 const char *q; 1519 int32_t len; 1520 1521 /* Format: (no spaces) 1522 ll [ _CC ] [ . MM ] [ @ VV] 1523 1524 l = lang, C = ctry, M = charmap, V = variant 1525 */ 1526 1527 if (gCorrectedPOSIXLocale != NULL) { 1528 return gCorrectedPOSIXLocale; 1529 } 1530 1531 if ((p = uprv_strchr(posixID, '.')) != NULL) { 1532 /* assume new locale can't be larger than old one? */ 1533 correctedPOSIXLocale = static_cast<char *>(uprv_malloc(uprv_strlen(posixID)+1)); 1534 /* Exit on memory allocation error. */ 1535 if (correctedPOSIXLocale == NULL) { 1536 return NULL; 1537 } 1538 uprv_strncpy(correctedPOSIXLocale, posixID, p-posixID); 1539 correctedPOSIXLocale[p-posixID] = 0; 1540 1541 /* do not copy after the @ */ 1542 if ((p = uprv_strchr(correctedPOSIXLocale, '@')) != NULL) { 1543 correctedPOSIXLocale[p-correctedPOSIXLocale] = 0; 1544 } 1545 } 1546 1547 /* Note that we scan the *uncorrected* ID. */ 1548 if ((p = uprv_strrchr(posixID, '@')) != NULL) { 1549 if (correctedPOSIXLocale == NULL) { 1550 correctedPOSIXLocale = static_cast<char *>(uprv_malloc(uprv_strlen(posixID)+1)); 1551 /* Exit on memory allocation error. */ 1552 if (correctedPOSIXLocale == NULL) { 1553 return NULL; 1554 } 1555 uprv_strncpy(correctedPOSIXLocale, posixID, p-posixID); 1556 correctedPOSIXLocale[p-posixID] = 0; 1557 } 1558 p++; 1559 1560 /* Take care of any special cases here.. */ 1561 if (!uprv_strcmp(p, "nynorsk")) { 1562 p = "NY"; 1563 /* Don't worry about no__NY. In practice, it won't appear. */ 1564 } 1565 1566 if (uprv_strchr(correctedPOSIXLocale,'_') == NULL) { 1567 uprv_strcat(correctedPOSIXLocale, "__"); /* aa@b -> aa__b */ 1568 } 1569 else { 1570 uprv_strcat(correctedPOSIXLocale, "_"); /* aa_CC@b -> aa_CC_b */ 1571 } 1572 1573 if ((q = uprv_strchr(p, '.')) != NULL) { 1574 /* How big will the resulting string be? */ 1575 len = (int32_t)(uprv_strlen(correctedPOSIXLocale) + (q-p)); 1576 uprv_strncat(correctedPOSIXLocale, p, q-p); 1577 correctedPOSIXLocale[len] = 0; 1578 } 1579 else { 1580 /* Anything following the @ sign */ 1581 uprv_strcat(correctedPOSIXLocale, p); 1582 } 1583 1584 /* Should there be a map from 'no@nynorsk' -> no_NO_NY here? 1585 * How about 'russian' -> 'ru'? 1586 * Many of the other locales using ISO codes will be handled by the 1587 * canonicalization functions in uloc_getDefault. 1588 */ 1589 } 1590 1591 /* Was a correction made? */ 1592 if (correctedPOSIXLocale != NULL) { 1593 posixID = correctedPOSIXLocale; 1594 } 1595 else { 1596 /* copy it, just in case the original pointer goes away. See j2395 */ 1597 correctedPOSIXLocale = (char *)uprv_malloc(uprv_strlen(posixID) + 1); 1598 /* Exit on memory allocation error. */ 1599 if (correctedPOSIXLocale == NULL) { 1600 return NULL; 1601 } 1602 posixID = uprv_strcpy(correctedPOSIXLocale, posixID); 1603 } 1604 1605 if (gCorrectedPOSIXLocale == NULL) { 1606 gCorrectedPOSIXLocale = correctedPOSIXLocale; 1607 ucln_common_registerCleanup(UCLN_COMMON_PUTIL, putil_cleanup); 1608 correctedPOSIXLocale = NULL; 1609 } 1610 1611 if (correctedPOSIXLocale != NULL) { /* Was already set - clean up. */ 1612 uprv_free(correctedPOSIXLocale); 1613 } 1614 1615 return posixID; 1616 1617#elif U_PLATFORM_USES_ONLY_WIN32_API 1618#define POSIX_LOCALE_CAPACITY 64 1619 UErrorCode status = U_ZERO_ERROR; 1620 char *correctedPOSIXLocale = 0; 1621 1622 if (gCorrectedPOSIXLocale != NULL) { 1623 return gCorrectedPOSIXLocale; 1624 } 1625 1626 LCID id = GetThreadLocale(); 1627 correctedPOSIXLocale = static_cast<char *>(uprv_malloc(POSIX_LOCALE_CAPACITY + 1)); 1628 if (correctedPOSIXLocale) { 1629 int32_t posixLen = uprv_convertToPosix(id, correctedPOSIXLocale, POSIX_LOCALE_CAPACITY, &status); 1630 if (U_SUCCESS(status)) { 1631 *(correctedPOSIXLocale + posixLen) = 0; 1632 gCorrectedPOSIXLocale = correctedPOSIXLocale; 1633 ucln_common_registerCleanup(UCLN_COMMON_PUTIL, putil_cleanup); 1634 } else { 1635 uprv_free(correctedPOSIXLocale); 1636 } 1637 } 1638 1639 if (gCorrectedPOSIXLocale == NULL) { 1640 return "en_US"; 1641 } 1642 return gCorrectedPOSIXLocale; 1643 1644#elif U_PLATFORM == U_PF_CLASSIC_MACOS 1645 int32_t script = MAC_LC_INIT_NUMBER; 1646 /* = IntlScript(); or GetScriptManagerVariable(smSysScript);*/ 1647 int32_t region = MAC_LC_INIT_NUMBER; 1648 /* = GetScriptManagerVariable(smRegionCode);*/ 1649 int32_t lang = MAC_LC_INIT_NUMBER; 1650 /* = GetScriptManagerVariable(smScriptLang);*/ 1651 int32_t date_region = MAC_LC_INIT_NUMBER; 1652 const char* posixID = 0; 1653 int32_t count = sizeof(mac_lc_recs) / sizeof(mac_lc_rec); 1654 int32_t i; 1655 Intl1Hndl ih; 1656 1657 ih = (Intl1Hndl) GetIntlResource(1); 1658 if (ih) 1659 date_region = ((uint16_t)(*ih)->intl1Vers) >> 8; 1660 1661 for (i = 0; i < count; i++) { 1662 if ( ((mac_lc_recs[i].script == MAC_LC_MAGIC_NUMBER) 1663 || (mac_lc_recs[i].script == script)) 1664 && ((mac_lc_recs[i].region == MAC_LC_MAGIC_NUMBER) 1665 || (mac_lc_recs[i].region == region)) 1666 && ((mac_lc_recs[i].lang == MAC_LC_MAGIC_NUMBER) 1667 || (mac_lc_recs[i].lang == lang)) 1668 && ((mac_lc_recs[i].date_region == MAC_LC_MAGIC_NUMBER) 1669 || (mac_lc_recs[i].date_region == date_region)) 1670 ) 1671 { 1672 posixID = mac_lc_recs[i].posixID; 1673 break; 1674 } 1675 } 1676 1677 return posixID; 1678 1679#elif U_PLATFORM == U_PF_OS400 1680 /* locales are process scoped and are by definition thread safe */ 1681 static char correctedLocale[64]; 1682 const char *localeID = getenv("LC_ALL"); 1683 char *p; 1684 1685 if (localeID == NULL) 1686 localeID = getenv("LANG"); 1687 if (localeID == NULL) 1688 localeID = setlocale(LC_ALL, NULL); 1689 /* Make sure we have something... */ 1690 if (localeID == NULL) 1691 return "en_US_POSIX"; 1692 1693 /* Extract the locale name from the path. */ 1694 if((p = uprv_strrchr(localeID, '/')) != NULL) 1695 { 1696 /* Increment p to start of locale name. */ 1697 p++; 1698 localeID = p; 1699 } 1700 1701 /* Copy to work location. */ 1702 uprv_strcpy(correctedLocale, localeID); 1703 1704 /* Strip off the '.locale' extension. */ 1705 if((p = uprv_strchr(correctedLocale, '.')) != NULL) { 1706 *p = 0; 1707 } 1708 1709 /* Upper case the locale name. */ 1710 T_CString_toUpperCase(correctedLocale); 1711 1712 /* See if we are using the POSIX locale. Any of the 1713 * following are equivalent and use the same QLGPGCMA 1714 * (POSIX) locale. 1715 * QLGPGCMA2 means UCS2 1716 * QLGPGCMA_4 means UTF-32 1717 * QLGPGCMA_8 means UTF-8 1718 */ 1719 if ((uprv_strcmp("C", correctedLocale) == 0) || 1720 (uprv_strcmp("POSIX", correctedLocale) == 0) || 1721 (uprv_strncmp("QLGPGCMA", correctedLocale, 8) == 0)) 1722 { 1723 uprv_strcpy(correctedLocale, "en_US_POSIX"); 1724 } 1725 else 1726 { 1727 int16_t LocaleLen; 1728 1729 /* Lower case the lang portion. */ 1730 for(p = correctedLocale; *p != 0 && *p != '_'; p++) 1731 { 1732 *p = uprv_tolower(*p); 1733 } 1734 1735 /* Adjust for Euro. After '_E' add 'URO'. */ 1736 LocaleLen = uprv_strlen(correctedLocale); 1737 if (correctedLocale[LocaleLen - 2] == '_' && 1738 correctedLocale[LocaleLen - 1] == 'E') 1739 { 1740 uprv_strcat(correctedLocale, "URO"); 1741 } 1742 1743 /* If using Lotus-based locale then convert to 1744 * equivalent non Lotus. 1745 */ 1746 else if (correctedLocale[LocaleLen - 2] == '_' && 1747 correctedLocale[LocaleLen - 1] == 'L') 1748 { 1749 correctedLocale[LocaleLen - 2] = 0; 1750 } 1751 1752 /* There are separate simplified and traditional 1753 * locales called zh_HK_S and zh_HK_T. 1754 */ 1755 else if (uprv_strncmp(correctedLocale, "zh_HK", 5) == 0) 1756 { 1757 uprv_strcpy(correctedLocale, "zh_HK"); 1758 } 1759 1760 /* A special zh_CN_GBK locale... 1761 */ 1762 else if (uprv_strcmp(correctedLocale, "zh_CN_GBK") == 0) 1763 { 1764 uprv_strcpy(correctedLocale, "zh_CN"); 1765 } 1766 1767 } 1768 1769 return correctedLocale; 1770#endif 1771 1772} 1773 1774#if !U_CHARSET_IS_UTF8 1775#if U_POSIX_LOCALE 1776/* 1777Due to various platform differences, one platform may specify a charset, 1778when they really mean a different charset. Remap the names so that they are 1779compatible with ICU. Only conflicting/ambiguous aliases should be resolved 1780here. Before adding anything to this function, please consider adding unique 1781names to the ICU alias table in the data directory. 1782*/ 1783static const char* 1784remapPlatformDependentCodepage(const char *locale, const char *name) { 1785 if (locale != NULL && *locale == 0) { 1786 /* Make sure that an empty locale is handled the same way. */ 1787 locale = NULL; 1788 } 1789 if (name == NULL) { 1790 return NULL; 1791 } 1792#if U_PLATFORM == U_PF_AIX 1793 if (uprv_strcmp(name, "IBM-943") == 0) { 1794 /* Use the ASCII compatible ibm-943 */ 1795 name = "Shift-JIS"; 1796 } 1797 else if (uprv_strcmp(name, "IBM-1252") == 0) { 1798 /* Use the windows-1252 that contains the Euro */ 1799 name = "IBM-5348"; 1800 } 1801#elif U_PLATFORM == U_PF_SOLARIS 1802 if (locale != NULL && uprv_strcmp(name, "EUC") == 0) { 1803 /* Solaris underspecifies the "EUC" name. */ 1804 if (uprv_strcmp(locale, "zh_CN") == 0) { 1805 name = "EUC-CN"; 1806 } 1807 else if (uprv_strcmp(locale, "zh_TW") == 0) { 1808 name = "EUC-TW"; 1809 } 1810 else if (uprv_strcmp(locale, "ko_KR") == 0) { 1811 name = "EUC-KR"; 1812 } 1813 } 1814 else if (uprv_strcmp(name, "eucJP") == 0) { 1815 /* 1816 ibm-954 is the best match. 1817 ibm-33722 is the default for eucJP (similar to Windows). 1818 */ 1819 name = "eucjis"; 1820 } 1821 else if (uprv_strcmp(name, "646") == 0) { 1822 /* 1823 * The default codepage given by Solaris is 646 but the C library routines treat it as if it was 1824 * ISO-8859-1 instead of US-ASCII(646). 1825 */ 1826 name = "ISO-8859-1"; 1827 } 1828#elif U_PLATFORM_IS_DARWIN_BASED 1829 if (locale == NULL && *name == 0) { 1830 /* 1831 No locale was specified, and an empty name was passed in. 1832 This usually indicates that nl_langinfo didn't return valid information. 1833 Mac OS X uses UTF-8 by default (especially the locale data and console). 1834 */ 1835 name = "UTF-8"; 1836 } 1837 else if (uprv_strcmp(name, "CP949") == 0) { 1838 /* Remap CP949 to a similar codepage to avoid issues with backslash and won symbol. */ 1839 name = "EUC-KR"; 1840 } 1841 else if (locale != NULL && uprv_strcmp(locale, "en_US_POSIX") != 0 && uprv_strcmp(name, "US-ASCII") == 0) { 1842 /* 1843 * For non C/POSIX locale, default the code page to UTF-8 instead of US-ASCII. 1844 */ 1845 name = "UTF-8"; 1846 } 1847#elif U_PLATFORM == U_PF_BSD 1848 if (uprv_strcmp(name, "CP949") == 0) { 1849 /* Remap CP949 to a similar codepage to avoid issues with backslash and won symbol. */ 1850 name = "EUC-KR"; 1851 } 1852#elif U_PLATFORM == U_PF_HPUX 1853 if (locale != NULL && uprv_strcmp(locale, "zh_HK") == 0 && uprv_strcmp(name, "big5") == 0) { 1854 /* HP decided to extend big5 as hkbig5 even though it's not compatible :-( */ 1855 /* zh_TW.big5 is not the same charset as zh_HK.big5! */ 1856 name = "hkbig5"; 1857 } 1858 else if (uprv_strcmp(name, "eucJP") == 0) { 1859 /* 1860 ibm-1350 is the best match, but unavailable. 1861 ibm-954 is mostly a superset of ibm-1350. 1862 ibm-33722 is the default for eucJP (similar to Windows). 1863 */ 1864 name = "eucjis"; 1865 } 1866#elif U_PLATFORM == U_PF_LINUX 1867 if (locale != NULL && uprv_strcmp(name, "euc") == 0) { 1868 /* Linux underspecifies the "EUC" name. */ 1869 if (uprv_strcmp(locale, "korean") == 0) { 1870 name = "EUC-KR"; 1871 } 1872 else if (uprv_strcmp(locale, "japanese") == 0) { 1873 /* See comment below about eucJP */ 1874 name = "eucjis"; 1875 } 1876 } 1877 else if (uprv_strcmp(name, "eucjp") == 0) { 1878 /* 1879 ibm-1350 is the best match, but unavailable. 1880 ibm-954 is mostly a superset of ibm-1350. 1881 ibm-33722 is the default for eucJP (similar to Windows). 1882 */ 1883 name = "eucjis"; 1884 } 1885 else if (locale != NULL && uprv_strcmp(locale, "en_US_POSIX") != 0 && 1886 (uprv_strcmp(name, "ANSI_X3.4-1968") == 0 || uprv_strcmp(name, "US-ASCII") == 0)) { 1887 /* 1888 * For non C/POSIX locale, default the code page to UTF-8 instead of US-ASCII. 1889 */ 1890 name = "UTF-8"; 1891 } 1892 /* 1893 * Linux returns ANSI_X3.4-1968 for C/POSIX, but the call site takes care of 1894 * it by falling back to 'US-ASCII' when NULL is returned from this 1895 * function. So, we don't have to worry about it here. 1896 */ 1897#endif 1898 /* return NULL when "" is passed in */ 1899 if (*name == 0) { 1900 name = NULL; 1901 } 1902 return name; 1903} 1904 1905static const char* 1906getCodepageFromPOSIXID(const char *localeName, char * buffer, int32_t buffCapacity) 1907{ 1908 char localeBuf[100]; 1909 const char *name = NULL; 1910 char *variant = NULL; 1911 1912 if (localeName != NULL && (name = (uprv_strchr(localeName, '.'))) != NULL) { 1913 size_t localeCapacity = uprv_min(sizeof(localeBuf), (name-localeName)+1); 1914 uprv_strncpy(localeBuf, localeName, localeCapacity); 1915 localeBuf[localeCapacity-1] = 0; /* ensure NULL termination */ 1916 name = uprv_strncpy(buffer, name+1, buffCapacity); 1917 buffer[buffCapacity-1] = 0; /* ensure NULL termination */ 1918 if ((variant = const_cast<char *>(uprv_strchr(name, '@'))) != NULL) { 1919 *variant = 0; 1920 } 1921 name = remapPlatformDependentCodepage(localeBuf, name); 1922 } 1923 return name; 1924} 1925#endif 1926 1927static const char* 1928int_getDefaultCodepage() 1929{ 1930#if U_PLATFORM == U_PF_OS400 1931 uint32_t ccsid = 37; /* Default to ibm-37 */ 1932 static char codepage[64]; 1933 Qwc_JOBI0400_t jobinfo; 1934 Qus_EC_t error = { sizeof(Qus_EC_t) }; /* SPI error code */ 1935 1936 EPT_CALL(QUSRJOBI)(&jobinfo, sizeof(jobinfo), "JOBI0400", 1937 "* ", " ", &error); 1938 1939 if (error.Bytes_Available == 0) { 1940 if (jobinfo.Coded_Char_Set_ID != 0xFFFF) { 1941 ccsid = (uint32_t)jobinfo.Coded_Char_Set_ID; 1942 } 1943 else if (jobinfo.Default_Coded_Char_Set_Id != 0xFFFF) { 1944 ccsid = (uint32_t)jobinfo.Default_Coded_Char_Set_Id; 1945 } 1946 /* else use the default */ 1947 } 1948 sprintf(codepage,"ibm-%d", ccsid); 1949 return codepage; 1950 1951#elif U_PLATFORM == U_PF_OS390 1952 static char codepage[64]; 1953 1954 strncpy(codepage, nl_langinfo(CODESET),63-strlen(UCNV_SWAP_LFNL_OPTION_STRING)); 1955 strcat(codepage,UCNV_SWAP_LFNL_OPTION_STRING); 1956 codepage[63] = 0; /* NULL terminate */ 1957 1958 return codepage; 1959 1960#elif U_PLATFORM == U_PF_CLASSIC_MACOS 1961 return "macintosh"; /* TODO: Macintosh Roman. There must be a better way. fixme! */ 1962 1963#elif U_PLATFORM_USES_ONLY_WIN32_API 1964 static char codepage[64]; 1965 sprintf(codepage, "windows-%d", GetACP()); 1966 return codepage; 1967 1968#elif U_POSIX_LOCALE 1969 static char codesetName[100]; 1970 const char *localeName = NULL; 1971 const char *name = NULL; 1972 1973 localeName = uprv_getPOSIXIDForDefaultCodepage(); 1974 uprv_memset(codesetName, 0, sizeof(codesetName)); 1975#if U_HAVE_NL_LANGINFO_CODESET 1976 /* When available, check nl_langinfo first because it usually gives more 1977 useful names. It depends on LC_CTYPE. 1978 nl_langinfo may use the same buffer as setlocale. */ 1979 { 1980 const char *codeset = nl_langinfo(U_NL_LANGINFO_CODESET); 1981#if U_PLATFORM_IS_DARWIN_BASED || U_PLATFORM_IS_LINUX_BASED 1982 /* 1983 * On Linux and MacOSX, ensure that default codepage for non C/POSIX locale is UTF-8 1984 * instead of ASCII. 1985 */ 1986 if (uprv_strcmp(localeName, "en_US_POSIX") != 0) { 1987 codeset = remapPlatformDependentCodepage(localeName, codeset); 1988 } else 1989#endif 1990 { 1991 codeset = remapPlatformDependentCodepage(NULL, codeset); 1992 } 1993 1994 if (codeset != NULL) { 1995 uprv_strncpy(codesetName, codeset, sizeof(codesetName)); 1996 codesetName[sizeof(codesetName)-1] = 0; 1997 return codesetName; 1998 } 1999 } 2000#endif 2001 2002 /* Use setlocale in a nice way, and then check some environment variables. 2003 Maybe the application used setlocale already. 2004 */ 2005 uprv_memset(codesetName, 0, sizeof(codesetName)); 2006 name = getCodepageFromPOSIXID(localeName, codesetName, sizeof(codesetName)); 2007 if (name) { 2008 /* if we can find the codeset name from setlocale, return that. */ 2009 return name; 2010 } 2011 2012 if (*codesetName == 0) 2013 { 2014 /* Everything failed. Return US ASCII (ISO 646). */ 2015 (void)uprv_strcpy(codesetName, "US-ASCII"); 2016 } 2017 return codesetName; 2018#else 2019 return "US-ASCII"; 2020#endif 2021} 2022 2023 2024U_CAPI const char* U_EXPORT2 2025uprv_getDefaultCodepage() 2026{ 2027 static char const *name = NULL; 2028 umtx_lock(NULL); 2029 if (name == NULL) { 2030 name = int_getDefaultCodepage(); 2031 } 2032 umtx_unlock(NULL); 2033 return name; 2034} 2035#endif /* !U_CHARSET_IS_UTF8 */ 2036 2037 2038/* end of platform-specific implementation -------------- */ 2039 2040/* version handling --------------------------------------------------------- */ 2041 2042U_CAPI void U_EXPORT2 2043u_versionFromString(UVersionInfo versionArray, const char *versionString) { 2044 char *end; 2045 uint16_t part=0; 2046 2047 if(versionArray==NULL) { 2048 return; 2049 } 2050 2051 if(versionString!=NULL) { 2052 for(;;) { 2053 versionArray[part]=(uint8_t)uprv_strtoul(versionString, &end, 10); 2054 if(end==versionString || ++part==U_MAX_VERSION_LENGTH || *end!=U_VERSION_DELIMITER) { 2055 break; 2056 } 2057 versionString=end+1; 2058 } 2059 } 2060 2061 while(part<U_MAX_VERSION_LENGTH) { 2062 versionArray[part++]=0; 2063 } 2064} 2065 2066U_CAPI void U_EXPORT2 2067u_versionFromUString(UVersionInfo versionArray, const UChar *versionString) { 2068 if(versionArray!=NULL && versionString!=NULL) { 2069 char versionChars[U_MAX_VERSION_STRING_LENGTH+1]; 2070 int32_t len = u_strlen(versionString); 2071 if(len>U_MAX_VERSION_STRING_LENGTH) { 2072 len = U_MAX_VERSION_STRING_LENGTH; 2073 } 2074 u_UCharsToChars(versionString, versionChars, len); 2075 versionChars[len]=0; 2076 u_versionFromString(versionArray, versionChars); 2077 } 2078} 2079 2080U_CAPI void U_EXPORT2 2081u_versionToString(const UVersionInfo versionArray, char *versionString) { 2082 uint16_t count, part; 2083 uint8_t field; 2084 2085 if(versionString==NULL) { 2086 return; 2087 } 2088 2089 if(versionArray==NULL) { 2090 versionString[0]=0; 2091 return; 2092 } 2093 2094 /* count how many fields need to be written */ 2095 for(count=4; count>0 && versionArray[count-1]==0; --count) { 2096 } 2097 2098 if(count <= 1) { 2099 count = 2; 2100 } 2101 2102 /* write the first part */ 2103 /* write the decimal field value */ 2104 field=versionArray[0]; 2105 if(field>=100) { 2106 *versionString++=(char)('0'+field/100); 2107 field%=100; 2108 } 2109 if(field>=10) { 2110 *versionString++=(char)('0'+field/10); 2111 field%=10; 2112 } 2113 *versionString++=(char)('0'+field); 2114 2115 /* write the following parts */ 2116 for(part=1; part<count; ++part) { 2117 /* write a dot first */ 2118 *versionString++=U_VERSION_DELIMITER; 2119 2120 /* write the decimal field value */ 2121 field=versionArray[part]; 2122 if(field>=100) { 2123 *versionString++=(char)('0'+field/100); 2124 field%=100; 2125 } 2126 if(field>=10) { 2127 *versionString++=(char)('0'+field/10); 2128 field%=10; 2129 } 2130 *versionString++=(char)('0'+field); 2131 } 2132 2133 /* NUL-terminate */ 2134 *versionString=0; 2135} 2136 2137U_CAPI void U_EXPORT2 2138u_getVersion(UVersionInfo versionArray) { 2139 (void)copyright; // Suppress unused variable warning from clang. 2140 u_versionFromString(versionArray, U_ICU_VERSION); 2141} 2142 2143/** 2144 * icucfg.h dependent code 2145 */ 2146 2147#if U_ENABLE_DYLOAD 2148 2149#if HAVE_DLOPEN && !U_PLATFORM_USES_ONLY_WIN32_API 2150 2151#if HAVE_DLFCN_H 2152 2153#ifdef __MVS__ 2154#ifndef __SUSV3 2155#define __SUSV3 1 2156#endif 2157#endif 2158#include <dlfcn.h> 2159#endif 2160 2161U_INTERNAL void * U_EXPORT2 2162uprv_dl_open(const char *libName, UErrorCode *status) { 2163 void *ret = NULL; 2164 if(U_FAILURE(*status)) return ret; 2165 ret = dlopen(libName, RTLD_NOW|RTLD_GLOBAL); 2166 if(ret==NULL) { 2167#ifdef U_TRACE_DYLOAD 2168 printf("dlerror on dlopen(%s): %s\n", libName, dlerror()); 2169#endif 2170 *status = U_MISSING_RESOURCE_ERROR; 2171 } 2172 return ret; 2173} 2174 2175U_INTERNAL void U_EXPORT2 2176uprv_dl_close(void *lib, UErrorCode *status) { 2177 if(U_FAILURE(*status)) return; 2178 dlclose(lib); 2179} 2180 2181U_INTERNAL UVoidFunction* U_EXPORT2 2182uprv_dlsym_func(void *lib, const char* sym, UErrorCode *status) { 2183 union { 2184 UVoidFunction *fp; 2185 void *vp; 2186 } uret; 2187 uret.fp = NULL; 2188 if(U_FAILURE(*status)) return uret.fp; 2189 uret.vp = dlsym(lib, sym); 2190 if(uret.vp == NULL) { 2191#ifdef U_TRACE_DYLOAD 2192 printf("dlerror on dlsym(%p,%s): %s\n", lib,sym, dlerror()); 2193#endif 2194 *status = U_MISSING_RESOURCE_ERROR; 2195 } 2196 return uret.fp; 2197} 2198 2199#else 2200 2201/* null (nonexistent) implementation. */ 2202 2203U_INTERNAL void * U_EXPORT2 2204uprv_dl_open(const char *libName, UErrorCode *status) { 2205 if(U_FAILURE(*status)) return NULL; 2206 *status = U_UNSUPPORTED_ERROR; 2207 return NULL; 2208} 2209 2210U_INTERNAL void U_EXPORT2 2211uprv_dl_close(void *lib, UErrorCode *status) { 2212 if(U_FAILURE(*status)) return; 2213 *status = U_UNSUPPORTED_ERROR; 2214 return; 2215} 2216 2217 2218U_INTERNAL UVoidFunction* U_EXPORT2 2219uprv_dlsym_func(void *lib, const char* sym, UErrorCode *status) { 2220 if(U_SUCCESS(*status)) { 2221 *status = U_UNSUPPORTED_ERROR; 2222 } 2223 return (UVoidFunction*)NULL; 2224} 2225 2226 2227 2228#endif 2229 2230#elif U_PLATFORM_USES_ONLY_WIN32_API 2231 2232U_INTERNAL void * U_EXPORT2 2233uprv_dl_open(const char *libName, UErrorCode *status) { 2234 HMODULE lib = NULL; 2235 2236 if(U_FAILURE(*status)) return NULL; 2237 2238 lib = LoadLibraryA(libName); 2239 2240 if(lib==NULL) { 2241 *status = U_MISSING_RESOURCE_ERROR; 2242 } 2243 2244 return (void*)lib; 2245} 2246 2247U_INTERNAL void U_EXPORT2 2248uprv_dl_close(void *lib, UErrorCode *status) { 2249 HMODULE handle = (HMODULE)lib; 2250 if(U_FAILURE(*status)) return; 2251 2252 FreeLibrary(handle); 2253 2254 return; 2255} 2256 2257 2258U_INTERNAL UVoidFunction* U_EXPORT2 2259uprv_dlsym_func(void *lib, const char* sym, UErrorCode *status) { 2260 HMODULE handle = (HMODULE)lib; 2261 UVoidFunction* addr = NULL; 2262 2263 if(U_FAILURE(*status) || lib==NULL) return NULL; 2264 2265 addr = (UVoidFunction*)GetProcAddress(handle, sym); 2266 2267 if(addr==NULL) { 2268 DWORD lastError = GetLastError(); 2269 if(lastError == ERROR_PROC_NOT_FOUND) { 2270 *status = U_MISSING_RESOURCE_ERROR; 2271 } else { 2272 *status = U_UNSUPPORTED_ERROR; /* other unknown error. */ 2273 } 2274 } 2275 2276 return addr; 2277} 2278 2279 2280#else 2281 2282/* No dynamic loading set. */ 2283 2284U_INTERNAL void * U_EXPORT2 2285uprv_dl_open(const char *libName, UErrorCode *status) { 2286 if(U_FAILURE(*status)) return NULL; 2287 *status = U_UNSUPPORTED_ERROR; 2288 return NULL; 2289} 2290 2291U_INTERNAL void U_EXPORT2 2292uprv_dl_close(void *lib, UErrorCode *status) { 2293 if(U_FAILURE(*status)) return; 2294 *status = U_UNSUPPORTED_ERROR; 2295 return; 2296} 2297 2298 2299U_INTERNAL UVoidFunction* U_EXPORT2 2300uprv_dlsym_func(void *lib, const char* sym, UErrorCode *status) { 2301 if(U_SUCCESS(*status)) { 2302 *status = U_UNSUPPORTED_ERROR; 2303 } 2304 return (UVoidFunction*)NULL; 2305} 2306 2307#endif /* U_ENABLE_DYLOAD */ 2308 2309/* 2310 * Hey, Emacs, please set the following: 2311 * 2312 * Local Variables: 2313 * indent-tabs-mode: nil 2314 * End: 2315 * 2316 */ 2317