1/*
2******************************************************************************
3*
4*   Copyright (C) 1997-2013, International Business Machines
5*   Corporation and others.  All Rights Reserved.
6*
7******************************************************************************
8*
9*  FILE NAME : putil.c (previously putil.cpp and ptypes.cpp)
10*
11*   Date        Name        Description
12*   04/14/97    aliu        Creation.
13*   04/24/97    aliu        Added getDefaultDataDirectory() and
14*                            getDefaultLocaleID().
15*   04/28/97    aliu        Rewritten to assume Unix and apply general methods
16*                            for assumed case.  Non-UNIX platforms must be
17*                            special-cased.  Rewrote numeric methods dealing
18*                            with NaN and Infinity to be platform independent
19*                             over all IEEE 754 platforms.
20*   05/13/97    aliu        Restored sign of timezone
21*                            (semantics are hours West of GMT)
22*   06/16/98    erm         Added IEEE_754 stuff, cleaned up isInfinite, isNan,
23*                             nextDouble..
24*   07/22/98    stephen     Added remainder, max, min, trunc
25*   08/13/98    stephen     Added isNegativeInfinity, isPositiveInfinity
26*   08/24/98    stephen     Added longBitsFromDouble
27*   09/08/98    stephen     Minor changes for Mac Port
28*   03/02/99    stephen     Removed openFile().  Added AS400 support.
29*                            Fixed EBCDIC tables
30*   04/15/99    stephen     Converted to C.
31*   06/28/99    stephen     Removed mutex locking in u_isBigEndian().
32*   08/04/99    jeffrey R.  Added OS/2 changes
33*   11/15/99    helena      Integrated S/390 IEEE support.
34*   04/26/01    Barry N.    OS/400 support for uprv_getDefaultLocaleID
35*   08/15/01    Steven H.   OS/400 support for uprv_getDefaultCodepage
36*   01/03/08    Steven L.   Fake Time Support
37******************************************************************************
38*/
39
40// Defines _XOPEN_SOURCE for access to POSIX functions.
41// Must be before any other #includes.
42#include "uposixdefs.h"
43
44/* include ICU headers */
45#include "unicode/utypes.h"
46#include "unicode/putil.h"
47#include "unicode/ustring.h"
48#include "putilimp.h"
49#include "uassert.h"
50#include "umutex.h"
51#include "cmemory.h"
52#include "cstring.h"
53#include "locmap.h"
54#include "ucln_cmn.h"
55
56/* Include standard headers. */
57#include <stdio.h>
58#include <stdlib.h>
59#include <string.h>
60#include <math.h>
61#include <locale.h>
62#include <float.h>
63
64#ifndef U_COMMON_IMPLEMENTATION
65#error U_COMMON_IMPLEMENTATION not set - must be set for all ICU source files in common/ - see http://userguide.icu-project.org/howtouseicu
66#endif
67
68
69/* include system headers */
70#if U_PLATFORM_USES_ONLY_WIN32_API
71    /*
72     * TODO: U_PLATFORM_USES_ONLY_WIN32_API includes MinGW.
73     * Should Cygwin be included as well (U_PLATFORM_HAS_WIN32_API)
74     * to use native APIs as much as possible?
75     */
76#   define WIN32_LEAN_AND_MEAN
77#   define VC_EXTRALEAN
78#   define NOUSER
79#   define NOSERVICE
80#   define NOIME
81#   define NOMCX
82#   include <windows.h>
83#   include "wintz.h"
84#elif U_PLATFORM == U_PF_OS400
85#   include <float.h>
86#   include <qusec.h>       /* error code structure */
87#   include <qusrjobi.h>
88#   include <qliept.h>      /* EPT_CALL macro  - this include must be after all other "QSYSINCs" */
89#   include <mih/testptr.h> /* For uprv_maximumPtr */
90#elif U_PLATFORM == U_PF_CLASSIC_MACOS
91#   include <Files.h>
92#   include <IntlResources.h>
93#   include <Script.h>
94#   include <Folders.h>
95#   include <MacTypes.h>
96#   include <TextUtils.h>
97#   define ICU_NO_USER_DATA_OVERRIDE 1
98#elif U_PLATFORM == U_PF_OS390
99#   include "unicode/ucnv.h"   /* Needed for UCNV_SWAP_LFNL_OPTION_STRING */
100#elif U_PLATFORM_IS_DARWIN_BASED || U_PLATFORM_IS_LINUX_BASED || U_PLATFORM == U_PF_BSD || U_PLATFORM == U_PF_SOLARIS
101#   include <limits.h>
102#   include <unistd.h>
103#   if U_PLATFORM == U_PF_SOLARIS
104#       ifndef _XPG4_2
105#           define _XPG4_2
106#       endif
107#   endif
108#elif U_PLATFORM == U_PF_QNX
109#   include <sys/neutrino.h>
110#endif
111
112#if (U_PF_MINGW <= U_PLATFORM && U_PLATFORM <= U_PF_CYGWIN) && defined(__STRICT_ANSI__)
113/* tzset isn't defined in strict ANSI on Cygwin and MinGW. */
114#undef __STRICT_ANSI__
115#endif
116
117/*
118 * Cygwin with GCC requires inclusion of time.h after the above disabling strict asci mode statement.
119 */
120#include <time.h>
121
122#if !U_PLATFORM_USES_ONLY_WIN32_API
123#include <sys/time.h>
124#endif
125
126/*
127 * Only include langinfo.h if we have a way to get the codeset. If we later
128 * depend on more feature, we can test on U_HAVE_NL_LANGINFO.
129 *
130 */
131
132#if U_HAVE_NL_LANGINFO_CODESET
133#include <langinfo.h>
134#endif
135
136/**
137 * Simple things (presence of functions, etc) should just go in configure.in and be added to
138 * icucfg.h via autoheader.
139 */
140#if U_PLATFORM_IMPLEMENTS_POSIX
141#   if U_PLATFORM == U_PF_OS400
142#    define HAVE_DLFCN_H 0
143#    define HAVE_DLOPEN 0
144#   else
145#   ifndef HAVE_DLFCN_H
146#    define HAVE_DLFCN_H 1
147#   endif
148#   ifndef HAVE_DLOPEN
149#    define HAVE_DLOPEN 1
150#   endif
151#   endif
152#   ifndef HAVE_GETTIMEOFDAY
153#    define HAVE_GETTIMEOFDAY 1
154#   endif
155#else
156#   define HAVE_DLFCN_H 0
157#   define HAVE_DLOPEN 0
158#   define HAVE_GETTIMEOFDAY 0
159#endif
160
161#define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
162
163/* Define the extension for data files, again... */
164#define DATA_TYPE "dat"
165
166/* Leave this copyright notice here! */
167static const char copyright[] = U_COPYRIGHT_STRING;
168
169/* floating point implementations ------------------------------------------- */
170
171/* We return QNAN rather than SNAN*/
172#define SIGN 0x80000000U
173
174/* Make it easy to define certain types of constants */
175typedef union {
176    int64_t i64; /* This must be defined first in order to allow the initialization to work. This is a C89 feature. */
177    double d64;
178} BitPatternConversion;
179static const BitPatternConversion gNan = { (int64_t) INT64_C(0x7FF8000000000000) };
180static const BitPatternConversion gInf = { (int64_t) INT64_C(0x7FF0000000000000) };
181
182/*---------------------------------------------------------------------------
183  Platform utilities
184  Our general strategy is to assume we're on a POSIX platform.  Platforms which
185  are non-POSIX must declare themselves so.  The default POSIX implementation
186  will sometimes work for non-POSIX platforms as well (e.g., the NaN-related
187  functions).
188  ---------------------------------------------------------------------------*/
189
190#if U_PLATFORM_USES_ONLY_WIN32_API || U_PLATFORM == U_PF_CLASSIC_MACOS || U_PLATFORM == U_PF_OS400
191#   undef U_POSIX_LOCALE
192#else
193#   define U_POSIX_LOCALE    1
194#endif
195
196/*
197    WARNING! u_topNBytesOfDouble and u_bottomNBytesOfDouble
198    can't be properly optimized by the gcc compiler sometimes (i.e. gcc 3.2).
199*/
200#if !IEEE_754
201static char*
202u_topNBytesOfDouble(double* d, int n)
203{
204#if U_IS_BIG_ENDIAN
205    return (char*)d;
206#else
207    return (char*)(d + 1) - n;
208#endif
209}
210
211static char*
212u_bottomNBytesOfDouble(double* d, int n)
213{
214#if U_IS_BIG_ENDIAN
215    return (char*)(d + 1) - n;
216#else
217    return (char*)d;
218#endif
219}
220#endif   /* !IEEE_754 */
221
222#if IEEE_754
223static UBool
224u_signBit(double d) {
225    uint8_t hiByte;
226#if U_IS_BIG_ENDIAN
227    hiByte = *(uint8_t *)&d;
228#else
229    hiByte = *(((uint8_t *)&d) + sizeof(double) - 1);
230#endif
231    return (hiByte & 0x80) != 0;
232}
233#endif
234
235
236
237#if defined (U_DEBUG_FAKETIME)
238/* Override the clock to test things without having to move the system clock.
239 * Assumes POSIX gettimeofday() will function
240 */
241UDate fakeClock_t0 = 0; /** Time to start the clock from **/
242UDate fakeClock_dt = 0; /** Offset (fake time - real time) **/
243UBool fakeClock_set = FALSE; /** True if fake clock has spun up **/
244static UMutex fakeClockMutex = U_MUTEX_INTIALIZER;
245
246static UDate getUTCtime_real() {
247    struct timeval posixTime;
248    gettimeofday(&posixTime, NULL);
249    return (UDate)(((int64_t)posixTime.tv_sec * U_MILLIS_PER_SECOND) + (posixTime.tv_usec/1000));
250}
251
252static UDate getUTCtime_fake() {
253    umtx_lock(&fakeClockMutex);
254    if(!fakeClock_set) {
255        UDate real = getUTCtime_real();
256        const char *fake_start = getenv("U_FAKETIME_START");
257        if((fake_start!=NULL) && (fake_start[0]!=0)) {
258            sscanf(fake_start,"%lf",&fakeClock_t0);
259            fakeClock_dt = fakeClock_t0 - real;
260            fprintf(stderr,"U_DEBUG_FAKETIME was set at compile time, so the ICU clock will start at a preset value\n"
261                    "env variable U_FAKETIME_START=%.0f (%s) for an offset of %.0f ms from the current time %.0f\n",
262                    fakeClock_t0, fake_start, fakeClock_dt, real);
263        } else {
264          fakeClock_dt = 0;
265            fprintf(stderr,"U_DEBUG_FAKETIME was set at compile time, but U_FAKETIME_START was not set.\n"
266                    "Set U_FAKETIME_START to the number of milliseconds since 1/1/1970 to set the ICU clock.\n");
267        }
268        fakeClock_set = TRUE;
269    }
270    umtx_unlock(&fakeClockMutex);
271
272    return getUTCtime_real() + fakeClock_dt;
273}
274#endif
275
276#if U_PLATFORM_USES_ONLY_WIN32_API
277typedef union {
278    int64_t int64;
279    FILETIME fileTime;
280} FileTimeConversion;   /* This is like a ULARGE_INTEGER */
281
282/* Number of 100 nanoseconds from 1/1/1601 to 1/1/1970 */
283#define EPOCH_BIAS  INT64_C(116444736000000000)
284#define HECTONANOSECOND_PER_MILLISECOND   10000
285
286#endif
287
288/*---------------------------------------------------------------------------
289  Universal Implementations
290  These are designed to work on all platforms.  Try these, and if they
291  don't work on your platform, then special case your platform with new
292  implementations.
293---------------------------------------------------------------------------*/
294
295U_CAPI UDate U_EXPORT2
296uprv_getUTCtime()
297{
298#if defined(U_DEBUG_FAKETIME)
299    return getUTCtime_fake(); /* Hook for overriding the clock */
300#else
301    return uprv_getRawUTCtime();
302#endif
303}
304
305/* Return UTC (GMT) time measured in milliseconds since 0:00 on 1/1/70.*/
306U_CAPI UDate U_EXPORT2
307uprv_getRawUTCtime()
308{
309#if U_PLATFORM == U_PF_CLASSIC_MACOS
310    time_t t, t1, t2;
311    struct tm tmrec;
312
313    uprv_memset( &tmrec, 0, sizeof(tmrec) );
314    tmrec.tm_year = 70;
315    tmrec.tm_mon = 0;
316    tmrec.tm_mday = 1;
317    t1 = mktime(&tmrec);    /* seconds of 1/1/1970*/
318
319    time(&t);
320    uprv_memcpy( &tmrec, gmtime(&t), sizeof(tmrec) );
321    t2 = mktime(&tmrec);    /* seconds of current GMT*/
322    return (UDate)(t2 - t1) * U_MILLIS_PER_SECOND;         /* GMT (or UTC) in seconds since 1970*/
323#elif U_PLATFORM_USES_ONLY_WIN32_API
324
325    FileTimeConversion winTime;
326    GetSystemTimeAsFileTime(&winTime.fileTime);
327    return (UDate)((winTime.int64 - EPOCH_BIAS) / HECTONANOSECOND_PER_MILLISECOND);
328#else
329
330#if HAVE_GETTIMEOFDAY
331    struct timeval posixTime;
332    gettimeofday(&posixTime, NULL);
333    return (UDate)(((int64_t)posixTime.tv_sec * U_MILLIS_PER_SECOND) + (posixTime.tv_usec/1000));
334#else
335    time_t epochtime;
336    time(&epochtime);
337    return (UDate)epochtime * U_MILLIS_PER_SECOND;
338#endif
339
340#endif
341}
342
343/*-----------------------------------------------------------------------------
344  IEEE 754
345  These methods detect and return NaN and infinity values for doubles
346  conforming to IEEE 754.  Platforms which support this standard include X86,
347  Mac 680x0, Mac PowerPC, AIX RS/6000, and most others.
348  If this doesn't work on your platform, you have non-IEEE floating-point, and
349  will need to code your own versions.  A naive implementation is to return 0.0
350  for getNaN and getInfinity, and false for isNaN and isInfinite.
351  ---------------------------------------------------------------------------*/
352
353U_CAPI UBool U_EXPORT2
354uprv_isNaN(double number)
355{
356#if IEEE_754
357    BitPatternConversion convertedNumber;
358    convertedNumber.d64 = number;
359    /* Infinity is 0x7FF0000000000000U. Anything greater than that is a NaN */
360    return (UBool)((convertedNumber.i64 & U_INT64_MAX) > gInf.i64);
361
362#elif U_PLATFORM == U_PF_OS390
363    uint32_t highBits = *(uint32_t*)u_topNBytesOfDouble(&number,
364                        sizeof(uint32_t));
365    uint32_t lowBits  = *(uint32_t*)u_bottomNBytesOfDouble(&number,
366                        sizeof(uint32_t));
367
368    return ((highBits & 0x7F080000L) == 0x7F080000L) &&
369      (lowBits == 0x00000000L);
370
371#else
372    /* If your platform doesn't support IEEE 754 but *does* have an NaN value,*/
373    /* you'll need to replace this default implementation with what's correct*/
374    /* for your platform.*/
375    return number != number;
376#endif
377}
378
379U_CAPI UBool U_EXPORT2
380uprv_isInfinite(double number)
381{
382#if IEEE_754
383    BitPatternConversion convertedNumber;
384    convertedNumber.d64 = number;
385    /* Infinity is exactly 0x7FF0000000000000U. */
386    return (UBool)((convertedNumber.i64 & U_INT64_MAX) == gInf.i64);
387#elif U_PLATFORM == U_PF_OS390
388    uint32_t highBits = *(uint32_t*)u_topNBytesOfDouble(&number,
389                        sizeof(uint32_t));
390    uint32_t lowBits  = *(uint32_t*)u_bottomNBytesOfDouble(&number,
391                        sizeof(uint32_t));
392
393    return ((highBits  & ~SIGN) == 0x70FF0000L) && (lowBits == 0x00000000L);
394
395#else
396    /* If your platform doesn't support IEEE 754 but *does* have an infinity*/
397    /* value, you'll need to replace this default implementation with what's*/
398    /* correct for your platform.*/
399    return number == (2.0 * number);
400#endif
401}
402
403U_CAPI UBool U_EXPORT2
404uprv_isPositiveInfinity(double number)
405{
406#if IEEE_754 || U_PLATFORM == U_PF_OS390
407    return (UBool)(number > 0 && uprv_isInfinite(number));
408#else
409    return uprv_isInfinite(number);
410#endif
411}
412
413U_CAPI UBool U_EXPORT2
414uprv_isNegativeInfinity(double number)
415{
416#if IEEE_754 || U_PLATFORM == U_PF_OS390
417    return (UBool)(number < 0 && uprv_isInfinite(number));
418
419#else
420    uint32_t highBits = *(uint32_t*)u_topNBytesOfDouble(&number,
421                        sizeof(uint32_t));
422    return((highBits & SIGN) && uprv_isInfinite(number));
423
424#endif
425}
426
427U_CAPI double U_EXPORT2
428uprv_getNaN()
429{
430#if IEEE_754 || U_PLATFORM == U_PF_OS390
431    return gNan.d64;
432#else
433    /* If your platform doesn't support IEEE 754 but *does* have an NaN value,*/
434    /* you'll need to replace this default implementation with what's correct*/
435    /* for your platform.*/
436    return 0.0;
437#endif
438}
439
440U_CAPI double U_EXPORT2
441uprv_getInfinity()
442{
443#if IEEE_754 || U_PLATFORM == U_PF_OS390
444    return gInf.d64;
445#else
446    /* If your platform doesn't support IEEE 754 but *does* have an infinity*/
447    /* value, you'll need to replace this default implementation with what's*/
448    /* correct for your platform.*/
449    return 0.0;
450#endif
451}
452
453U_CAPI double U_EXPORT2
454uprv_floor(double x)
455{
456    return floor(x);
457}
458
459U_CAPI double U_EXPORT2
460uprv_ceil(double x)
461{
462    return ceil(x);
463}
464
465U_CAPI double U_EXPORT2
466uprv_round(double x)
467{
468    return uprv_floor(x + 0.5);
469}
470
471U_CAPI double U_EXPORT2
472uprv_fabs(double x)
473{
474    return fabs(x);
475}
476
477U_CAPI double U_EXPORT2
478uprv_modf(double x, double* y)
479{
480    return modf(x, y);
481}
482
483U_CAPI double U_EXPORT2
484uprv_fmod(double x, double y)
485{
486    return fmod(x, y);
487}
488
489U_CAPI double U_EXPORT2
490uprv_pow(double x, double y)
491{
492    /* This is declared as "double pow(double x, double y)" */
493    return pow(x, y);
494}
495
496U_CAPI double U_EXPORT2
497uprv_pow10(int32_t x)
498{
499    return pow(10.0, (double)x);
500}
501
502U_CAPI double U_EXPORT2
503uprv_fmax(double x, double y)
504{
505#if IEEE_754
506    /* first handle NaN*/
507    if(uprv_isNaN(x) || uprv_isNaN(y))
508        return uprv_getNaN();
509
510    /* check for -0 and 0*/
511    if(x == 0.0 && y == 0.0 && u_signBit(x))
512        return y;
513
514#endif
515
516    /* this should work for all flt point w/o NaN and Inf special cases */
517    return (x > y ? x : y);
518}
519
520U_CAPI double U_EXPORT2
521uprv_fmin(double x, double y)
522{
523#if IEEE_754
524    /* first handle NaN*/
525    if(uprv_isNaN(x) || uprv_isNaN(y))
526        return uprv_getNaN();
527
528    /* check for -0 and 0*/
529    if(x == 0.0 && y == 0.0 && u_signBit(y))
530        return y;
531
532#endif
533
534    /* this should work for all flt point w/o NaN and Inf special cases */
535    return (x > y ? y : x);
536}
537
538/**
539 * Truncates the given double.
540 * trunc(3.3) = 3.0, trunc (-3.3) = -3.0
541 * This is different than calling floor() or ceil():
542 * floor(3.3) = 3, floor(-3.3) = -4
543 * ceil(3.3) = 4, ceil(-3.3) = -3
544 */
545U_CAPI double U_EXPORT2
546uprv_trunc(double d)
547{
548#if IEEE_754
549    /* handle error cases*/
550    if(uprv_isNaN(d))
551        return uprv_getNaN();
552    if(uprv_isInfinite(d))
553        return uprv_getInfinity();
554
555    if(u_signBit(d))    /* Signbit() picks up -0.0;  d<0 does not. */
556        return ceil(d);
557    else
558        return floor(d);
559
560#else
561    return d >= 0 ? floor(d) : ceil(d);
562
563#endif
564}
565
566/**
567 * Return the largest positive number that can be represented by an integer
568 * type of arbitrary bit length.
569 */
570U_CAPI double U_EXPORT2
571uprv_maxMantissa(void)
572{
573    return pow(2.0, DBL_MANT_DIG + 1.0) - 1.0;
574}
575
576U_CAPI double U_EXPORT2
577uprv_log(double d)
578{
579    return log(d);
580}
581
582U_CAPI void * U_EXPORT2
583uprv_maximumPtr(void * base)
584{
585#if U_PLATFORM == U_PF_OS400
586    /*
587     * With the provided function we should never be out of range of a given segment
588     * (a traditional/typical segment that is).  Our segments have 5 bytes for the
589     * id and 3 bytes for the offset.  The key is that the casting takes care of
590     * only retrieving the offset portion minus x1000.  Hence, the smallest offset
591     * seen in a program is x001000 and when casted to an int would be 0.
592     * That's why we can only add 0xffefff.  Otherwise, we would exceed the segment.
593     *
594     * Currently, 16MB is the current addressing limitation on i5/OS if the activation is
595     * non-TERASPACE.  If it is TERASPACE it is 2GB - 4k(header information).
596     * This function determines the activation based on the pointer that is passed in and
597     * calculates the appropriate maximum available size for
598     * each pointer type (TERASPACE and non-TERASPACE)
599     *
600     * Unlike other operating systems, the pointer model isn't determined at
601     * compile time on i5/OS.
602     */
603    if ((base != NULL) && (_TESTPTR(base, _C_TERASPACE_CHECK))) {
604        /* if it is a TERASPACE pointer the max is 2GB - 4k */
605        return ((void *)(((char *)base)-((uint32_t)(base))+((uint32_t)0x7fffefff)));
606    }
607    /* otherwise 16MB since NULL ptr is not checkable or the ptr is not TERASPACE */
608    return ((void *)(((char *)base)-((uint32_t)(base))+((uint32_t)0xffefff)));
609
610#else
611    return U_MAX_PTR(base);
612#endif
613}
614
615/*---------------------------------------------------------------------------
616  Platform-specific Implementations
617  Try these, and if they don't work on your platform, then special case your
618  platform with new implementations.
619  ---------------------------------------------------------------------------*/
620
621/* Generic time zone layer -------------------------------------------------- */
622
623/* Time zone utilities */
624U_CAPI void U_EXPORT2
625uprv_tzset()
626{
627#if defined(U_TZSET)
628    U_TZSET();
629#else
630    /* no initialization*/
631#endif
632}
633
634U_CAPI int32_t U_EXPORT2
635uprv_timezone()
636{
637#ifdef U_TIMEZONE
638    return U_TIMEZONE;
639#else
640    time_t t, t1, t2;
641    struct tm tmrec;
642    UBool dst_checked;
643    int32_t tdiff = 0;
644
645    time(&t);
646    uprv_memcpy( &tmrec, localtime(&t), sizeof(tmrec) );
647    dst_checked = (tmrec.tm_isdst != 0); /* daylight savings time is checked*/
648    t1 = mktime(&tmrec);                 /* local time in seconds*/
649    uprv_memcpy( &tmrec, gmtime(&t), sizeof(tmrec) );
650    t2 = mktime(&tmrec);                 /* GMT (or UTC) in seconds*/
651    tdiff = t2 - t1;
652    /* imitate NT behaviour, which returns same timezone offset to GMT for
653       winter and summer.
654       This does not work on all platforms. For instance, on glibc on Linux
655       and on Mac OS 10.5, tdiff calculated above remains the same
656       regardless of whether DST is in effect or not. However, U_TIMEZONE
657       is defined on those platforms and this code is not reached so that
658       we can leave this alone. If there's a platform behaving
659       like glibc that uses this code, we need to add platform-dependent
660       preprocessor here. */
661    if (dst_checked)
662        tdiff += 3600;
663    return tdiff;
664#endif
665}
666
667/* Note that U_TZNAME does *not* have to be tzname, but if it is,
668   some platforms need to have it declared here. */
669
670#if defined(U_TZNAME) && (U_PLATFORM == U_PF_IRIX || U_PLATFORM_IS_DARWIN_BASED || (U_PLATFORM == U_PF_CYGWIN && !U_PLATFORM_USES_ONLY_WIN32_API))
671/* RS6000 and others reject char **tzname.  */
672extern U_IMPORT char *U_TZNAME[];
673#endif
674
675#if !UCONFIG_NO_FILE_IO && (U_PLATFORM_IS_DARWIN_BASED || U_PLATFORM_IS_LINUX_BASED || U_PLATFORM == U_PF_BSD || U_PLATFORM == U_PF_SOLARIS)
676/* These platforms are likely to use Olson timezone IDs. */
677#define CHECK_LOCALTIME_LINK 1
678#if U_PLATFORM_IS_DARWIN_BASED
679#include <tzfile.h>
680#define TZZONEINFO      (TZDIR "/")
681#elif U_PLATFORM == U_PF_SOLARIS
682#define TZDEFAULT       "/etc/localtime"
683#define TZZONEINFO      "/usr/share/lib/zoneinfo/"
684#define TZ_ENV_CHECK    "localtime"
685#else
686#define TZDEFAULT       "/etc/localtime"
687#define TZZONEINFO      "/usr/share/zoneinfo/"
688#endif
689#if U_HAVE_DIRENT_H
690#define TZFILE_SKIP     "posixrules" /* tz file to skip when searching. */
691/* Some Linux distributions have 'localtime' in /usr/share/zoneinfo
692   symlinked to /etc/localtime, which makes searchForTZFile return
693   'localtime' when it's the first match. */
694#define TZFILE_SKIP2    "localtime"
695#define SEARCH_TZFILE
696#include <dirent.h>  /* Needed to search through system timezone files */
697#endif
698static char gTimeZoneBuffer[PATH_MAX];
699static char *gTimeZoneBufferPtr = NULL;
700#endif
701
702#if !U_PLATFORM_USES_ONLY_WIN32_API
703#define isNonDigit(ch) (ch < '0' || '9' < ch)
704static UBool isValidOlsonID(const char *id) {
705    int32_t idx = 0;
706
707    /* Determine if this is something like Iceland (Olson ID)
708    or AST4ADT (non-Olson ID) */
709    while (id[idx] && isNonDigit(id[idx]) && id[idx] != ',') {
710        idx++;
711    }
712
713    /* If we went through the whole string, then it might be okay.
714    The timezone is sometimes set to "CST-7CDT", "CST6CDT5,J129,J131/19:30",
715    "GRNLNDST3GRNLNDDT" or similar, so we cannot use it.
716    The rest of the time it could be an Olson ID. George */
717    return (UBool)(id[idx] == 0
718        || uprv_strcmp(id, "PST8PDT") == 0
719        || uprv_strcmp(id, "MST7MDT") == 0
720        || uprv_strcmp(id, "CST6CDT") == 0
721        || uprv_strcmp(id, "EST5EDT") == 0);
722}
723
724/* On some Unix-like OS, 'posix' subdirectory in
725   /usr/share/zoneinfo replicates the top-level contents. 'right'
726   subdirectory has the same set of files, but individual files
727   are different from those in the top-level directory or 'posix'
728   because 'right' has files for TAI (Int'l Atomic Time) while 'posix'
729   has files for UTC.
730   When the first match for /etc/localtime is in either of them
731   (usually in posix because 'right' has different file contents),
732   or TZ environment variable points to one of them, createTimeZone
733   fails because, say, 'posix/America/New_York' is not an Olson
734   timezone id ('America/New_York' is). So, we have to skip
735   'posix/' and 'right/' at the beginning. */
736static void skipZoneIDPrefix(const char** id) {
737    if (uprv_strncmp(*id, "posix/", 6) == 0
738        || uprv_strncmp(*id, "right/", 6) == 0)
739    {
740        *id += 6;
741    }
742}
743#endif
744
745#if defined(U_TZNAME) && !U_PLATFORM_USES_ONLY_WIN32_API
746
747#define CONVERT_HOURS_TO_SECONDS(offset) (int32_t)(offset*3600)
748typedef struct OffsetZoneMapping {
749    int32_t offsetSeconds;
750    int32_t daylightType; /* 0=U_DAYLIGHT_NONE, 1=daylight in June-U_DAYLIGHT_JUNE, 2=daylight in December=U_DAYLIGHT_DECEMBER*/
751    const char *stdID;
752    const char *dstID;
753    const char *olsonID;
754} OffsetZoneMapping;
755
756enum { U_DAYLIGHT_NONE=0,U_DAYLIGHT_JUNE=1,U_DAYLIGHT_DECEMBER=2 };
757
758/*
759This list tries to disambiguate a set of abbreviated timezone IDs and offsets
760and maps it to an Olson ID.
761Before adding anything to this list, take a look at
762icu/source/tools/tzcode/tz.alias
763Sometimes no daylight savings (0) is important to define due to aliases.
764This list can be tested with icu/source/test/compat/tzone.pl
765More values could be added to daylightType to increase precision.
766*/
767static const struct OffsetZoneMapping OFFSET_ZONE_MAPPINGS[] = {
768    {-45900, 2, "CHAST", "CHADT", "Pacific/Chatham"},
769    {-43200, 1, "PETT", "PETST", "Asia/Kamchatka"},
770    {-43200, 2, "NZST", "NZDT", "Pacific/Auckland"},
771    {-43200, 1, "ANAT", "ANAST", "Asia/Anadyr"},
772    {-39600, 1, "MAGT", "MAGST", "Asia/Magadan"},
773    {-37800, 2, "LHST", "LHST", "Australia/Lord_Howe"},
774    {-36000, 2, "EST", "EST", "Australia/Sydney"},
775    {-36000, 1, "SAKT", "SAKST", "Asia/Sakhalin"},
776    {-36000, 1, "VLAT", "VLAST", "Asia/Vladivostok"},
777    {-34200, 2, "CST", "CST", "Australia/South"},
778    {-32400, 1, "YAKT", "YAKST", "Asia/Yakutsk"},
779    {-32400, 1, "CHOT", "CHOST", "Asia/Choibalsan"},
780    {-31500, 2, "CWST", "CWST", "Australia/Eucla"},
781    {-28800, 1, "IRKT", "IRKST", "Asia/Irkutsk"},
782    {-28800, 1, "ULAT", "ULAST", "Asia/Ulaanbaatar"},
783    {-28800, 2, "WST", "WST", "Australia/West"},
784    {-25200, 1, "HOVT", "HOVST", "Asia/Hovd"},
785    {-25200, 1, "KRAT", "KRAST", "Asia/Krasnoyarsk"},
786    {-21600, 1, "NOVT", "NOVST", "Asia/Novosibirsk"},
787    {-21600, 1, "OMST", "OMSST", "Asia/Omsk"},
788    {-18000, 1, "YEKT", "YEKST", "Asia/Yekaterinburg"},
789    {-14400, 1, "SAMT", "SAMST", "Europe/Samara"},
790    {-14400, 1, "AMT", "AMST", "Asia/Yerevan"},
791    {-14400, 1, "AZT", "AZST", "Asia/Baku"},
792    {-10800, 1, "AST", "ADT", "Asia/Baghdad"},
793    {-10800, 1, "MSK", "MSD", "Europe/Moscow"},
794    {-10800, 1, "VOLT", "VOLST", "Europe/Volgograd"},
795    {-7200, 0, "EET", "CEST", "Africa/Tripoli"},
796    {-7200, 1, "EET", "EEST", "Europe/Athens"}, /* Conflicts with Africa/Cairo */
797    {-7200, 1, "IST", "IDT", "Asia/Jerusalem"},
798    {-3600, 0, "CET", "WEST", "Africa/Algiers"},
799    {-3600, 2, "WAT", "WAST", "Africa/Windhoek"},
800    {0, 1, "GMT", "IST", "Europe/Dublin"},
801    {0, 1, "GMT", "BST", "Europe/London"},
802    {0, 0, "WET", "WEST", "Africa/Casablanca"},
803    {0, 0, "WET", "WET", "Africa/El_Aaiun"},
804    {3600, 1, "AZOT", "AZOST", "Atlantic/Azores"},
805    {3600, 1, "EGT", "EGST", "America/Scoresbysund"},
806    {10800, 1, "PMST", "PMDT", "America/Miquelon"},
807    {10800, 2, "UYT", "UYST", "America/Montevideo"},
808    {10800, 1, "WGT", "WGST", "America/Godthab"},
809    {10800, 2, "BRT", "BRST", "Brazil/East"},
810    {12600, 1, "NST", "NDT", "America/St_Johns"},
811    {14400, 1, "AST", "ADT", "Canada/Atlantic"},
812    {14400, 2, "AMT", "AMST", "America/Cuiaba"},
813    {14400, 2, "CLT", "CLST", "Chile/Continental"},
814    {14400, 2, "FKT", "FKST", "Atlantic/Stanley"},
815    {14400, 2, "PYT", "PYST", "America/Asuncion"},
816    {18000, 1, "CST", "CDT", "America/Havana"},
817    {18000, 1, "EST", "EDT", "US/Eastern"}, /* Conflicts with America/Grand_Turk */
818    {21600, 2, "EAST", "EASST", "Chile/EasterIsland"},
819    {21600, 0, "CST", "MDT", "Canada/Saskatchewan"},
820    {21600, 0, "CST", "CDT", "America/Guatemala"},
821    {21600, 1, "CST", "CDT", "US/Central"}, /* Conflicts with Mexico/General */
822    {25200, 1, "MST", "MDT", "US/Mountain"}, /* Conflicts with Mexico/BajaSur */
823    {28800, 0, "PST", "PST", "Pacific/Pitcairn"},
824    {28800, 1, "PST", "PDT", "US/Pacific"}, /* Conflicts with Mexico/BajaNorte */
825    {32400, 1, "AKST", "AKDT", "US/Alaska"},
826    {36000, 1, "HAST", "HADT", "US/Aleutian"}
827};
828
829/*#define DEBUG_TZNAME*/
830
831static const char* remapShortTimeZone(const char *stdID, const char *dstID, int32_t daylightType, int32_t offset)
832{
833    int32_t idx;
834#ifdef DEBUG_TZNAME
835    fprintf(stderr, "TZ=%s std=%s dst=%s daylight=%d offset=%d\n", getenv("TZ"), stdID, dstID, daylightType, offset);
836#endif
837    for (idx = 0; idx < LENGTHOF(OFFSET_ZONE_MAPPINGS); idx++)
838    {
839        if (offset == OFFSET_ZONE_MAPPINGS[idx].offsetSeconds
840            && daylightType == OFFSET_ZONE_MAPPINGS[idx].daylightType
841            && strcmp(OFFSET_ZONE_MAPPINGS[idx].stdID, stdID) == 0
842            && strcmp(OFFSET_ZONE_MAPPINGS[idx].dstID, dstID) == 0)
843        {
844            return OFFSET_ZONE_MAPPINGS[idx].olsonID;
845        }
846    }
847    return NULL;
848}
849#endif
850
851#ifdef SEARCH_TZFILE
852#define MAX_PATH_SIZE PATH_MAX /* Set the limit for the size of the path. */
853#define MAX_READ_SIZE 512
854
855typedef struct DefaultTZInfo {
856    char* defaultTZBuffer;
857    int64_t defaultTZFileSize;
858    FILE* defaultTZFilePtr;
859    UBool defaultTZstatus;
860    int32_t defaultTZPosition;
861} DefaultTZInfo;
862
863/*
864 * This method compares the two files given to see if they are a match.
865 * It is currently use to compare two TZ files.
866 */
867static UBool compareBinaryFiles(const char* defaultTZFileName, const char* TZFileName, DefaultTZInfo* tzInfo) {
868    FILE* file;
869    int64_t sizeFile;
870    int64_t sizeFileLeft;
871    int32_t sizeFileRead;
872    int32_t sizeFileToRead;
873    char bufferFile[MAX_READ_SIZE];
874    UBool result = TRUE;
875
876    if (tzInfo->defaultTZFilePtr == NULL) {
877        tzInfo->defaultTZFilePtr = fopen(defaultTZFileName, "r");
878    }
879    file = fopen(TZFileName, "r");
880
881    tzInfo->defaultTZPosition = 0; /* reset position to begin search */
882
883    if (file != NULL && tzInfo->defaultTZFilePtr != NULL) {
884        /* First check that the file size are equal. */
885        if (tzInfo->defaultTZFileSize == 0) {
886            fseek(tzInfo->defaultTZFilePtr, 0, SEEK_END);
887            tzInfo->defaultTZFileSize = ftell(tzInfo->defaultTZFilePtr);
888        }
889        fseek(file, 0, SEEK_END);
890        sizeFile = ftell(file);
891        sizeFileLeft = sizeFile;
892
893        if (sizeFile != tzInfo->defaultTZFileSize) {
894            result = FALSE;
895        } else {
896            /* Store the data from the files in seperate buffers and
897             * compare each byte to determine equality.
898             */
899            if (tzInfo->defaultTZBuffer == NULL) {
900                rewind(tzInfo->defaultTZFilePtr);
901                tzInfo->defaultTZBuffer = (char*)uprv_malloc(sizeof(char) * tzInfo->defaultTZFileSize);
902                sizeFileRead = fread(tzInfo->defaultTZBuffer, 1, tzInfo->defaultTZFileSize, tzInfo->defaultTZFilePtr);
903            }
904            rewind(file);
905            while(sizeFileLeft > 0) {
906                uprv_memset(bufferFile, 0, MAX_READ_SIZE);
907                sizeFileToRead = sizeFileLeft < MAX_READ_SIZE ? sizeFileLeft : MAX_READ_SIZE;
908
909                sizeFileRead = fread(bufferFile, 1, sizeFileToRead, file);
910                if (memcmp(tzInfo->defaultTZBuffer + tzInfo->defaultTZPosition, bufferFile, sizeFileRead) != 0) {
911                    result = FALSE;
912                    break;
913                }
914                sizeFileLeft -= sizeFileRead;
915                tzInfo->defaultTZPosition += sizeFileRead;
916            }
917        }
918    } else {
919        result = FALSE;
920    }
921
922    if (file != NULL) {
923        fclose(file);
924    }
925
926    return result;
927}
928/*
929 * This method recursively traverses the directory given for a matching TZ file and returns the first match.
930 */
931/* dirent also lists two entries: "." and ".." that we can safely ignore. */
932#define SKIP1 "."
933#define SKIP2 ".."
934static char SEARCH_TZFILE_RESULT[MAX_PATH_SIZE] = "";
935static char* searchForTZFile(const char* path, DefaultTZInfo* tzInfo) {
936    char curpath[MAX_PATH_SIZE];
937    DIR* dirp = opendir(path);
938    DIR* subDirp = NULL;
939    struct dirent* dirEntry = NULL;
940
941    char* result = NULL;
942    if (dirp == NULL) {
943        return result;
944    }
945
946    /* Save the current path */
947    uprv_memset(curpath, 0, MAX_PATH_SIZE);
948    uprv_strcpy(curpath, path);
949
950    /* Check each entry in the directory. */
951    while((dirEntry = readdir(dirp)) != NULL) {
952        const char* dirName = dirEntry->d_name;
953        if (uprv_strcmp(dirName, SKIP1) != 0 && uprv_strcmp(dirName, SKIP2) != 0) {
954            /* Create a newpath with the new entry to test each entry in the directory. */
955            char newpath[MAX_PATH_SIZE];
956            uprv_strcpy(newpath, curpath);
957            uprv_strcat(newpath, dirName);
958
959            if ((subDirp = opendir(newpath)) != NULL) {
960                /* If this new path is a directory, make a recursive call with the newpath. */
961                closedir(subDirp);
962                uprv_strcat(newpath, "/");
963                result = searchForTZFile(newpath, tzInfo);
964                /*
965                 Have to get out here. Otherwise, we'd keep looking
966                 and return the first match in the top-level directory
967                 if there's a match in the top-level. If not, this function
968                 would return NULL and set gTimeZoneBufferPtr to NULL in initDefault().
969                 It worked without this in most cases because we have a fallback of calling
970                 localtime_r to figure out the default timezone.
971                */
972                if (result != NULL)
973                    break;
974            } else if (uprv_strcmp(TZFILE_SKIP, dirName) != 0 && uprv_strcmp(TZFILE_SKIP2, dirName) != 0) {
975                if(compareBinaryFiles(TZDEFAULT, newpath, tzInfo)) {
976                    const char* zoneid = newpath + (sizeof(TZZONEINFO)) - 1;
977                    skipZoneIDPrefix(&zoneid);
978                    uprv_strcpy(SEARCH_TZFILE_RESULT, zoneid);
979                    result = SEARCH_TZFILE_RESULT;
980                    /* Get out after the first one found. */
981                    break;
982                }
983            }
984        }
985    }
986    closedir(dirp);
987    return result;
988}
989#endif
990U_CAPI const char* U_EXPORT2
991uprv_tzname(int n)
992{
993    const char *tzid = NULL;
994#if U_PLATFORM_USES_ONLY_WIN32_API
995    tzid = uprv_detectWindowsTimeZone();
996
997    if (tzid != NULL) {
998        return tzid;
999    }
1000#else
1001
1002/*#if U_PLATFORM_IS_DARWIN_BASED
1003    int ret;
1004
1005    tzid = getenv("TZFILE");
1006    if (tzid != NULL) {
1007        return tzid;
1008    }
1009#endif*/
1010
1011/* This code can be temporarily disabled to test tzname resolution later on. */
1012#ifndef DEBUG_TZNAME
1013    tzid = getenv("TZ");
1014    if (tzid != NULL && isValidOlsonID(tzid)
1015#if U_PLATFORM == U_PF_SOLARIS
1016    /* When TZ equals localtime on Solaris, check the /etc/localtime file. */
1017        && uprv_strcmp(tzid, TZ_ENV_CHECK) != 0
1018#endif
1019    ) {
1020        /* This might be a good Olson ID. */
1021        skipZoneIDPrefix(&tzid);
1022        return tzid;
1023    }
1024    /* else U_TZNAME will give a better result. */
1025#endif
1026
1027#if defined(CHECK_LOCALTIME_LINK) && !defined(DEBUG_SKIP_LOCALTIME_LINK)
1028    /* Caller must handle threading issues */
1029    if (gTimeZoneBufferPtr == NULL) {
1030        /*
1031        This is a trick to look at the name of the link to get the Olson ID
1032        because the tzfile contents is underspecified.
1033        This isn't guaranteed to work because it may not be a symlink.
1034        */
1035        int32_t ret = (int32_t)readlink(TZDEFAULT, gTimeZoneBuffer, sizeof(gTimeZoneBuffer));
1036        if (0 < ret) {
1037            int32_t tzZoneInfoLen = uprv_strlen(TZZONEINFO);
1038            gTimeZoneBuffer[ret] = 0;
1039            if (uprv_strncmp(gTimeZoneBuffer, TZZONEINFO, tzZoneInfoLen) == 0
1040                && isValidOlsonID(gTimeZoneBuffer + tzZoneInfoLen))
1041            {
1042                return (gTimeZoneBufferPtr = gTimeZoneBuffer + tzZoneInfoLen);
1043            }
1044        } else {
1045#if defined(SEARCH_TZFILE)
1046            DefaultTZInfo* tzInfo = (DefaultTZInfo*)uprv_malloc(sizeof(DefaultTZInfo));
1047            if (tzInfo != NULL) {
1048                tzInfo->defaultTZBuffer = NULL;
1049                tzInfo->defaultTZFileSize = 0;
1050                tzInfo->defaultTZFilePtr = NULL;
1051                tzInfo->defaultTZstatus = FALSE;
1052                tzInfo->defaultTZPosition = 0;
1053
1054                gTimeZoneBufferPtr = searchForTZFile(TZZONEINFO, tzInfo);
1055
1056                /* Free previously allocated memory */
1057                if (tzInfo->defaultTZBuffer != NULL) {
1058                    uprv_free(tzInfo->defaultTZBuffer);
1059                }
1060                if (tzInfo->defaultTZFilePtr != NULL) {
1061                    fclose(tzInfo->defaultTZFilePtr);
1062                }
1063                uprv_free(tzInfo);
1064            }
1065
1066            if (gTimeZoneBufferPtr != NULL && isValidOlsonID(gTimeZoneBufferPtr)) {
1067                return gTimeZoneBufferPtr;
1068            }
1069#endif
1070        }
1071    }
1072    else {
1073        return gTimeZoneBufferPtr;
1074    }
1075#endif
1076#endif
1077
1078#ifdef U_TZNAME
1079#if U_PLATFORM_USES_ONLY_WIN32_API
1080    /* The return value is free'd in timezone.cpp on Windows because
1081     * the other code path returns a pointer to a heap location. */
1082    return uprv_strdup(U_TZNAME[n]);
1083#else
1084    /*
1085    U_TZNAME is usually a non-unique abbreviation, which isn't normally usable.
1086    So we remap the abbreviation to an olson ID.
1087
1088    Since Windows exposes a little more timezone information,
1089    we normally don't use this code on Windows because
1090    uprv_detectWindowsTimeZone should have already given the correct answer.
1091    */
1092    {
1093        struct tm juneSol, decemberSol;
1094        int daylightType;
1095        static const time_t juneSolstice=1182478260; /*2007-06-21 18:11 UT*/
1096        static const time_t decemberSolstice=1198332540; /*2007-12-22 06:09 UT*/
1097
1098        /* This probing will tell us when daylight savings occurs.  */
1099        localtime_r(&juneSolstice, &juneSol);
1100        localtime_r(&decemberSolstice, &decemberSol);
1101        if(decemberSol.tm_isdst > 0) {
1102          daylightType = U_DAYLIGHT_DECEMBER;
1103        } else if(juneSol.tm_isdst > 0) {
1104          daylightType = U_DAYLIGHT_JUNE;
1105        } else {
1106          daylightType = U_DAYLIGHT_NONE;
1107        }
1108        tzid = remapShortTimeZone(U_TZNAME[0], U_TZNAME[1], daylightType, uprv_timezone());
1109        if (tzid != NULL) {
1110            return tzid;
1111        }
1112    }
1113    return U_TZNAME[n];
1114#endif
1115#else
1116    return "";
1117#endif
1118}
1119
1120/* Get and set the ICU data directory --------------------------------------- */
1121
1122static char *gDataDirectory = NULL;
1123#if U_POSIX_LOCALE
1124 static char *gCorrectedPOSIXLocale = NULL; /* Heap allocated */
1125#endif
1126
1127static UBool U_CALLCONV putil_cleanup(void)
1128{
1129    if (gDataDirectory && *gDataDirectory) {
1130        uprv_free(gDataDirectory);
1131    }
1132    gDataDirectory = NULL;
1133#if U_POSIX_LOCALE
1134    if (gCorrectedPOSIXLocale) {
1135        uprv_free(gCorrectedPOSIXLocale);
1136        gCorrectedPOSIXLocale = NULL;
1137    }
1138#endif
1139    return TRUE;
1140}
1141
1142/*
1143 * Set the data directory.
1144 *    Make a copy of the passed string, and set the global data dir to point to it.
1145 *    TODO:  see bug #2849, regarding thread safety.
1146 */
1147U_CAPI void U_EXPORT2
1148u_setDataDirectory(const char *directory) {
1149    char *newDataDir;
1150    int32_t length;
1151
1152    if(directory==NULL || *directory==0) {
1153        /* A small optimization to prevent the malloc and copy when the
1154        shared library is used, and this is a way to make sure that NULL
1155        is never returned.
1156        */
1157        newDataDir = (char *)"";
1158    }
1159    else {
1160        length=(int32_t)uprv_strlen(directory);
1161        newDataDir = (char *)uprv_malloc(length + 2);
1162        /* Exit out if newDataDir could not be created. */
1163        if (newDataDir == NULL) {
1164            return;
1165        }
1166        uprv_strcpy(newDataDir, directory);
1167
1168#if (U_FILE_SEP_CHAR != U_FILE_ALT_SEP_CHAR)
1169        {
1170            char *p;
1171            while(p = uprv_strchr(newDataDir, U_FILE_ALT_SEP_CHAR)) {
1172                *p = U_FILE_SEP_CHAR;
1173            }
1174        }
1175#endif
1176    }
1177
1178    umtx_lock(NULL);
1179    if (gDataDirectory && *gDataDirectory) {
1180        uprv_free(gDataDirectory);
1181    }
1182    gDataDirectory = newDataDir;
1183    ucln_common_registerCleanup(UCLN_COMMON_PUTIL, putil_cleanup);
1184    umtx_unlock(NULL);
1185}
1186
1187U_CAPI UBool U_EXPORT2
1188uprv_pathIsAbsolute(const char *path)
1189{
1190  if(!path || !*path) {
1191    return FALSE;
1192  }
1193
1194  if(*path == U_FILE_SEP_CHAR) {
1195    return TRUE;
1196  }
1197
1198#if (U_FILE_SEP_CHAR != U_FILE_ALT_SEP_CHAR)
1199  if(*path == U_FILE_ALT_SEP_CHAR) {
1200    return TRUE;
1201  }
1202#endif
1203
1204#if U_PLATFORM_USES_ONLY_WIN32_API
1205  if( (((path[0] >= 'A') && (path[0] <= 'Z')) ||
1206       ((path[0] >= 'a') && (path[0] <= 'z'))) &&
1207      path[1] == ':' ) {
1208    return TRUE;
1209  }
1210#endif
1211
1212  return FALSE;
1213}
1214
1215/* Temporary backup setting of ICU_DATA_DIR_PREFIX_ENV_VAR
1216   until some client wrapper makefiles are updated */
1217#if U_PLATFORM_IS_DARWIN_BASED && TARGET_IPHONE_SIMULATOR
1218# if !defined(ICU_DATA_DIR_PREFIX_ENV_VAR)
1219#  define ICU_DATA_DIR_PREFIX_ENV_VAR "IPHONE_SIMULATOR_ROOT"
1220# endif
1221#endif
1222
1223U_CAPI const char * U_EXPORT2
1224u_getDataDirectory(void) {
1225    const char *path = NULL;
1226#if defined(ICU_DATA_DIR_PREFIX_ENV_VAR)
1227    char datadir_path_buffer[PATH_MAX];
1228#endif
1229
1230    /* if we have the directory, then return it immediately */
1231    UMTX_CHECK(NULL, gDataDirectory, path);
1232
1233    if(path) {
1234        return path;
1235    }
1236
1237    /*
1238    When ICU_NO_USER_DATA_OVERRIDE is defined, users aren't allowed to
1239    override ICU's data with the ICU_DATA environment variable. This prevents
1240    problems where multiple custom copies of ICU's specific version of data
1241    are installed on a system. Either the application must define the data
1242    directory with u_setDataDirectory, define ICU_DATA_DIR when compiling
1243    ICU, set the data with udata_setCommonData or trust that all of the
1244    required data is contained in ICU's data library that contains
1245    the entry point defined by U_ICUDATA_ENTRY_POINT.
1246
1247    There may also be some platforms where environment variables
1248    are not allowed.
1249    */
1250#   if !defined(ICU_NO_USER_DATA_OVERRIDE) && !UCONFIG_NO_FILE_IO
1251    /* First try to get the environment variable */
1252    path=getenv("ICU_DATA");
1253#   endif
1254
1255    /* ICU_DATA_DIR may be set as a compile option.
1256     * U_ICU_DATA_DEFAULT_DIR is provided and is set by ICU at compile time
1257     * and is used only when data is built in archive mode eliminating the need
1258     * for ICU_DATA_DIR to be set. U_ICU_DATA_DEFAULT_DIR is set to the installation
1259     * directory of the data dat file. Users should use ICU_DATA_DIR if they want to
1260     * set their own path.
1261     */
1262#if defined(ICU_DATA_DIR) || defined(U_ICU_DATA_DEFAULT_DIR)
1263    if(path==NULL || *path==0) {
1264# if defined(ICU_DATA_DIR_PREFIX_ENV_VAR)
1265        const char *prefix = getenv(ICU_DATA_DIR_PREFIX_ENV_VAR);
1266# endif
1267# ifdef ICU_DATA_DIR
1268        path=ICU_DATA_DIR;
1269# else
1270        path=U_ICU_DATA_DEFAULT_DIR;
1271# endif
1272# if defined(ICU_DATA_DIR_PREFIX_ENV_VAR)
1273        if (prefix != NULL) {
1274            snprintf(datadir_path_buffer, PATH_MAX, "%s%s", prefix, path);
1275            path=datadir_path_buffer;
1276        }
1277# endif
1278    }
1279#endif
1280
1281    if(path==NULL) {
1282        /* It looks really bad, set it to something. */
1283        path = "";
1284    }
1285
1286    u_setDataDirectory(path);
1287    return gDataDirectory;
1288}
1289
1290
1291
1292
1293
1294/* Macintosh-specific locale information ------------------------------------ */
1295#if U_PLATFORM == U_PF_CLASSIC_MACOS
1296
1297typedef struct {
1298    int32_t script;
1299    int32_t region;
1300    int32_t lang;
1301    int32_t date_region;
1302    const char* posixID;
1303} mac_lc_rec;
1304
1305/* Todo: This will be updated with a newer version from www.unicode.org web
1306   page when it's available.*/
1307#define MAC_LC_MAGIC_NUMBER -5
1308#define MAC_LC_INIT_NUMBER -9
1309
1310static const mac_lc_rec mac_lc_recs[] = {
1311    MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 0, "en_US",
1312    /* United States*/
1313    MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 1, "fr_FR",
1314    /* France*/
1315    MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 2, "en_GB",
1316    /* Great Britain*/
1317    MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 3, "de_DE",
1318    /* Germany*/
1319    MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 4, "it_IT",
1320    /* Italy*/
1321    MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 5, "nl_NL",
1322    /* Metherlands*/
1323    MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 6, "fr_BE",
1324    /* French for Belgium or Lxembourg*/
1325    MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 7, "sv_SE",
1326    /* Sweden*/
1327    MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 9, "da_DK",
1328    /* Denmark*/
1329    MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 10, "pt_PT",
1330    /* Portugal*/
1331    MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 11, "fr_CA",
1332    /* French Canada*/
1333    MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 13, "is_IS",
1334    /* Israel*/
1335    MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 14, "ja_JP",
1336    /* Japan*/
1337    MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 15, "en_AU",
1338    /* Australia*/
1339    MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 16, "ar_AE",
1340    /* the Arabic world (?)*/
1341    MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 17, "fi_FI",
1342    /* Finland*/
1343    MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 18, "fr_CH",
1344    /* French for Switzerland*/
1345    MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 19, "de_CH",
1346    /* German for Switzerland*/
1347    MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 20, "el_GR",
1348    /* Greece*/
1349    MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 21, "is_IS",
1350    /* Iceland ===*/
1351    /*MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 22, "",*/
1352    /* Malta ===*/
1353    /*MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 23, "",*/
1354    /* Cyprus ===*/
1355    MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 24, "tr_TR",
1356    /* Turkey ===*/
1357    MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 25, "sh_YU",
1358    /* Croatian system for Yugoslavia*/
1359    /*MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 33, "",*/
1360    /* Hindi system for India*/
1361    /*MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 34, "",*/
1362    /* Pakistan*/
1363    MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 41, "lt_LT",
1364    /* Lithuania*/
1365    MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 42, "pl_PL",
1366    /* Poland*/
1367    MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 43, "hu_HU",
1368    /* Hungary*/
1369    MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 44, "et_EE",
1370    /* Estonia*/
1371    MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 45, "lv_LV",
1372    /* Latvia*/
1373    /*MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 46, "",*/
1374    /* Lapland  [Ask Rich for the data. HS]*/
1375    /*MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 47, "",*/
1376    /* Faeroe Islands*/
1377    MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 48, "fa_IR",
1378    /* Iran*/
1379    MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 49, "ru_RU",
1380    /* Russia*/
1381    MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 50, "en_IE",
1382    /* Ireland*/
1383    MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 51, "ko_KR",
1384    /* Korea*/
1385    MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 52, "zh_CN",
1386    /* People's Republic of China*/
1387    MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 53, "zh_TW",
1388    /* Taiwan*/
1389    MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 54, "th_TH",
1390    /* Thailand*/
1391
1392    /* fallback is en_US*/
1393    MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER,
1394    MAC_LC_MAGIC_NUMBER, "en_US"
1395};
1396
1397#endif
1398
1399#if U_POSIX_LOCALE
1400/* A helper function used by uprv_getPOSIXIDForDefaultLocale and
1401 * uprv_getPOSIXIDForDefaultCodepage. Returns the posix locale id for
1402 * LC_CTYPE and LC_MESSAGES. It doesn't support other locale categories.
1403 */
1404static const char *uprv_getPOSIXIDForCategory(int category)
1405{
1406    const char* posixID = NULL;
1407    if (category == LC_MESSAGES || category == LC_CTYPE) {
1408        /*
1409        * On Solaris two different calls to setlocale can result in
1410        * different values. Only get this value once.
1411        *
1412        * We must check this first because an application can set this.
1413        *
1414        * LC_ALL can't be used because it's platform dependent. The LANG
1415        * environment variable seems to affect LC_CTYPE variable by default.
1416        * Here is what setlocale(LC_ALL, NULL) can return.
1417        * HPUX can return 'C C C C C C C'
1418        * Solaris can return /en_US/C/C/C/C/C on the second try.
1419        * Linux can return LC_CTYPE=C;LC_NUMERIC=C;...
1420        *
1421        * The default codepage detection also needs to use LC_CTYPE.
1422        *
1423        * Do not call setlocale(LC_*, "")! Using an empty string instead
1424        * of NULL, will modify the libc behavior.
1425        */
1426        posixID = setlocale(category, NULL);
1427        if ((posixID == 0)
1428            || (uprv_strcmp("C", posixID) == 0)
1429            || (uprv_strcmp("POSIX", posixID) == 0))
1430        {
1431            /* Maybe we got some garbage.  Try something more reasonable */
1432            posixID = getenv("LC_ALL");
1433            if (posixID == 0) {
1434                posixID = getenv(category == LC_MESSAGES ? "LC_MESSAGES" : "LC_CTYPE");
1435                if (posixID == 0) {
1436                    posixID = getenv("LANG");
1437                }
1438            }
1439        }
1440    }
1441    if ((posixID==0)
1442        || (uprv_strcmp("C", posixID) == 0)
1443        || (uprv_strcmp("POSIX", posixID) == 0))
1444    {
1445        /* Nothing worked.  Give it a nice POSIX default value. */
1446        posixID = "en_US_POSIX";
1447    }
1448    return posixID;
1449}
1450
1451/* Return just the POSIX id for the default locale, whatever happens to be in
1452 * it. It gets the value from LC_MESSAGES and indirectly from LC_ALL and LANG.
1453 */
1454static const char *uprv_getPOSIXIDForDefaultLocale(void)
1455{
1456    static const char* posixID = NULL;
1457    if (posixID == 0) {
1458        posixID = uprv_getPOSIXIDForCategory(LC_MESSAGES);
1459    }
1460    return posixID;
1461}
1462
1463#if !U_CHARSET_IS_UTF8
1464/* Return just the POSIX id for the default codepage, whatever happens to be in
1465 * it. It gets the value from LC_CTYPE and indirectly from LC_ALL and LANG.
1466 */
1467static const char *uprv_getPOSIXIDForDefaultCodepage(void)
1468{
1469    static const char* posixID = NULL;
1470    if (posixID == 0) {
1471        posixID = uprv_getPOSIXIDForCategory(LC_CTYPE);
1472    }
1473    return posixID;
1474}
1475#endif
1476#endif
1477
1478/* NOTE: The caller should handle thread safety */
1479U_CAPI const char* U_EXPORT2
1480uprv_getDefaultLocaleID()
1481{
1482#if U_POSIX_LOCALE
1483/*
1484  Note that:  (a '!' means the ID is improper somehow)
1485     LC_ALL  ---->     default_loc          codepage
1486--------------------------------------------------------
1487     ab.CD             ab                   CD
1488     ab@CD             ab__CD               -
1489     ab@CD.EF          ab__CD               EF
1490
1491     ab_CD.EF@GH       ab_CD_GH             EF
1492
1493Some 'improper' ways to do the same as above:
1494  !  ab_CD@GH.EF       ab_CD_GH             EF
1495  !  ab_CD.EF@GH.IJ    ab_CD_GH             EF
1496  !  ab_CD@ZZ.EF@GH.IJ ab_CD_GH             EF
1497
1498     _CD@GH            _CD_GH               -
1499     _CD.EF@GH         _CD_GH               EF
1500
1501The variant cannot have dots in it.
1502The 'rightmost' variant (@xxx) wins.
1503The leftmost codepage (.xxx) wins.
1504*/
1505    char *correctedPOSIXLocale = 0;
1506    const char* posixID = uprv_getPOSIXIDForDefaultLocale();
1507    const char *p;
1508    const char *q;
1509    int32_t len;
1510
1511    /* Format: (no spaces)
1512    ll [ _CC ] [ . MM ] [ @ VV]
1513
1514      l = lang, C = ctry, M = charmap, V = variant
1515    */
1516
1517    if (gCorrectedPOSIXLocale != NULL) {
1518        return gCorrectedPOSIXLocale;
1519    }
1520
1521    if ((p = uprv_strchr(posixID, '.')) != NULL) {
1522        /* assume new locale can't be larger than old one? */
1523        correctedPOSIXLocale = static_cast<char *>(uprv_malloc(uprv_strlen(posixID)+1));
1524        /* Exit on memory allocation error. */
1525        if (correctedPOSIXLocale == NULL) {
1526            return NULL;
1527        }
1528        uprv_strncpy(correctedPOSIXLocale, posixID, p-posixID);
1529        correctedPOSIXLocale[p-posixID] = 0;
1530
1531        /* do not copy after the @ */
1532        if ((p = uprv_strchr(correctedPOSIXLocale, '@')) != NULL) {
1533            correctedPOSIXLocale[p-correctedPOSIXLocale] = 0;
1534        }
1535    }
1536
1537    /* Note that we scan the *uncorrected* ID. */
1538    if ((p = uprv_strrchr(posixID, '@')) != NULL) {
1539        if (correctedPOSIXLocale == NULL) {
1540            correctedPOSIXLocale = static_cast<char *>(uprv_malloc(uprv_strlen(posixID)+1));
1541            /* Exit on memory allocation error. */
1542            if (correctedPOSIXLocale == NULL) {
1543                return NULL;
1544            }
1545            uprv_strncpy(correctedPOSIXLocale, posixID, p-posixID);
1546            correctedPOSIXLocale[p-posixID] = 0;
1547        }
1548        p++;
1549
1550        /* Take care of any special cases here.. */
1551        if (!uprv_strcmp(p, "nynorsk")) {
1552            p = "NY";
1553            /* Don't worry about no__NY. In practice, it won't appear. */
1554        }
1555
1556        if (uprv_strchr(correctedPOSIXLocale,'_') == NULL) {
1557            uprv_strcat(correctedPOSIXLocale, "__"); /* aa@b -> aa__b */
1558        }
1559        else {
1560            uprv_strcat(correctedPOSIXLocale, "_"); /* aa_CC@b -> aa_CC_b */
1561        }
1562
1563        if ((q = uprv_strchr(p, '.')) != NULL) {
1564            /* How big will the resulting string be? */
1565            len = (int32_t)(uprv_strlen(correctedPOSIXLocale) + (q-p));
1566            uprv_strncat(correctedPOSIXLocale, p, q-p);
1567            correctedPOSIXLocale[len] = 0;
1568        }
1569        else {
1570            /* Anything following the @ sign */
1571            uprv_strcat(correctedPOSIXLocale, p);
1572        }
1573
1574        /* Should there be a map from 'no@nynorsk' -> no_NO_NY here?
1575         * How about 'russian' -> 'ru'?
1576         * Many of the other locales using ISO codes will be handled by the
1577         * canonicalization functions in uloc_getDefault.
1578         */
1579    }
1580
1581    /* Was a correction made? */
1582    if (correctedPOSIXLocale != NULL) {
1583        posixID = correctedPOSIXLocale;
1584    }
1585    else {
1586        /* copy it, just in case the original pointer goes away.  See j2395 */
1587        correctedPOSIXLocale = (char *)uprv_malloc(uprv_strlen(posixID) + 1);
1588        /* Exit on memory allocation error. */
1589        if (correctedPOSIXLocale == NULL) {
1590            return NULL;
1591        }
1592        posixID = uprv_strcpy(correctedPOSIXLocale, posixID);
1593    }
1594
1595    if (gCorrectedPOSIXLocale == NULL) {
1596        gCorrectedPOSIXLocale = correctedPOSIXLocale;
1597        ucln_common_registerCleanup(UCLN_COMMON_PUTIL, putil_cleanup);
1598        correctedPOSIXLocale = NULL;
1599    }
1600
1601    if (correctedPOSIXLocale != NULL) {  /* Was already set - clean up. */
1602        uprv_free(correctedPOSIXLocale);
1603    }
1604
1605    return posixID;
1606
1607#elif U_PLATFORM_USES_ONLY_WIN32_API
1608    UErrorCode status = U_ZERO_ERROR;
1609    LCID id = GetThreadLocale();
1610    const char* locID = uprv_convertToPosix(id, &status);
1611
1612    if (U_FAILURE(status)) {
1613        locID = "en_US";
1614    }
1615    return locID;
1616
1617#elif U_PLATFORM == U_PF_CLASSIC_MACOS
1618    int32_t script = MAC_LC_INIT_NUMBER;
1619    /* = IntlScript(); or GetScriptManagerVariable(smSysScript);*/
1620    int32_t region = MAC_LC_INIT_NUMBER;
1621    /* = GetScriptManagerVariable(smRegionCode);*/
1622    int32_t lang = MAC_LC_INIT_NUMBER;
1623    /* = GetScriptManagerVariable(smScriptLang);*/
1624    int32_t date_region = MAC_LC_INIT_NUMBER;
1625    const char* posixID = 0;
1626    int32_t count = sizeof(mac_lc_recs) / sizeof(mac_lc_rec);
1627    int32_t i;
1628    Intl1Hndl ih;
1629
1630    ih = (Intl1Hndl) GetIntlResource(1);
1631    if (ih)
1632        date_region = ((uint16_t)(*ih)->intl1Vers) >> 8;
1633
1634    for (i = 0; i < count; i++) {
1635        if (   ((mac_lc_recs[i].script == MAC_LC_MAGIC_NUMBER)
1636             || (mac_lc_recs[i].script == script))
1637            && ((mac_lc_recs[i].region == MAC_LC_MAGIC_NUMBER)
1638             || (mac_lc_recs[i].region == region))
1639            && ((mac_lc_recs[i].lang == MAC_LC_MAGIC_NUMBER)
1640             || (mac_lc_recs[i].lang == lang))
1641            && ((mac_lc_recs[i].date_region == MAC_LC_MAGIC_NUMBER)
1642             || (mac_lc_recs[i].date_region == date_region))
1643            )
1644        {
1645            posixID = mac_lc_recs[i].posixID;
1646            break;
1647        }
1648    }
1649
1650    return posixID;
1651
1652#elif U_PLATFORM == U_PF_OS400
1653    /* locales are process scoped and are by definition thread safe */
1654    static char correctedLocale[64];
1655    const  char *localeID = getenv("LC_ALL");
1656           char *p;
1657
1658    if (localeID == NULL)
1659        localeID = getenv("LANG");
1660    if (localeID == NULL)
1661        localeID = setlocale(LC_ALL, NULL);
1662    /* Make sure we have something... */
1663    if (localeID == NULL)
1664        return "en_US_POSIX";
1665
1666    /* Extract the locale name from the path. */
1667    if((p = uprv_strrchr(localeID, '/')) != NULL)
1668    {
1669        /* Increment p to start of locale name. */
1670        p++;
1671        localeID = p;
1672    }
1673
1674    /* Copy to work location. */
1675    uprv_strcpy(correctedLocale, localeID);
1676
1677    /* Strip off the '.locale' extension. */
1678    if((p = uprv_strchr(correctedLocale, '.')) != NULL) {
1679        *p = 0;
1680    }
1681
1682    /* Upper case the locale name. */
1683    T_CString_toUpperCase(correctedLocale);
1684
1685    /* See if we are using the POSIX locale.  Any of the
1686    * following are equivalent and use the same QLGPGCMA
1687    * (POSIX) locale.
1688    * QLGPGCMA2 means UCS2
1689    * QLGPGCMA_4 means UTF-32
1690    * QLGPGCMA_8 means UTF-8
1691    */
1692    if ((uprv_strcmp("C", correctedLocale) == 0) ||
1693        (uprv_strcmp("POSIX", correctedLocale) == 0) ||
1694        (uprv_strncmp("QLGPGCMA", correctedLocale, 8) == 0))
1695    {
1696        uprv_strcpy(correctedLocale, "en_US_POSIX");
1697    }
1698    else
1699    {
1700        int16_t LocaleLen;
1701
1702        /* Lower case the lang portion. */
1703        for(p = correctedLocale; *p != 0 && *p != '_'; p++)
1704        {
1705            *p = uprv_tolower(*p);
1706        }
1707
1708        /* Adjust for Euro.  After '_E' add 'URO'. */
1709        LocaleLen = uprv_strlen(correctedLocale);
1710        if (correctedLocale[LocaleLen - 2] == '_' &&
1711            correctedLocale[LocaleLen - 1] == 'E')
1712        {
1713            uprv_strcat(correctedLocale, "URO");
1714        }
1715
1716        /* If using Lotus-based locale then convert to
1717         * equivalent non Lotus.
1718         */
1719        else if (correctedLocale[LocaleLen - 2] == '_' &&
1720            correctedLocale[LocaleLen - 1] == 'L')
1721        {
1722            correctedLocale[LocaleLen - 2] = 0;
1723        }
1724
1725        /* There are separate simplified and traditional
1726         * locales called zh_HK_S and zh_HK_T.
1727         */
1728        else if (uprv_strncmp(correctedLocale, "zh_HK", 5) == 0)
1729        {
1730            uprv_strcpy(correctedLocale, "zh_HK");
1731        }
1732
1733        /* A special zh_CN_GBK locale...
1734        */
1735        else if (uprv_strcmp(correctedLocale, "zh_CN_GBK") == 0)
1736        {
1737            uprv_strcpy(correctedLocale, "zh_CN");
1738        }
1739
1740    }
1741
1742    return correctedLocale;
1743#endif
1744
1745}
1746
1747#if !U_CHARSET_IS_UTF8
1748#if U_POSIX_LOCALE
1749/*
1750Due to various platform differences, one platform may specify a charset,
1751when they really mean a different charset. Remap the names so that they are
1752compatible with ICU. Only conflicting/ambiguous aliases should be resolved
1753here. Before adding anything to this function, please consider adding unique
1754names to the ICU alias table in the data directory.
1755*/
1756static const char*
1757remapPlatformDependentCodepage(const char *locale, const char *name) {
1758    if (locale != NULL && *locale == 0) {
1759        /* Make sure that an empty locale is handled the same way. */
1760        locale = NULL;
1761    }
1762    if (name == NULL) {
1763        return NULL;
1764    }
1765#if U_PLATFORM == U_PF_AIX
1766    if (uprv_strcmp(name, "IBM-943") == 0) {
1767        /* Use the ASCII compatible ibm-943 */
1768        name = "Shift-JIS";
1769    }
1770    else if (uprv_strcmp(name, "IBM-1252") == 0) {
1771        /* Use the windows-1252 that contains the Euro */
1772        name = "IBM-5348";
1773    }
1774#elif U_PLATFORM == U_PF_SOLARIS
1775    if (locale != NULL && uprv_strcmp(name, "EUC") == 0) {
1776        /* Solaris underspecifies the "EUC" name. */
1777        if (uprv_strcmp(locale, "zh_CN") == 0) {
1778            name = "EUC-CN";
1779        }
1780        else if (uprv_strcmp(locale, "zh_TW") == 0) {
1781            name = "EUC-TW";
1782        }
1783        else if (uprv_strcmp(locale, "ko_KR") == 0) {
1784            name = "EUC-KR";
1785        }
1786    }
1787    else if (uprv_strcmp(name, "eucJP") == 0) {
1788        /*
1789        ibm-954 is the best match.
1790        ibm-33722 is the default for eucJP (similar to Windows).
1791        */
1792        name = "eucjis";
1793    }
1794    else if (uprv_strcmp(name, "646") == 0) {
1795        /*
1796         * The default codepage given by Solaris is 646 but the C library routines treat it as if it was
1797         * ISO-8859-1 instead of US-ASCII(646).
1798         */
1799        name = "ISO-8859-1";
1800    }
1801#elif U_PLATFORM_IS_DARWIN_BASED
1802    if (locale == NULL && *name == 0) {
1803        /*
1804        No locale was specified, and an empty name was passed in.
1805        This usually indicates that nl_langinfo didn't return valid information.
1806        Mac OS X uses UTF-8 by default (especially the locale data and console).
1807        */
1808        name = "UTF-8";
1809    }
1810    else if (uprv_strcmp(name, "CP949") == 0) {
1811        /* Remap CP949 to a similar codepage to avoid issues with backslash and won symbol. */
1812        name = "EUC-KR";
1813    }
1814    else if (locale != NULL && uprv_strcmp(locale, "en_US_POSIX") != 0 && uprv_strcmp(name, "US-ASCII") == 0) {
1815        /*
1816         * For non C/POSIX locale, default the code page to UTF-8 instead of US-ASCII.
1817         */
1818        name = "UTF-8";
1819    }
1820#elif U_PLATFORM == U_PF_BSD
1821    if (uprv_strcmp(name, "CP949") == 0) {
1822        /* Remap CP949 to a similar codepage to avoid issues with backslash and won symbol. */
1823        name = "EUC-KR";
1824    }
1825#elif U_PLATFORM == U_PF_HPUX
1826    if (locale != NULL && uprv_strcmp(locale, "zh_HK") == 0 && uprv_strcmp(name, "big5") == 0) {
1827        /* HP decided to extend big5 as hkbig5 even though it's not compatible :-( */
1828        /* zh_TW.big5 is not the same charset as zh_HK.big5! */
1829        name = "hkbig5";
1830    }
1831    else if (uprv_strcmp(name, "eucJP") == 0) {
1832        /*
1833        ibm-1350 is the best match, but unavailable.
1834        ibm-954 is mostly a superset of ibm-1350.
1835        ibm-33722 is the default for eucJP (similar to Windows).
1836        */
1837        name = "eucjis";
1838    }
1839#elif U_PLATFORM == U_PF_LINUX
1840    if (locale != NULL && uprv_strcmp(name, "euc") == 0) {
1841        /* Linux underspecifies the "EUC" name. */
1842        if (uprv_strcmp(locale, "korean") == 0) {
1843            name = "EUC-KR";
1844        }
1845        else if (uprv_strcmp(locale, "japanese") == 0) {
1846            /* See comment below about eucJP */
1847            name = "eucjis";
1848        }
1849    }
1850    else if (uprv_strcmp(name, "eucjp") == 0) {
1851        /*
1852        ibm-1350 is the best match, but unavailable.
1853        ibm-954 is mostly a superset of ibm-1350.
1854        ibm-33722 is the default for eucJP (similar to Windows).
1855        */
1856        name = "eucjis";
1857    }
1858    else if (locale != NULL && uprv_strcmp(locale, "en_US_POSIX") != 0 &&
1859            (uprv_strcmp(name, "ANSI_X3.4-1968") == 0 || uprv_strcmp(name, "US-ASCII") == 0)) {
1860        /*
1861         * For non C/POSIX locale, default the code page to UTF-8 instead of US-ASCII.
1862         */
1863        name = "UTF-8";
1864    }
1865    /*
1866     * Linux returns ANSI_X3.4-1968 for C/POSIX, but the call site takes care of
1867     * it by falling back to 'US-ASCII' when NULL is returned from this
1868     * function. So, we don't have to worry about it here.
1869     */
1870#endif
1871    /* return NULL when "" is passed in */
1872    if (*name == 0) {
1873        name = NULL;
1874    }
1875    return name;
1876}
1877
1878static const char*
1879getCodepageFromPOSIXID(const char *localeName, char * buffer, int32_t buffCapacity)
1880{
1881    char localeBuf[100];
1882    const char *name = NULL;
1883    char *variant = NULL;
1884
1885    if (localeName != NULL && (name = (uprv_strchr(localeName, '.'))) != NULL) {
1886        size_t localeCapacity = uprv_min(sizeof(localeBuf), (name-localeName)+1);
1887        uprv_strncpy(localeBuf, localeName, localeCapacity);
1888        localeBuf[localeCapacity-1] = 0; /* ensure NULL termination */
1889        name = uprv_strncpy(buffer, name+1, buffCapacity);
1890        buffer[buffCapacity-1] = 0; /* ensure NULL termination */
1891        if ((variant = const_cast<char *>(uprv_strchr(name, '@'))) != NULL) {
1892            *variant = 0;
1893        }
1894        name = remapPlatformDependentCodepage(localeBuf, name);
1895    }
1896    return name;
1897}
1898#endif
1899
1900static const char*
1901int_getDefaultCodepage()
1902{
1903#if U_PLATFORM == U_PF_OS400
1904    uint32_t ccsid = 37; /* Default to ibm-37 */
1905    static char codepage[64];
1906    Qwc_JOBI0400_t jobinfo;
1907    Qus_EC_t error = { sizeof(Qus_EC_t) }; /* SPI error code */
1908
1909    EPT_CALL(QUSRJOBI)(&jobinfo, sizeof(jobinfo), "JOBI0400",
1910        "*                         ", "                ", &error);
1911
1912    if (error.Bytes_Available == 0) {
1913        if (jobinfo.Coded_Char_Set_ID != 0xFFFF) {
1914            ccsid = (uint32_t)jobinfo.Coded_Char_Set_ID;
1915        }
1916        else if (jobinfo.Default_Coded_Char_Set_Id != 0xFFFF) {
1917            ccsid = (uint32_t)jobinfo.Default_Coded_Char_Set_Id;
1918        }
1919        /* else use the default */
1920    }
1921    sprintf(codepage,"ibm-%d", ccsid);
1922    return codepage;
1923
1924#elif U_PLATFORM == U_PF_OS390
1925    static char codepage[64];
1926
1927    strncpy(codepage, nl_langinfo(CODESET),63-strlen(UCNV_SWAP_LFNL_OPTION_STRING));
1928    strcat(codepage,UCNV_SWAP_LFNL_OPTION_STRING);
1929    codepage[63] = 0; /* NULL terminate */
1930
1931    return codepage;
1932
1933#elif U_PLATFORM == U_PF_CLASSIC_MACOS
1934    return "macintosh"; /* TODO: Macintosh Roman. There must be a better way. fixme! */
1935
1936#elif U_PLATFORM_USES_ONLY_WIN32_API
1937    static char codepage[64];
1938    sprintf(codepage, "windows-%d", GetACP());
1939    return codepage;
1940
1941#elif U_POSIX_LOCALE
1942    static char codesetName[100];
1943    const char *localeName = NULL;
1944    const char *name = NULL;
1945
1946    localeName = uprv_getPOSIXIDForDefaultCodepage();
1947    uprv_memset(codesetName, 0, sizeof(codesetName));
1948#if U_HAVE_NL_LANGINFO_CODESET
1949    /* When available, check nl_langinfo first because it usually gives more
1950       useful names. It depends on LC_CTYPE.
1951       nl_langinfo may use the same buffer as setlocale. */
1952    {
1953        const char *codeset = nl_langinfo(U_NL_LANGINFO_CODESET);
1954#if U_PLATFORM_IS_DARWIN_BASED || U_PLATFORM_IS_LINUX_BASED
1955        /*
1956         * On Linux and MacOSX, ensure that default codepage for non C/POSIX locale is UTF-8
1957         * instead of ASCII.
1958         */
1959        if (uprv_strcmp(localeName, "en_US_POSIX") != 0) {
1960            codeset = remapPlatformDependentCodepage(localeName, codeset);
1961        } else
1962#endif
1963        {
1964            codeset = remapPlatformDependentCodepage(NULL, codeset);
1965        }
1966
1967        if (codeset != NULL) {
1968            uprv_strncpy(codesetName, codeset, sizeof(codesetName));
1969            codesetName[sizeof(codesetName)-1] = 0;
1970            return codesetName;
1971        }
1972    }
1973#endif
1974
1975    /* Use setlocale in a nice way, and then check some environment variables.
1976       Maybe the application used setlocale already.
1977    */
1978    uprv_memset(codesetName, 0, sizeof(codesetName));
1979    name = getCodepageFromPOSIXID(localeName, codesetName, sizeof(codesetName));
1980    if (name) {
1981        /* if we can find the codeset name from setlocale, return that. */
1982        return name;
1983    }
1984
1985    if (*codesetName == 0)
1986    {
1987        /* Everything failed. Return US ASCII (ISO 646). */
1988        (void)uprv_strcpy(codesetName, "US-ASCII");
1989    }
1990    return codesetName;
1991#else
1992    return "US-ASCII";
1993#endif
1994}
1995
1996
1997U_CAPI const char*  U_EXPORT2
1998uprv_getDefaultCodepage()
1999{
2000    static char const  *name = NULL;
2001    umtx_lock(NULL);
2002    if (name == NULL) {
2003        name = int_getDefaultCodepage();
2004    }
2005    umtx_unlock(NULL);
2006    return name;
2007}
2008#endif  /* !U_CHARSET_IS_UTF8 */
2009
2010
2011/* end of platform-specific implementation -------------- */
2012
2013/* version handling --------------------------------------------------------- */
2014
2015U_CAPI void U_EXPORT2
2016u_versionFromString(UVersionInfo versionArray, const char *versionString) {
2017    char *end;
2018    uint16_t part=0;
2019
2020    if(versionArray==NULL) {
2021        return;
2022    }
2023
2024    if(versionString!=NULL) {
2025        for(;;) {
2026            versionArray[part]=(uint8_t)uprv_strtoul(versionString, &end, 10);
2027            if(end==versionString || ++part==U_MAX_VERSION_LENGTH || *end!=U_VERSION_DELIMITER) {
2028                break;
2029            }
2030            versionString=end+1;
2031        }
2032    }
2033
2034    while(part<U_MAX_VERSION_LENGTH) {
2035        versionArray[part++]=0;
2036    }
2037}
2038
2039U_CAPI void U_EXPORT2
2040u_versionFromUString(UVersionInfo versionArray, const UChar *versionString) {
2041    if(versionArray!=NULL && versionString!=NULL) {
2042        char versionChars[U_MAX_VERSION_STRING_LENGTH+1];
2043        int32_t len = u_strlen(versionString);
2044        if(len>U_MAX_VERSION_STRING_LENGTH) {
2045            len = U_MAX_VERSION_STRING_LENGTH;
2046        }
2047        u_UCharsToChars(versionString, versionChars, len);
2048        versionChars[len]=0;
2049        u_versionFromString(versionArray, versionChars);
2050    }
2051}
2052
2053U_CAPI void U_EXPORT2
2054u_versionToString(const UVersionInfo versionArray, char *versionString) {
2055    uint16_t count, part;
2056    uint8_t field;
2057
2058    if(versionString==NULL) {
2059        return;
2060    }
2061
2062    if(versionArray==NULL) {
2063        versionString[0]=0;
2064        return;
2065    }
2066
2067    /* count how many fields need to be written */
2068    for(count=4; count>0 && versionArray[count-1]==0; --count) {
2069    }
2070
2071    if(count <= 1) {
2072        count = 2;
2073    }
2074
2075    /* write the first part */
2076    /* write the decimal field value */
2077    field=versionArray[0];
2078    if(field>=100) {
2079        *versionString++=(char)('0'+field/100);
2080        field%=100;
2081    }
2082    if(field>=10) {
2083        *versionString++=(char)('0'+field/10);
2084        field%=10;
2085    }
2086    *versionString++=(char)('0'+field);
2087
2088    /* write the following parts */
2089    for(part=1; part<count; ++part) {
2090        /* write a dot first */
2091        *versionString++=U_VERSION_DELIMITER;
2092
2093        /* write the decimal field value */
2094        field=versionArray[part];
2095        if(field>=100) {
2096            *versionString++=(char)('0'+field/100);
2097            field%=100;
2098        }
2099        if(field>=10) {
2100            *versionString++=(char)('0'+field/10);
2101            field%=10;
2102        }
2103        *versionString++=(char)('0'+field);
2104    }
2105
2106    /* NUL-terminate */
2107    *versionString=0;
2108}
2109
2110U_CAPI void U_EXPORT2
2111u_getVersion(UVersionInfo versionArray) {
2112    u_versionFromString(versionArray, U_ICU_VERSION);
2113}
2114
2115/**
2116 * icucfg.h dependent code
2117 */
2118
2119#if U_ENABLE_DYLOAD
2120
2121#if HAVE_DLOPEN && !U_PLATFORM_USES_ONLY_WIN32_API
2122
2123#if HAVE_DLFCN_H
2124
2125#ifdef __MVS__
2126#ifndef __SUSV3
2127#define __SUSV3 1
2128#endif
2129#endif
2130#include <dlfcn.h>
2131#endif
2132
2133U_INTERNAL void * U_EXPORT2
2134uprv_dl_open(const char *libName, UErrorCode *status) {
2135  void *ret = NULL;
2136  if(U_FAILURE(*status)) return ret;
2137  ret =  dlopen(libName, RTLD_NOW|RTLD_GLOBAL);
2138  if(ret==NULL) {
2139#ifdef U_TRACE_DYLOAD
2140    printf("dlerror on dlopen(%s): %s\n", libName, dlerror());
2141#endif
2142    *status = U_MISSING_RESOURCE_ERROR;
2143  }
2144  return ret;
2145}
2146
2147U_INTERNAL void U_EXPORT2
2148uprv_dl_close(void *lib, UErrorCode *status) {
2149  if(U_FAILURE(*status)) return;
2150  dlclose(lib);
2151}
2152
2153U_INTERNAL UVoidFunction* U_EXPORT2
2154uprv_dlsym_func(void *lib, const char* sym, UErrorCode *status) {
2155  union {
2156      UVoidFunction *fp;
2157      void *vp;
2158  } uret;
2159  uret.fp = NULL;
2160  if(U_FAILURE(*status)) return uret.fp;
2161  uret.vp = dlsym(lib, sym);
2162  if(uret.vp == NULL) {
2163#ifdef U_TRACE_DYLOAD
2164    printf("dlerror on dlsym(%p,%s): %s\n", lib,sym, dlerror());
2165#endif
2166    *status = U_MISSING_RESOURCE_ERROR;
2167  }
2168  return uret.fp;
2169}
2170
2171#else
2172
2173/* null (nonexistent) implementation. */
2174
2175U_INTERNAL void * U_EXPORT2
2176uprv_dl_open(const char *libName, UErrorCode *status) {
2177  if(U_FAILURE(*status)) return NULL;
2178  *status = U_UNSUPPORTED_ERROR;
2179  return NULL;
2180}
2181
2182U_INTERNAL void U_EXPORT2
2183uprv_dl_close(void *lib, UErrorCode *status) {
2184  if(U_FAILURE(*status)) return;
2185  *status = U_UNSUPPORTED_ERROR;
2186  return;
2187}
2188
2189
2190U_INTERNAL UVoidFunction* U_EXPORT2
2191uprv_dlsym_func(void *lib, const char* sym, UErrorCode *status) {
2192  if(U_SUCCESS(*status)) {
2193    *status = U_UNSUPPORTED_ERROR;
2194  }
2195  return (UVoidFunction*)NULL;
2196}
2197
2198
2199
2200#endif
2201
2202#elif U_PLATFORM_USES_ONLY_WIN32_API
2203
2204U_INTERNAL void * U_EXPORT2
2205uprv_dl_open(const char *libName, UErrorCode *status) {
2206  HMODULE lib = NULL;
2207
2208  if(U_FAILURE(*status)) return NULL;
2209
2210  lib = LoadLibraryA(libName);
2211
2212  if(lib==NULL) {
2213    *status = U_MISSING_RESOURCE_ERROR;
2214  }
2215
2216  return (void*)lib;
2217}
2218
2219U_INTERNAL void U_EXPORT2
2220uprv_dl_close(void *lib, UErrorCode *status) {
2221  HMODULE handle = (HMODULE)lib;
2222  if(U_FAILURE(*status)) return;
2223
2224  FreeLibrary(handle);
2225
2226  return;
2227}
2228
2229
2230U_INTERNAL UVoidFunction* U_EXPORT2
2231uprv_dlsym_func(void *lib, const char* sym, UErrorCode *status) {
2232  HMODULE handle = (HMODULE)lib;
2233  UVoidFunction* addr = NULL;
2234
2235  if(U_FAILURE(*status) || lib==NULL) return NULL;
2236
2237  addr = (UVoidFunction*)GetProcAddress(handle, sym);
2238
2239  if(addr==NULL) {
2240    DWORD lastError = GetLastError();
2241    if(lastError == ERROR_PROC_NOT_FOUND) {
2242      *status = U_MISSING_RESOURCE_ERROR;
2243    } else {
2244      *status = U_UNSUPPORTED_ERROR; /* other unknown error. */
2245    }
2246  }
2247
2248  return addr;
2249}
2250
2251
2252#else
2253
2254/* No dynamic loading set. */
2255
2256U_INTERNAL void * U_EXPORT2
2257uprv_dl_open(const char *libName, UErrorCode *status) {
2258    if(U_FAILURE(*status)) return NULL;
2259    *status = U_UNSUPPORTED_ERROR;
2260    return NULL;
2261}
2262
2263U_INTERNAL void U_EXPORT2
2264uprv_dl_close(void *lib, UErrorCode *status) {
2265    if(U_FAILURE(*status)) return;
2266    *status = U_UNSUPPORTED_ERROR;
2267    return;
2268}
2269
2270
2271U_INTERNAL UVoidFunction* U_EXPORT2
2272uprv_dlsym_func(void *lib, const char* sym, UErrorCode *status) {
2273  if(U_SUCCESS(*status)) {
2274    *status = U_UNSUPPORTED_ERROR;
2275  }
2276  return (UVoidFunction*)NULL;
2277}
2278
2279#endif /* U_ENABLE_DYLOAD */
2280
2281/*
2282 * Hey, Emacs, please set the following:
2283 *
2284 * Local Variables:
2285 * indent-tabs-mode: nil
2286 * End:
2287 *
2288 */
2289