1/*
2 * xsltlocale.c: locale handling
3 *
4 * Reference:
5 * RFC 3066: Tags for the Identification of Languages
6 * http://www.ietf.org/rfc/rfc3066.txt
7 * ISO 639-1, ISO 3166-1
8 *
9 * Author: Nick Wellnhofer
10 * winapi port: Roumen Petrov
11 */
12
13#define IN_LIBXSLT
14#include "libxslt.h"
15
16#include <string.h>
17#include <libxml/xmlmemory.h>
18
19#include "xsltlocale.h"
20#include "xsltutils.h"
21
22#if defined(__GLIBC__) && __GLIBC__ == 2 && __GLIBC_MINOR__ <= 2
23#define newlocale __newlocale
24#define freelocale __freelocale
25#define strxfrm_l __strxfrm_l
26#define LC_COLLATE_MASK (1 << LC_COLLATE)
27#endif
28
29#define TOUPPER(c) (c & ~0x20)
30#define TOLOWER(c) (c | 0x20)
31#define ISALPHA(c) ((unsigned)(TOUPPER(c) - 'A') < 26)
32
33/*without terminating null character*/
34#define XSLTMAX_ISO639LANGLEN		8
35#define XSLTMAX_ISO3166CNTRYLEN		8
36					/* <lang>-<cntry> */
37#define XSLTMAX_LANGTAGLEN		(XSLTMAX_ISO639LANGLEN+1+XSLTMAX_ISO3166CNTRYLEN)
38
39static const xmlChar* xsltDefaultRegion(const xmlChar *localeName);
40
41#ifdef XSLT_LOCALE_WINAPI
42xmlRMutexPtr xsltLocaleMutex = NULL;
43
44struct xsltRFC1766Info_s {
45      /*note typedef unsigned char xmlChar !*/
46    xmlChar    tag[XSLTMAX_LANGTAGLEN+1];
47      /*note typedef LCID xsltLocale !*/
48    xsltLocale lcid;
49};
50typedef struct xsltRFC1766Info_s xsltRFC1766Info;
51
52static int xsltLocaleListSize = 0;
53static xsltRFC1766Info *xsltLocaleList = NULL;
54
55
56static xsltLocale
57xslt_locale_WINAPI(const xmlChar *languageTag) {
58    int k;
59    xsltRFC1766Info *p = xsltLocaleList;
60
61    for (k=0; k<xsltLocaleListSize; k++, p++)
62	if (xmlStrcmp(p->tag, languageTag) == 0) return p->lcid;
63    return((xsltLocale)0);
64}
65
66static void xsltEnumSupportedLocales(void);
67#endif
68
69/**
70 * xsltFreeLocales:
71 *
72 * Cleanup function for the locale support on shutdown
73 */
74void
75xsltFreeLocales(void) {
76#ifdef XSLT_LOCALE_WINAPI
77    xmlRMutexLock(xsltLocaleMutex);
78    xmlFree(xsltLocaleList);
79    xsltLocaleList = NULL;
80    xmlRMutexUnlock(xsltLocaleMutex);
81#endif
82}
83
84/**
85 * xsltNewLocale:
86 * @languageTag: RFC 3066 language tag
87 *
88 * Creates a new locale of an opaque system dependent type based on the
89 * language tag.
90 *
91 * Returns the locale or NULL on error or if no matching locale was found
92 */
93xsltLocale
94xsltNewLocale(const xmlChar *languageTag) {
95#ifdef XSLT_LOCALE_XLOCALE
96    xsltLocale locale;
97    char localeName[XSLTMAX_LANGTAGLEN+6]; /* 6 chars for ".utf8\0" */
98    const xmlChar *p = languageTag;
99    const char *region = NULL;
100    char *q = localeName;
101    int i, llen;
102
103    /* Convert something like "pt-br" to "pt_BR.utf8" */
104
105    if (languageTag == NULL)
106	return(NULL);
107
108    for (i=0; i<XSLTMAX_ISO639LANGLEN && ISALPHA(*p); ++i)
109	*q++ = TOLOWER(*p++);
110
111    if (i == 0)
112	return(NULL);
113
114    llen = i;
115
116    if (*p) {
117	if (*p++ != '-')
118	    return(NULL);
119        *q++ = '_';
120
121	for (i=0; i<XSLTMAX_ISO3166CNTRYLEN && ISALPHA(*p); ++i)
122	    *q++ = TOUPPER(*p++);
123
124	if (i == 0 || *p)
125	    return(NULL);
126
127        memcpy(q, ".utf8", 6);
128        locale = newlocale(LC_COLLATE_MASK, localeName, NULL);
129        if (locale != NULL)
130            return(locale);
131
132        /* Continue without using country code */
133
134        q = localeName + llen;
135    }
136
137    /* Try locale without territory, e.g. for Esperanto (eo) */
138
139    memcpy(q, ".utf8", 6);
140    locale = newlocale(LC_COLLATE_MASK, localeName, NULL);
141    if (locale != NULL)
142        return(locale);
143
144    /* Try to find most common country for language */
145
146    if (llen != 2)
147        return(NULL);
148
149    region = (char *)xsltDefaultRegion((xmlChar *)localeName);
150    if (region == NULL)
151        return(NULL);
152
153    q = localeName + llen;
154    *q++ = '_';
155    *q++ = region[0];
156    *q++ = region[1];
157    memcpy(q, ".utf8", 6);
158    locale = newlocale(LC_COLLATE_MASK, localeName, NULL);
159
160    return(locale);
161#endif
162
163#ifdef XSLT_LOCALE_WINAPI
164{
165    xsltLocale    locale = (xsltLocale)0;
166    xmlChar       localeName[XSLTMAX_LANGTAGLEN+1];
167    xmlChar       *q = localeName;
168    const xmlChar *p = languageTag;
169    int           i, llen;
170    const xmlChar *region = NULL;
171
172    if (languageTag == NULL) goto end;
173
174    xsltEnumSupportedLocales();
175
176    for (i=0; i<XSLTMAX_ISO639LANGLEN && ISALPHA(*p); ++i)
177	*q++ = TOLOWER(*p++);
178    if (i == 0) goto end;
179
180    llen = i;
181    *q++ = '-';
182    if (*p) { /*if country tag is given*/
183	if (*p++ != '-') goto end;
184
185	for (i=0; i<XSLTMAX_ISO3166CNTRYLEN && ISALPHA(*p); ++i)
186	    *q++ = TOUPPER(*p++);
187	if (i == 0 || *p) goto end;
188
189	*q = '\0';
190	locale = xslt_locale_WINAPI(localeName);
191	if (locale != (xsltLocale)0) goto end;
192    }
193    /* Try to find most common country for language */
194    region = xsltDefaultRegion(localeName);
195    if (region == NULL) goto end;
196
197    strcpy(localeName + llen + 1, region);
198    locale = xslt_locale_WINAPI(localeName);
199end:
200    return(locale);
201}
202#endif
203
204#ifdef XSLT_LOCALE_NONE
205    return(NULL);
206#endif
207}
208
209static const xmlChar*
210xsltDefaultRegion(const xmlChar *localeName) {
211    xmlChar c;
212    /* region should be xmlChar, but gcc warns on all string assignments */
213    const char *region = NULL;
214
215    c = localeName[1];
216    /* This is based on the locales from glibc 2.3.3 */
217
218    switch (localeName[0]) {
219        case 'a':
220            if (c == 'a' || c == 'm') region = "ET";
221            else if (c == 'f') region = "ZA";
222            else if (c == 'n') region = "ES";
223            else if (c == 'r') region = "AE";
224            else if (c == 'z') region = "AZ";
225            break;
226        case 'b':
227            if (c == 'e') region = "BY";
228            else if (c == 'g') region = "BG";
229            else if (c == 'n') region = "BD";
230            else if (c == 'r') region = "FR";
231            else if (c == 's') region = "BA";
232            break;
233        case 'c':
234            if (c == 'a') region = "ES";
235            else if (c == 's') region = "CZ";
236            else if (c == 'y') region = "GB";
237            break;
238        case 'd':
239            if (c == 'a') region = "DK";
240            else if (c == 'e') region = "DE";
241            break;
242        case 'e':
243            if (c == 'l') region = "GR";
244            else if (c == 'n' || c == 'o') region = "US";
245            else if (c == 's' || c == 'u') region = "ES";
246            else if (c == 't') region = "EE";
247            break;
248        case 'f':
249            if (c == 'a') region = "IR";
250            else if (c == 'i') region = "FI";
251            else if (c == 'o') region = "FO";
252            else if (c == 'r') region = "FR";
253            break;
254        case 'g':
255            if (c == 'a') region = "IE";
256            else if (c == 'l') region = "ES";
257            else if (c == 'v') region = "GB";
258            break;
259        case 'h':
260            if (c == 'e') region = "IL";
261            else if (c == 'i') region = "IN";
262            else if (c == 'r') region = "HT";
263            else if (c == 'u') region = "HU";
264            break;
265        case 'i':
266            if (c == 'd') region = "ID";
267            else if (c == 's') region = "IS";
268            else if (c == 't') region = "IT";
269            else if (c == 'w') region = "IL";
270            break;
271        case 'j':
272            if (c == 'a') region = "JP";
273            break;
274        case 'k':
275            if (c == 'l') region = "GL";
276            else if (c == 'o') region = "KR";
277            else if (c == 'w') region = "GB";
278            break;
279        case 'l':
280            if (c == 't') region = "LT";
281            else if (c == 'v') region = "LV";
282            break;
283        case 'm':
284            if (c == 'k') region = "MK";
285            else if (c == 'l' || c == 'r') region = "IN";
286            else if (c == 'n') region = "MN";
287            else if (c == 's') region = "MY";
288            else if (c == 't') region = "MT";
289            break;
290        case 'n':
291            if (c == 'b' || c == 'n' || c == 'o') region = "NO";
292            else if (c == 'e') region = "NP";
293            else if (c == 'l') region = "NL";
294            break;
295        case 'o':
296            if (c == 'm') region = "ET";
297            break;
298        case 'p':
299            if (c == 'a') region = "IN";
300            else if (c == 'l') region = "PL";
301            else if (c == 't') region = "PT";
302            break;
303        case 'r':
304            if (c == 'o') region = "RO";
305            else if (c == 'u') region = "RU";
306            break;
307        case 's':
308            switch (c) {
309                case 'e': region = "NO"; break;
310                case 'h': region = "YU"; break;
311                case 'k': region = "SK"; break;
312                case 'l': region = "SI"; break;
313                case 'o': region = "ET"; break;
314                case 'q': region = "AL"; break;
315                case 't': region = "ZA"; break;
316                case 'v': region = "SE"; break;
317            }
318            break;
319        case 't':
320            if (c == 'a' || c == 'e') region = "IN";
321            else if (c == 'h') region = "TH";
322            else if (c == 'i') region = "ER";
323            else if (c == 'r') region = "TR";
324            else if (c == 't') region = "RU";
325            break;
326        case 'u':
327            if (c == 'k') region = "UA";
328            else if (c == 'r') region = "PK";
329            break;
330        case 'v':
331            if (c == 'i') region = "VN";
332            break;
333        case 'w':
334            if (c == 'a') region = "BE";
335            break;
336        case 'x':
337            if (c == 'h') region = "ZA";
338            break;
339        case 'z':
340            if (c == 'h') region = "CN";
341            else if (c == 'u') region = "ZA";
342            break;
343    }
344    return((xmlChar *)region);
345}
346
347/**
348 * xsltFreeLocale:
349 * @locale: the locale to free
350 *
351 * Frees a locale created with xsltNewLocale
352 */
353void
354xsltFreeLocale(xsltLocale locale) {
355#ifdef XSLT_LOCALE_XLOCALE
356    freelocale(locale);
357#endif
358}
359
360/**
361 * xsltStrxfrm:
362 * @locale: locale created with xsltNewLocale
363 * @string: UTF-8 string to transform
364 *
365 * Transforms a string according to locale. The transformed string must then be
366 * compared with xsltLocaleStrcmp and freed with xmlFree.
367 *
368 * Returns the transformed string or NULL on error
369 */
370xsltLocaleChar *
371xsltStrxfrm(xsltLocale locale, const xmlChar *string)
372{
373#ifdef XSLT_LOCALE_NONE
374    return(NULL);
375#else
376    size_t xstrlen, r;
377    xsltLocaleChar *xstr;
378
379#ifdef XSLT_LOCALE_XLOCALE
380    xstrlen = strxfrm_l(NULL, (const char *)string, 0, locale) + 1;
381    xstr = (xsltLocaleChar *) xmlMalloc(xstrlen);
382    if (xstr == NULL) {
383	xsltTransformError(NULL, NULL, NULL,
384	    "xsltStrxfrm : out of memory error\n");
385	return(NULL);
386    }
387
388    r = strxfrm_l((char *)xstr, (const char *)string, xstrlen, locale);
389#endif
390
391#ifdef XSLT_LOCALE_WINAPI
392    xstrlen = MultiByteToWideChar(CP_UTF8, 0, string, -1, NULL, 0);
393    if (xstrlen == 0) {
394        xsltTransformError(NULL, NULL, NULL, "xsltStrxfrm : MultiByteToWideChar check failed\n");
395        return(NULL);
396    }
397    xstr = (xsltLocaleChar*) xmlMalloc(xstrlen * sizeof(xsltLocaleChar));
398    if (xstr == NULL) {
399        xsltTransformError(NULL, NULL, NULL, "xsltStrxfrm : out of memory\n");
400        return(NULL);
401    }
402    r = MultiByteToWideChar(CP_UTF8, 0, string, -1, xstr, xstrlen);
403    if (r == 0) {
404        xsltTransformError(NULL, NULL, NULL, "xsltStrxfrm : MultiByteToWideChar failed\n");
405        xmlFree(xstr);
406        return(NULL);
407    }
408    return(xstr);
409#endif /* XSLT_LOCALE_WINAPI */
410
411    if (r >= xstrlen) {
412	xsltTransformError(NULL, NULL, NULL, "xsltStrxfrm : strxfrm failed\n");
413        xmlFree(xstr);
414        return(NULL);
415    }
416
417    return(xstr);
418#endif /* XSLT_LOCALE_NONE */
419}
420
421/**
422 * xsltLocaleStrcmp:
423 * @locale: a locale identifier
424 * @str1: a string transformed with xsltStrxfrm
425 * @str2: a string transformed with xsltStrxfrm
426 *
427 * Compares two strings transformed with xsltStrxfrm
428 *
429 * Returns a value < 0 if str1 sorts before str2,
430 *         a value > 0 if str1 sorts after str2,
431 *         0 if str1 and str2 are equal wrt sorting
432 */
433int
434xsltLocaleStrcmp(xsltLocale locale, const xsltLocaleChar *str1, const xsltLocaleChar *str2) {
435    (void)locale;
436#ifdef XSLT_LOCALE_WINAPI
437{
438    int ret;
439    if (str1 == str2) return(0);
440    if (str1 == NULL) return(-1);
441    if (str2 == NULL) return(1);
442    ret = CompareStringW(locale, 0, str1, -1, str2, -1);
443    if (ret == 0) {
444        xsltTransformError(NULL, NULL, NULL, "xsltLocaleStrcmp : CompareStringW fail\n");
445        return(0);
446    }
447    return(ret - 2);
448}
449#else
450    return(xmlStrcmp(str1, str2));
451#endif
452}
453
454#ifdef XSLT_LOCALE_WINAPI
455/**
456 * xsltCountSupportedLocales:
457 * @lcid: not used
458 *
459 * callback used to count locales
460 *
461 * Returns TRUE
462 */
463BOOL CALLBACK
464xsltCountSupportedLocales(LPSTR lcid) {
465    (void) lcid;
466    ++xsltLocaleListSize;
467    return(TRUE);
468}
469
470/**
471 * xsltIterateSupportedLocales:
472 * @lcid: not used
473 *
474 * callback used to track locales
475 *
476 * Returns TRUE if not at the end of the array
477 */
478BOOL CALLBACK
479xsltIterateSupportedLocales(LPSTR lcid) {
480    static int count = 0;
481    xmlChar    iso639lang [XSLTMAX_ISO639LANGLEN  +1];
482    xmlChar    iso3136ctry[XSLTMAX_ISO3166CNTRYLEN+1];
483    int        k, l;
484    xsltRFC1766Info *p = xsltLocaleList + count;
485
486    k = sscanf(lcid, "%lx", (long*)&p->lcid);
487    if (k < 1) goto end;
488    /*don't count terminating null character*/
489    k = GetLocaleInfoA(p->lcid, LOCALE_SISO639LANGNAME , iso639lang , sizeof(iso639lang ));
490    if (--k < 1) goto end;
491    l = GetLocaleInfoA(p->lcid, LOCALE_SISO3166CTRYNAME, iso3136ctry, sizeof(iso3136ctry));
492    if (--l < 1) goto end;
493
494    {  /*fill results*/
495	xmlChar    *q = p->tag;
496	memcpy(q, iso639lang, k);
497	q += k;
498	*q++ = '-';
499	memcpy(q, iso3136ctry, l);
500	q += l;
501	*q = '\0';
502    }
503    ++count;
504end:
505    return((count < xsltLocaleListSize) ? TRUE : FALSE);
506}
507
508
509static void
510xsltEnumSupportedLocales(void) {
511    xmlRMutexLock(xsltLocaleMutex);
512    if (xsltLocaleListSize <= 0) {
513	size_t len;
514
515	EnumSystemLocalesA(xsltCountSupportedLocales, LCID_SUPPORTED);
516
517	len = xsltLocaleListSize * sizeof(xsltRFC1766Info);
518	xsltLocaleList = xmlMalloc(len);
519	memset(xsltLocaleList, 0, len);
520	EnumSystemLocalesA(xsltIterateSupportedLocales, LCID_SUPPORTED);
521    }
522    xmlRMutexUnlock(xsltLocaleMutex);
523}
524
525#endif /*def XSLT_LOCALE_WINAPI*/
526