1/*
2 * Copyright (c) 2014 Apple Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. Please obtain a copy of the License at
10 * http://www.opensource.apple.com/apsl/ and read it before using this
11 * file.
12 *
13 * The Original Code and all software distributed under the License are
14 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
15 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
16 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
18 * Please see the License for the specific language governing rights and
19 * limitations under the License.
20 *
21 * @APPLE_LICENSE_HEADER_END@
22 */
23
24/*
25    CFLocaleIdentifier.c
26	Copyright (c) 2002-2013, Apple Inc. All rights reserved.
27    Responsibility: David Smith
28
29    CFLocaleIdentifier.c defines
30    - enum value kLocaleIdentifierCStringMax
31    - structs KeyStringToResultString, SpecialCaseUpdates
32    and provides the following data for the functions
33    CFLocaleCreateCanonicalLocaleIdentifierFromScriptManagerCodes,
34    CFLocaleCreateCanonicalLocaleIdentifierFromString
35    CFLocaleCreateCanonicalLanguageIdentifierFromString
36
37    1. static const char * regionCodeToLocaleString[]; enum kNumRegionCodeToLocaleString;
38        map RegionCode 0..kNumRegionCodeToLocaleString-1 to canonical locale string
39
40    2. static const char * langCodeToLocaleString[]; enum kNumLangCodeToLocaleString;
41        map LangCode 0..kNumLangCodeToLocaleString-1 to canonical locale string
42
43    3. static const KeyStringToResultString oldAppleLocaleToCanonical[]; enum kNumOldAppleLocaleToCanonical;
44        map old Apple string        oldAppleLocaleToCanonical[n].key
45        to canonical locale string  oldAppleLocaleToCanonical[n].result
46        for n = 0..kNumOldAppleLocaleToCanonical-1
47
48    4. static const KeyStringToResultString localeStringPrefixToCanonical[]; enum kNumLocaleStringPrefixToCanonical;
49        map non-canonical language prefix (3-letter, obsolete)  localeStringPrefixToCanonical[].key
50        to updated replacement                                  localeStringPrefixToCanonical[].result
51        for n = 0..kNumLocaleStringPrefixToCanonical-1
52
53    5. static const SpecialCaseUpdates specialCases[];
54        various special cases for updating region codes, or for updating language codes based on region codes
55
56    6. static const KeyStringToResultString localeStringRegionToDefaults[]; enum kNumLocaleStringRegionToDefaults;
57        map locale string region tag    localeStringRegionToDefaults[n].key
58        to default substrings to delete localeStringRegionToDefaults[n].result
59        for n = 0..kNumLocaleStringRegionToDefaults-1
60
61    7. static const KeyStringToResultString localeStringPrefixToDefaults[]; enum kNumLocaleStringPrefixToDefaults;
62        map locale string initial part  localeStringPrefixToDefaults[n].key
63        to default substrings to delete localeStringPrefixToDefaults[n].result
64        for n = 0..kNumLocaleStringPrefixToDefaults-1
65
66    8. static const KeyStringToResultString appleLocaleToLanguageString[]; enum kNumAppleLocaleToLanguageString;
67        map Apple locale string         appleLocaleToLanguageString[].key
68        to equivalent language string   appleLocaleToLanguageString[].result
69        for n = 0..kNumAppleLocaleToLanguageString-1
70
71*/
72
73#include <CoreFoundation/CFString.h>
74#include <CoreFoundation/CFCalendar.h>
75#include <ctype.h>
76#include <string.h>
77#include <stdlib.h>
78#include <stdio.h>
79#if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_WINDOWS || DEPLOYMENT_TARGET_LINUX
80#include <unicode/uloc.h>
81#else
82#define ULOC_KEYWORD_SEPARATOR '@'
83#define ULOC_FULLNAME_CAPACITY 56
84#define ULOC_KEYWORD_AND_VALUES_CAPACITY 100
85#endif
86#include "CFInternal.h"
87#include "CFLocaleInternal.h"
88
89// Max byte length of locale identifier (ASCII) as C string, including terminating null byte
90enum {
91    kLocaleIdentifierCStringMax = ULOC_FULLNAME_CAPACITY + ULOC_KEYWORD_AND_VALUES_CAPACITY	// currently 56 + 100
92};
93
94// KeyStringToResultString struct used in data tables for CFLocaleCreateCanonicalLocaleIdentifierFromString
95struct KeyStringToResultString {
96    const char *    key;
97    const char *    result;
98};
99typedef struct KeyStringToResultString KeyStringToResultString;
100
101// SpecialCaseUpdates struct used in data tables for CFLocaleCreateCanonicalLocaleIdentifierFromString
102struct SpecialCaseUpdates {
103    const char *    lang;
104    const char *    reg1;
105    const char *    update1;
106    const char *    reg2;
107    const char *    update2;
108};
109typedef struct SpecialCaseUpdates SpecialCaseUpdates;
110
111
112static const char * const regionCodeToLocaleString[] = {
113// map RegionCode (array index) to canonical locale string
114//
115//  canon. string      region code;             language code;      [comment]       [   # __CFBundleLocaleAbbreviationsArray
116//  --------           ------------             ------------------  ------------        --------     string, if different ]
117    "en_US",        //   0 verUS;                 0 langEnglish;
118    "fr_FR",        //   1 verFrance;             1 langFrench;
119    "en_GB",        //   2 verBritain;            0 langEnglish;
120    "de_DE",        //   3 verGermany;            2 langGerman;
121    "it_IT",        //   4 verItaly;              3 langItalian;
122    "nl_NL",        //   5 verNetherlands;        4 langDutch;
123    "nl_BE",        //   6 verFlemish;           34 langFlemish (redundant, =Dutch);
124    "sv_SE",        //   7 verSweden;             5 langSwedish;
125    "es_ES",        //   8 verSpain;              6 langSpanish;
126    "da_DK",        //   9 verDenmark;            7 langDanish;
127    "pt_PT",        //  10 verPortugal;           8 langPortuguese;
128    "fr_CA",        //  11 verFrCanada;           1 langFrench;
129    "nb_NO",        //  12 verNorway;             9 langNorwegian (Bokmal);             # "no_NO"
130    "he_IL",        //  13 verIsrael;            10 langHebrew;
131    "ja_JP",        //  14 verJapan;             11 langJapanese;
132    "en_AU",        //  15 verAustralia;          0 langEnglish;
133    "ar",           //  16 verArabic;            12 langArabic;
134    "fi_FI",        //  17 verFinland;           13 langFinnish;
135    "fr_CH",        //  18 verFrSwiss;            1 langFrench;
136    "de_CH",        //  19 verGrSwiss;            2 langGerman;
137    "el_GR",        //  20 verGreece;            14 langGreek (modern)-Grek-mono;
138    "is_IS",        //  21 verIceland;           15 langIcelandic;
139    "mt_MT",        //  22 verMalta;             16 langMaltese;
140    "el_CY",        //  23 verCyprus;            14 langGreek?;     el or tr? guess el  # ""
141    "tr_TR",        //  24 verTurkey;            17 langTurkish;
142    "hr_HR",        //  25 verYugoCroatian;      18 langCroatian;   * one-way mapping -> verCroatia
143    "nl_NL",        //  26 KCHR, Netherlands;     4 langDutch;      * one-way mapping
144    "nl_BE",        //  27 KCHR, verFlemish;     34 langFlemish;    * one-way mapping
145    "_CA",          //  28 KCHR, Canada-en/fr?;  -1 none;           * one-way mapping   # "en_CA"
146    "_CA",          //  29 KCHR, Canada-en/fr?;  -1 none;           * one-way mapping   # "en_CA"
147    "pt_PT",        //  30 KCHR, Portugal;        8 langPortuguese; * one-way mapping
148    "nb_NO",        //  31 KCHR, Norway;          9 langNorwegian (Bokmal); * one-way mapping   # "no_NO"
149    "da_DK",        //  32 KCHR, Denmark;         7 langDanish;     * one-way mapping
150    "hi_IN",        //  33 verIndiaHindi;        21 langHindi;
151    "ur_PK",        //  34 verPakistanUrdu;      20 langUrdu;
152    "tr_TR",        //  35 verTurkishModified;   17 langTurkish;    * one-way mapping
153    "it_CH",        //  36 verItalianSwiss;       3 langItalian;
154    "en_001",       //  37 verInternational;      0 langEnglish; ASCII only             # "en"
155    NULL,           //  38 *unassigned;          -1 none;           * one-way mapping   # ""
156    "ro_RO",        //  39 verRomania;           37 langRomanian;
157    "grc",          //  40 verGreekAncient;     148 langGreekAncient -Grek-poly;        # "el_GR"
158    "lt_LT",        //  41 verLithuania;         24 langLithuanian;
159    "pl_PL",        //  42 verPoland;            25 langPolish;
160    "hu_HU",        //  43 verHungary;           26 langHungarian;
161    "et_EE",        //  44 verEstonia;           27 langEstonian;
162    "lv_LV",        //  45 verLatvia;            28 langLatvian;
163    "se",           //  46 verSami;              29 langSami;
164    "fo_FO",        //  47 verFaroeIsl;          30 langFaroese;
165    "fa_IR",        //  48 verIran;              31 langFarsi/Persian;
166    "ru_RU",        //  49 verRussia;            32 langRussian;
167    "ga_IE",        //  50 verIreland;           35 langIrishGaelic (no dots);
168    "ko_KR",        //  51 verKorea;             23 langKorean;
169    "zh_CN",        //  52 verChina;             33 langSimpChinese;
170    "zh_TW",        //  53 verTaiwan;            19 langTradChinese;
171    "th_TH",        //  54 verThailand;          22 langThai;
172    "und",          //  55 verScriptGeneric;     -1 none;                               # ""        // <1.9>
173    "cs_CZ",        //  56 verCzech;             38 langCzech;
174    "sk_SK",        //  57 verSlovak;            39 langSlovak;
175    "und",          //  58 verEastAsiaGeneric;   -1 none;           * one-way mapping   # ""        // <1.9>
176    "hu_HU",        //  59 verMagyar;            26 langHungarian;  * one-way mapping -> verHungary
177    "bn",           //  60 verBengali;           67 langBengali;    _IN or _BD? guess generic
178    "be_BY",        //  61 verBelarus;           46 langBelorussian;
179    "uk_UA",        //  62 verUkraine;           45 langUkrainian;
180    NULL,           //  63 *unused;              -1 none;           * one-way mapping   # ""
181    "el_GR",        //  64 verGreeceAlt;         14 langGreek (modern)-Grek-mono;   * one-way mapping
182    "sr_RS",        //  65 verSerbian;           42 langSerbian -Cyrl;								// <1.18>
183    "sl_SI",        //  66 verSlovenian;         40 langSlovenian;
184    "mk_MK",        //  67 verMacedonian;        43 langMacedonian;
185    "hr_HR",        //  68 verCroatia;           18 langCroatian;
186    NULL,           //  69 *unused;              -1 none;           * one-way mapping   # ""
187    "de-1996",      //  70 verGermanReformed;     2 langGerman;     1996 orthogr.       # "de_DE"
188    "pt_BR",        //  71 verBrazil;             8 langPortuguese;
189    "bg_BG",        //  72 verBulgaria;          44 langBulgarian;
190    "ca_ES",        //  73 verCatalonia;        130 langCatalan;
191    "mul",          //  74 verMultilingual;      -1 none;                               # ""
192    "gd",           //  75 verScottishGaelic;   144 langScottishGaelic;
193    "gv",           //  76 verManxGaelic;       145 langManxGaelic;
194    "br",           //  77 verBreton;           142 langBreton;
195    "iu_CA",        //  78 verNunavut;          143 langInuktitut -Cans;
196    "cy",           //  79 verWelsh;            128 langWelsh;
197    "_CA",          //  80 KCHR, Canada-en/fr?;  -1 none;           * one-way mapping   # "en_CA"
198    "ga-Latg_IE",   //  81 verIrishGaelicScrip; 146 langIrishGaelicScript -dots;        # "ga_IE"   // <xx>
199    "en_CA",        //  82 verEngCanada;          0 langEnglish;
200    "dz_BT",        //  83 verBhutan;           137 langDzongkha;
201    "hy_AM",        //  84 verArmenian;          51 langArmenian;
202    "ka_GE",        //  85 verGeorgian;          52 langGeorgian;
203    "es_419",       //  86 verSpLatinAmerica;     6 langSpanish;                        # "es"
204    "es_ES",        //  87 KCHR, Spain;           6 langSpanish;    * one-way mapping
205    "to_TO",        //  88 verTonga;            147 langTongan;
206    "pl_PL",        //  89 KCHR, Poland;         25 langPolish;     * one-way mapping
207    "ca_ES",        //  90 KCHR, Catalonia;     130 langCatalan;    * one-way mapping
208    "fr_001",       //  91 verFrenchUniversal;    1 langFrench;
209    "de_AT",        //  92 verAustria;            2 langGerman;
210    "es_419",       //  93 > verSpLatinAmerica;   6 langSpanish;    * one-way mapping   # "es"
211    "gu_IN",        //  94 verGujarati;          69 langGujarati;
212    "pa",           //  95 verPunjabi;           70 langPunjabi;    _IN or _PK? guess generic
213    "ur_IN",        //  96 verIndiaUrdu;         20 langUrdu;
214    "vi_VN",        //  97 verVietnam;           80 langVietnamese;
215    "fr_BE",        //  98 verFrBelgium;          1 langFrench;
216    "uz_UZ",        //  99 verUzbek;             47 langUzbek;
217    "en_SG",        // 100 verSingapore;          0 langEnglish?; en, zh, or ms? guess en   # ""
218    "nn_NO",        // 101 verNynorsk;          151 langNynorsk;                        # ""
219    "af_ZA",        // 102 verAfrikaans;        141 langAfrikaans;
220    "eo",           // 103 verEsperanto;         94 langEsperanto;
221    "mr_IN",        // 104 verMarathi;           66 langMarathi;
222    "bo",           // 105 verTibetan;           63 langTibetan;
223    "ne_NP",        // 106 verNepal;             64 langNepali;
224    "kl",           // 107 verGreenland;        149 langGreenlandic;
225    "en_IE",        // 108 verIrelandEnglish;     0 langEnglish;                        # (no entry)
226};
227enum {
228    kNumRegionCodeToLocaleString = sizeof(regionCodeToLocaleString)/sizeof(char *)
229};
230
231static const char * const langCodeToLocaleString[] = {
232// map LangCode (array index) to canonical locale string
233//
234//  canon. string   language code;                  [ comment]  [   # __CFBundleLanguageAbbreviationsArray
235//  --------        --------------                  ----------      --------    string, if different ]
236    "en",       //   0 langEnglish;
237    "fr",       //   1 langFrench;
238    "de",       //   2 langGerman;
239    "it",       //   3 langItalian;
240    "nl",       //   4 langDutch;
241    "sv",       //   5 langSwedish;
242    "es",       //   6 langSpanish;
243    "da",       //   7 langDanish;
244    "pt",       //   8 langPortuguese;
245    "nb",       //   9 langNorwegian (Bokmal);                      # "no"
246    "he",       //  10 langHebrew -Hebr;
247    "ja",       //  11 langJapanese -Jpan;
248    "ar",       //  12 langArabic -Arab;
249    "fi",       //  13 langFinnish;
250    "el",       //  14 langGreek (modern)-Grek-mono;
251    "is",       //  15 langIcelandic;
252    "mt",       //  16 langMaltese -Latn;
253    "tr",       //  17 langTurkish -Latn;
254    "hr",       //  18 langCroatian;
255    "zh-Hant",  //  19 langTradChinese;                             # "zh"
256    "ur",       //  20 langUrdu -Arab;
257    "hi",       //  21 langHindi -Deva;
258    "th",       //  22 langThai -Thai;
259    "ko",       //  23 langKorean -Hang;
260    "lt",       //  24 langLithuanian;
261    "pl",       //  25 langPolish;
262    "hu",       //  26 langHungarian;
263    "et",       //  27 langEstonian;
264    "lv",       //  28 langLatvian;
265    "se",       //  29 langSami;
266    "fo",       //  30 langFaroese;
267    "fa",       //  31 langFarsi/Persian -Arab;
268    "ru",       //  32 langRussian -Cyrl;
269    "zh-Hans",  //  33 langSimpChinese;                             # "zh"
270    "nl-BE",    //  34 langFlemish (redundant, =Dutch);             # "nl"
271    "ga",       //  35 langIrishGaelic (no dots);
272    "sq",       //  36 langAlbanian;                no region codes
273    "ro",       //  37 langRomanian;
274    "cs",       //  38 langCzech;
275    "sk",       //  39 langSlovak;
276    "sl",       //  40 langSlovenian;
277    "yi",       //  41 langYiddish -Hebr;           no region codes
278    "sr",       //  42 langSerbian -Cyrl;
279    "mk",       //  43 langMacedonian -Cyrl;
280    "bg",       //  44 langBulgarian -Cyrl;
281    "uk",       //  45 langUkrainian -Cyrl;
282    "be",       //  46 langBelorussian -Cyrl;
283    "uz",       //  47 langUzbek -Cyrl;             also -Latn, -Arab
284    "kk",       //  48 langKazakh -Cyrl;            no region codes; also -Latn, -Arab
285    "az-Cyrl",  //  49 langAzerbaijani -Cyrl;       no region codes # "az"
286    "az-Arab",  //  50 langAzerbaijanAr -Arab;      no region codes # "az"
287    "hy",       //  51 langArmenian -Armn;
288    "ka",       //  52 langGeorgian -Geor;
289    "mo",       //  53 langMoldavian -Cyrl;         no region codes
290    "ky",       //  54 langKirghiz -Cyrl;           no region codes; also -Latn, -Arab
291    "tg",       //  55 langTajiki -Cyrl;            no region codes; also -Latn, -Arab
292    "tk-Cyrl",  //  56 langTurkmen -Cyrl;           no region codes; also -Latn, -Arab
293    "mn-Mong",  //  57 langMongolian -Mong;         no region codes # "mn"
294    "mn",       //  58 langMongolianCyr -Cyrl;      no region codes # "mn"
295    "ps",       //  59 langPashto -Arab;            no region codes
296    "ku",       //  60 langKurdish -Arab;           no region codes
297    "ks",       //  61 langKashmiri -Arab;          no region codes
298    "sd",       //  62 langSindhi -Arab;            no region codes
299    "bo",       //  63 langTibetan -Tibt;
300    "ne",       //  64 langNepali -Deva;
301    "sa",       //  65 langSanskrit -Deva;          no region codes
302    "mr",       //  66 langMarathi -Deva;
303    "bn",       //  67 langBengali -Beng;
304    "as",       //  68 langAssamese -Beng;          no region codes
305    "gu",       //  69 langGujarati -Gujr;
306    "pa",       //  70 langPunjabi -Guru;
307    "or",       //  71 langOriya -Orya;             no region codes
308    "ml",       //  72 langMalayalam -Mlym;         no region codes
309    "kn",       //  73 langKannada -Knda;           no region codes
310    "ta",       //  74 langTamil -Taml;             no region codes
311    "te",       //  75 langTelugu -Telu;            no region codes
312    "si",       //  76 langSinhalese -Sinh;         no region codes
313    "my",       //  77 langBurmese -Mymr;           no region codes
314    "km",       //  78 langKhmer -Khmr;             no region codes
315    "lo",       //  79 langLao -Laoo;               no region codes
316    "vi",       //  80 langVietnamese -Latn;
317    "id",       //  81 langIndonesian -Latn;        no region codes
318    "fil",      //  82 langTagalog -Latn;           no region codes
319    "ms",       //  83 langMalayRoman -Latn;        no region codes # "ms"
320    "ms-Arab",  //  84 langMalayArabic -Arab;       no region codes # "ms"
321    "am",       //  85 langAmharic -Ethi;           no region codes
322    "ti",       //  86 langTigrinya -Ethi;          no region codes
323    "om",       //  87 langOromo -Ethi;             no region codes
324    "so",       //  88 langSomali -Latn;            no region codes
325    "sw",       //  89 langSwahili -Latn;           no region codes
326    "rw",       //  90 langKinyarwanda -Latn;       no region codes
327    "rn",       //  91 langRundi -Latn;             no region codes
328    "ny",       //  92 langNyanja/Chewa -Latn;      no region codes # ""
329    "mg",       //  93 langMalagasy -Latn;          no region codes
330    "eo",       //  94 langEsperanto -Latn;
331    NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL, //  95 to 105 (gap)
332    NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL, // 106 to 116 (gap)
333    NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL, // 107 to 117 (gap)
334    "cy",       // 128 langWelsh -Latn;
335    "eu",       // 129 langBasque -Latn;            no region codes
336    "ca",       // 130 langCatalan -Latn;
337    "la",       // 131 langLatin -Latn;             no region codes
338    "qu",       // 132 langQuechua -Latn;           no region codes
339    "gn",       // 133 langGuarani -Latn;           no region codes
340    "ay",       // 134 langAymara -Latn;            no region codes
341    "tt-Cyrl",  // 135 langTatar -Cyrl;             no region codes
342    "ug",       // 136 langUighur -Arab;            no region codes
343    "dz",       // 137 langDzongkha -Tibt;
344    "jv",       // 138 langJavaneseRom -Latn;       no region codes
345    "su",       // 139 langSundaneseRom -Latn;      no region codes
346    "gl",       // 140 langGalician -Latn;          no region codes
347    "af",       // 141 langAfrikaans -Latn;
348    "br",       // 142 langBreton -Latn;
349    "iu",       // 143 langInuktitut -Cans;
350    "gd",       // 144 langScottishGaelic;
351    "gv",       // 145 langManxGaelic -Latn;
352    "ga-Latg",  // 146 langIrishGaelicScript  -Latn-dots;           # "ga"                                      // <xx>
353    "to",       // 147 langTongan -Latn;
354    "grc",      // 148 langGreekAncient -Grek-poly;                 # "el"
355    "kl",       // 149 langGreenlandic -Latn;
356    "az",       // 150 langAzerbaijanRoman -Latn;   no region codes # "az"
357    "nn",       // 151 langNynorsk -Latn;                           # (no entry)
358};
359enum {
360    kNumLangCodeToLocaleString = sizeof(langCodeToLocaleString)/sizeof(char *)
361};
362
363static const KeyStringToResultString oldAppleLocaleToCanonical[] = {
364// Map obsolete/old-style Apple strings to canonical
365// Must be sorted according to how strcmp compares the strings in the first column
366//
367//    non-canonical             canonical       [  comment ]            # source/reason for non-canonical string
368//    string                    string
369//    -------------             ---------
370    { "Afrikaans",              "af"        },  //                      # __CFBundleLanguageNamesArray
371    { "Albanian",               "sq"        },  //                      # __CFBundleLanguageNamesArray
372    { "Amharic",                "am"        },  //                      # __CFBundleLanguageNamesArray
373    { "Arabic",                 "ar"        },  //                      # __CFBundleLanguageNamesArray
374    { "Armenian",               "hy"        },  //                      # __CFBundleLanguageNamesArray
375    { "Assamese",               "as"        },  //                      # __CFBundleLanguageNamesArray
376    { "Aymara",                 "ay"        },  //                      # __CFBundleLanguageNamesArray
377    { "Azerbaijani",            "az"        },  // -Arab,-Cyrl,-Latn?   # __CFBundleLanguageNamesArray (had 3 entries "Azerbaijani" for "az-Arab", "az-Cyrl", "az-Latn")
378    { "Basque",                 "eu"        },  //                      # __CFBundleLanguageNamesArray
379    { "Belarusian",             "be"        },  //                      # handle other names
380    { "Belorussian",            "be"        },  //                      # handle other names
381    { "Bengali",                "bn"        },  //                      # __CFBundleLanguageNamesArray
382    { "Brazilian Portugese",    "pt-BR"     },  //                      # from Installer.app Info.plist IFLanguages key, misspelled
383    { "Brazilian Portuguese",   "pt-BR"     },  //                      # correct spelling for above
384    { "Breton",                 "br"        },  //                      # __CFBundleLanguageNamesArray
385    { "Bulgarian",              "bg"        },  //                      # __CFBundleLanguageNamesArray
386    { "Burmese",                "my"        },  //                      # __CFBundleLanguageNamesArray
387    { "Byelorussian",           "be"        },  //                      # __CFBundleLanguageNamesArray
388    { "Catalan",                "ca"        },  //                      # __CFBundleLanguageNamesArray
389    { "Chewa",                  "ny"        },  //                      # handle other names
390    { "Chichewa",               "ny"        },  //                      # handle other names
391    { "Chinese",                "zh"        },  // -Hans,-Hant?         # __CFBundleLanguageNamesArray (had 2 entries "Chinese" for "zh-Hant", "zh-Hans")
392    { "Chinese, Simplified",    "zh-Hans"   },  //                      # from Installer.app Info.plist IFLanguages key
393    { "Chinese, Traditional",   "zh-Hant"   },  //                      # correct spelling for below
394    { "Chinese, Tradtional",    "zh-Hant"   },  //                      # from Installer.app Info.plist IFLanguages key, misspelled
395    { "Croatian",               "hr"        },  //                      # __CFBundleLanguageNamesArray
396    { "Czech",                  "cs"        },  //                      # __CFBundleLanguageNamesArray
397    { "Danish",                 "da"        },  //                      # __CFBundleLanguageNamesArray
398    { "Dutch",                  "nl"        },  //                      # __CFBundleLanguageNamesArray (had 2 entries "Dutch" for "nl", "nl-BE")
399    { "Dzongkha",               "dz"        },  //                      # __CFBundleLanguageNamesArray
400    { "English",                "en"        },  //                      # __CFBundleLanguageNamesArray
401    { "Esperanto",              "eo"        },  //                      # __CFBundleLanguageNamesArray
402    { "Estonian",               "et"        },  //                      # __CFBundleLanguageNamesArray
403    { "Faroese",                "fo"        },  //                      # __CFBundleLanguageNamesArray
404    { "Farsi",                  "fa"        },  //                      # __CFBundleLanguageNamesArray
405    { "Finnish",                "fi"        },  //                      # __CFBundleLanguageNamesArray
406    { "Flemish",                "nl-BE"     },  //                      # handle other names
407    { "French",                 "fr"        },  //                      # __CFBundleLanguageNamesArray
408    { "Galician",               "gl"        },  //                      # __CFBundleLanguageNamesArray
409    { "Gallegan",               "gl"        },  //                      # handle other names
410    { "Georgian",               "ka"        },  //                      # __CFBundleLanguageNamesArray
411    { "German",                 "de"        },  //                      # __CFBundleLanguageNamesArray
412    { "Greek",                  "el"        },  //                      # __CFBundleLanguageNamesArray (had 2 entries "Greek" for "el", "grc")
413    { "Greenlandic",            "kl"        },  //                      # __CFBundleLanguageNamesArray
414    { "Guarani",                "gn"        },  //                      # __CFBundleLanguageNamesArray
415    { "Gujarati",               "gu"        },  //                      # __CFBundleLanguageNamesArray
416    { "Hawaiian",               "haw"       },  //                      # handle new languages
417    { "Hebrew",                 "he"        },  //                      # __CFBundleLanguageNamesArray
418    { "Hindi",                  "hi"        },  //                      # __CFBundleLanguageNamesArray
419    { "Hungarian",              "hu"        },  //                      # __CFBundleLanguageNamesArray
420    { "Icelandic",              "is"        },  //                      # __CFBundleLanguageNamesArray
421    { "Indonesian",             "id"        },  //                      # __CFBundleLanguageNamesArray
422    { "Inuktitut",              "iu"        },  //                      # __CFBundleLanguageNamesArray
423    { "Irish",                  "ga"        },  //                      # __CFBundleLanguageNamesArray (had 2 entries "Irish" for "ga", "ga-dots")
424    { "Italian",                "it"        },  //                      # __CFBundleLanguageNamesArray
425    { "Japanese",               "ja"        },  //                      # __CFBundleLanguageNamesArray
426    { "Javanese",               "jv"        },  //                      # __CFBundleLanguageNamesArray
427    { "Kalaallisut",            "kl"        },  //                      # handle other names
428    { "Kannada",                "kn"        },  //                      # __CFBundleLanguageNamesArray
429    { "Kashmiri",               "ks"        },  //                      # __CFBundleLanguageNamesArray
430    { "Kazakh",                 "kk"        },  //                      # __CFBundleLanguageNamesArray
431    { "Khmer",                  "km"        },  //                      # __CFBundleLanguageNamesArray
432    { "Kinyarwanda",            "rw"        },  //                      # __CFBundleLanguageNamesArray
433    { "Kirghiz",                "ky"        },  //                      # __CFBundleLanguageNamesArray
434    { "Korean",                 "ko"        },  //                      # __CFBundleLanguageNamesArray
435    { "Kurdish",                "ku"        },  //                      # __CFBundleLanguageNamesArray
436    { "Lao",                    "lo"        },  //                      # __CFBundleLanguageNamesArray
437    { "Latin",                  "la"        },  //                      # __CFBundleLanguageNamesArray
438    { "Latvian",                "lv"        },  //                      # __CFBundleLanguageNamesArray
439    { "Lithuanian",             "lt"        },  //                      # __CFBundleLanguageNamesArray
440    { "Macedonian",             "mk"        },  //                      # __CFBundleLanguageNamesArray
441    { "Malagasy",               "mg"        },  //                      # __CFBundleLanguageNamesArray
442    { "Malay",                  "ms"        },  // -Latn,-Arab?         # __CFBundleLanguageNamesArray (had 2 entries "Malay" for "ms-Latn", "ms-Arab")
443    { "Malayalam",              "ml"        },  //                      # __CFBundleLanguageNamesArray
444    { "Maltese",                "mt"        },  //                      # __CFBundleLanguageNamesArray
445    { "Manx",                   "gv"        },  //                      # __CFBundleLanguageNamesArray
446    { "Marathi",                "mr"        },  //                      # __CFBundleLanguageNamesArray
447    { "Moldavian",              "mo"        },  //                      # __CFBundleLanguageNamesArray
448    { "Mongolian",              "mn"        },  // -Mong,-Cyrl?         # __CFBundleLanguageNamesArray (had 2 entries "Mongolian" for "mn-Mong", "mn-Cyrl")
449    { "Nepali",                 "ne"        },  //                      # __CFBundleLanguageNamesArray
450    { "Norwegian",              "nb"        },  //                      # __CFBundleLanguageNamesArray (had "Norwegian" mapping to "no")
451    { "Nyanja",                 "ny"        },  //                      # __CFBundleLanguageNamesArray
452    { "Nynorsk",                "nn"        },  //                      # handle other names (no entry in __CFBundleLanguageNamesArray)
453    { "Oriya",                  "or"        },  //                      # __CFBundleLanguageNamesArray
454    { "Oromo",                  "om"        },  //                      # __CFBundleLanguageNamesArray
455    { "Panjabi",                "pa"        },  //                      # handle other names
456    { "Pashto",                 "ps"        },  //                      # __CFBundleLanguageNamesArray
457    { "Persian",                "fa"        },  //                      # handle other names
458    { "Polish",                 "pl"        },  //                      # __CFBundleLanguageNamesArray
459    { "Portuguese",             "pt"        },  //                      # __CFBundleLanguageNamesArray
460    { "Portuguese, Brazilian",  "pt-BR"     },  //                      # handle other names
461    { "Punjabi",                "pa"        },  //                      # __CFBundleLanguageNamesArray
462    { "Pushto",                 "ps"        },  //                      # handle other names
463    { "Quechua",                "qu"        },  //                      # __CFBundleLanguageNamesArray
464    { "Romanian",               "ro"        },  //                      # __CFBundleLanguageNamesArray
465    { "Ruanda",                 "rw"        },  //                      # handle other names
466    { "Rundi",                  "rn"        },  //                      # __CFBundleLanguageNamesArray
467    { "Russian",                "ru"        },  //                      # __CFBundleLanguageNamesArray
468    { "Sami",                   "se"        },  //                      # __CFBundleLanguageNamesArray
469    { "Sanskrit",               "sa"        },  //                      # __CFBundleLanguageNamesArray
470    { "Scottish",               "gd"        },  //                      # __CFBundleLanguageNamesArray
471    { "Serbian",                "sr"        },  //                      # __CFBundleLanguageNamesArray
472    { "Simplified Chinese",     "zh-Hans"   },  //                      # handle other names
473    { "Sindhi",                 "sd"        },  //                      # __CFBundleLanguageNamesArray
474    { "Sinhalese",              "si"        },  //                      # __CFBundleLanguageNamesArray
475    { "Slovak",                 "sk"        },  //                      # __CFBundleLanguageNamesArray
476    { "Slovenian",              "sl"        },  //                      # __CFBundleLanguageNamesArray
477    { "Somali",                 "so"        },  //                      # __CFBundleLanguageNamesArray
478    { "Spanish",                "es"        },  //                      # __CFBundleLanguageNamesArray
479    { "Sundanese",              "su"        },  //                      # __CFBundleLanguageNamesArray
480    { "Swahili",                "sw"        },  //                      # __CFBundleLanguageNamesArray
481    { "Swedish",                "sv"        },  //                      # __CFBundleLanguageNamesArray
482    { "Tagalog",                "fil"       },  //                      # __CFBundleLanguageNamesArray
483    { "Tajik",                  "tg"        },  //                      # handle other names
484    { "Tajiki",                 "tg"        },  //                      # __CFBundleLanguageNamesArray
485    { "Tamil",                  "ta"        },  //                      # __CFBundleLanguageNamesArray
486    { "Tatar",                  "tt"        },  //                      # __CFBundleLanguageNamesArray
487    { "Telugu",                 "te"        },  //                      # __CFBundleLanguageNamesArray
488    { "Thai",                   "th"        },  //                      # __CFBundleLanguageNamesArray
489    { "Tibetan",                "bo"        },  //                      # __CFBundleLanguageNamesArray
490    { "Tigrinya",               "ti"        },  //                      # __CFBundleLanguageNamesArray
491    { "Tongan",                 "to"        },  //                      # __CFBundleLanguageNamesArray
492    { "Traditional Chinese",    "zh-Hant"   },  //                      # handle other names
493    { "Turkish",                "tr"        },  //                      # __CFBundleLanguageNamesArray
494    { "Turkmen",                "tk"        },  //                      # __CFBundleLanguageNamesArray
495    { "Uighur",                 "ug"        },  //                      # __CFBundleLanguageNamesArray
496    { "Ukrainian",              "uk"        },  //                      # __CFBundleLanguageNamesArray
497    { "Urdu",                   "ur"        },  //                      # __CFBundleLanguageNamesArray
498    { "Uzbek",                  "uz"        },  //                      # __CFBundleLanguageNamesArray
499    { "Vietnamese",             "vi"        },  //                      # __CFBundleLanguageNamesArray
500    { "Welsh",                  "cy"        },  //                      # __CFBundleLanguageNamesArray
501    { "Yiddish",                "yi"        },  //                      # __CFBundleLanguageNamesArray
502    { "ar_??",                  "ar"        },  //                      # from old MapScriptInfoAndISOCodes
503    { "az.Ar",                  "az-Arab"   },  //                      # from old LocaleRefGetPartString
504    { "az.Cy",                  "az-Cyrl"   },  //                      # from old LocaleRefGetPartString
505    { "az.La",                  "az"        },  //                      # from old LocaleRefGetPartString
506    { "be_??",                  "be_BY"     },  //                      # from old MapScriptInfoAndISOCodes
507    { "bn_??",                  "bn"        },  //                      # from old LocaleRefGetPartString
508    { "bo_??",                  "bo"        },  //                      # from old MapScriptInfoAndISOCodes
509    { "br_??",                  "br"        },  //                      # from old MapScriptInfoAndISOCodes
510    { "cy_??",                  "cy"        },  //                      # from old MapScriptInfoAndISOCodes
511    { "de-96",                  "de-1996"   },  //                      # from old MapScriptInfoAndISOCodes                     // <1.9>
512    { "de_96",                  "de-1996"   },  //                      # from old MapScriptInfoAndISOCodes                     // <1.9>
513    { "de_??",                  "de-1996"   },  //                      # from old MapScriptInfoAndISOCodes
514    { "el.El-P",                "grc"       },  //                      # from old LocaleRefGetPartString
515    { "en-ascii",               "en_001"    },  //                      # from earlier version of tables in this file!
516    { "en_??",                  "en_001"    },  //                      # from old MapScriptInfoAndISOCodes
517    { "eo_??",                  "eo"        },  //                      # from old MapScriptInfoAndISOCodes
518    { "es_??",                  "es_419"    },  //                      # from old MapScriptInfoAndISOCodes
519    { "es_XL",                  "es_419"    },  //                      # from earlier version of tables in this file!
520    { "fr_??",                  "fr_001"    },  //                      # from old MapScriptInfoAndISOCodes
521    { "ga-dots",                "ga-Latg"   },  //                      # from earlier version of tables in this file!          // <1.8>
522    { "ga-dots_IE",             "ga-Latg_IE" }, //                      # from earlier version of tables in this file!          // <1.8>
523    { "ga.Lg",                  "ga-Latg"   },  //                      # from old LocaleRefGetPartString                       // <1.8>
524    { "ga.Lg_IE",               "ga-Latg_IE" }, //                      # from old LocaleRefGetPartString                       // <1.8>
525    { "gd_??",                  "gd"        },  //                      # from old MapScriptInfoAndISOCodes
526    { "gv_??",                  "gv"        },  //                      # from old MapScriptInfoAndISOCodes
527    { "jv.La",                  "jv"        },  //                      # logical extension                                     // <1.9>
528    { "jw.La",                  "jv"        },  //                      # from old LocaleRefGetPartString
529    { "kk.Cy",                  "kk"        },  //                      # from old LocaleRefGetPartString
530    { "kl.La",                  "kl"        },  //                      # from old LocaleRefGetPartString
531    { "kl.La_GL",               "kl_GL"     },  //                      # from old LocaleRefGetPartString                       // <1.9>
532    { "lp_??",                  "se"        },  //                      # from old MapScriptInfoAndISOCodes
533    { "mk_??",                  "mk_MK"     },  //                      # from old MapScriptInfoAndISOCodes
534    { "mn.Cy",                  "mn"        },  //                      # from old LocaleRefGetPartString
535    { "mn.Mn",                  "mn-Mong"   },  //                      # from old LocaleRefGetPartString
536    { "ms.Ar",                  "ms-Arab"   },  //                      # from old LocaleRefGetPartString
537    { "ms.La",                  "ms"        },  //                      # from old LocaleRefGetPartString
538    { "nl-be",                  "nl-BE"     },  //                      # from old LocaleRefGetPartString
539    { "nl-be_BE",               "nl_BE"     },  //                      # from old LocaleRefGetPartString
540    { "no-NO",					"nb-NO"     },  //                      # not handled by localeStringPrefixToCanonical
541    { "no-NO_NO",				"nb-NO_NO"  },  //                      # not handled by localeStringPrefixToCanonical
542//  { "no-bok_NO",              "nb_NO"     },  //                      # from old LocaleRefGetPartString - handled by localeStringPrefixToCanonical
543//  { "no-nyn_NO",              "nn_NO"     },  //                      # from old LocaleRefGetPartString - handled by localeStringPrefixToCanonical
544//  { "nya",                    "ny"        },  //                      # from old LocaleRefGetPartString - handled by localeStringPrefixToCanonical
545    { "pa_??",                  "pa"        },  //                      # from old LocaleRefGetPartString
546    { "sa.Dv",                  "sa"        },  //                      # from old LocaleRefGetPartString
547    { "sl_??",                  "sl_SI"     },  //                      # from old MapScriptInfoAndISOCodes
548    { "sr_??",                  "sr_RS"     },  //                      # from old MapScriptInfoAndISOCodes						// <1.18>
549    { "su.La",                  "su"        },  //                      # from old LocaleRefGetPartString
550    { "yi.He",                  "yi"        },  //                      # from old LocaleRefGetPartString
551    { "zh-simp",                "zh-Hans"   },  //                      # from earlier version of tables in this file!
552    { "zh-trad",                "zh-Hant"   },  //                      # from earlier version of tables in this file!
553    { "zh.Ha-S",                "zh-Hans"   },  //                      # from old LocaleRefGetPartString
554    { "zh.Ha-S_CN",             "zh_CN"     },  //                      # from old LocaleRefGetPartString
555    { "zh.Ha-T",                "zh-Hant"   },  //                      # from old LocaleRefGetPartString
556    { "zh.Ha-T_TW",             "zh_TW"     },  //                      # from old LocaleRefGetPartString
557};
558enum {
559    kNumOldAppleLocaleToCanonical = sizeof(oldAppleLocaleToCanonical)/sizeof(KeyStringToResultString)
560};
561
562static const KeyStringToResultString localeStringPrefixToCanonical[] = {
563// Map 3-letter & obsolete ISO 639 codes, plus obsolete RFC 3066 codes, to 2-letter ISO 639 code.
564// (special cases for 'sh' handled separately)
565// First column must be all lowercase; must be sorted according to how strcmp compares the strings in the first column.
566//
567//    non-canonical canonical       [  comment ]                # source/reason for non-canonical string
568//    prefix        prefix
569//    ------------- ---------
570
571    { "aar",        "aa"        },  // Afar
572 //   { "aa_SAAHO",   "ssy"       },  // Saho                       # deprecated/grandfathered, handled as a special case
573    { "abk",        "ab"        },  // Abkhazian
574    { "afr",        "af"        },  // Afrikaans
575    { "aju",        "jrb"       },  // Moroccan Judeo-Arabic -> Judeo-Arabic (macrolang.)
576    { "aka",        "ak"        },  // Akan
577    { "alb",        "sq"        },  // Albanian
578    { "als",        "sq"        },  // Tosk Albanian -> Albanian (macrolang.)
579    { "amh",        "am"        },  // Amharic
580    { "ara",        "ar"        },  // Arabic
581    { "arb",        "ar"        },  // Std Arabic -> Arabic (macrolang.)
582    { "arg",        "an"        },  // Aragonese
583    { "arm",        "hy"        },  // Armenian
584    { "art-lojban", "jbo"       },  // Lojban                     # deprecated/grandfathered
585    { "asm",        "as"        },  // Assamese
586    { "ava",        "av"        },  // Avaric
587    { "ave",        "ae"        },  // Avestan
588    { "aym",        "ay"        },  // Aymara
589    { "ayr",        "ay"        },  // Central Aymara -> Aymara (macrolang.)
590    { "aze",        "az"        },  // Azerbaijani
591    { "azj",        "az"        },  // N.Azerbaijani -> Azerbaijani (macrolang.)
592    { "bak",        "ba"        },  // Bashkir
593    { "bam",        "bm"        },  // Bambara
594    { "baq",        "eu"        },  // Basque
595    { "bcc",        "bal"       },  // Balochi, Southern -> Baluchi (macrolang.)
596    { "bcl",        "bik"       },  // Bicolano, Central -> Bikol (macrolang.)
597    { "bel",        "be"        },  // Belarusian
598    { "ben",        "bn"        },  // Bengali
599    { "bih",        "bh"        },  // Bihari
600    { "bis",        "bi"        },  // Bislama
601    { "bod",        "bo"        },  // Tibetan
602    { "bos",        "bs"        },  // Bosnian
603    { "bre",        "br"        },  // Breton
604    { "bul",        "bg"        },  // Bulgarian
605    { "bur",        "my"        },  // Burmese
606    { "bxk",        "luy"       },  // Lubukusu -> Luyia (macrolang.)
607    { "bxr",        "bua"       },  // Buriat, Russia -> Buriat (macrolang.)
608    { "cat",        "ca"        },  // Catalan
609    { "ces",        "cs"        },  // Czech
610    { "cha",        "ch"        },  // Chamorro
611    { "che",        "ce"        },  // Chechen
612    { "chi",        "zh"        },  // Chinese
613    { "chu",        "cu"        },  // Church Slavic, Church Slavonic, Old Bulgarian, Old Church Slavonic, Old Slavonic
614    { "chv",        "cv"        },  // Chuvash
615    { "cld",        "syr"       },  // Chaldean Neo-Aramaic -> Syriac (macrolang.)
616    { "cmn",        "zh"        },  // Mandarin -> Chinese (macrolang.)
617    { "cor",        "kw"        },  // Cornish
618    { "cos",        "co"        },  // Corsican
619    { "cre",        "cr"        },  // Cree
620    { "cwd",        "cr"        },  // Cree, Woods -> Cree (macrolang.)
621    { "cym",        "cy"        },  // Welsh
622    { "cze",        "cs"        },  // Czech
623    { "dan",        "da"        },  // Danish
624    { "deu",        "de"        },  // German
625    { "dgo",        "doi"       },  // Dogri -> Dogri (macrolang.)
626    { "dhd",        "mwr"       },  // Dhundari -> Marwari (macrolang.)
627    { "dik",        "din"       },  // Southwestern Dinka -> Dinka (macrolang.)
628    { "diq",        "zza"       },  // Dimli -> Zaza (macrolang.)
629    { "div",        "dv"        },  // Dhivehi, Divehi, Maldivian
630    { "dut",        "nl"        },  // Dutch
631    { "dzo",        "dz"        },  // Dzongkha
632    { "ekk",        "et"        },  // Std Estonian -> Estonian (macrolang.)
633    { "ell",        "el"        },  // Greek, Modern (1453-)
634    { "emk",        "man"       },  // Maninkakan, Eastern -> Mandingo (macrolang.)
635    { "eng",        "en"        },  // English
636    { "epo",        "eo"        },  // Esperanto
637    { "esk",        "ik"        },  // Northwest Alaska Inupiatun -> Inupiaq (macrolang.)
638    { "est",        "et"        },  // Estonian
639    { "eus",        "eu"        },  // Basque
640    { "ewe",        "ee"        },  // Ewe
641    { "fao",        "fo"        },  // Faroese
642    { "fas",        "fa"        },  // Persian
643    { "fat",        "ak"        },  // Fanti -> Akan (macrolang.)
644    { "fij",        "fj"        },  // Fijian
645    { "fin",        "fi"        },  // Finnish
646    { "fra",        "fr"        },  // French
647    { "fre",        "fr"        },  // French
648    { "fry",        "fy"        },  // Western Frisian
649    { "fuc",        "ff"        },  // Pular -> Fulah (macrolang.)
650    { "ful",        "ff"        },  // Fulah
651    { "gaz",        "om"        },  // W.Central Oromo -> Oromo (macrolang.)
652    { "gbo",        "grb"       },  // Northern Grebo -> Grebo (macrolang.)
653    { "geo",        "ka"        },  // Georgian
654    { "ger",        "de"        },  // German
655    { "gla",        "gd"        },  // Gaelic,Scottish
656    { "gle",        "ga"        },  // Irish
657    { "glg",        "gl"        },  // Gallegan
658    { "glv",        "gv"        },  // Manx
659    { "gno",        "gon"       },  // Northern Gondi -> Gondi (macrolang.)
660    { "gre",        "el"        },  // Greek, Modern (1453-)
661    { "grn",        "gn"        },  // Guarani
662    { "gug",        "gn"        },  // Paraguayan Guarani -> Guarani (macrolang.)
663    { "guj",        "gu"        },  // Gujarati
664    { "gya",        "gba"       },  // Northwest Gbaya -> Gbaya (Cent. Afr. Rep.) (macrolang.)
665    { "hat",        "ht"        },  // Haitian, Haitian Creole
666    { "hau",        "ha"        },  // Hausa
667    { "hbs",        "sr_Latn"   },  // Serbo-Croatian
668    { "hdn",        "hai"       },  // Northern Haida -> Haida (macrolang.)
669    { "hea",        "hmn"       },  // Northern Qiandong Miao -> Hmong (macrolang.)
670    { "heb",        "he"        },  // Hebrew
671    { "her",        "hz"        },  // Herero
672    { "him",        "srx"       },  // Himachali -> Sirmauri (= Pahari, Himachali) (macrolang.)
673    { "hin",        "hi"        },  // Hindi
674    { "hmo",        "ho"        },  // Hiri Motu
675    { "hrv",        "hr"        },  // Croatian
676    { "hun",        "hu"        },  // Hungarian
677    { "hye",        "hy"        },  // Armenian
678    { "i-ami",      "ami"       },  // Amis                       # deprecated/grandfathered
679    { "i-bnn",      "bnn"       },  // Bunun                      # deprecated/grandfathered
680    { "i-hak",      "hak"       },  // Hakka                    # deprecated RFC 3066
681    { "i-klingon",  "tlh"       },  // Klingon                    # deprecated/grandfathered
682    { "i-lux",      "lb"        },  // Luxembourgish            # deprecated RFC 3066
683    { "i-navajo",   "nv"        },  // Navajo                   # deprecated RFC 3066
684    { "i-pwn",      "pwn"       },  // Paiwan                     # deprecated/grandfathered
685    { "i-tao",      "tao"       },  // Tao                        # deprecated/grandfathered
686    { "i-tay",      "tay"       },  // Tayal                      # deprecated/grandfathered
687    { "i-tsu",      "tsu"       },  // Tsou                       # deprecated/grandfathered
688    { "ibo",        "ig"        },  // Igbo
689    { "ice",        "is"        },  // Icelandic
690    { "ido",        "io"        },  // Ido
691    { "iii",        "ii"        },  // Sichuan Yi, Nuosu
692    { "ike",        "iu"        },  // E.Canada Inuktitut -> Inuktitut (macrolang.)
693    { "iku",        "iu"        },  // Inuktitut
694    { "ile",        "ie"        },  // Interlingue
695    { "in",         "id"        },  // Indonesian               # deprecated 639 code in -> id (1989)
696    { "ina",        "ia"        },  // Interlingua
697    { "ind",        "id"        },  // Indonesian
698    { "ipk",        "ik"        },  // Inupiaq
699    { "isl",        "is"        },  // Icelandic
700    { "ita",        "it"        },  // Italian
701    { "iw",         "he"        },  // Hebrew                   # deprecated 639 code iw -> he (1989)
702    { "jav",        "jv"        },  // Javanese
703    { "jaw",        "jv"        },  // Javanese                 # deprecated 639 code jaw -> jv (2001)
704    { "ji",         "yi"        },  // Yiddish                  # deprecated 639 code ji -> yi (1989)
705    { "jpn",        "ja"        },  // Japanese
706    { "jw",         "jv"        },  // Javanese                 # deprecated
707    { "kal",        "kl"        },  // Kalaallisut
708    { "kan",        "kn"        },  // Kannada
709    { "kas",        "ks"        },  // Kashmiri
710    { "kat",        "ka"        },  // Georgian
711    { "kau",        "kr"        },  // Kanuri
712    { "kaz",        "kk"        },  // Kazakh
713    { "khk",        "mn"        },  // Halh Mongolian [mainly Cyrl] -> Mongolian (macrolang.)
714    { "khm",        "km"        },  // Khmer
715    { "kik",        "ki"        },  // Kikuyu, Gikuyu
716    { "kin",        "rw"        },  // Kinyarwanda
717    { "kir",        "ky"        },  // Kirghiz
718    { "kmr",        "ku"        },  // Northern Kurdish -> Kurdish (macrolang.)
719    { "knc",        "kr"        },  // Central Kanuri -> Kanuri (macrolang.)
720    { "kng",        "kg"        },  // Koongo -> Kongo (macrolang.)
721    { "knn",        "kok"       },  // Konkani (individ.lang) -> Konkani (macrolang.)
722    { "kom",        "kv"        },  // Komi
723    { "kon",        "kg"        },  // Kongo
724    { "kor",        "ko"        },  // Korean
725    { "kpv",        "kv"        },  // Komi-Zyrian -> Komi (macrolang.)
726    { "kua",        "kj"        },  // Kuanyama, Kwanyama
727    { "kur",        "ku"        },  // Kurdish
728    { "lao",        "lo"        },  // Lao
729    { "lat",        "la"        },  // Latin
730    { "lav",        "lv"        },  // Latvian
731    { "lbk",        "bnc"       },  // Central Bontok -> Bontok (macrolang.)
732    { "lim",        "li"        },  // Limburgan, Limburger, Limburgish
733    { "lin",        "ln"        },  // Lingala
734    { "lit",        "lt"        },  // Lithuanian
735    { "ltz",        "lb"        },  // Letzeburgesch
736    { "lub",        "lu"        },  // Luba-Katanga
737    { "lug",        "lg"        },  // Ganda
738    { "lvs",        "lv"        },  // Std Latvian -> Latvian (macrolang.)
739    { "mac",        "mk"        },  // Macedonian
740    { "mal",        "ml"        },  // Malayalam
741    { "mar",        "mr"        },  // Marathi
742    { "may",        "ms"        },  // Malay
743    { "mhr",        "chm"       },  // Mari, Eastern -> Mari (Russia) (macrolang.)
744    { "mkd",        "mk"        },  // Macedonian
745    { "mlg",        "mg"        },  // Malagasy
746    { "mlt",        "mt"        },  // Maltese
747    { "mol",        "mo"        },  // Moldavian
748    { "mon",        "mn"        },  // Mongolian
749    { "msa",        "ms"        },  // Malay
750    { "mup",        "raj"       },  // Malvi -> Rajasthani (macrolang.)
751    { "mya",        "my"        },  // Burmese
752    { "nau",        "na"        },  // Nauru
753    { "nav",        "nv"        },  // Navajo, Navaho
754    { "nbl",        "nr"        },  // South Ndebele
755    { "nde",        "nd"        },  // North Ndebele
756    { "ndo",        "ng"        },  // Ndonga
757    { "nep",        "ne"        },  // Nepali
758    { "nld",        "nl"        },  // Dutch
759    { "nno",        "nn"        },  // Norwegian Nynorsk
760    { "no",         "nb"        },  // Norwegian generic        # ambiguous 639 code no -> nb
761    { "no-bok",     "nb"        },  // Norwegian Bokmal         # deprecated RFC 3066 tag - used in old LocaleRefGetPartString
762    { "no-nyn",     "nn"        },  // Norwegian Nynorsk        # deprecated RFC 3066 tag - used in old LocaleRefGetPartString
763    { "nob",        "nb"        },  // Norwegian Bokmal
764    { "nor",        "nb"        },  // Norwegian generic        # ambiguous 639 code nor -> nb
765  //  { "no_BOKMAL",  "nb"        },  // Norwegian Bokmal           # deprecated/grandfathered, handled as a special case
766  //  { "no_NYNORSK", "nn"        },  // Norwegian Nynorsk          # deprecated/grandfathered, handled as a special case
767    { "nya",        "ny"        },  // Nyanja/Chewa/Chichewa    # 3-letter code used in old LocaleRefGetPartString
768    { "oci",        "oc"        },  // Occitan/Provencal
769    { "ojg",        "oj"        },  // Ojibwa, Eastern -> Ojibwa (macrolang.)
770    { "oji",        "oj"        },  // Ojibwa
771    { "ori",        "or"        },  // Oriya
772    { "orm",        "om"        },  // Oromo,Galla
773    { "oss",        "os"        },  // Ossetian, Ossetic
774    { "pan",        "pa"        },  // Panjabi
775    { "pbu",        "ps"        },  // N.Pashto, -> Pushto (macrolang.)
776    { "per",        "fa"        },  // Persian
777    { "pes",        "fa"        },  // W.Farsi -> Persian (macrolang.)
778    { "pli",        "pi"        },  // Pali
779    { "plt",        "mg"        },  // Plateau Malagasy -> Malagasy (macrolang.)
780    { "pnb",        "lah"       },  // W.Panjabi -> Lahnda (macrolang.)
781    { "pol",        "pl"        },  // Polish
782    { "por",        "pt"        },  // Portuguese
783    { "pus",        "ps"        },  // Pushto
784    { "que",        "qu"        },  // Quechua
785    { "qxp",        "qu"        },  // Puno Quechua -> Quechua (macrolang.)
786    { "rmy",        "rom"       },  // Vlax Romani -> Romany (macrolang.)
787    { "roh",        "rm"        },  // Raeto-Romance
788    { "ron",        "ro"        },  // Romanian
789    { "rum",        "ro"        },  // Romanian
790    { "run",        "rn"        },  // Rundi
791    { "rus",        "ru"        },  // Russian
792    { "sag",        "sg"        },  // Sango
793    { "san",        "sa"        },  // Sanskrit
794    { "scc",        "sr"        },  // Serbian
795    { "scr",        "hr"        },  // Croatian
796    { "sgn-be-fr",  "sfb"       },  // Belgian-French Sign Lang.  # deprecated/grandfathered
797    { "sgn-be-nl",  "vgt"       },  // Belgian-Flemish Sign Lang. # deprecated/grandfathered
798    { "sgn-ch-de",  "sgg"       },  // Swiss German Sign Lang.    # deprecated/grandfathered
799    { "sin",        "si"        },  // Sinhalese
800    { "slk",        "sk"        },  // Slovak
801    { "slo",        "sk"        },  // Slovak
802    { "slv",        "sl"        },  // Slovenian
803    { "sme",        "se"        },  // Sami,Northern
804    { "smo",        "sm"        },  // Samoan
805    { "sna",        "sn"        },  // Shona
806    { "snd",        "sd"        },  // Sindhi
807    { "som",        "so"        },  // Somali
808    { "sot",        "st"        },  // Southern Sotho
809    { "spa",        "es"        },  // Spanish
810    { "spy",        "kln"       },  // Sabaot -> Kalenjin (macrolang.)
811    { "sqi",        "sq"        },  // Albanian
812    { "src",        "sc"        },  // Sardinian, Logudorese -> Sardinian (macrolang.)
813    { "srd",        "sc"        },  // Sardinian
814    { "srp",        "sr"        },  // Serbian
815    { "ssw",        "ss"        },  // Swati
816    { "sun",        "su"        },  // Sundanese
817    { "swa",        "sw"        },  // Swahili
818    { "swe",        "sv"        },  // Swedish
819    { "swh",        "sw"        },  // Swahili (individ.lang) -> Swahili (macrolang.)
820    { "tah",        "ty"        },  // Tahitian
821    { "tam",        "ta"        },  // Tamil
822    { "tat",        "tt"        },  // Tatar
823    { "tel",        "te"        },  // Telugu
824    { "tgk",        "tg"        },  // Tajik
825    { "tgl",        "tl"        },  // Tagalog
826    { "tha",        "th"        },  // Thai
827    { "tib",        "bo"        },  // Tibetan
828    { "tir",        "ti"        },  // Tigrinya
829    { "tl",         "fil"       },  // Tagalog                  # legacy
830    { "ton",        "to"        },  // Tongan
831    { "tsn",        "tn"        },  // Tswana
832    { "tso",        "ts"        },  // Tsonga
833    { "ttq",        "tmh"       },  // Tamajaq, Tawallammat -> Tamashek (macrolang.)
834    { "tuk",        "tk"        },  // Turkmen
835    { "tur",        "tr"        },  // Turkish
836    { "tw",         "ak"        },  // Twi -> Akan (macrolang.)
837    { "twi",        "ak"        },  // Twi
838    { "uig",        "ug"        },  // Uighur
839    { "ukr",        "uk"        },  // Ukrainian
840    { "umu",        "del"       },  // Munsee -> Delaware (macrolang.)
841    { "urd",        "ur"        },  // Urdu
842    { "uzb",        "uz"        },  // Uzbek
843    { "uzn",        "uz"        },  // N. Uzbek -> Uzbek (macrolang.)
844    { "ven",        "ve"        },  // Venda
845    { "vie",        "vi"        },  // Vietnamese
846    { "vol",        "vo"        },  // Volapük
847    { "wel",        "cy"        },  // Welsh
848    { "wln",        "wa"        },  // Walloon
849    { "wol",        "wo"        },  // Wolof
850    { "xho",        "xh"        },  // Xhosa
851    { "xpe",        "kpe"       },  // Kpelle, Liberia -> Kpelle (macrolang.)
852    { "xsl",        "den"       },  // Slavey, South -> Slave (Athapascan) (macrolang.)
853    { "ydd",        "yi"        },  // Yiddish,E. -> Yiddish (macrolang.)
854    { "yid",        "yi"        },  // Yiddish
855    { "yor",        "yo"        },  // Yoruba
856    { "zai",        "zap"       },  // Zapotec, Isthmus -> Zapotec (macrolang.)
857    { "zh-cdo",     "cdo"       },  // Chinese, Min Dong        # extlang
858    { "zh-cjy",     "cjy"       },  // Chinese, Jinyu           # extlang
859    { "zh-cmn",     "zh"        },  // Chinese, Mandarin        # extlang
860    { "zh-cpx",     "cpx"       },  // Chinese, Pu-Xian         # extlang
861    { "zh-czh",     "czh"       },  // Chinese, Huizhou         # extlang
862    { "zh-czo",     "czo"       },  // Chinese, Min Zhong       # extlang
863    { "zh-gan",     "gan"       },  // Chinese, Gan             # extlang
864    { "zh-guoyu",   "zh"        },  // Mandarin/Std Chinese     # deprecated
865    { "zh-hak",     "hak"       },  // Chinese, Hakka           # extlang
866    { "zh-hakka",   "hak"       },  // Hakka                    # deprecated
867    { "zh-hsn",     "hsn"       },  // Chinese, Xiang           # extlang
868    { "zh-min-nan", "nan"       },  // Minnan,Hokkien,Taiwanese,So. Fujian # deprecated
869    { "zh-mnp",     "mnp"       },  // Chinese, Min Bei         # extlang
870    { "zh-nan",     "nan"       },  // Chinese, Min Nan         # extlang
871    { "zh-wuu",     "wuu"       },  // Chinese, Wu              # extlang
872    { "zh-xiang",   "hsn"       },  // Xiang/Hunanese           # deprecated
873    { "zh-yue",     "yue"       },  // Chinese, Yue             # extlang
874    { "zha",        "za"        },  // Zhuang, Chuang
875    { "zho",        "zh"        },  // Chinese
876    { "zsm",        "ms"        },  // Std Malay -> Malay (macrolang.)
877    { "zul",        "zu"        },  // Zulu
878    { "zyb",        "za"        },  // Yongbei Zhuang -> Zhuang (macrolang.)
879};
880enum {
881    kNumLocaleStringPrefixToCanonical = sizeof(localeStringPrefixToCanonical)/sizeof(KeyStringToResultString)
882};
883
884
885static const SpecialCaseUpdates specialCases[] = {
886// Data for special cases
887// a) The 3166 code CS was used for Czechoslovakia until 1993, when that country split and the code was
888// replaced by CZ and SK. Then in 2003-07, the code YU (formerly designating all of Yugoslavia, then after
889// the 1990s breakup just designating what is now Serbia and Montenegro) was changed to CS! Then after
890// Serbia and Montenegro split, the code CS was replaced in 2006-09 with separate codes RS and ME. If we
891// see CS but a language of cs or sk, we change CS to CZ or SK. Otherwise, we change CS (and old YU) to RS.
892// b) The 639 code sh for Serbo-Croatian was also replaced in the 1990s by separate codes hr and sr, and
893// deprecated in 2000. We guess which one to map it to as follows: If there is a region tag of HR we use
894// hr; if there is a region tag of (now) RS we use sr; else we do not change it (not enough info).
895// c) There are other codes that have been updated without these issues (eg. TP to TL), plus among the
896// "exceptionally reserved" codes some are just alternates for standard codes (eg. UK for GB).
897    {   NULL,   "-UK",  "GB",   NULL,   NULL    },  // always change UK to GB (UK is "exceptionally reserved" to mean GB)
898    {   NULL,   "-TP",  "TL",   NULL,   NULL    },  // always change TP to TL (East Timor, code changed 2002-05)
899    {   "cs",   "-CS",  "CZ",   NULL,   NULL    },  // if language is cs, change CS (pre-1993 Czechoslovakia) to CZ (Czech Republic)
900    {   "sk",   "-CS",  "SK",   NULL,   NULL    },  // if language is sk, change CS (pre-1993 Czechoslovakia) to SK (Slovakia)
901    {   NULL,   "-CS",  "RS",   NULL,   NULL    },  // otherwise map CS (assume Serbia+Montenegro) to RS (Serbia)
902    {   NULL,   "-YU",  "RS",   NULL,   NULL    },  // also map old YU (assume Serbia+Montenegro) to RS (Serbia)
903    {   "sh",   "-HR",  "hr",   "-RS",  "sr"    },  // then if language is old 'sh' (SerboCroatian), change it to 'hr' (Croatian)
904                                                    // if we find HR (Croatia) or to 'sr' (Serbian) if we find RS (Serbia).
905                                                    // Note: Do this after changing YU/CS toRS as above.
906    {   NULL,   NULL,   NULL,   NULL,   NULL    }   // terminator
907};
908
909
910static const KeyStringToResultString localeStringRegionToDefaults[] = {
911// For some region-code suffixes, there are default substrings to strip off for canonical string.
912// Must be sorted according to how strcmp compares the strings in the first column
913//
914//  region      default writing
915//  suffix      system tags, strip     comment
916//  --------    -------------          ---------
917    { "_CN",    "-Hans"         },  // mainland China, default is simplified
918    { "_HK",    "-Hant"         },  // Hong Kong, default is traditional
919    { "_MO",    "-Hant"         },  // Macao, default is traditional
920    { "_SG",    "-Hans"         },  // Singapore, default is simplified
921    { "_TW",    "-Hant"         },  // Taiwan, default is traditional
922};
923enum {
924    kNumLocaleStringRegionToDefaults = sizeof(localeStringRegionToDefaults)/sizeof(KeyStringToResultString)
925};
926
927static const KeyStringToResultString localeStringPrefixToDefaults[] = {
928// For some initial portions of language tag, there are default substrings to strip off for canonical string.
929// Must be sorted according to how strcmp compares the strings in the first column
930//
931//  language    default writing
932//  tag prefix  system tags, strip     comment
933//  --------    -------------          ---------
934    { "ab-",    "-Cyrl"         },  // Abkhazian
935    { "af-",    "-Latn"         },  // Afrikaans
936    { "agq-",   "-Latn"         },  // Aghem
937    { "ak-",    "-Latn"         },  // Akan
938    { "am-",    "-Ethi"         },  // Amharic
939    { "ar-",    "-Arab"         },  // Arabic
940    { "as-",    "-Beng"         },  // Assamese
941    { "asa-",   "-Latn"         },  // Asu
942    { "ay-",    "-Latn"         },  // Aymara
943    { "az-",    "-Latn"         },  // Azerbaijani
944    { "bas-",   "-Latn"         },  // Basaa
945    { "be-",    "-Cyrl"         },  // Belarusian
946    { "bem-",   "-Latn"         },  // Bemba
947    { "bez-",   "-Latn"         },  // Bena
948    { "bg-",    "-Cyrl"         },  // Bulgarian
949    { "bm-",    "-Latn"         },  // Bambara
950    { "bn-",    "-Beng"         },  // Bengali
951    { "bo-",    "-Tibt"         },  // Tibetan (? not Suppress-Script)
952    { "br-",    "-Latn"         },  // Breton (? not Suppress-Script)
953    { "brx-",   "-Deva"         },  // Bodo
954    { "bs-",    "-Latn"         },  // Bosnian
955    { "ca-",    "-Latn"         },  // Catalan
956    { "cgg-",   "-Latn"         },  // Chiga
957    { "chr-",   "-Cher"         },  // Cherokee
958    { "cs-",    "-Latn"         },  // Czech
959    { "cy-",    "-Latn"         },  // Welsh
960    { "da-",    "-Latn"         },  // Danish
961    { "dav-",   "-Latn"         },  // Taita
962    { "de-",    "-Latn -1901"   },  // German, traditional orthography
963    { "dje-",   "-Latn"         },  // Zarma
964    { "dua-",   "-Latn"         },  // Duala
965    { "dv-",    "-Thaa"         },  // Divehi/Maldivian
966    { "dyo-",   "-Latn"         },  // Jola-Fonyi
967    { "dz-",    "-Tibt"         },  // Dzongkha
968    { "ebu-",   "-Latn"         },  // Embu
969    { "ee-",    "-Latn"         },  // Ewe
970    { "el-",    "-Grek"         },  // Greek (modern, monotonic)
971    { "en-",    "-Latn"         },  // English
972    { "eo-",    "-Latn"         },  // Esperanto
973    { "es-",    "-Latn"         },  // Spanish
974    { "et-",    "-Latn"         },  // Estonian
975    { "eu-",    "-Latn"         },  // Basque
976    { "ewo-",   "-Latn"         },  // Ewondo
977    { "fa-",    "-Arab"         },  // Farsi
978    { "ff-",    "-Latn"         },  // Fulah
979    { "fi-",    "-Latn"         },  // Finnish
980    { "fil-",   "-Latn"         },  // Tagalog
981    { "fo-",    "-Latn"         },  // Faroese
982    { "fr-",    "-Latn"         },  // French
983    { "ga-",    "-Latn"         },  // Irish
984    { "gd-",    "-Latn"         },  // Scottish Gaelic (? not Suppress-Script)
985    { "gl-",    "-Latn"         },  // Galician
986    { "gn-",    "-Latn"         },  // Guarani
987    { "gsw-",   "-Latn"         },  // Swiss German
988    { "gu-",    "-Gujr"         },  // Gujarati
989    { "guz-",   "-Latn"         },  // Gusii
990    { "gv-",    "-Latn"         },  // Manx
991    { "ha-",    "-Latn"         },  // Hausa
992    { "haw-",   "-Latn"         },  // Hawaiian (? not Suppress-Script)
993    { "he-",    "-Hebr"         },  // Hebrew
994    { "hi-",    "-Deva"         },  // Hindi
995    { "hr-",    "-Latn"         },  // Croatian
996    { "hu-",    "-Latn"         },  // Hungarian
997    { "hy-",    "-Armn"         },  // Armenian
998    { "id-",    "-Latn"         },  // Indonesian
999    { "ig-",    "-Latn"         },  // Igbo
1000    { "ii-",    "-Yiii"         },  // Sichuan Yi
1001    { "is-",    "-Latn"         },  // Icelandic
1002    { "it-",    "-Latn"         },  // Italian
1003    { "ja-",    "-Jpan"         },  // Japanese
1004    { "jmc-",   "-Latn"         },  // Machame
1005    { "ka-",    "-Geor"         },  // Georgian
1006    { "kab-",   "-Latn"         },  // Kabyle
1007    { "kam-",   "-Latn"         },  // Kamba
1008    { "kde-",   "-Latn"         },  // Makonde
1009    { "kea-",   "-Latn"         },  // Kabuverdianu
1010    { "khq-",   "-Latn"         },  // Koyra Chiini
1011    { "ki-",    "-Latn"         },  // Kikuyu
1012    { "kk-",    "-Cyrl"         },  // Kazakh
1013    { "kl-",    "-Latn"         },  // Kalaallisut/Greenlandic
1014    { "km-",    "-Khmr"         },  // Central Khmer
1015    { "kn-",    "-Knda"         },  // Kannada
1016    { "ko-",    "-Hang"         },  // Korean (? not Suppress-Script)
1017    { "kok-",   "-Deva"         },  // Konkani
1018    { "ksb-",   "-Latn"         },  // Shambala
1019    { "ksf-",   "-Latn"         },  // Bafia
1020    { "kw-",    "-Latn"         },  // Cornish
1021    { "ky-",    "-Cyrl"         },  // Kirghiz
1022    { "la-",    "-Latn"         },  // Latin
1023    { "lag-",   "-Latn"         },  // Langi
1024    { "lb-",    "-Latn"         },  // Luxembourgish
1025    { "lg-",    "-Latn"         },  // Ganda
1026    { "ln-",    "-Latn"         },  // Lingala
1027    { "lo-",    "-Laoo"         },  // Lao
1028    { "lt-",    "-Latn"         },  // Lithuanian
1029    { "lu-",    "-Latn"         },  // Luba-Katanga
1030    { "luo-",   "-Latn"         },  // Luo
1031    { "luy-",   "-Latn"         },  // Luyia
1032    { "lv-",    "-Latn"         },  // Latvian
1033    { "mas-",   "-Latn"         },  // Masai
1034    { "mer-",   "-Latn"         },  // Meru
1035    { "mfe-",   "-Latn"         },  // Morisyen
1036    { "mg-",    "-Latn"         },  // Malagasy
1037    { "mgh-",   "-Latn"         },  // Makhuwa-Meetto
1038    { "mk-",    "-Cyrl"         },  // Macedonian
1039    { "ml-",    "-Mlym"         },  // Malayalam
1040    { "mn-",    "-Cyrl"         },  // Mongolian
1041    { "mo-",    "-Latn"         },  // Moldavian
1042    { "mr-",    "-Deva"         },  // Marathi
1043    { "ms-",    "-Latn"         },  // Malay
1044    { "mt-",    "-Latn"         },  // Maltese
1045    { "mua-",   "-Latn"         },  // Mundang
1046    { "my-",    "-Mymr"         },  // Burmese/Myanmar
1047    { "naq-",   "-Latn"         },  // Nama
1048    { "nb-",    "-Latn"         },  // Norwegian Bokmal
1049    { "nd-",    "-Latn"         },  // North Ndebele
1050    { "ne-",    "-Deva"         },  // Nepali
1051    { "nl-",    "-Latn"         },  // Dutch
1052    { "nmg-",   "-Latn"         },  // Kwasio
1053    { "nn-",    "-Latn"         },  // Norwegian Nynorsk
1054    { "nus-",   "-Latn"         },  // Nuer
1055    { "ny-",    "-Latn"         },  // Chichewa/Nyanja
1056    { "nyn-",   "-Latn"         },  // Nyankole
1057    { "om-",    "-Latn"         },  // Oromo
1058    { "or-",    "-Orya"         },  // Oriya
1059    { "pa-",    "-Guru"         },  // Punjabi
1060    { "pl-",    "-Latn"         },  // Polish
1061    { "ps-",    "-Arab"         },  // Pushto
1062    { "pt-",    "-Latn"         },  // Portuguese
1063    { "qu-",    "-Latn"         },  // Quechua
1064    { "rm-",    "-Latn"         },  // Romansh
1065    { "rn-",    "-Latn"         },  // Rundi
1066    { "ro-",    "-Latn"         },  // Romanian
1067    { "rof-",   "-Latn"         },  // Rombo
1068    { "ru-",    "-Cyrl"         },  // Russian
1069    { "rw-",    "-Latn"         },  // Kinyarwanda
1070    { "rwk-",   "-Latn"         },  // Rwa
1071    { "sa-",    "-Deva"         },  // Sanskrit (? not Suppress-Script)
1072    { "saq-",   "-Latn"         },  // Samburu
1073    { "sbp-",   "-Latn"         },  // Sangu
1074    { "se-",    "-Latn"         },  // Sami (? not Suppress-Script)
1075    { "seh-",   "-Latn"         },  // Sena
1076    { "ses-",   "-Latn"         },  // Koyraboro Senni
1077    { "sg-",    "-Latn"         },  // Sango
1078    { "shi-",   "-Latn"         },  // Tachelhit
1079    { "si-",    "-Sinh"         },  // Sinhala
1080    { "sk-",    "-Latn"         },  // Slovak
1081    { "sl-",    "-Latn"         },  // Slovenian
1082    { "sn-",    "-Latn"         },  // Shona
1083    { "so-",    "-Latn"         },  // Somali
1084    { "sq-",    "-Latn"         },  // Albanian
1085    { "sr-",    "-Cyrl"         },  // Serbian
1086    { "sv-",    "-Latn"         },  // Swedish
1087    { "sw-",    "-Latn"         },  // Swahili
1088    { "swc-",   "-Latn"         },  // Congo Swahili
1089    { "ta-",    "-Taml"         },  // Tamil
1090    { "te-",    "-Telu"         },  // Telugu
1091    { "teo-",   "-Latn"         },  // Teso
1092    { "tg-",    "-Cyrl"         },  // Tajik
1093    { "th-",    "-Thai"         },  // Thai
1094    { "ti-",    "-Ethi"         },  // Tigrinya
1095    { "tk-",    "-Latn"         },  // Turkmen
1096    { "tn-",    "-Latn"         },  // Tswana
1097    { "to-",    "-Latn"         },  // Tonga of Tonga Islands
1098    { "tr-",    "-Latn"         },  // Turkish
1099    { "twq-",   "-Latn"         },  // Tasawaq
1100    { "tzm-",   "-Latn"         },  // Central Morocco Tamazight
1101    { "uk-",    "-Cyrl"         },  // Ukrainian
1102    { "ur-",    "-Arab"         },  // Urdu
1103    { "uz-",    "-Cyrl"         },  // Uzbek
1104    { "vai-",   "-Vaii"         },  // Vai
1105    { "vi-",    "-Latn"         },  // Vietnamese
1106    { "vun-",   "-Latn"         },  // Vunjo
1107    { "wo-",    "-Latn"         },  // Wolof
1108    { "xh-",    "-Latn"         },  // Xhosa
1109    { "xog-",   "-Latn"         },  // Soga
1110    { "yav-",   "-Latn"         },  // Yangben
1111    { "yi-",    "-Hebr"         },  // Yiddish
1112    { "yo-",    "-Latn"         },  // Yoruba
1113    { "zh-",    "-Hani"         },  // Chinese (? not Suppress-Script)
1114    { "zu-",    "-Latn"         },  // Zulu
1115};
1116enum {
1117    kNumLocaleStringPrefixToDefaults = sizeof(localeStringPrefixToDefaults)/sizeof(KeyStringToResultString)
1118};
1119
1120static const KeyStringToResultString appleLocaleToLanguageString[] = {
1121// Map locale strings that Apple uses as language IDs to real language strings.
1122// Must be sorted according to how strcmp compares the strings in the first column.
1123// Note: Now we remove all transforms of the form ll_RR -> ll-RR, they are now
1124// handled in the code. <1.19>
1125//
1126//    locale 			lang			[  comment ]
1127//    string			string
1128//    -------			-------
1129    { "en_US_POSIX",	"en-US-POSIX"	},  // POSIX locale, need as language string			// <1.17> [3840752]
1130    { "zh_CN",  		"zh-Hans"		},  // mainland China => simplified
1131    { "zh_HK",  		"zh-Hant"		},  // Hong Kong => traditional, not currently used
1132    { "zh_MO",  		"zh-Hant"		},  // Macao => traditional, not currently used
1133    { "zh_SG",  		"zh-Hans"		},  // Singapore => simplified, not currently used
1134    { "zh_TW",  		"zh-Hant"		},  // Taiwan => traditional
1135};
1136enum {
1137    kNumAppleLocaleToLanguageString = sizeof(appleLocaleToLanguageString)/sizeof(KeyStringToResultString)
1138};
1139
1140static const KeyStringToResultString appleLocaleToLanguageStringForCFBundle[] = {
1141// Map locale strings that Apple uses as language IDs to real language strings.
1142// Must be sorted according to how strcmp compares the strings in the first column.
1143//
1144//    locale 			lang			[  comment ]
1145//    string			string
1146//    -------			-------
1147    { "de_AT",  		"de-AT"			},  // Austrian German
1148    { "de_CH",  		"de-CH"			},  // Swiss German
1149//  { "de_DE",  		"de-DE"			},  // German for Germany (default), not currently used
1150    { "en_AU", 			"en-AU"			},  // Australian English
1151    { "en_CA",  		"en-CA"			},  // Canadian English
1152    { "en_GB",  		"en-GB"			},  // British English
1153//  { "en_IE",  		"en-IE"			},  // Irish English, not currently used
1154    { "en_US",  		"en-US"			},  // U.S. English
1155    { "en_US_POSIX",	"en-US-POSIX"	},  // POSIX locale, need as language string			// <1.17> [3840752]
1156//  { "fr_BE",  		"fr-BE"			},  // Belgian French, not currently used
1157    { "fr_CA",  		"fr-CA"			},  // Canadian French
1158    { "fr_CH",  		"fr-CH"			},  // Swiss French
1159//  { "fr_FR",  		"fr-FR"			},  // French for France (default), not currently used
1160    { "nl_BE",  		"nl-BE"			},  // Flemish = Vlaams, Dutch for Belgium
1161//  { "nl_NL",  		"nl-NL"			},  // Dutch for Netherlands (default), not currently used
1162    { "pt_BR",  		"pt-BR"			},  // Brazilian Portuguese
1163    { "pt_PT",  		"pt-PT"     	},  // Portuguese for Portugal
1164    { "zh_CN",  		"zh-Hans"		},  // mainland China => simplified
1165    { "zh_HK",  		"zh-Hant"		},  // Hong Kong => traditional, not currently used
1166    { "zh_MO",  		"zh-Hant"		},  // Macao => traditional, not currently used
1167    { "zh_SG",  		"zh-Hans"		},  // Singapore => simplified, not currently used
1168    { "zh_TW",  		"zh-Hant"		},  // Taiwan => traditional
1169};
1170enum {
1171    kNumAppleLocaleToLanguageStringForCFBundle = sizeof(appleLocaleToLanguageStringForCFBundle)/sizeof(KeyStringToResultString)
1172};
1173
1174
1175struct LocaleToLegacyCodes {
1176    const char *        locale;	// reduced to language plus one other component (script, region, variant), separators normalized to'_'
1177    RegionCode		    regCode;
1178    LangCode		    langCode;
1179    CFStringEncoding    encoding;
1180};
1181typedef struct LocaleToLegacyCodes LocaleToLegacyCodes;
1182
1183static const LocaleToLegacyCodes localeToLegacyCodes[] = {
1184	//	locale			RegionCode					LangCode						CFStringEncoding
1185    {   "af"/*ZA*/,     102/*verAfrikaans*/,        141/*langAfrikaans*/,            0/*Roman*/              },  // Latn
1186    {   "am",            -1,                         85/*langAmharic*/,             28/*Ethiopic*/           },  // Ethi
1187    {   "ar",            16/*verArabic*/,            12/*langArabic*/,               4/*Arabic*/             },  // Arab;
1188    {   "as",            -1,                         68/*langAssamese*/,            13/*Bengali*/            },  // Beng;
1189    {   "ay",            -1,                        134/*langAymara*/,               0/*Roman*/              },  // Latn;
1190    {   "az",            -1,                        150/*langAzerbaijanRoman*/,      0/*Roman*/              },  // "az" defaults to -Latn
1191    {   "az_Arab",       -1,                         50/*langAzerbaijanAr*/,         4/*Arabic*/             },  // Arab;
1192    {   "az_Cyrl",       -1,                         49/*langAzerbaijani*/,          7/*Cyrillic*/           },  // Cyrl;
1193    {   "az_Latn",       -1,                        150/*langAzerbaijanRoman*/,      0/*Roman*/              },  // Latn;
1194    {   "be"/*BY*/,      61/*verBelarus*/,           46/*langBelorussian*/,          7/*Cyrillic*/           },  // Cyrl;
1195    {   "bg"/*BG*/,      72/*verBulgaria*/,          44/*langBulgarian*/,            7/*Cyrillic*/           },  // Cyrl;
1196    {   "bn",            60/*verBengali*/,           67/*langBengali*/,             13/*Bengali*/            },  // Beng;
1197    {   "bo",           105/*verTibetan*/,           63/*langTibetan*/,             26/*Tibetan*/            },  // Tibt;
1198    {   "br",            77/*verBreton*/,           142/*langBreton*/,              39/*Celtic*/             },  // Latn;
1199    {   "ca"/*ES*/,      73/*verCatalonia*/,        130/*langCatalan*/,              0/*Roman*/              },  // Latn;
1200    {   "cs"/*CZ*/,      56/*verCzech*/,             38/*langCzech*/,               29/*CentralEurRoman*/    },  // Latn;
1201    {   "cy",            79/*verWelsh*/,            128/*langWelsh*/,               39/*Celtic*/             },  // Latn;
1202    {   "da"/*DK*/,       9/*verDenmark*/,            7/*langDanish*/,               0/*Roman*/              },  // Latn;
1203    {   "de",             3/*verGermany*/,            2/*langGerman*/,               0/*Roman*/              },  // assume "de" defaults to verGermany
1204    {   "de_1996",       70/*verGermanReformed*/,     2/*langGerman*/,               0/*Roman*/              },
1205    {   "de_AT",         92/*verAustria*/,            2/*langGerman*/,               0/*Roman*/              },
1206    {   "de_CH",         19/*verGrSwiss*/,            2/*langGerman*/,               0/*Roman*/              },
1207    {   "de_DE",          3/*verGermany*/,            2/*langGerman*/,               0/*Roman*/              },
1208    {   "dz"/*BT*/,      83/*verBhutan*/,           137/*langDzongkha*/,            26/*Tibetan*/            },  // Tibt;
1209    {   "el",            20/*verGreece*/,            14/*langGreek*/,                6/*Greek*/              },  // assume "el" defaults to verGreece
1210    {   "el_CY",         23/*verCyprus*/,            14/*langGreek*/,                6/*Greek*/              },
1211    {   "el_GR",         20/*verGreece*/,            14/*langGreek*/,                6/*Greek*/              },  // modern monotonic
1212    {   "en",             0/*verUS*/,                 0/*langEnglish*/,              0/*Roman*/              },  // "en" defaults to verUS (per Chris Hansten)
1213    {   "en_001",        37/*verInternational*/,      0/*langEnglish*/,              0/*Roman*/              },
1214    {   "en_AU",         15/*verAustralia*/,          0/*langEnglish*/,              0/*Roman*/              },
1215    {   "en_CA",         82/*verEngCanada*/,          0/*langEnglish*/,              0/*Roman*/              },
1216    {   "en_GB",          2/*verBritain*/,            0/*langEnglish*/,              0/*Roman*/              },
1217    {   "en_IE",        108/*verIrelandEnglish*/,     0/*langEnglish*/,              0/*Roman*/              },
1218    {   "en_SG",        100/*verSingapore*/,          0/*langEnglish*/,              0/*Roman*/              },
1219    {   "en_US",          0/*verUS*/,                 0/*langEnglish*/,              0/*Roman*/              },
1220    {   "eo",           103/*verEsperanto*/,         94/*langEsperanto*/,            0/*Roman*/              },  // Latn;
1221    {   "es",             8/*verSpain*/,              6/*langSpanish*/,              0/*Roman*/              },  // "es" defaults to verSpain (per Chris Hansten)
1222    {   "es_419",        86/*verSpLatinAmerica*/,     6/*langSpanish*/,              0/*Roman*/              },  // new BCP 47 tag
1223    {   "es_ES",          8/*verSpain*/,              6/*langSpanish*/,              0/*Roman*/              },
1224    {   "es_MX",         86/*verSpLatinAmerica*/,     6/*langSpanish*/,              0/*Roman*/              },
1225    {   "es_US",         86/*verSpLatinAmerica*/,     6/*langSpanish*/,              0/*Roman*/              },
1226    {   "et"/*EE*/,      44/*verEstonia*/,           27/*langEstonian*/,            29/*CentralEurRoman*/    },
1227    {   "eu",            -1,                        129/*langBasque*/,               0/*Roman*/              },  // Latn;
1228    {   "fa"/*IR*/,      48/*verIran*/,              31/*langFarsi/Persian*/,       0x8C/*Farsi*/            },  // Arab;
1229    {   "fi"/*FI*/,      17/*verFinland*/,           13/*langFinnish*/,              0/*Roman*/              },
1230    {   "fil",           -1,                         82/*langTagalog*/,              0/*Roman*/              },  // Latn;
1231    {   "fo"/*FO*/,      47/*verFaroeIsl*/,          30/*langFaroese*/,             37/*Icelandic*/          },
1232    {   "fr",             1/*verFrance*/,             1/*langFrench*/,               0/*Roman*/              },  // "fr" defaults to verFrance (per Chris Hansten)
1233    {   "fr_001",        91/*verFrenchUniversal*/,    1/*langFrench*/,               0/*Roman*/              },
1234    {   "fr_BE",         98/*verFrBelgium*/,          1/*langFrench*/,               0/*Roman*/              },
1235    {   "fr_CA",         11/*verFrCanada*/,           1/*langFrench*/,               0/*Roman*/              },
1236    {   "fr_CH",         18/*verFrSwiss*/,            1/*langFrench*/,               0/*Roman*/              },
1237    {   "fr_FR",          1/*verFrance*/,             1/*langFrench*/,               0/*Roman*/              },
1238    {   "ga"/*IE*/,      50/*verIreland*/,           35/*langIrishGaelic*/,          0/*Roman*/              },  // no dots (h after)
1239    {   "ga_Latg"/*IE*/, 81/*verIrishGaelicScrip*/, 146/*langIrishGaelicScript*/,   40/*Gaelic*/             },  // using dots
1240    {   "gd",            75/*verScottishGaelic*/,   144/*langScottishGaelic*/,      39/*Celtic*/             },
1241    {   "gl",            -1,                        140/*langGalician*/,             0/*Roman*/              },  // Latn;
1242    {   "gn",            -1,                        133/*langGuarani*/,              0/*Roman*/              },  // Latn;
1243    {   "grc",           40/*verGreekAncient*/,     148/*langGreekAncient*/,         6/*Greek*/              },  // polytonic (MacGreek doesn't actually support it)
1244    {   "gu"/*IN*/,      94/*verGujarati*/,          69/*langGujarati*/,            11/*Gujarati*/           },  // Gujr;
1245    {   "gv",            76/*verManxGaelic*/,       145/*langManxGaelic*/,          39/*Celtic*/             },  // Latn;
1246    {   "he"/*IL*/,      13/*verIsrael*/,            10/*langHebrew*/,               5/*Hebrew*/             },  // Hebr;
1247    {   "hi"/*IN*/,      33/*verIndiaHindi*/,        21/*langHindi*/,                9/*Devanagari*/         },  // Deva;
1248    {   "hr"/*HR*/,      68/*verCroatia*/,           18/*langCroatian*/,            36/*Croatian*/           },
1249    {   "hu"/*HU*/,      43/*verHungary*/,           26/*langHungarian*/,           29/*CentralEurRoman*/    },
1250    {   "hy"/*AM*/,      84/*verArmenian*/,          51/*langArmenian*/,            24/*Armenian*/           },  // Armn;
1251    {   "id",            -1,                         81/*langIndonesian*/,           0/*Roman*/              },  // Latn;
1252    {   "is"/*IS*/,      21/*verIceland*/,           15/*langIcelandic*/,           37/*Icelandic*/          },
1253    {   "it",             4/*verItaly*/,              3/*langItalian*/,              0/*Roman*/              },  // "it" defaults to verItaly
1254    {   "it_CH",         36/*verItalianSwiss*/,       3/*langItalian*/,              0/*Roman*/              },
1255    {   "it_IT",          4/*verItaly*/,              3/*langItalian*/,              0/*Roman*/              },
1256    {   "iu"/*CA*/,      78/*verNunavut*/,          143/*langInuktitut*/,           0xEC/*Inuit*/            },  // Cans;
1257    {   "ja"/*JP*/,      14/*verJapan*/,             11/*langJapanese*/,             1/*Japanese*/           },  // Jpan;
1258    {   "jv",            -1,                        138/*langJavaneseRom*/,          0/*Roman*/              },  // Latn;
1259    {   "ka"/*GE*/,      85/*verGeorgian*/,          52/*langGeorgian*/,            23/*Georgian*/           },  // Geor;
1260    {   "kk",            -1,                         48/*langKazakh*/,               7/*Cyrillic*/           },  // "kk" defaults to -Cyrl; also have -Latn, -Arab
1261    {   "kl",           107/*verGreenland*/,        149/*langGreenlandic*/,          0/*Roman*/              },  // Latn;
1262    {   "km",            -1,                         78/*langKhmer*/,               20/*Khmer*/              },  // Khmr;
1263    {   "kn",            -1,                         73/*langKannada*/,             16/*Kannada*/            },  // Knda;
1264    {   "ko"/*KR*/,      51/*verKorea*/,             23/*langKorean*/,               3/*Korean*/             },  // Hang;
1265    {   "ks",            -1,                         61/*langKashmiri*/,             4/*Arabic*/             },  // Arab;
1266    {   "ku",            -1,                         60/*langKurdish*/,              4/*Arabic*/             },  // Arab;
1267    {   "ky",            -1,                         54/*langKirghiz*/,              7/*Cyrillic*/           },  // Cyrl; also -Latn, -Arab
1268    {   "la",            -1,                        131/*langLatin*/,                0/*Roman*/              },  // Latn;
1269    {   "lo",            -1,                         79/*langLao*/,                 22/*Laotian*/            },  // Laoo;
1270    {   "lt"/*LT*/,      41/*verLithuania*/,         24/*langLithuanian*/,          29/*CentralEurRoman*/    },
1271    {   "lv"/*LV*/,      45/*verLatvia*/,            28/*langLatvian*/,             29/*CentralEurRoman*/    },
1272    {   "mg",            -1,                         93/*langMalagasy*/,             0/*Roman*/              },  // Latn;
1273    {   "mk"/*MK*/,      67/*verMacedonian*/,        43/*langMacedonian*/,           7/*Cyrillic*/           },  // Cyrl;
1274    {   "ml",            -1,                         72/*langMalayalam*/,           17/*Malayalam*/          },  // Mlym;
1275    {   "mn",            -1,                         58/*langMongolianCyr*/,         7/*Cyrillic*/           },  // "mn" defaults to -Cyrl
1276    {   "mn_Cyrl",       -1,                         58/*langMongolianCyr*/,         7/*Cyrillic*/           },  // Cyrl;
1277    {   "mn_Mong",       -1,                         57/*langMongolian*/,           27/*Mongolian*/          },  // Mong;
1278    {   "mo",            -1,                         53/*langMoldavian*/,            7/*Cyrillic*/           },  // Cyrl;
1279    {   "mr"/*IN*/,     104/*verMarathi*/,           66/*langMarathi*/,              9/*Devanagari*/         },  // Deva;
1280    {   "ms",            -1,                         83/*langMalayRoman*/,           0/*Roman*/              },  // "ms" defaults to -Latn;
1281    {   "ms_Arab",       -1,                         84/*langMalayArabic*/,          4/*Arabic*/             },  // Arab;
1282    {   "mt"/*MT*/,      22/*verMalta*/,             16/*langMaltese*/,              0/*Roman*/              },  // Latn;
1283    {   "mul",           74/*verMultilingual*/,      -1,                             0                       },
1284    {   "my",            -1,                         77/*langBurmese*/,             19/*Burmese*/            },  // Mymr;
1285    {   "nb"/*NO*/,      12/*verNorway*/,             9/*langNorwegian*/,            0/*Roman*/              },
1286    {   "ne"/*NP*/,     106/*verNepal*/,             64/*langNepali*/,               9/*Devanagari*/         },  // Deva;
1287    {   "nl",             5/*verNetherlands*/,        4/*langDutch*/,                0/*Roman*/              },  // "nl" defaults to verNetherlands
1288    {   "nl_BE",          6/*verFlemish*/,           34/*langFlemish*/,              0/*Roman*/              },
1289    {   "nl_NL",          5/*verNetherlands*/,        4/*langDutch*/,                0/*Roman*/              },
1290    {   "nn"/*NO*/,     101/*verNynorsk*/,          151/*langNynorsk*/,              0/*Roman*/              },
1291    {   "ny",            -1,                         92/*langNyanja/Chewa*/,         0/*Roman*/              },  // Latn;
1292    {   "om",            -1,                         87/*langOromo*/,               28/*Ethiopic*/           },  // Ethi;
1293    {   "or",            -1,                         71/*langOriya*/,               12/*Oriya*/              },  // Orya;
1294    {   "pa",            95/*verPunjabi*/,           70/*langPunjabi*/,             10/*Gurmukhi*/           },  // Guru;
1295    {   "pl"/*PL*/,      42/*verPoland*/,            25/*langPolish*/,              29/*CentralEurRoman*/    },
1296    {   "ps",            -1,                         59/*langPashto*/,              0x8C/*Farsi*/            },  // Arab;
1297    {   "pt",            71/*verBrazil*/,             8/*langPortuguese*/,           0/*Roman*/              },  // "pt" defaults to verBrazil (per Chris Hansten)
1298    {   "pt_BR",         71/*verBrazil*/,             8/*langPortuguese*/,           0/*Roman*/              },
1299    {   "pt_PT",         10/*verPortugal*/,           8/*langPortuguese*/,           0/*Roman*/              },
1300    {   "qu",            -1,                        132/*langQuechua*/,              0/*Roman*/              },  // Latn;
1301    {   "rn",            -1,                         91/*langRundi*/,                0/*Roman*/              },  // Latn;
1302    {   "ro"/*RO*/,      39/*verRomania*/,           37/*langRomanian*/,            38/*Romanian*/           },
1303    {   "ru"/*RU*/,      49/*verRussia*/,            32/*langRussian*/,              7/*Cyrillic*/           },  // Cyrl;
1304    {   "rw",            -1,                         90/*langKinyarwanda*/,          0/*Roman*/              },  // Latn;
1305    {   "sa",            -1,                         65/*langSanskrit*/,             9/*Devanagari*/         },  // Deva;
1306    {   "sd",            -1,                         62/*langSindhi*/,              0x8C/*Farsi*/            },  // Arab;
1307    {   "se",            46/*verSami*/,              29/*langSami*/,                 0/*Roman*/              },
1308    {   "si",            -1,                         76/*langSinhalese*/,           18/*Sinhalese*/          },  // Sinh;
1309    {   "sk"/*SK*/,      57/*verSlovak*/,            39/*langSlovak*/,              29/*CentralEurRoman*/    },
1310    {   "sl"/*SI*/,      66/*verSlovenian*/,         40/*langSlovenian*/,           36/*Croatian*/           },
1311    {   "so",            -1,                         88/*langSomali*/,               0/*Roman*/              },  // Latn;
1312    {   "sq",            -1,                         36/*langAlbanian*/,             0/*Roman*/              },
1313    {   "sr"/*CS,RS*/,   65/*verSerbian*/,           42/*langSerbian*/,              7/*Cyrillic*/           },  // Cyrl;
1314    {   "su",            -1,                        139/*langSundaneseRom*/,         0/*Roman*/              },  // Latn;
1315    {   "sv"/*SE*/,       7/*verSweden*/,             5/*langSwedish*/,              0/*Roman*/              },
1316    {   "sw",            -1,                         89/*langSwahili*/,              0/*Roman*/              },  // Latn;
1317    {   "ta",            -1,                         74/*langTamil*/,               14/*Tamil*/              },  // Taml;
1318    {   "te",            -1,                         75/*langTelugu*/,              15/*Telugu*/             },  // Telu
1319    {   "tg",            -1,                         55/*langTajiki*/,               7/*Cyrillic*/           },  // "tg" defaults to "Cyrl"
1320    {   "tg_Cyrl",       -1,                         55/*langTajiki*/,               7/*Cyrillic*/           },  // Cyrl; also -Latn, -Arab
1321    {   "th"/*TH*/,      54/*verThailand*/,          22/*langThai*/,                21/*Thai*/               },  // Thai;
1322    {   "ti",            -1,                         86/*langTigrinya*/,            28/*Ethiopic*/           },  // Ethi;
1323    {   "tk",            -1,                         56/*langTurkmen*/,              7/*Cyrillic*/           },  // "tk" defaults to Cyrl
1324    {   "tk_Cyrl",       -1,                         56/*langTurkmen*/,              7/*Cyrillic*/           },  // Cyrl; also -Latn, -Arab
1325    {   "tl",            -1,                         82/*langTagalog*/,              0/*Roman*/              },  // Latn;
1326    {   "to"/*TO*/,      88/*verTonga*/,            147/*langTongan*/,               0/*Roman*/              },  // Latn;
1327    {   "tr"/*TR*/,      24/*verTurkey*/,            17/*langTurkish*/,             35/*Turkish*/            },  // Latn;
1328    {   "tt",            -1,                        135/*langTatar*/,                7/*Cyrillic*/           },  // Cyrl;
1329    {   "tt_Cyrl",       -1,                        135/*langTatar*/,                7/*Cyrillic*/           },  // Cyrl;
1330    {   "ug",            -1,                        136/*langUighur*/,               4/*Arabic*/             },  // Arab;
1331    {   "uk"/*UA*/,      62/*verUkraine*/,           45/*langUkrainian*/,            7/*Cyrillic*/           },  // Cyrl;
1332    {   "und",           55/*verScriptGeneric*/,     -1,                             0                       },
1333    {   "ur",            34/*verPakistanUrdu*/,      20/*langUrdu*/,                0x8C/*Farsi*/            },  // "ur" defaults to verPakistanUrdu
1334    {   "ur_IN",         96/*verIndiaUrdu*/,         20/*langUrdu*/,                0x8C/*Farsi*/            },  // Arab
1335    {   "ur_PK",         34/*verPakistanUrdu*/,      20/*langUrdu*/,                0x8C/*Farsi*/            },  // Arab
1336    {   "uz"/*UZ*/,      99/*verUzbek*/,             47/*langUzbek*/,                7/*Cyrillic*/           },  // Cyrl; also -Latn, -Arab
1337    {   "uz_Cyrl",       99/*verUzbek*/,             47/*langUzbek*/,                7/*Cyrillic*/           },
1338    {   "vi"/*VN*/,      97/*verVietnam*/,           80/*langVietnamese*/,          30/*Vietnamese*/         },  // Latn
1339    {   "yi",            -1,                         41/*langYiddish*/,              5/*Hebrew*/             },  // Hebr;
1340    {   "zh",            52/*verChina*/,             33/*langSimpChinese*/,         25/*ChineseSimp*/        },  // "zh" defaults to verChina, langSimpChinese
1341    {   "zh_CN",         52/*verChina*/,             33/*langSimpChinese*/,         25/*ChineseSimp*/        },
1342    {   "zh_HK",         53/*verTaiwan*/,            19/*langTradChinese*/,          2/*ChineseTrad*/        },
1343    {   "zh_Hans",       52/*verChina*/,             33/*langSimpChinese*/,         25/*ChineseSimp*/        },
1344    {   "zh_Hant",       53/*verTaiwan*/,            19/*langTradChinese*/,          2/*ChineseTrad*/        },
1345    {   "zh_MO",         53/*verTaiwan*/,            19/*langTradChinese*/,          2/*ChineseTrad*/        },
1346    {   "zh_SG",         52/*verChina*/,             33/*langSimpChinese*/,         25/*ChineseSimp*/        },
1347    {   "zh_TW",         53/*verTaiwan*/,            19/*langTradChinese*/,          2/*ChineseTrad*/        },
1348};
1349enum {
1350    kNumLocaleToLegacyCodes = sizeof(localeToLegacyCodes)/sizeof(localeToLegacyCodes[0])
1351};
1352
1353/*
1354	For reference here is a list of ICU locales with variants and how some
1355	of them are canonicalized with the ICU function uloc_canonicalize:
1356
1357	ICU 3.0 has:
1358		en_US_POSIX			x	no change
1359		hy_AM_REVISED		x	no change
1360		ja_JP_TRADITIONAL	->	ja_JP@calendar=japanese
1361		th_TH_TRADITIONAL	->	th_TH@calendar=buddhist
1362
1363	ICU 2.8 also had the following (now obsolete):
1364		ca_ES_PREEURO
1365		de__PHONEBOOK		->	de@collation=phonebook
1366		de_AT_PREEURO
1367		de_DE_PREEURO
1368		de_LU_PREEURO
1369		el_GR_PREEURO
1370		en_BE_PREEURO
1371		en_GB_EURO			->	en_GB@currency=EUR
1372		en_IE_PREEURO		->	en_IE@currency=IEP
1373		es__TRADITIONAL		->	es@collation=traditional
1374		es_ES_PREEURO
1375		eu_ES_PREEURO
1376		fi_FI_PREEURO
1377		fr_BE_PREEURO
1378		fr_FR_PREEURO		->	fr_FR@currency=FRF
1379		fr_LU_PREEURO
1380		ga_IE_PREEURO
1381		gl_ES_PREEURO
1382		hi__DIRECT			->	hi@collation=direct
1383		it_IT_PREEURO
1384		nl_BE_PREEURO
1385		nl_NL_PREEURO
1386		pt_PT_PREEURO
1387		zh__PINYIN			->	zh@collation=pinyin
1388		zh_TW_STROKE		->	zh_TW@collation=stroke
1389
1390*/
1391
1392// _CompareTestEntryToTableEntryKey
1393// (Local function for CFLocaleCreateCanonicalLocaleIdentifierFromString)
1394// comparison function for bsearch
1395static int _CompareTestEntryToTableEntryKey(const void *testEntryPtr, const void *tableEntryKeyPtr) {
1396    return strcmp( ((const KeyStringToResultString *)testEntryPtr)->key, ((const KeyStringToResultString *)tableEntryKeyPtr)->key );
1397}
1398
1399// _CompareTestEntryPrefixToTableEntryKey
1400// (Local function for CFLocaleCreateCanonicalLocaleIdentifierFromString)
1401// Comparison function for bsearch. Assumes prefix IS terminated with '-' or '_'.
1402// Do the following instead of strlen & strncmp so we don't walk tableEntry key twice.
1403static int _CompareTestEntryPrefixToTableEntryKey(const void *testEntryPtr, const void *tableEntryKeyPtr) {
1404    const char *    testPtr = ((const KeyStringToResultString *)testEntryPtr)->key;
1405    const char *    tablePtr = ((const KeyStringToResultString *)tableEntryKeyPtr)->key;
1406
1407    while ( *testPtr == *tablePtr && *tablePtr != 0 ) {
1408        testPtr++; tablePtr++;
1409    }
1410    if ( *tablePtr != 0 ) {
1411        // strings are different, and the string in the table has not run out;
1412        // i.e. the table entry is not a prefix of the text string.
1413        return ( *testPtr < *tablePtr )? -1: 1;
1414    }
1415    return 0;
1416}
1417
1418// _CompareLowerTestEntryPrefixToTableEntryKey
1419// (Local function for CFLocaleCreateCanonicalLocaleIdentifierFromString)
1420// Comparison function for bsearch. Assumes prefix NOT terminated with '-' or '_'.
1421// Lowercases the test string before comparison (the table should already have lowercased entries).
1422static int _CompareLowerTestEntryPrefixToTableEntryKey(const void *testEntryPtr, const void *tableEntryKeyPtr) {
1423    const char *    testPtr = ((const KeyStringToResultString *)testEntryPtr)->key;
1424    const char *    tablePtr = ((const KeyStringToResultString *)tableEntryKeyPtr)->key;
1425    char            lowerTestChar;
1426
1427    while ( (lowerTestChar = tolower(*testPtr)) == *tablePtr && *tablePtr != 0 && lowerTestChar != '_' ) {  // <1.9>
1428        testPtr++; tablePtr++;
1429    }
1430    if ( *tablePtr != 0 ) {
1431        // strings are different, and the string in the table has not run out;
1432        // i.e. the table entry is not a prefix of the text string.
1433        if (lowerTestChar == '_')                                                           // <1.9>
1434            return -1;                                                                      // <1.9>
1435        return ( lowerTestChar < *tablePtr )? -1: 1;
1436    }
1437    // The string in the table has run out. If the test string char is not alnum,
1438    // then the string matches, else the test string sorts after.
1439    return ( !isalnum(lowerTestChar) )? 0: 1;
1440}
1441
1442// _DeleteCharsAtPointer
1443// (Local function for CFLocaleCreateCanonicalLocaleIdentifierFromString)
1444// remove _length_ characters from the beginning of the string indicated by _stringPtr_
1445// (we know that the string has at least _length_ characters in it)
1446static void _DeleteCharsAtPointer(char *stringPtr, int length) {
1447    do {
1448        *stringPtr = stringPtr[length];
1449    } while (*stringPtr++ != 0);
1450}
1451
1452// _CopyReplacementAtPointer
1453// (Local function for CFLocaleCreateCanonicalLocaleIdentifierFromString)
1454// Copy replacement string (*excluding* terminating NULL byte) to the place indicated by stringPtr
1455static void _CopyReplacementAtPointer(char *stringPtr, const char *replacementPtr) {
1456    while (*replacementPtr != 0) {
1457        *stringPtr++ = *replacementPtr++;
1458    }
1459}
1460
1461// _CheckForTag
1462// (Local function for CFLocaleCreateCanonicalLocaleIdentifierFromString)
1463static Boolean _CheckForTag(const char *localeStringPtr, const char *tagPtr, int tagLen) {
1464    return ( strncmp(localeStringPtr, tagPtr, tagLen) == 0 && !isalnum(localeStringPtr[tagLen]) );
1465}
1466
1467// _ReplacePrefix
1468// Move this code from _UpdateFullLocaleString into separate function                       // <1.10>
1469static void _ReplacePrefix(char locString[], int locStringMaxLen, int oldPrefixLen, const char *newPrefix) {
1470    int newPrefixLen = strlen(newPrefix);
1471    int lengthDelta = newPrefixLen - oldPrefixLen;
1472
1473    if (lengthDelta < 0) {
1474        // replacement is shorter, delete chars by shifting tail of string
1475        _DeleteCharsAtPointer(locString + newPrefixLen, -lengthDelta);
1476    } else if (lengthDelta > 0) {
1477        // replacement is longer...
1478        int stringLen = strlen(locString);
1479
1480        if (stringLen + lengthDelta < locStringMaxLen) {
1481            // make room by shifting tail of string
1482            char *  tailShiftPtr = locString + stringLen;
1483            char *  tailStartPtr = locString + oldPrefixLen;    // pointer to tail of string to shift
1484
1485            while (tailShiftPtr >= tailStartPtr) {
1486                tailShiftPtr[lengthDelta] = *tailShiftPtr;
1487                tailShiftPtr--;
1488            }
1489        } else {
1490            // no room, can't do substitution
1491            newPrefix = NULL;
1492        }
1493    }
1494
1495    if (newPrefix) {
1496        // do the substitution
1497        _CopyReplacementAtPointer(locString, newPrefix);
1498    }
1499}
1500
1501// _UpdateFullLocaleString
1502// Given a locale string that uses standard codes (not a special old-style Apple string),
1503// update all the language codes and region codes to latest versions, map 3-letter
1504// language codes to 2-letter codes if possible, and normalize casing. If requested, return
1505// pointers to a language-region variant subtag (if present) and a region tag (if present).
1506// (add locStringMaxLen parameter)                                                          // <1.10>
1507static void _UpdateFullLocaleString(char inLocaleString[], int locStringMaxLen,
1508									char **langRegSubtagRef, char **regionTagRef,
1509									char varKeyValueString[])								// <1.17>
1510{
1511    KeyStringToResultString     testEntry;
1512    KeyStringToResultString *   foundEntry;
1513    const SpecialCaseUpdates *  specialCasePtr;
1514    char *      inLocalePtr;
1515    char *      subtagPtr;
1516    char *      langRegSubtag = NULL;
1517    char *      regionTag = NULL;
1518    char *		variantTag = NULL;
1519    Boolean     subtagHasDigits, pastPrimarySubtag, hadRegion;
1520
1521    // 1. First replace any non-canonical prefix (case insensitive) with canonical
1522    // (change 3-letter ISO 639 code to 2-letter, update obsolete ISO 639 codes & RFC 3066 tags, etc.)
1523
1524    testEntry.key = inLocaleString;
1525    foundEntry = (KeyStringToResultString *)bsearch( &testEntry, localeStringPrefixToCanonical, kNumLocaleStringPrefixToCanonical,
1526                                                    sizeof(KeyStringToResultString), _CompareLowerTestEntryPrefixToTableEntryKey );
1527    if (foundEntry) {
1528        // replace key (at beginning of string) with result
1529        _ReplacePrefix(inLocaleString, locStringMaxLen, strlen(foundEntry->key), foundEntry->result);   // <1.10>
1530    }
1531
1532    // 2. Walk through input string, normalizing case & marking use of ISO 3166 codes
1533
1534    inLocalePtr = inLocaleString;
1535    subtagPtr = inLocaleString;
1536    subtagHasDigits = false;
1537    pastPrimarySubtag = false;
1538    hadRegion = false;
1539
1540    while ( true ) {
1541        if ( isalpha(*inLocalePtr) ) {
1542            // if not past a region tag, then lowercase, else uppercase
1543            *inLocalePtr = (!hadRegion)? tolower(*inLocalePtr): toupper(*inLocalePtr);
1544        } else if ( isdigit(*inLocalePtr) ) {
1545            subtagHasDigits = true;
1546        } else {
1547
1548            if (!pastPrimarySubtag) {
1549                // may have a NULL primary subtag
1550                if (subtagHasDigits) {
1551                    break;
1552                }
1553                pastPrimarySubtag = true;
1554            } else if (!hadRegion) {
1555                // We are after any primary language subtag, but not past any region tag.
1556                // This subtag is preceded by '-' or '_'.
1557                int subtagLength = inLocalePtr - subtagPtr; // includes leading '-' or '_'
1558
1559				if (subtagLength == 3 && !subtagHasDigits) {
1560					// potential ISO 3166 code for region or language variant; if so, needs uppercasing
1561					if (*subtagPtr == '_') {
1562						regionTag = subtagPtr;
1563						hadRegion = true;
1564						subtagPtr[1] = toupper(subtagPtr[1]);
1565						subtagPtr[2] = toupper(subtagPtr[2]);
1566					} else if (langRegSubtag == NULL) {
1567						langRegSubtag = subtagPtr;
1568						subtagPtr[1] = toupper(subtagPtr[1]);
1569						subtagPtr[2] = toupper(subtagPtr[2]);
1570					}
1571				} else if (subtagLength == 4 && subtagHasDigits) {
1572					// potential UN M.49 region code
1573					if (*subtagPtr == '_') {
1574						regionTag = subtagPtr;
1575						hadRegion = true;
1576					} else if (langRegSubtag == NULL) {
1577						langRegSubtag = subtagPtr;
1578					}
1579				} else if (subtagLength == 5 && !subtagHasDigits) {
1580					// ISO 15924 script code, uppercase just the first letter
1581					subtagPtr[1] = toupper(subtagPtr[1]);
1582				} else if (subtagLength == 1 && *subtagPtr == '_') {						// <1.17>
1583					hadRegion = true;
1584				}
1585
1586                if (!hadRegion) {
1587                    // convert improper '_' to '-'
1588                    *subtagPtr = '-';
1589                }
1590            } else {
1591            	variantTag = subtagPtr;															// <1.17>
1592            }
1593
1594            if (*inLocalePtr == '-' || *inLocalePtr == '_') {
1595                subtagPtr = inLocalePtr;
1596                subtagHasDigits = false;
1597            } else {
1598                break;
1599            }
1600        }
1601
1602        inLocalePtr++;
1603    }
1604
1605    // 3 If there is a variant tag, see if ICU canonicalizes it to keywords.					// <1.17> [3577669]
1606    // If so, copy the keywords to varKeyValueString and delete the variant tag
1607    // from the original string (but don't otherwise use the ICU canonicalization).
1608    varKeyValueString[0] = 0;
1609#if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_WINDOWS || DEPLOYMENT_TARGET_LINUX
1610    if (variantTag) {
1611		UErrorCode	icuStatus;
1612		int			icuCanonStringLen;
1613		char * 		varKeyValueStringPtr = varKeyValueString;
1614
1615		icuStatus = U_ZERO_ERROR;
1616		icuCanonStringLen = uloc_canonicalize( inLocaleString, varKeyValueString, locStringMaxLen, &icuStatus );
1617		if ( U_SUCCESS(icuStatus) ) {
1618			char *	icuCanonStringPtr = varKeyValueString;
1619
1620			if (icuCanonStringLen >= locStringMaxLen)
1621				icuCanonStringLen = locStringMaxLen - 1;
1622			varKeyValueString[icuCanonStringLen] = 0;
1623			while (*icuCanonStringPtr != 0 && *icuCanonStringPtr != ULOC_KEYWORD_SEPARATOR)
1624				++icuCanonStringPtr;
1625			if (*icuCanonStringPtr != 0) {
1626				// the canonicalized string has keywords
1627				// delete the variant tag in the original string (and other trailing '_' or '-')
1628				*variantTag-- = 0;
1629				while (*variantTag == '_')
1630					*variantTag-- = 0;
1631				// delete all of the canonicalized string except the keywords
1632				while (*icuCanonStringPtr != 0)
1633					*varKeyValueStringPtr++ = *icuCanonStringPtr++;
1634			}
1635		*varKeyValueStringPtr = 0;
1636		}
1637    }
1638#endif
1639
1640    // 4. Handle special cases of updating region codes, or updating language codes based on
1641    // region code.
1642    for (specialCasePtr = specialCases; specialCasePtr->reg1 != NULL; specialCasePtr++) {
1643        if ( specialCasePtr->lang == NULL || _CheckForTag(inLocaleString, specialCasePtr->lang, 2) ) {
1644            // OK, we matched any language specified. Now what needs updating?
1645            char * foundTag;
1646
1647            if ( isupper(specialCasePtr->update1[0]) ) {
1648                // updating a region code
1649                if ( ( foundTag = strstr(inLocaleString, specialCasePtr->reg1) ) && !isalnum(foundTag[3]) ) {
1650                    _CopyReplacementAtPointer(foundTag+1, specialCasePtr->update1);
1651                }
1652                if ( regionTag && _CheckForTag(regionTag+1, specialCasePtr->reg1 + 1, 2) ) {
1653                    _CopyReplacementAtPointer(regionTag+1, specialCasePtr->update1);
1654                }
1655
1656            } else {
1657                // updating the language, there will be two choices based on region
1658                if        ( ( regionTag && _CheckForTag(regionTag+1, specialCasePtr->reg1 + 1, 2) ) ||
1659                            ( ( foundTag = strstr(inLocaleString, specialCasePtr->reg1) ) && !isalnum(foundTag[3]) ) ) {
1660                    _CopyReplacementAtPointer(inLocaleString, specialCasePtr->update1);
1661                } else if ( ( regionTag && _CheckForTag(regionTag+1, specialCasePtr->reg2 + 1, 2) ) ||
1662                            ( ( foundTag = strstr(inLocaleString, specialCasePtr->reg2) ) && !isalnum(foundTag[3]) ) ) {
1663                    _CopyReplacementAtPointer(inLocaleString, specialCasePtr->update2);
1664                }
1665            }
1666        }
1667    }
1668
1669    // 5. return pointers if requested.
1670    if (langRegSubtagRef != NULL) {
1671        *langRegSubtagRef = langRegSubtag;
1672    }
1673    if (regionTagRef != NULL) {
1674        *regionTagRef = regionTag;
1675    }
1676}
1677
1678
1679// _RemoveSubstringsIfPresent
1680// (Local function for CFLocaleCreateCanonicalLocaleIdentifierFromString)
1681// substringList is a list of space-separated substrings to strip if found in localeString
1682static void _RemoveSubstringsIfPresent(char *localeString, const char *substringList) {
1683    while (*substringList != 0) {
1684        char    currentSubstring[kLocaleIdentifierCStringMax];
1685        int     substringLength = 0;
1686        char *  foundSubstring;
1687
1688        // copy current substring & get its length
1689        while ( isgraph(*substringList) ) {
1690            currentSubstring[substringLength++] = *substringList++;
1691        }
1692        // move to next substring
1693        while ( isspace(*substringList) ) {
1694            substringList++;
1695        }
1696
1697        // search for current substring in locale string
1698        if (substringLength == 0)
1699            continue;
1700        currentSubstring[substringLength] = 0;
1701        foundSubstring = strstr(localeString, currentSubstring);
1702
1703        // if substring is found, delete it
1704        if (foundSubstring) {
1705            _DeleteCharsAtPointer(foundSubstring, substringLength);
1706        }
1707    }
1708}
1709
1710
1711// _GetKeyValueString                                                                       // <1.10>
1712// Removes any key-value string from inLocaleString, puts canonized version in keyValueString
1713
1714static void _GetKeyValueString(char inLocaleString[], char keyValueString[]) {
1715    char *  inLocalePtr = inLocaleString;
1716
1717    while (*inLocalePtr != 0 && *inLocalePtr != ULOC_KEYWORD_SEPARATOR) {
1718        inLocalePtr++;
1719    }
1720    if (*inLocalePtr != 0) {    // we found a key-value section
1721        char *  keyValuePtr = keyValueString;
1722
1723        *keyValuePtr = *inLocalePtr;
1724        *inLocalePtr = 0;
1725        do {
1726            if ( *(++inLocalePtr) != ' ' ) {
1727                *(++keyValuePtr) = *inLocalePtr;    // remove "tolower() for *inLocalePtr"  // <1.11>
1728            }
1729        } while (*inLocalePtr != 0);
1730    } else {
1731        keyValueString[0] = 0;
1732    }
1733}
1734
1735static void _AppendKeyValueString(char inLocaleString[], int locStringMaxLen, char keyValueString[]) {
1736#if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_WINDOWS || DEPLOYMENT_TARGET_LINUX
1737	if (keyValueString[0] != 0) {
1738		UErrorCode		uerr = U_ZERO_ERROR;
1739		UEnumeration *	uenum = uloc_openKeywords(keyValueString, &uerr);
1740		if ( uenum != NULL ) {
1741			const char *	keyword;
1742			int32_t			length;
1743			char			value[ULOC_KEYWORDS_CAPACITY];	// use as max for keyword value
1744			while ( U_SUCCESS(uerr) ) {
1745				keyword = uenum_next(uenum, &length, &uerr);
1746				if ( keyword == NULL ) {
1747					break;
1748				}
1749				length = uloc_getKeywordValue( keyValueString, keyword, value, sizeof(value), &uerr );
1750				length = uloc_setKeywordValue( keyword, value, inLocaleString, locStringMaxLen, &uerr );
1751			}
1752			uenum_close(uenum);
1753		}
1754	}
1755#endif
1756}
1757
1758// __private_extern__ CFStringRef _CFLocaleCreateCanonicalLanguageIdentifierForCFBundle(CFAllocatorRef allocator, CFStringRef localeIdentifier) {}
1759
1760CFStringRef CFLocaleCreateCanonicalLanguageIdentifierFromString(CFAllocatorRef allocator, CFStringRef localeIdentifier) {
1761    char            inLocaleString[kLocaleIdentifierCStringMax];
1762    CFStringRef     outStringRef = NULL;
1763
1764    if ( localeIdentifier && CFStringGetCString(localeIdentifier, inLocaleString,  sizeof(inLocaleString), kCFStringEncodingASCII) ) {
1765        KeyStringToResultString     testEntry;
1766        KeyStringToResultString *   foundEntry;
1767        char                        keyValueString[sizeof(inLocaleString)];				// <1.10>
1768        char						varKeyValueString[sizeof(inLocaleString)];			// <1.17>
1769
1770        _GetKeyValueString(inLocaleString, keyValueString);								// <1.10>
1771        testEntry.result = NULL;
1772
1773        // A. Special case aa_SAAHO, no_BOKMAL, and no_NYNORSK since they are legacy identifiers that don't follow the normal rules (http://unicode.org/cldr/trac/browser/trunk/common/supplemental/supplementalMetadata.xml)
1774
1775        testEntry.key = inLocaleString;
1776        KeyStringToResultString specialCase = testEntry;
1777        foundEntry = &specialCase;
1778
1779        if (strncmp("aa_SAAHO", testEntry.key, strlen("aa_SAAHO")) == 0) {
1780            foundEntry->result = "ssy";
1781        } else if (strncmp("no_BOKMAL", testEntry.key, strlen("no_BOKMAL")) == 0) {
1782            foundEntry->result = "nb";
1783        } else if (strncmp("no_NYNORSK", testEntry.key, strlen("no_NYNORSK")) == 0) {
1784            foundEntry->result = "nn";
1785        } else {
1786            // B. First check if input string matches an old-style string that has a replacement
1787            // (do this before case normalization)
1788            foundEntry = (KeyStringToResultString *)bsearch( &testEntry, oldAppleLocaleToCanonical, kNumOldAppleLocaleToCanonical,
1789                                                            sizeof(KeyStringToResultString), _CompareTestEntryToTableEntryKey );
1790        }
1791        if (foundEntry) {
1792            // It does match, so replace old string with new
1793            strlcpy(inLocaleString, foundEntry->result, sizeof(inLocaleString));
1794             varKeyValueString[0] = 0;
1795        } else {
1796            char *      langRegSubtag = NULL;
1797            char *      regionTag = NULL;
1798
1799            // C. No match with an old-style string, use input string but update codes, normalize case, etc.
1800            _UpdateFullLocaleString(inLocaleString, sizeof(inLocaleString), &langRegSubtag, &regionTag, varKeyValueString);   // <1.10><1.17><1.19>
1801
1802            // if the language part already includes a regional variant, then delete any region tag. <1.19>
1803            if (langRegSubtag && regionTag)
1804            	*regionTag = 0;
1805        }
1806
1807        // D. Now we have an up-to-date locale string, but we need to strip defaults and turn it into a language string
1808
1809        // 1. Strip defaults in input string based on initial part of locale string
1810        // (mainly to strip default script tag for a language)
1811        testEntry.key = inLocaleString;
1812        foundEntry = (KeyStringToResultString *)bsearch( &testEntry, localeStringPrefixToDefaults, kNumLocaleStringPrefixToDefaults,
1813                                                        sizeof(KeyStringToResultString), _CompareTestEntryPrefixToTableEntryKey );
1814        if (foundEntry) {
1815            // The input string begins with a character sequence for which
1816            // there are default substrings which should be stripped if present
1817            _RemoveSubstringsIfPresent(inLocaleString, foundEntry->result);
1818        }
1819
1820        // 2. If the string matches a locale string used by Apple as a language string, turn it into a language string
1821        testEntry.key = inLocaleString;
1822        foundEntry = (KeyStringToResultString *)bsearch( &testEntry, appleLocaleToLanguageString, kNumAppleLocaleToLanguageString,
1823                                                        sizeof(KeyStringToResultString), _CompareTestEntryToTableEntryKey );
1824        if (foundEntry) {
1825            // it does match
1826            strlcpy(inLocaleString, foundEntry->result, sizeof(inLocaleString));
1827        } else {
1828            // skip to any region tag or java-type variant
1829            char *  inLocalePtr = inLocaleString;
1830            while (*inLocalePtr != 0 && *inLocalePtr != '_') {
1831                inLocalePtr++;
1832            }
1833            // if there is still a region tag, turn it into a language variant <1.19>
1834            if (*inLocalePtr == '_') {
1835            	// handle 3-digit regions in addition to 2-letter ones
1836            	char *	regionTag = inLocalePtr++;
1837            	long	expectedLength = 0;
1838            	if ( isalpha(*inLocalePtr) ) {
1839            		while ( isalpha(*(++inLocalePtr)) )
1840            			;
1841            		expectedLength = 3;
1842            	} else if ( isdigit(*inLocalePtr) ) {
1843            		while ( isdigit(*(++inLocalePtr)) )
1844            			;
1845            		expectedLength = 4;
1846            	}
1847            	*regionTag = (inLocalePtr - regionTag == expectedLength)? '-': 0;
1848             }
1849             // anything else at/after '_' just gets deleted
1850            *inLocalePtr = 0;
1851        }
1852
1853        // E. Re-append any key-value strings, now canonical										// <1.10><1.17>
1854		_AppendKeyValueString( inLocaleString, sizeof(inLocaleString), varKeyValueString );
1855		_AppendKeyValueString( inLocaleString, sizeof(inLocaleString), keyValueString );
1856
1857        // All done, return what we came up with.
1858        outStringRef = CFStringCreateWithCString(allocator, inLocaleString, kCFStringEncodingASCII);
1859    }
1860
1861    return outStringRef;
1862}
1863
1864
1865CFStringRef CFLocaleCreateCanonicalLocaleIdentifierFromString(CFAllocatorRef allocator, CFStringRef localeIdentifier) {
1866    char            inLocaleString[kLocaleIdentifierCStringMax];
1867    CFStringRef     outStringRef = NULL;
1868
1869    if ( localeIdentifier && CFStringGetCString(localeIdentifier, inLocaleString,  sizeof(inLocaleString), kCFStringEncodingASCII) ) {
1870        KeyStringToResultString     testEntry;
1871        KeyStringToResultString *   foundEntry;
1872        char                        keyValueString[sizeof(inLocaleString)];				// <1.10>
1873        char			    		varKeyValueString[sizeof(inLocaleString)];			// <1.17>
1874
1875        _GetKeyValueString(inLocaleString, keyValueString);								// <1.10>
1876        testEntry.result = NULL;
1877
1878        // A. First check if input string matches an old-style Apple string that has a replacement
1879        // (do this before case normalization)
1880        testEntry.key = inLocaleString;
1881        foundEntry = (KeyStringToResultString *)bsearch( &testEntry, oldAppleLocaleToCanonical, kNumOldAppleLocaleToCanonical,
1882                                                        sizeof(KeyStringToResultString), _CompareTestEntryToTableEntryKey );
1883        if (foundEntry) {
1884            // It does match, so replace old string with new                                // <1.10>
1885            strlcpy(inLocaleString, foundEntry->result, sizeof(inLocaleString));
1886            varKeyValueString[0] = 0;
1887        } else {
1888            char *      langRegSubtag = NULL;
1889            char *      regionTag = NULL;
1890
1891            // B. No match with an old-style string, use input string but update codes, normalize case, etc.
1892            _UpdateFullLocaleString(inLocaleString, sizeof(inLocaleString), &langRegSubtag, &regionTag, varKeyValueString);   // <1.10><1.17>
1893
1894
1895            // C. Now strip defaults that are implied by other fields.
1896
1897            // 1. If an ISO 3166 region tag matches an ISO 3166 regional language variant subtag, strip the latter.
1898            if ( langRegSubtag && regionTag && strncmp(langRegSubtag+1, regionTag+1, 2) == 0 ) {
1899                _DeleteCharsAtPointer(langRegSubtag, 3);
1900            }
1901
1902            // 2. Strip defaults in input string based on final region tag in locale string
1903            // (mainly for Chinese, to strip -Hans for _CN/_SG, -Hant for _TW/_HK/_MO)
1904            if ( regionTag ) {
1905                testEntry.key = regionTag;
1906                foundEntry = (KeyStringToResultString *)bsearch( &testEntry, localeStringRegionToDefaults, kNumLocaleStringRegionToDefaults,
1907                                                                sizeof(KeyStringToResultString), _CompareTestEntryToTableEntryKey );
1908                if (foundEntry) {
1909                    _RemoveSubstringsIfPresent(inLocaleString, foundEntry->result);
1910                }
1911            }
1912
1913            // 3. Strip defaults in input string based on initial part of locale string
1914            // (mainly to strip default script tag for a language)
1915            testEntry.key = inLocaleString;
1916            foundEntry = (KeyStringToResultString *)bsearch( &testEntry, localeStringPrefixToDefaults, kNumLocaleStringPrefixToDefaults,
1917                                                            sizeof(KeyStringToResultString), _CompareTestEntryPrefixToTableEntryKey );
1918            if (foundEntry) {
1919                // The input string begins with a character sequence for which
1920                // there are default substrings which should be stripped if present
1921                _RemoveSubstringsIfPresent(inLocaleString, foundEntry->result);
1922            }
1923        }
1924
1925        // D. Re-append any key-value strings, now canonical								// <1.10><1.17>
1926		_AppendKeyValueString( inLocaleString, sizeof(inLocaleString), varKeyValueString );
1927		_AppendKeyValueString( inLocaleString, sizeof(inLocaleString), keyValueString );
1928
1929        // Now create the CFString (even if empty!)
1930        outStringRef = CFStringCreateWithCString(allocator, inLocaleString, kCFStringEncodingASCII);
1931    }
1932
1933    return outStringRef;
1934}
1935
1936// CFLocaleCreateCanonicalLocaleIdentifierFromScriptManagerCodes, based on
1937// the first part of the SPI CFBundleCopyLocalizationForLocalizationInfo in CFBundle_Resources.c
1938CFStringRef CFLocaleCreateCanonicalLocaleIdentifierFromScriptManagerCodes(CFAllocatorRef allocator, LangCode lcode, RegionCode rcode) {
1939    CFStringRef result = NULL;
1940    if (0 <= rcode && rcode < kNumRegionCodeToLocaleString) {
1941	const char *localeString = regionCodeToLocaleString[rcode];
1942	if (localeString != NULL && *localeString != '\0') {
1943	    result = CFStringCreateWithCStringNoCopy(allocator, localeString, kCFStringEncodingASCII, kCFAllocatorNull);
1944	}
1945    }
1946    if (result) return result;
1947    if (0 <= lcode && lcode < kNumLangCodeToLocaleString) {
1948	const char *localeString = langCodeToLocaleString[lcode];
1949	if (localeString != NULL && *localeString != '\0') {
1950	    result = CFStringCreateWithCStringNoCopy(allocator, localeString, kCFStringEncodingASCII, kCFAllocatorNull);
1951	}
1952    }
1953    return result;
1954}
1955
1956
1957/*
1958SPI:  CFLocaleGetLanguageRegionEncodingForLocaleIdentifier gets the appropriate language and region codes,
1959 and the default legacy script code and encoding, for the specified locale (or language) string.
1960 Returns false if CFLocale has no information about the given locale (in which case none of the by-reference return values are set);
1961 otherwise may set *langCode and/or *regCode to -1 if there is no appropriate legacy value for the locale.
1962 This is a replacement for the CFBundle SPI CFBundleGetLocalizationInfoForLocalization (which was intended to be temporary and transitional);
1963 this function is more up-to-date in its handling of locale strings, and is in CFLocale where this functionality should belong. Compared
1964 to CFBundleGetLocalizationInfoForLocalization, this function does not spcially interpret a NULL localeIdentifier to mean use the single most
1965 preferred localization in the current context (this function returns NO for a NULL localeIdentifier); and in this function
1966 langCode, regCode, and scriptCode are all SInt16* (not SInt32* like the equivalent parameters in CFBundleGetLocalizationInfoForLocalization).
1967*/
1968#if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_WINDOWS || DEPLOYMENT_TARGET_LINUX
1969static int CompareLocaleToLegacyCodesEntries( const void *entry1, const void *entry2 );
1970#endif
1971
1972Boolean CFLocaleGetLanguageRegionEncodingForLocaleIdentifier(CFStringRef localeIdentifier, LangCode *langCode, RegionCode *regCode, ScriptCode *scriptCode, CFStringEncoding *stringEncoding) {
1973#if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_WINDOWS || DEPLOYMENT_TARGET_LINUX
1974	Boolean		returnValue = false;
1975	CFStringRef	canonicalIdentifier = CFLocaleCreateCanonicalLocaleIdentifierFromString(NULL, localeIdentifier);
1976	if (canonicalIdentifier) {
1977    	char	localeCString[kLocaleIdentifierCStringMax];
1978		if ( CFStringGetCString(canonicalIdentifier, localeCString,  sizeof(localeCString), kCFStringEncodingASCII) ) {
1979			UErrorCode	icuStatus = U_ZERO_ERROR;
1980			int32_t		languagelength;
1981			char		searchString[ULOC_LANG_CAPACITY + ULOC_FULLNAME_CAPACITY];
1982
1983			languagelength = uloc_getLanguage( localeCString, searchString, ULOC_LANG_CAPACITY, &icuStatus );
1984			if ( U_SUCCESS(icuStatus) && languagelength > 0 ) {
1985				// OK, here we have at least a language code, check for other components in order
1986				LocaleToLegacyCodes			searchEntry = { (const char *)searchString, 0, 0, 0 };
1987				const LocaleToLegacyCodes *	foundEntryPtr;
1988				int32_t						componentLength;
1989				char						componentString[ULOC_FULLNAME_CAPACITY];
1990
1991				languagelength = strlen(searchString);	// in case it got truncated
1992				icuStatus = U_ZERO_ERROR;
1993				componentLength = uloc_getScript( localeCString, componentString, sizeof(componentString), &icuStatus );
1994				if ( U_FAILURE(icuStatus) || componentLength == 0 ) {
1995					icuStatus = U_ZERO_ERROR;
1996					componentLength = uloc_getCountry( localeCString, componentString, sizeof(componentString), &icuStatus );
1997					if ( U_FAILURE(icuStatus) || componentLength == 0 ) {
1998						icuStatus = U_ZERO_ERROR;
1999						componentLength = uloc_getVariant( localeCString, componentString, sizeof(componentString), &icuStatus );
2000						if ( U_FAILURE(icuStatus) ) {
2001							componentLength = 0;
2002						}
2003					}
2004				}
2005
2006				// Append whichever other component we first found
2007				if (componentLength > 0) {
2008					strlcat(searchString, "_", sizeof(searchString));
2009					strlcat(searchString, componentString, sizeof(searchString));
2010				}
2011
2012				// Search
2013				foundEntryPtr = (const LocaleToLegacyCodes *)bsearch( &searchEntry, localeToLegacyCodes, kNumLocaleToLegacyCodes, sizeof(LocaleToLegacyCodes), CompareLocaleToLegacyCodesEntries );
2014				if (foundEntryPtr == NULL && (int32_t) strlen(searchString) > languagelength) {
2015					// truncate to language al;one and try again
2016					searchString[languagelength] = 0;
2017					foundEntryPtr = (const LocaleToLegacyCodes *)bsearch( &searchEntry, localeToLegacyCodes, kNumLocaleToLegacyCodes, sizeof(LocaleToLegacyCodes), CompareLocaleToLegacyCodesEntries );
2018				}
2019
2020				// If found a matching entry, return requested values
2021				if (foundEntryPtr) {
2022					returnValue = true;
2023					if (langCode)		*langCode		= foundEntryPtr->langCode;
2024					if (regCode)		*regCode		= foundEntryPtr->regCode;
2025					if (stringEncoding)	*stringEncoding	= foundEntryPtr->encoding;
2026					if (scriptCode) {
2027						// map CFStringEncoding to ScriptCode
2028						if (foundEntryPtr->encoding < 33/*kCFStringEncodingMacSymbol*/) {
2029							*scriptCode	= foundEntryPtr->encoding;
2030						} else {
2031							switch (foundEntryPtr->encoding) {
2032								case 0x8C/*kCFStringEncodingMacFarsi*/:		*scriptCode	= 4/*smArabic*/; break;
2033								case 0x98/*kCFStringEncodingMacUkrainian*/:	*scriptCode	= 7/*smCyrillic*/; break;
2034								case 0xEC/*kCFStringEncodingMacInuit*/:		*scriptCode	= 28/*smEthiopic*/; break;
2035								case 0xFC/*kCFStringEncodingMacVT100*/:		*scriptCode	= 32/*smUninterp*/; break;
2036								default:									*scriptCode	= 0/*smRoman*/; break;
2037							}
2038						}
2039					}
2040				}
2041			}
2042		}
2043		CFRelease(canonicalIdentifier);
2044	}
2045	return returnValue;
2046#else
2047    return false;
2048#endif
2049}
2050
2051#if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_WINDOWS || DEPLOYMENT_TARGET_LINUX
2052static int CompareLocaleToLegacyCodesEntries( const void *entry1, const void *entry2 ) {
2053	const char *	localeString1 = ((const LocaleToLegacyCodes *)entry1)->locale;
2054	const char *	localeString2 = ((const LocaleToLegacyCodes *)entry2)->locale;
2055	return strcmp(localeString1, localeString2);
2056}
2057#endif
2058
2059CFDictionaryRef CFLocaleCreateComponentsFromLocaleIdentifier(CFAllocatorRef allocator, CFStringRef localeID) {
2060    CFMutableDictionaryRef working = CFDictionaryCreateMutable(allocator, 10, &kCFTypeDictionaryKeyCallBacks, &kCFTypeDictionaryValueCallBacks);
2061#if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_WINDOWS || DEPLOYMENT_TARGET_LINUX
2062    char cLocaleID[ULOC_FULLNAME_CAPACITY+ULOC_KEYWORD_AND_VALUES_CAPACITY];
2063    char buffer[ULOC_FULLNAME_CAPACITY+ULOC_KEYWORD_AND_VALUES_CAPACITY];
2064
2065    UErrorCode icuStatus = U_ZERO_ERROR;
2066    int32_t length = 0;
2067
2068    if (!localeID) goto out;
2069
2070    // Extract the C string locale ID, for ICU
2071    CFIndex outBytes = 0;
2072    CFStringGetBytes(localeID, CFRangeMake(0, CFStringGetLength(localeID)), kCFStringEncodingASCII, (UInt8) '?', true, (unsigned char *)cLocaleID, sizeof(cLocaleID)/sizeof(char) - 1, &outBytes);
2073    cLocaleID[outBytes] = '\0';
2074
2075    // Get the components
2076    length = uloc_getLanguage(cLocaleID, buffer, sizeof(buffer)/sizeof(char), &icuStatus);
2077    if (U_SUCCESS(icuStatus) && length > 0)
2078    {
2079        CFStringRef string = CFStringCreateWithBytes(allocator, (UInt8 *)buffer, length, kCFStringEncodingASCII, true);
2080        CFDictionaryAddValue(working, kCFLocaleLanguageCodeKey, string);
2081        CFRelease(string);
2082    }
2083    icuStatus = U_ZERO_ERROR;
2084
2085    length = uloc_getScript(cLocaleID, buffer, sizeof(buffer)/sizeof(char), &icuStatus);
2086    if (U_SUCCESS(icuStatus) && length > 0)
2087    {
2088        CFStringRef string = CFStringCreateWithBytes(allocator, (UInt8 *)buffer, length, kCFStringEncodingASCII, true);
2089        CFDictionaryAddValue(working, kCFLocaleScriptCodeKey, string);
2090        CFRelease(string);
2091    }
2092    icuStatus = U_ZERO_ERROR;
2093
2094    length = uloc_getCountry(cLocaleID, buffer, sizeof(buffer)/sizeof(char), &icuStatus);
2095    if (U_SUCCESS(icuStatus) && length > 0)
2096    {
2097        CFStringRef string = CFStringCreateWithBytes(allocator, (UInt8 *)buffer, length, kCFStringEncodingASCII, true);
2098        CFDictionaryAddValue(working, kCFLocaleCountryCodeKey, string);
2099        CFRelease(string);
2100    }
2101    icuStatus = U_ZERO_ERROR;
2102
2103    length = uloc_getVariant(cLocaleID, buffer, sizeof(buffer)/sizeof(char), &icuStatus);
2104    if (U_SUCCESS(icuStatus) && length > 0)
2105    {
2106        CFStringRef string = CFStringCreateWithBytes(allocator, (UInt8 *)buffer, length, kCFStringEncodingASCII, true);
2107        CFDictionaryAddValue(working, kCFLocaleVariantCodeKey, string);
2108        CFRelease(string);
2109    }
2110    icuStatus = U_ZERO_ERROR;
2111
2112    // Now get the keywords; open an enumerator on them
2113    UEnumeration *iter = uloc_openKeywords(cLocaleID, &icuStatus);
2114    const char *locKey = NULL;
2115    int32_t locKeyLen = 0;
2116    while ((locKey = uenum_next(iter, &locKeyLen, &icuStatus)) && U_SUCCESS(icuStatus))
2117    {
2118        char locValue[ULOC_KEYWORD_AND_VALUES_CAPACITY];
2119
2120        // Get the value for this keyword
2121        if (uloc_getKeywordValue(cLocaleID, locKey, locValue, sizeof(locValue)/sizeof(char), &icuStatus) > 0
2122            && U_SUCCESS(icuStatus))
2123        {
2124            CFStringRef key = CFStringCreateWithBytes(allocator, (UInt8 *)locKey, strlen(locKey), kCFStringEncodingASCII, true);
2125            CFStringRef value = CFStringCreateWithBytes(allocator, (UInt8 *)locValue, strlen(locValue), kCFStringEncodingASCII, true);
2126            if (key && value)
2127                CFDictionaryAddValue(working, key, value);
2128            if (key)
2129                CFRelease(key);
2130            if (value)
2131                CFRelease(value);
2132        }
2133    }
2134    uenum_close(iter);
2135
2136    out:;
2137#endif
2138    // Convert to an immutable dictionary and return
2139    CFDictionaryRef result = CFDictionaryCreateCopy(allocator, working);
2140    CFRelease(working);
2141    return result;
2142}
2143
2144static char *__CStringFromString(CFStringRef str) {
2145    if (!str) return NULL;
2146    CFRange rg = CFRangeMake(0, CFStringGetLength(str));
2147    CFIndex neededLength = 0;
2148    CFStringGetBytes(str, rg, kCFStringEncodingASCII, (UInt8)'?', false, NULL, 0, &neededLength);
2149    char *buf = (char *)malloc(neededLength + 1);
2150    CFStringGetBytes(str, rg, kCFStringEncodingASCII, (UInt8)'?', false, (uint8_t *)buf, neededLength, &neededLength);
2151    buf[neededLength] = '\0';
2152    return buf;
2153}
2154
2155CFStringRef CFLocaleCreateLocaleIdentifierFromComponents(CFAllocatorRef allocator, CFDictionaryRef dictionary) {
2156    if (!dictionary) return NULL;
2157
2158    CFIndex cnt = CFDictionaryGetCount(dictionary);
2159    STACK_BUFFER_DECL(CFStringRef, values, cnt);
2160    STACK_BUFFER_DECL(CFStringRef, keys, cnt);
2161    CFDictionaryGetKeysAndValues(dictionary, (const void **)keys, (const void **)values);
2162
2163    char *language = NULL, *script = NULL, *country = NULL, *variant = NULL;
2164    for (CFIndex idx = 0; idx < cnt; idx++) {
2165	if (CFEqual(kCFLocaleLanguageCodeKey, keys[idx])) {
2166	    language = __CStringFromString(values[idx]);
2167	    keys[idx] = NULL;
2168	} else if (CFEqual(kCFLocaleScriptCodeKey, keys[idx])) {
2169	    script = __CStringFromString(values[idx]);
2170	    keys[idx] = NULL;
2171	} else if (CFEqual(kCFLocaleCountryCodeKey, keys[idx])) {
2172	    country = __CStringFromString(values[idx]);
2173	    keys[idx] = NULL;
2174	} else if (CFEqual(kCFLocaleVariantCodeKey, keys[idx])) {
2175	    variant = __CStringFromString(values[idx]);
2176	    keys[idx] = NULL;
2177	}
2178    }
2179
2180    char *buf1 = NULL;	// (|L)(|_S)(|_C|_C_V|__V)
2181    asprintf(&buf1, "%s%s%s%s%s%s%s", language ? language : "", script ? "_" : "", script ? script : "", (country || variant ? "_" : ""), country ? country : "", variant ? "_" : "", variant ? variant : "");
2182
2183    char cLocaleID[2 * ULOC_FULLNAME_CAPACITY + 2 * ULOC_KEYWORD_AND_VALUES_CAPACITY];
2184    strlcpy(cLocaleID, buf1, sizeof(cLocaleID));
2185    free(language);
2186    free(script);
2187    free(country);
2188    free(variant);
2189    free(buf1);
2190
2191#if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_WINDOWS || DEPLOYMENT_TARGET_LINUX
2192    for (CFIndex idx = 0; idx < cnt; idx++) {
2193	if (keys[idx]) {
2194	    char *key = __CStringFromString(keys[idx]);
2195	    char *value;
2196            if (0 == strcmp(key, "kCFLocaleCalendarKey")) {
2197                // For interchangeability convenience, we alternatively allow a
2198                // calendar object to be passed in, with the alternate key, and
2199                // we'll extract the identifier.
2200                CFCalendarRef cal = (CFCalendarRef)values[idx];
2201                CFStringRef ident = CFCalendarGetIdentifier(cal);
2202                value = __CStringFromString(ident);
2203                char *oldkey = key;
2204                key = strdup("calendar");
2205                free(oldkey);
2206            } else {
2207                value = __CStringFromString(values[idx]);
2208            }
2209	    UErrorCode status = U_ZERO_ERROR;
2210	    uloc_setKeywordValue(key, value, cLocaleID, sizeof(cLocaleID), &status);
2211	    free(key);
2212	    free(value);
2213	}
2214    }
2215#endif
2216
2217    return CFStringCreateWithCString(allocator, cLocaleID, kCFStringEncodingASCII);
2218}
2219
2220