1/*
2 * Copyright (c) 2014 Apple Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. Please obtain a copy of the License at
10 * http://www.opensource.apple.com/apsl/ and read it before using this
11 * file.
12 *
13 * The Original Code and all software distributed under the License are
14 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
15 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
16 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
18 * Please see the License for the specific language governing rights and
19 * limitations under the License.
20 *
21 * @APPLE_LICENSE_HEADER_END@
22 */
23
24/*	CFStringEncodingDatabase.c
25	Copyright (c) 2005-2013, Apple Inc. All rights reserved.
26	Responsibility: Aki Inoue
27*/
28
29#include "CFInternal.h"
30#include <CoreFoundation/CFStringEncodingExt.h>
31#include "CFStringEncodingConverterPriv.h"
32#include "CFStringEncodingDatabase.h"
33#include <stdio.h>
34
35#define ISO8859CODEPAGE_BASE (28590)
36
37static const uint16_t __CFKnownEncodingList[] = {
38    kCFStringEncodingMacRoman,
39    kCFStringEncodingMacJapanese,
40    kCFStringEncodingMacChineseTrad,
41    kCFStringEncodingMacKorean,
42    kCFStringEncodingMacArabic,
43    kCFStringEncodingMacHebrew,
44    kCFStringEncodingMacGreek,
45    kCFStringEncodingMacCyrillic,
46    kCFStringEncodingMacDevanagari,
47    kCFStringEncodingMacGurmukhi,
48    kCFStringEncodingMacGujarati,
49    kCFStringEncodingMacOriya,
50    kCFStringEncodingMacBengali,
51    kCFStringEncodingMacTamil,
52    kCFStringEncodingMacTelugu,
53    kCFStringEncodingMacKannada,
54    kCFStringEncodingMacMalayalam,
55    kCFStringEncodingMacSinhalese,
56    kCFStringEncodingMacBurmese,
57    kCFStringEncodingMacKhmer,
58    kCFStringEncodingMacThai,
59    kCFStringEncodingMacLaotian,
60    kCFStringEncodingMacGeorgian,
61    kCFStringEncodingMacArmenian,
62    kCFStringEncodingMacChineseSimp,
63    kCFStringEncodingMacTibetan,
64    kCFStringEncodingMacMongolian,
65    kCFStringEncodingMacEthiopic,
66    kCFStringEncodingMacCentralEurRoman,
67    kCFStringEncodingMacVietnamese,
68    kCFStringEncodingMacSymbol,
69    kCFStringEncodingMacDingbats,
70    kCFStringEncodingMacTurkish,
71    kCFStringEncodingMacCroatian,
72    kCFStringEncodingMacIcelandic,
73    kCFStringEncodingMacRomanian,
74    kCFStringEncodingMacCeltic,
75    kCFStringEncodingMacGaelic,
76    kCFStringEncodingMacFarsi,
77    kCFStringEncodingMacUkrainian,
78    kCFStringEncodingMacInuit,
79
80    kCFStringEncodingDOSLatinUS,
81    kCFStringEncodingDOSGreek,
82    kCFStringEncodingDOSBalticRim,
83    kCFStringEncodingDOSLatin1,
84    kCFStringEncodingDOSGreek1,
85    kCFStringEncodingDOSLatin2,
86    kCFStringEncodingDOSCyrillic,
87    kCFStringEncodingDOSTurkish,
88    kCFStringEncodingDOSPortuguese,
89    kCFStringEncodingDOSIcelandic,
90    kCFStringEncodingDOSHebrew,
91    kCFStringEncodingDOSCanadianFrench,
92    kCFStringEncodingDOSArabic,
93    kCFStringEncodingDOSNordic,
94    kCFStringEncodingDOSRussian,
95    kCFStringEncodingDOSGreek2,
96    kCFStringEncodingDOSThai,
97    kCFStringEncodingDOSJapanese,
98    kCFStringEncodingDOSChineseSimplif,
99    kCFStringEncodingDOSKorean,
100    kCFStringEncodingDOSChineseTrad,
101
102    kCFStringEncodingWindowsLatin1,
103    kCFStringEncodingWindowsLatin2,
104    kCFStringEncodingWindowsCyrillic,
105    kCFStringEncodingWindowsGreek,
106    kCFStringEncodingWindowsLatin5,
107    kCFStringEncodingWindowsHebrew,
108    kCFStringEncodingWindowsArabic,
109    kCFStringEncodingWindowsBalticRim,
110    kCFStringEncodingWindowsVietnamese,
111    kCFStringEncodingWindowsKoreanJohab,
112    kCFStringEncodingASCII,
113
114    kCFStringEncodingShiftJIS_X0213,
115    kCFStringEncodingGB_18030_2000,
116
117    kCFStringEncodingISO_2022_JP,
118    kCFStringEncodingISO_2022_JP_2,
119    kCFStringEncodingISO_2022_JP_1,
120    kCFStringEncodingISO_2022_JP_3,
121    kCFStringEncodingISO_2022_CN,
122    kCFStringEncodingISO_2022_CN_EXT,
123    kCFStringEncodingISO_2022_KR,
124    kCFStringEncodingEUC_JP,
125    kCFStringEncodingEUC_CN,
126    kCFStringEncodingEUC_TW,
127    kCFStringEncodingEUC_KR,
128
129    kCFStringEncodingShiftJIS,
130
131    kCFStringEncodingKOI8_R,
132
133    kCFStringEncodingBig5,
134
135    kCFStringEncodingMacRomanLatin1,
136    kCFStringEncodingHZ_GB_2312,
137    kCFStringEncodingBig5_HKSCS_1999,
138    kCFStringEncodingVISCII,
139    kCFStringEncodingKOI8_U,
140    kCFStringEncodingBig5_E,
141    kCFStringEncodingUTF7_IMAP,
142
143    kCFStringEncodingNextStepLatin,
144
145    kCFStringEncodingEBCDIC_CP037
146};
147
148// Windows codepage mapping
149static const uint16_t __CFWindowsCPList[] = {
150    10000,
151    10001,
152    10002,
153    10003,
154    10004,
155    10005,
156    10006,
157    10007,
158    0,
159    0,
160    0,
161    0,
162    0,
163    0,
164    0,
165    0,
166    0,
167    0,
168    0,
169    0,
170    10021,
171    0,
172    0,
173    0,
174    10008,
175    0,
176    0,
177    0,
178    10029,
179    0,
180    0,
181    0,
182    10081,
183    10082,
184    10079,
185    10010,
186    0,
187    0,
188    0,
189    10017,
190    0,
191
192    437,
193    737,
194    775,
195    850,
196    851,
197    852,
198    855,
199    857,
200    860,
201    861,
202    862,
203    863,
204    864,
205    865,
206    866,
207    869,
208    874,
209    932,
210    936,
211    949,
212    950,
213
214    1252,
215    1250,
216    1251,
217    1253,
218    1254,
219    1255,
220    1256,
221    1257,
222    1258,
223    1361,
224
225    20127,
226
227    0,
228    54936,
229
230    50221, // we prefere this over 50220/50221 since that's what CF coverter generates
231    0,
232    0,
233    0,
234    50227,
235    0,
236    50225,
237
238    51932,
239    51936,
240    51950,
241    51949,
242
243    0,
244
245    20866,
246
247    0,
248
249    0,
250    52936,
251    0,
252    0,
253    21866,
254    0,
255    0,
256
257    0,
258
259    37
260};
261
262// Canonical name
263static const char *__CFCanonicalNameList[] = {
264    "macintosh",
265    "japanese",
266    "trad-chinese",
267    "korean",
268    "arabic",
269    "hebrew",
270    "greek",
271    "cyrillic",
272    "devanagari",
273    "gurmukhi",
274    "gujarati",
275    "oriya",
276    "bengali",
277    "tamil",
278    "telugu",
279    "kannada",
280    "malayalam",
281    "sinhalese",
282    "burmese",
283    "khmer",
284    "thai",
285    "laotian",
286    "georgian",
287    "armenian",
288    "simp-chinese",
289    "tibetan",
290    "mongolian",
291    "ethiopic",
292    "centraleurroman",
293    "vietnamese",
294    "symbol",
295    "dingbats",
296    "turkish",
297    "croatian",
298    "icelandic",
299    "romanian",
300    "celtic",
301    "gaelic",
302    "farsi",
303    "ukrainian",
304    "inuit",
305
306    NULL,
307    NULL,
308    NULL,
309    NULL,
310    NULL,
311    NULL,
312    NULL,
313    NULL,
314    NULL,
315    NULL,
316    NULL,
317    NULL,
318    NULL,
319    NULL,
320    NULL,
321    NULL,
322    NULL,
323    NULL,
324    NULL,
325    NULL,
326    NULL,
327
328    NULL,
329    NULL,
330    NULL,
331    NULL,
332    NULL,
333    NULL,
334    NULL,
335    NULL,
336    NULL,
337    NULL,
338
339    "us-ascii",
340
341    NULL,
342    "gb18030",
343
344    "iso-2022-jp",
345    "iso-2022-jp-2",
346    "iso-2022-jp-1",
347    "iso-2022-jp-3",
348    "iso-2022-cn",
349    "iso-2022-cn-ext",
350    "iso-2022-kr",
351    "euc-jp",
352    "gb2312",
353    "euc-tw",
354    "euc-kr",
355
356    "shift_jis",
357
358    "koi8-r",
359
360    "big5",
361
362    "roman-latin1",
363    "hz-gb-2312",
364    "big5-hkscs",
365    "viscii",
366    "koi8-u",
367    NULL,
368    "utf7-imap",
369
370    "x-nextstep",
371
372    "ibm037",
373};
374
375static inline CFIndex __CFGetEncodingIndex(CFStringEncoding encoding) {
376    const uint16_t *head = __CFKnownEncodingList;
377    const uint16_t *tail = head + ((sizeof(__CFKnownEncodingList) / sizeof(*__CFKnownEncodingList)) - 1);
378    const uint16_t *middle;
379
380    encoding &= 0x0FFF;
381    while (head <= tail) {
382        middle = head + ((tail - head) >> 1);
383
384        if (encoding == *middle) {
385            return middle - __CFKnownEncodingList;
386        } else if (encoding < *middle) {
387            tail = middle - 1;
388        } else {
389            head = middle + 1;
390        }
391    }
392
393    return kCFNotFound;
394}
395
396CF_PRIVATE uint16_t __CFStringEncodingGetWindowsCodePage(CFStringEncoding encoding) {
397    CFStringEncoding encodingBase = encoding & 0x0F00;
398
399    if (0x0100 == encodingBase) { // UTF
400        switch (encoding) {
401            case kCFStringEncodingUTF7: return 65000;
402            case kCFStringEncodingUTF8: return 65001;
403            case kCFStringEncodingUTF16: return 1200;
404            case kCFStringEncodingUTF16BE: return 1201;
405            case kCFStringEncodingUTF32: return 65005;
406            case kCFStringEncodingUTF32BE: return 65006;
407        }
408    } else if (0x0200 == encodingBase) { // ISO 8859 range
409        return ISO8859CODEPAGE_BASE + (encoding & 0xFF);
410    } else { // others
411        CFIndex index = __CFGetEncodingIndex(encoding);
412
413        if (kCFNotFound != index) return __CFWindowsCPList[index];
414    }
415
416    return 0;
417}
418
419CF_PRIVATE CFStringEncoding __CFStringEncodingGetFromWindowsCodePage(uint16_t codepage) {
420    switch (codepage) {
421        case 65001: return kCFStringEncodingUTF8;
422        case 1200: return kCFStringEncodingUTF16;
423        case 0: return kCFStringEncodingInvalidId;
424        case 1201: return kCFStringEncodingUTF16BE;
425        case 65005: return kCFStringEncodingUTF32;
426        case 65006: return kCFStringEncodingUTF32BE;
427        case 65000: return kCFStringEncodingUTF7;
428    }
429
430    if ((codepage > ISO8859CODEPAGE_BASE) && (codepage <= (ISO8859CODEPAGE_BASE + 16))) {
431        return (codepage - ISO8859CODEPAGE_BASE) + 0x0200;
432    } else {
433        static CFMutableDictionaryRef mappingTable = NULL;
434        static CFSpinLock_t lock = CFSpinLockInit;
435        uintptr_t value;
436
437        __CFSpinLock(&lock);
438        if (NULL == mappingTable) {
439            CFIndex index, count = sizeof(__CFKnownEncodingList) / sizeof(*__CFKnownEncodingList);
440
441            mappingTable = CFDictionaryCreateMutable(NULL, 0, NULL, NULL);
442
443            for (index = 0;index < count;index++) {
444                if (0 != __CFWindowsCPList[index]) CFDictionarySetValue(mappingTable, (const void *)(uintptr_t)__CFWindowsCPList[index], (const void *)(uintptr_t)__CFKnownEncodingList[index]);
445            }
446        }
447        __CFSpinUnlock(&lock);
448
449        if (CFDictionaryGetValueIfPresent(mappingTable, (const void *)(uintptr_t)codepage, (const void **)&value)) return (CFStringEncoding)value;
450    }
451
452
453    return kCFStringEncodingInvalidId;
454}
455
456CF_PRIVATE bool __CFStringEncodingGetCanonicalName(CFStringEncoding encoding, char *buffer, CFIndex bufferSize) {
457    const char *format = "%s";
458    const char *name = NULL;
459    uint32_t value = 0;
460    CFIndex index;
461
462    switch (encoding & 0x0F00) {
463        case 0x0100: // UTF range
464            switch (encoding) {
465                case kCFStringEncodingUTF7: name = "utf-7"; break;
466                case kCFStringEncodingUTF8: name = "utf-8"; break;
467                case kCFStringEncodingUTF16: name = "utf-16"; break;
468                case kCFStringEncodingUTF16BE: name = "utf-16be"; break;
469                case kCFStringEncodingUTF16LE: name = "utf-16le"; break;
470                case kCFStringEncodingUTF32: name = "utf-32"; break;
471                case kCFStringEncodingUTF32BE: name = "utf-32be"; break;
472                case kCFStringEncodingUTF32LE: name = "utf-32le"; break;
473            }
474            break;
475
476        case 0x0200: // ISO 8859 range
477            format = "iso-8859-%d";
478            value = (encoding & 0xFF);
479            break;
480
481        case 0x0400: // DOS code page range
482        case 0x0500: // Windows code page range
483            index = __CFGetEncodingIndex(encoding);
484
485            if (kCFNotFound != index) {
486                value = __CFWindowsCPList[index];
487                if (0 != value) format = ((0x0400 == (encoding & 0x0F00)) ? "cp%d" : "windows-%d");
488            }
489            break;
490
491        default: // others
492            index = __CFGetEncodingIndex(encoding);
493
494            if (kCFNotFound != index) {
495                if (((0 == (encoding & 0x0F00)) && (kCFStringEncodingMacRoman != encoding)) || (kCFStringEncodingMacRomanLatin1 == encoding)) format = "x-mac-%s";
496                name = (const char *)__CFCanonicalNameList[index];
497            }
498            break;
499    }
500
501    if ((0 == value) && (NULL == name)) {
502        return false;
503    } else if (0 != value) {
504        return ((snprintf(buffer, bufferSize, format, value) < bufferSize) ? true : false);
505    } else {
506        return ((snprintf(buffer, bufferSize, format, name) < bufferSize) ? true : false);
507    }
508}
509
510#define LENGTH_LIMIT (256)
511static Boolean __CFCanonicalNameCompare(const void *value1, const void *value2) { return ((0 == strncasecmp_l((const char *)value1, (const char *)value2, LENGTH_LIMIT, NULL)) ? true : false); }
512
513static CFHashCode __CFCanonicalNameHash(const void *value) {
514    const char *name = (const char *)value;
515    CFHashCode code = 0;
516
517    while ((0 != *name) && ((name - (const char *)value) < LENGTH_LIMIT)) {
518        char character = *(name++);
519
520        code += (character + (((character >= 'A') && (character <= 'Z')) ? 'a' - 'A' : 0));
521    }
522
523    return code * (name - (const char *)value);
524}
525
526CF_PRIVATE CFStringEncoding __CFStringEncodingGetFromCanonicalName(const char *canonicalName) {
527    CFStringEncoding encoding;
528    CFIndex prefixLength;
529    static CFMutableDictionaryRef mappingTable = NULL;
530    static CFSpinLock_t lock = CFSpinLockInit;
531
532    prefixLength = strlen("iso-8859-");
533    if (0 == strncasecmp_l(canonicalName, "iso-8859-", prefixLength, NULL)) {// do ISO
534        encoding = strtol(canonicalName + prefixLength, NULL, 10);
535
536        return (((0 == encoding) || (encoding > 16)) ? kCFStringEncodingInvalidId : encoding + 0x0200);
537    }
538
539    prefixLength = strlen("cp");
540    if (0 == strncasecmp_l(canonicalName, "cp", prefixLength, NULL)) {// do DOS
541        encoding = strtol(canonicalName + prefixLength, NULL, 10);
542
543        return __CFStringEncodingGetFromWindowsCodePage(encoding);
544    }
545
546    prefixLength = strlen("windows-");
547    if (0 == strncasecmp_l(canonicalName, "windows-", prefixLength, NULL)) {// do DOS
548        encoding = strtol(canonicalName + prefixLength, NULL, 10);
549
550        return __CFStringEncodingGetFromWindowsCodePage(encoding);
551    }
552
553    __CFSpinLock(&lock);
554    if (NULL == mappingTable) {
555        CFIndex index, count = sizeof(__CFKnownEncodingList) / sizeof(*__CFKnownEncodingList);
556
557        CFDictionaryKeyCallBacks keys = {
558            0, NULL, NULL, NULL, &__CFCanonicalNameCompare, &__CFCanonicalNameHash
559        };
560
561        mappingTable = CFDictionaryCreateMutable(NULL, 0, &keys, NULL);
562
563        // Add UTFs
564        CFDictionarySetValue(mappingTable, "utf-7", (const void *)kCFStringEncodingUTF7);
565        CFDictionarySetValue(mappingTable, "utf-8", (const void *)kCFStringEncodingUTF8);
566        CFDictionarySetValue(mappingTable, "utf-16", (const void *)kCFStringEncodingUTF16);
567        CFDictionarySetValue(mappingTable, "utf-16be", (const void *)kCFStringEncodingUTF16BE);
568        CFDictionarySetValue(mappingTable, "utf-16le", (const void *)kCFStringEncodingUTF16LE);
569        CFDictionarySetValue(mappingTable, "utf-32", (const void *)kCFStringEncodingUTF32);
570        CFDictionarySetValue(mappingTable, "utf-32be", (const void *)kCFStringEncodingUTF32BE);
571        CFDictionarySetValue(mappingTable, "utf-32le", (const void *)kCFStringEncodingUTF32LE);
572
573        for (index = 0;index < count;index++) {
574            if (NULL != __CFCanonicalNameList[index]) CFDictionarySetValue(mappingTable, (const void *)(uintptr_t)__CFCanonicalNameList[index], (const void *)(uintptr_t)__CFKnownEncodingList[index]);
575        }
576    }
577    __CFSpinUnlock(&lock);
578
579    if (0 == strncasecmp_l(canonicalName, "macintosh", sizeof("macintosh") - 1, NULL)) return kCFStringEncodingMacRoman;
580
581
582    prefixLength = strlen("x-mac-");
583    encoding = (CFStringEncoding)(CFIndex)CFDictionaryGetValue(mappingTable, canonicalName + ((0 == strncasecmp_l(canonicalName, "x-mac-", prefixLength, NULL)) ? prefixLength : 0));
584
585    return ((0 == encoding) ? kCFStringEncodingInvalidId : encoding);
586}
587#undef LENGTH_LIMIT
588
589#if DEPLOYMENT_TARGET_MACOSX
590// This list indexes from DOS range
591static uint16_t __CFISO8859SimilarScriptList[] = {
592    kCFStringEncodingMacRoman,
593    kCFStringEncodingMacCentralEurRoman,
594    kCFStringEncodingMacRoman,
595    kCFStringEncodingMacCentralEurRoman,
596    kCFStringEncodingMacCyrillic,
597    kCFStringEncodingMacArabic,
598    kCFStringEncodingMacGreek,
599    kCFStringEncodingMacHebrew,
600    kCFStringEncodingMacTurkish,
601    kCFStringEncodingMacInuit,
602    kCFStringEncodingMacThai,
603    kCFStringEncodingMacRoman,
604    kCFStringEncodingMacCentralEurRoman,
605    kCFStringEncodingMacCeltic,
606    kCFStringEncodingMacRoman,
607    kCFStringEncodingMacRomanian};
608
609static uint16_t __CFOtherSimilarScriptList[] = {
610    kCFStringEncodingMacRoman,
611    kCFStringEncodingMacGreek,
612    kCFStringEncodingMacCentralEurRoman,
613    kCFStringEncodingMacRoman,
614    kCFStringEncodingMacGreek,
615    kCFStringEncodingMacCentralEurRoman,
616    kCFStringEncodingMacCyrillic,
617    kCFStringEncodingMacTurkish,
618    kCFStringEncodingMacRoman,
619    kCFStringEncodingMacIcelandic,
620    kCFStringEncodingMacHebrew,
621    kCFStringEncodingMacRoman,
622    kCFStringEncodingMacArabic,
623    kCFStringEncodingMacInuit,
624    kCFStringEncodingMacCyrillic,
625    kCFStringEncodingMacGreek,
626    kCFStringEncodingMacThai,
627    kCFStringEncodingMacJapanese,
628    kCFStringEncodingMacChineseSimp,
629    kCFStringEncodingMacKorean,
630    kCFStringEncodingMacChineseTrad,
631
632    kCFStringEncodingMacRoman,
633    kCFStringEncodingMacCentralEurRoman,
634    kCFStringEncodingMacCyrillic,
635    kCFStringEncodingMacGreek,
636    kCFStringEncodingMacTurkish,
637    kCFStringEncodingMacHebrew,
638    kCFStringEncodingMacArabic,
639    kCFStringEncodingMacCentralEurRoman,
640    kCFStringEncodingMacVietnamese,
641    kCFStringEncodingMacKorean,
642
643    kCFStringEncodingMacRoman,
644
645    kCFStringEncodingMacJapanese,
646    kCFStringEncodingMacChineseSimp,
647
648    kCFStringEncodingMacJapanese,
649    kCFStringEncodingMacJapanese,
650    kCFStringEncodingMacJapanese,
651    kCFStringEncodingMacJapanese,
652    kCFStringEncodingMacChineseSimp,
653    kCFStringEncodingMacChineseSimp,
654    kCFStringEncodingMacKorean,
655    kCFStringEncodingMacJapanese,
656    kCFStringEncodingMacChineseSimp,
657    kCFStringEncodingMacChineseTrad,
658    kCFStringEncodingMacKorean,
659
660    kCFStringEncodingMacJapanese,
661
662    kCFStringEncodingMacCyrillic,
663
664    kCFStringEncodingMacChineseTrad,
665
666    kCFStringEncodingMacRoman,
667    kCFStringEncodingMacChineseSimp,
668    kCFStringEncodingMacChineseTrad,
669    kCFStringEncodingMacVietnamese,
670    kCFStringEncodingMacUkrainian,
671    kCFStringEncodingMacChineseTrad,
672    kCFStringEncodingMacRoman,
673
674    kCFStringEncodingMacRoman,
675
676    kCFStringEncodingMacRoman
677};
678
679static const char *__CFISONameList[] = {
680    "Western (ISO Latin 1)",
681    "Central European (ISO Latin 2)",
682    "Western (ISO Latin 3)",
683    "Central European (ISO Latin 4)",
684    "Cyrillic (ISO 8859-5)",
685    "Arabic (ISO 8859-6)",
686    "Greek (ISO 8859-7)",
687    "Hebrew (ISO 8859-8)",
688    "Turkish (ISO Latin 5)",
689    "Nordic (ISO Latin 6)",
690    "Thai (ISO 8859-11)",
691    NULL,
692    "Baltic (ISO Latin 7)",
693    "Celtic (ISO Latin 8)",
694    "Western (ISO Latin 9)",
695    "Romanian (ISO Latin 10)",
696};
697
698static const char *__CFOtherNameList[] = {
699    "Western (Mac OS Roman)",
700    "Japanese (Mac OS)",
701    "Traditional Chinese (Mac OS)",
702    "Korean (Mac OS)",
703    "Arabic (Mac OS)",
704    "Hebrew (Mac OS)",
705    "Greek (Mac OS)",
706    "Cyrillic (Mac OS)",
707    "Devanagari (Mac OS)",
708    "Gurmukhi (Mac OS)",
709    "Gujarati (Mac OS)",
710    "Oriya (Mac OS)",
711    "Bengali (Mac OS)",
712    "Tamil (Mac OS)",
713    "Telugu (Mac OS)",
714    "Kannada (Mac OS)",
715    "Malayalam (Mac OS)",
716    "Sinhalese (Mac OS)",
717    "Burmese (Mac OS)",
718    "Khmer (Mac OS)",
719    "Thai (Mac OS)",
720    "Laotian (Mac OS)",
721    "Georgian (Mac OS)",
722    "Armenian (Mac OS)",
723    "Simplified Chinese (Mac OS)",
724    "Tibetan (Mac OS)",
725    "Mongolian (Mac OS)",
726    "Ethiopic (Mac OS)",
727    "Central European (Mac OS)",
728    "Vietnamese (Mac OS)",
729    "Symbol (Mac OS)",
730    "Dingbats (Mac OS)",
731    "Turkish (Mac OS)",
732    "Croatian (Mac OS)",
733    "Icelandic (Mac OS)",
734    "Romanian (Mac OS)",
735    "Celtic (Mac OS)",
736    "Gaelic (Mac OS)",
737    "Farsi (Mac OS)",
738    "Cyrillic (Mac OS Ukrainian)",
739    "Inuit (Mac OS)",
740    "Latin-US (DOS)",
741    "Greek (DOS)",
742    "Baltic (DOS)",
743    "Western (DOS Latin 1)",
744    "Greek (DOS Greek 1)",
745    "Central European (DOS Latin 2)",
746    "Cyrillic (DOS)",
747    "Turkish (DOS)",
748    "Portuguese (DOS)",
749    "Icelandic (DOS)",
750    "Hebrew (DOS)",
751    "Canadian French (DOS)",
752    "Arabic (DOS)",
753    "Nordic (DOS)",
754    "Russian (DOS)",
755    "Greek (DOS Greek 2)",
756    "Thai (Windows, DOS)",
757    "Japanese (Windows, DOS)",
758    "Simplified Chinese (Windows, DOS)",
759    "Korean (Windows, DOS)",
760    "Traditional Chinese (Windows, DOS)",
761    "Western (Windows Latin 1)",
762    "Central European (Windows Latin 2)",
763    "Cyrillic (Windows)",
764    "Greek (Windows)",
765    "Turkish (Windows Latin 5)",
766    "Hebrew (Windows)",
767    "Arabic (Windows)",
768    "Baltic (Windows)",
769    "Vietnamese (Windows)",
770    "Korean (Windows Johab)",
771    "Western (ASCII)",
772    "Japanese (Shift JIS X0213)",
773    "Chinese (GB 18030)",
774    "Japanese (ISO 2022-JP)",
775    "Japanese (ISO 2022-JP-2)",
776    "Japanese (ISO 2022-JP-1)",
777    "Japanese (ISO 2022-JP-3)",
778    "Chinese (ISO 2022-CN)",
779    "Chinese (ISO 2022-CN-EXT)",
780    "Korean (ISO 2022-KR)",
781    "Japanese (EUC)",
782    "Simplified Chinese (GB 2312)",
783    "Traditional Chinese (EUC)",
784    "Korean (EUC)",
785    "Japanese (Shift JIS)",
786    "Cyrillic (KOI8-R)",
787    "Traditional Chinese (Big 5)",
788    "Western (Mac Mail)",
789    "Simplified Chinese (HZ GB 2312)",
790    "Traditional Chinese (Big 5 HKSCS)",
791    NULL,
792    "Ukrainian (KOI8-U)",
793    "Traditional Chinese (Big 5-E)",
794    NULL,
795    "Western (NextStep)",
796    "Western (EBCDIC Latin 1)",
797};
798#endif /* DEPLOYMENT_TARGET_MACOSX */
799
800CF_PRIVATE CFStringEncoding __CFStringEncodingGetMostCompatibleMacScript(CFStringEncoding encoding) {
801#if DEPLOYMENT_TARGET_MACOSX
802    switch (encoding & 0x0F00) {
803        case 0: return encoding & 0xFF; break; // Mac scripts
804
805        case 0x0100: return kCFStringEncodingUnicode; break; // Unicode
806
807        case 0x200: // ISO 8859
808            return (((encoding & 0xFF) <= (sizeof(__CFISO8859SimilarScriptList) / sizeof(*__CFISO8859SimilarScriptList))) ? __CFISO8859SimilarScriptList[(encoding & 0xFF) - 1] : kCFStringEncodingInvalidId);
809            break;
810
811        default: {
812            CFIndex index = __CFGetEncodingIndex(encoding);
813
814            if (kCFNotFound != index) {
815                index -= __CFGetEncodingIndex(kCFStringEncodingDOSLatinUS);
816                return __CFOtherSimilarScriptList[index];
817            }
818        }
819    }
820#endif /* DEPLOYMENT_TARGET_MACOSX */
821
822    return kCFStringEncodingInvalidId;
823}
824
825CF_PRIVATE const char *__CFStringEncodingGetName(CFStringEncoding encoding) {
826    switch (encoding) {
827        case kCFStringEncodingUTF8: return "Unicode (UTF-8)"; break;
828        case kCFStringEncodingUTF16: return "Unicode (UTF-16)"; break;
829        case kCFStringEncodingUTF16BE: return "Unicode (UTF-16BE)"; break;
830        case kCFStringEncodingUTF16LE: return "Unicode (UTF-16LE)"; break;
831        case kCFStringEncodingUTF32: return "Unicode (UTF-32)"; break;
832        case kCFStringEncodingUTF32BE: return "Unicode (UTF-32BE)"; break;
833        case kCFStringEncodingUTF32LE: return "Unicode (UTF-32LE)"; break;
834        case kCFStringEncodingNonLossyASCII: return "Non-lossy ASCII"; break;
835        case kCFStringEncodingUTF7: return "Unicode (UTF-7)"; break;
836    }
837
838#if DEPLOYMENT_TARGET_MACOSX
839    if (0x0200 == (encoding & 0x0F00)) {
840        encoding &= 0x00FF;
841
842        if (encoding <= (sizeof(__CFISONameList) / sizeof(*__CFISONameList))) return __CFISONameList[encoding - 1];
843    } else {
844        CFIndex index = __CFGetEncodingIndex(encoding);
845
846        if (kCFNotFound != index) return __CFOtherNameList[index];
847    }
848#endif /* DEPLOYMENT_TARGET_MACOSX */
849
850    return NULL;
851}
852