1/* 2 * Copyright (c) 2014 Apple Inc. All rights reserved. 3 * 4 * @APPLE_LICENSE_HEADER_START@ 5 * 6 * This file contains Original Code and/or Modifications of Original Code 7 * as defined in and that are subject to the Apple Public Source License 8 * Version 2.0 (the 'License'). You may not use this file except in 9 * compliance with the License. Please obtain a copy of the License at 10 * http://www.opensource.apple.com/apsl/ and read it before using this 11 * file. 12 * 13 * The Original Code and all software distributed under the License are 14 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER 15 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, 16 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. 18 * Please see the License for the specific language governing rights and 19 * limitations under the License. 20 * 21 * @APPLE_LICENSE_HEADER_END@ 22 */ 23 24/* CFStringEncodingDatabase.c 25 Copyright (c) 2005-2013, Apple Inc. All rights reserved. 26 Responsibility: Aki Inoue 27*/ 28 29#include "CFInternal.h" 30#include <CoreFoundation/CFStringEncodingExt.h> 31#include "CFStringEncodingConverterPriv.h" 32#include "CFStringEncodingDatabase.h" 33#include <stdio.h> 34 35#define ISO8859CODEPAGE_BASE (28590) 36 37static const uint16_t __CFKnownEncodingList[] = { 38 kCFStringEncodingMacRoman, 39 kCFStringEncodingMacJapanese, 40 kCFStringEncodingMacChineseTrad, 41 kCFStringEncodingMacKorean, 42 kCFStringEncodingMacArabic, 43 kCFStringEncodingMacHebrew, 44 kCFStringEncodingMacGreek, 45 kCFStringEncodingMacCyrillic, 46 kCFStringEncodingMacDevanagari, 47 kCFStringEncodingMacGurmukhi, 48 kCFStringEncodingMacGujarati, 49 kCFStringEncodingMacOriya, 50 kCFStringEncodingMacBengali, 51 kCFStringEncodingMacTamil, 52 kCFStringEncodingMacTelugu, 53 kCFStringEncodingMacKannada, 54 kCFStringEncodingMacMalayalam, 55 kCFStringEncodingMacSinhalese, 56 kCFStringEncodingMacBurmese, 57 kCFStringEncodingMacKhmer, 58 kCFStringEncodingMacThai, 59 kCFStringEncodingMacLaotian, 60 kCFStringEncodingMacGeorgian, 61 kCFStringEncodingMacArmenian, 62 kCFStringEncodingMacChineseSimp, 63 kCFStringEncodingMacTibetan, 64 kCFStringEncodingMacMongolian, 65 kCFStringEncodingMacEthiopic, 66 kCFStringEncodingMacCentralEurRoman, 67 kCFStringEncodingMacVietnamese, 68 kCFStringEncodingMacSymbol, 69 kCFStringEncodingMacDingbats, 70 kCFStringEncodingMacTurkish, 71 kCFStringEncodingMacCroatian, 72 kCFStringEncodingMacIcelandic, 73 kCFStringEncodingMacRomanian, 74 kCFStringEncodingMacCeltic, 75 kCFStringEncodingMacGaelic, 76 kCFStringEncodingMacFarsi, 77 kCFStringEncodingMacUkrainian, 78 kCFStringEncodingMacInuit, 79 80 kCFStringEncodingDOSLatinUS, 81 kCFStringEncodingDOSGreek, 82 kCFStringEncodingDOSBalticRim, 83 kCFStringEncodingDOSLatin1, 84 kCFStringEncodingDOSGreek1, 85 kCFStringEncodingDOSLatin2, 86 kCFStringEncodingDOSCyrillic, 87 kCFStringEncodingDOSTurkish, 88 kCFStringEncodingDOSPortuguese, 89 kCFStringEncodingDOSIcelandic, 90 kCFStringEncodingDOSHebrew, 91 kCFStringEncodingDOSCanadianFrench, 92 kCFStringEncodingDOSArabic, 93 kCFStringEncodingDOSNordic, 94 kCFStringEncodingDOSRussian, 95 kCFStringEncodingDOSGreek2, 96 kCFStringEncodingDOSThai, 97 kCFStringEncodingDOSJapanese, 98 kCFStringEncodingDOSChineseSimplif, 99 kCFStringEncodingDOSKorean, 100 kCFStringEncodingDOSChineseTrad, 101 102 kCFStringEncodingWindowsLatin1, 103 kCFStringEncodingWindowsLatin2, 104 kCFStringEncodingWindowsCyrillic, 105 kCFStringEncodingWindowsGreek, 106 kCFStringEncodingWindowsLatin5, 107 kCFStringEncodingWindowsHebrew, 108 kCFStringEncodingWindowsArabic, 109 kCFStringEncodingWindowsBalticRim, 110 kCFStringEncodingWindowsVietnamese, 111 kCFStringEncodingWindowsKoreanJohab, 112 kCFStringEncodingASCII, 113 114 kCFStringEncodingShiftJIS_X0213, 115 kCFStringEncodingGB_18030_2000, 116 117 kCFStringEncodingISO_2022_JP, 118 kCFStringEncodingISO_2022_JP_2, 119 kCFStringEncodingISO_2022_JP_1, 120 kCFStringEncodingISO_2022_JP_3, 121 kCFStringEncodingISO_2022_CN, 122 kCFStringEncodingISO_2022_CN_EXT, 123 kCFStringEncodingISO_2022_KR, 124 kCFStringEncodingEUC_JP, 125 kCFStringEncodingEUC_CN, 126 kCFStringEncodingEUC_TW, 127 kCFStringEncodingEUC_KR, 128 129 kCFStringEncodingShiftJIS, 130 131 kCFStringEncodingKOI8_R, 132 133 kCFStringEncodingBig5, 134 135 kCFStringEncodingMacRomanLatin1, 136 kCFStringEncodingHZ_GB_2312, 137 kCFStringEncodingBig5_HKSCS_1999, 138 kCFStringEncodingVISCII, 139 kCFStringEncodingKOI8_U, 140 kCFStringEncodingBig5_E, 141 kCFStringEncodingUTF7_IMAP, 142 143 kCFStringEncodingNextStepLatin, 144 145 kCFStringEncodingEBCDIC_CP037 146}; 147 148// Windows codepage mapping 149static const uint16_t __CFWindowsCPList[] = { 150 10000, 151 10001, 152 10002, 153 10003, 154 10004, 155 10005, 156 10006, 157 10007, 158 0, 159 0, 160 0, 161 0, 162 0, 163 0, 164 0, 165 0, 166 0, 167 0, 168 0, 169 0, 170 10021, 171 0, 172 0, 173 0, 174 10008, 175 0, 176 0, 177 0, 178 10029, 179 0, 180 0, 181 0, 182 10081, 183 10082, 184 10079, 185 10010, 186 0, 187 0, 188 0, 189 10017, 190 0, 191 192 437, 193 737, 194 775, 195 850, 196 851, 197 852, 198 855, 199 857, 200 860, 201 861, 202 862, 203 863, 204 864, 205 865, 206 866, 207 869, 208 874, 209 932, 210 936, 211 949, 212 950, 213 214 1252, 215 1250, 216 1251, 217 1253, 218 1254, 219 1255, 220 1256, 221 1257, 222 1258, 223 1361, 224 225 20127, 226 227 0, 228 54936, 229 230 50221, // we prefere this over 50220/50221 since that's what CF coverter generates 231 0, 232 0, 233 0, 234 50227, 235 0, 236 50225, 237 238 51932, 239 51936, 240 51950, 241 51949, 242 243 0, 244 245 20866, 246 247 0, 248 249 0, 250 52936, 251 0, 252 0, 253 21866, 254 0, 255 0, 256 257 0, 258 259 37 260}; 261 262// Canonical name 263static const char *__CFCanonicalNameList[] = { 264 "macintosh", 265 "japanese", 266 "trad-chinese", 267 "korean", 268 "arabic", 269 "hebrew", 270 "greek", 271 "cyrillic", 272 "devanagari", 273 "gurmukhi", 274 "gujarati", 275 "oriya", 276 "bengali", 277 "tamil", 278 "telugu", 279 "kannada", 280 "malayalam", 281 "sinhalese", 282 "burmese", 283 "khmer", 284 "thai", 285 "laotian", 286 "georgian", 287 "armenian", 288 "simp-chinese", 289 "tibetan", 290 "mongolian", 291 "ethiopic", 292 "centraleurroman", 293 "vietnamese", 294 "symbol", 295 "dingbats", 296 "turkish", 297 "croatian", 298 "icelandic", 299 "romanian", 300 "celtic", 301 "gaelic", 302 "farsi", 303 "ukrainian", 304 "inuit", 305 306 NULL, 307 NULL, 308 NULL, 309 NULL, 310 NULL, 311 NULL, 312 NULL, 313 NULL, 314 NULL, 315 NULL, 316 NULL, 317 NULL, 318 NULL, 319 NULL, 320 NULL, 321 NULL, 322 NULL, 323 NULL, 324 NULL, 325 NULL, 326 NULL, 327 328 NULL, 329 NULL, 330 NULL, 331 NULL, 332 NULL, 333 NULL, 334 NULL, 335 NULL, 336 NULL, 337 NULL, 338 339 "us-ascii", 340 341 NULL, 342 "gb18030", 343 344 "iso-2022-jp", 345 "iso-2022-jp-2", 346 "iso-2022-jp-1", 347 "iso-2022-jp-3", 348 "iso-2022-cn", 349 "iso-2022-cn-ext", 350 "iso-2022-kr", 351 "euc-jp", 352 "gb2312", 353 "euc-tw", 354 "euc-kr", 355 356 "shift_jis", 357 358 "koi8-r", 359 360 "big5", 361 362 "roman-latin1", 363 "hz-gb-2312", 364 "big5-hkscs", 365 "viscii", 366 "koi8-u", 367 NULL, 368 "utf7-imap", 369 370 "x-nextstep", 371 372 "ibm037", 373}; 374 375static inline CFIndex __CFGetEncodingIndex(CFStringEncoding encoding) { 376 const uint16_t *head = __CFKnownEncodingList; 377 const uint16_t *tail = head + ((sizeof(__CFKnownEncodingList) / sizeof(*__CFKnownEncodingList)) - 1); 378 const uint16_t *middle; 379 380 encoding &= 0x0FFF; 381 while (head <= tail) { 382 middle = head + ((tail - head) >> 1); 383 384 if (encoding == *middle) { 385 return middle - __CFKnownEncodingList; 386 } else if (encoding < *middle) { 387 tail = middle - 1; 388 } else { 389 head = middle + 1; 390 } 391 } 392 393 return kCFNotFound; 394} 395 396CF_PRIVATE uint16_t __CFStringEncodingGetWindowsCodePage(CFStringEncoding encoding) { 397 CFStringEncoding encodingBase = encoding & 0x0F00; 398 399 if (0x0100 == encodingBase) { // UTF 400 switch (encoding) { 401 case kCFStringEncodingUTF7: return 65000; 402 case kCFStringEncodingUTF8: return 65001; 403 case kCFStringEncodingUTF16: return 1200; 404 case kCFStringEncodingUTF16BE: return 1201; 405 case kCFStringEncodingUTF32: return 65005; 406 case kCFStringEncodingUTF32BE: return 65006; 407 } 408 } else if (0x0200 == encodingBase) { // ISO 8859 range 409 return ISO8859CODEPAGE_BASE + (encoding & 0xFF); 410 } else { // others 411 CFIndex index = __CFGetEncodingIndex(encoding); 412 413 if (kCFNotFound != index) return __CFWindowsCPList[index]; 414 } 415 416 return 0; 417} 418 419CF_PRIVATE CFStringEncoding __CFStringEncodingGetFromWindowsCodePage(uint16_t codepage) { 420 switch (codepage) { 421 case 65001: return kCFStringEncodingUTF8; 422 case 1200: return kCFStringEncodingUTF16; 423 case 0: return kCFStringEncodingInvalidId; 424 case 1201: return kCFStringEncodingUTF16BE; 425 case 65005: return kCFStringEncodingUTF32; 426 case 65006: return kCFStringEncodingUTF32BE; 427 case 65000: return kCFStringEncodingUTF7; 428 } 429 430 if ((codepage > ISO8859CODEPAGE_BASE) && (codepage <= (ISO8859CODEPAGE_BASE + 16))) { 431 return (codepage - ISO8859CODEPAGE_BASE) + 0x0200; 432 } else { 433 static CFMutableDictionaryRef mappingTable = NULL; 434 static CFSpinLock_t lock = CFSpinLockInit; 435 uintptr_t value; 436 437 __CFSpinLock(&lock); 438 if (NULL == mappingTable) { 439 CFIndex index, count = sizeof(__CFKnownEncodingList) / sizeof(*__CFKnownEncodingList); 440 441 mappingTable = CFDictionaryCreateMutable(NULL, 0, NULL, NULL); 442 443 for (index = 0;index < count;index++) { 444 if (0 != __CFWindowsCPList[index]) CFDictionarySetValue(mappingTable, (const void *)(uintptr_t)__CFWindowsCPList[index], (const void *)(uintptr_t)__CFKnownEncodingList[index]); 445 } 446 } 447 __CFSpinUnlock(&lock); 448 449 if (CFDictionaryGetValueIfPresent(mappingTable, (const void *)(uintptr_t)codepage, (const void **)&value)) return (CFStringEncoding)value; 450 } 451 452 453 return kCFStringEncodingInvalidId; 454} 455 456CF_PRIVATE bool __CFStringEncodingGetCanonicalName(CFStringEncoding encoding, char *buffer, CFIndex bufferSize) { 457 const char *format = "%s"; 458 const char *name = NULL; 459 uint32_t value = 0; 460 CFIndex index; 461 462 switch (encoding & 0x0F00) { 463 case 0x0100: // UTF range 464 switch (encoding) { 465 case kCFStringEncodingUTF7: name = "utf-7"; break; 466 case kCFStringEncodingUTF8: name = "utf-8"; break; 467 case kCFStringEncodingUTF16: name = "utf-16"; break; 468 case kCFStringEncodingUTF16BE: name = "utf-16be"; break; 469 case kCFStringEncodingUTF16LE: name = "utf-16le"; break; 470 case kCFStringEncodingUTF32: name = "utf-32"; break; 471 case kCFStringEncodingUTF32BE: name = "utf-32be"; break; 472 case kCFStringEncodingUTF32LE: name = "utf-32le"; break; 473 } 474 break; 475 476 case 0x0200: // ISO 8859 range 477 format = "iso-8859-%d"; 478 value = (encoding & 0xFF); 479 break; 480 481 case 0x0400: // DOS code page range 482 case 0x0500: // Windows code page range 483 index = __CFGetEncodingIndex(encoding); 484 485 if (kCFNotFound != index) { 486 value = __CFWindowsCPList[index]; 487 if (0 != value) format = ((0x0400 == (encoding & 0x0F00)) ? "cp%d" : "windows-%d"); 488 } 489 break; 490 491 default: // others 492 index = __CFGetEncodingIndex(encoding); 493 494 if (kCFNotFound != index) { 495 if (((0 == (encoding & 0x0F00)) && (kCFStringEncodingMacRoman != encoding)) || (kCFStringEncodingMacRomanLatin1 == encoding)) format = "x-mac-%s"; 496 name = (const char *)__CFCanonicalNameList[index]; 497 } 498 break; 499 } 500 501 if ((0 == value) && (NULL == name)) { 502 return false; 503 } else if (0 != value) { 504 return ((snprintf(buffer, bufferSize, format, value) < bufferSize) ? true : false); 505 } else { 506 return ((snprintf(buffer, bufferSize, format, name) < bufferSize) ? true : false); 507 } 508} 509 510#define LENGTH_LIMIT (256) 511static Boolean __CFCanonicalNameCompare(const void *value1, const void *value2) { return ((0 == strncasecmp_l((const char *)value1, (const char *)value2, LENGTH_LIMIT, NULL)) ? true : false); } 512 513static CFHashCode __CFCanonicalNameHash(const void *value) { 514 const char *name = (const char *)value; 515 CFHashCode code = 0; 516 517 while ((0 != *name) && ((name - (const char *)value) < LENGTH_LIMIT)) { 518 char character = *(name++); 519 520 code += (character + (((character >= 'A') && (character <= 'Z')) ? 'a' - 'A' : 0)); 521 } 522 523 return code * (name - (const char *)value); 524} 525 526CF_PRIVATE CFStringEncoding __CFStringEncodingGetFromCanonicalName(const char *canonicalName) { 527 CFStringEncoding encoding; 528 CFIndex prefixLength; 529 static CFMutableDictionaryRef mappingTable = NULL; 530 static CFSpinLock_t lock = CFSpinLockInit; 531 532 prefixLength = strlen("iso-8859-"); 533 if (0 == strncasecmp_l(canonicalName, "iso-8859-", prefixLength, NULL)) {// do ISO 534 encoding = strtol(canonicalName + prefixLength, NULL, 10); 535 536 return (((0 == encoding) || (encoding > 16)) ? kCFStringEncodingInvalidId : encoding + 0x0200); 537 } 538 539 prefixLength = strlen("cp"); 540 if (0 == strncasecmp_l(canonicalName, "cp", prefixLength, NULL)) {// do DOS 541 encoding = strtol(canonicalName + prefixLength, NULL, 10); 542 543 return __CFStringEncodingGetFromWindowsCodePage(encoding); 544 } 545 546 prefixLength = strlen("windows-"); 547 if (0 == strncasecmp_l(canonicalName, "windows-", prefixLength, NULL)) {// do DOS 548 encoding = strtol(canonicalName + prefixLength, NULL, 10); 549 550 return __CFStringEncodingGetFromWindowsCodePage(encoding); 551 } 552 553 __CFSpinLock(&lock); 554 if (NULL == mappingTable) { 555 CFIndex index, count = sizeof(__CFKnownEncodingList) / sizeof(*__CFKnownEncodingList); 556 557 CFDictionaryKeyCallBacks keys = { 558 0, NULL, NULL, NULL, &__CFCanonicalNameCompare, &__CFCanonicalNameHash 559 }; 560 561 mappingTable = CFDictionaryCreateMutable(NULL, 0, &keys, NULL); 562 563 // Add UTFs 564 CFDictionarySetValue(mappingTable, "utf-7", (const void *)kCFStringEncodingUTF7); 565 CFDictionarySetValue(mappingTable, "utf-8", (const void *)kCFStringEncodingUTF8); 566 CFDictionarySetValue(mappingTable, "utf-16", (const void *)kCFStringEncodingUTF16); 567 CFDictionarySetValue(mappingTable, "utf-16be", (const void *)kCFStringEncodingUTF16BE); 568 CFDictionarySetValue(mappingTable, "utf-16le", (const void *)kCFStringEncodingUTF16LE); 569 CFDictionarySetValue(mappingTable, "utf-32", (const void *)kCFStringEncodingUTF32); 570 CFDictionarySetValue(mappingTable, "utf-32be", (const void *)kCFStringEncodingUTF32BE); 571 CFDictionarySetValue(mappingTable, "utf-32le", (const void *)kCFStringEncodingUTF32LE); 572 573 for (index = 0;index < count;index++) { 574 if (NULL != __CFCanonicalNameList[index]) CFDictionarySetValue(mappingTable, (const void *)(uintptr_t)__CFCanonicalNameList[index], (const void *)(uintptr_t)__CFKnownEncodingList[index]); 575 } 576 } 577 __CFSpinUnlock(&lock); 578 579 if (0 == strncasecmp_l(canonicalName, "macintosh", sizeof("macintosh") - 1, NULL)) return kCFStringEncodingMacRoman; 580 581 582 prefixLength = strlen("x-mac-"); 583 encoding = (CFStringEncoding)(CFIndex)CFDictionaryGetValue(mappingTable, canonicalName + ((0 == strncasecmp_l(canonicalName, "x-mac-", prefixLength, NULL)) ? prefixLength : 0)); 584 585 return ((0 == encoding) ? kCFStringEncodingInvalidId : encoding); 586} 587#undef LENGTH_LIMIT 588 589#if DEPLOYMENT_TARGET_MACOSX 590// This list indexes from DOS range 591static uint16_t __CFISO8859SimilarScriptList[] = { 592 kCFStringEncodingMacRoman, 593 kCFStringEncodingMacCentralEurRoman, 594 kCFStringEncodingMacRoman, 595 kCFStringEncodingMacCentralEurRoman, 596 kCFStringEncodingMacCyrillic, 597 kCFStringEncodingMacArabic, 598 kCFStringEncodingMacGreek, 599 kCFStringEncodingMacHebrew, 600 kCFStringEncodingMacTurkish, 601 kCFStringEncodingMacInuit, 602 kCFStringEncodingMacThai, 603 kCFStringEncodingMacRoman, 604 kCFStringEncodingMacCentralEurRoman, 605 kCFStringEncodingMacCeltic, 606 kCFStringEncodingMacRoman, 607 kCFStringEncodingMacRomanian}; 608 609static uint16_t __CFOtherSimilarScriptList[] = { 610 kCFStringEncodingMacRoman, 611 kCFStringEncodingMacGreek, 612 kCFStringEncodingMacCentralEurRoman, 613 kCFStringEncodingMacRoman, 614 kCFStringEncodingMacGreek, 615 kCFStringEncodingMacCentralEurRoman, 616 kCFStringEncodingMacCyrillic, 617 kCFStringEncodingMacTurkish, 618 kCFStringEncodingMacRoman, 619 kCFStringEncodingMacIcelandic, 620 kCFStringEncodingMacHebrew, 621 kCFStringEncodingMacRoman, 622 kCFStringEncodingMacArabic, 623 kCFStringEncodingMacInuit, 624 kCFStringEncodingMacCyrillic, 625 kCFStringEncodingMacGreek, 626 kCFStringEncodingMacThai, 627 kCFStringEncodingMacJapanese, 628 kCFStringEncodingMacChineseSimp, 629 kCFStringEncodingMacKorean, 630 kCFStringEncodingMacChineseTrad, 631 632 kCFStringEncodingMacRoman, 633 kCFStringEncodingMacCentralEurRoman, 634 kCFStringEncodingMacCyrillic, 635 kCFStringEncodingMacGreek, 636 kCFStringEncodingMacTurkish, 637 kCFStringEncodingMacHebrew, 638 kCFStringEncodingMacArabic, 639 kCFStringEncodingMacCentralEurRoman, 640 kCFStringEncodingMacVietnamese, 641 kCFStringEncodingMacKorean, 642 643 kCFStringEncodingMacRoman, 644 645 kCFStringEncodingMacJapanese, 646 kCFStringEncodingMacChineseSimp, 647 648 kCFStringEncodingMacJapanese, 649 kCFStringEncodingMacJapanese, 650 kCFStringEncodingMacJapanese, 651 kCFStringEncodingMacJapanese, 652 kCFStringEncodingMacChineseSimp, 653 kCFStringEncodingMacChineseSimp, 654 kCFStringEncodingMacKorean, 655 kCFStringEncodingMacJapanese, 656 kCFStringEncodingMacChineseSimp, 657 kCFStringEncodingMacChineseTrad, 658 kCFStringEncodingMacKorean, 659 660 kCFStringEncodingMacJapanese, 661 662 kCFStringEncodingMacCyrillic, 663 664 kCFStringEncodingMacChineseTrad, 665 666 kCFStringEncodingMacRoman, 667 kCFStringEncodingMacChineseSimp, 668 kCFStringEncodingMacChineseTrad, 669 kCFStringEncodingMacVietnamese, 670 kCFStringEncodingMacUkrainian, 671 kCFStringEncodingMacChineseTrad, 672 kCFStringEncodingMacRoman, 673 674 kCFStringEncodingMacRoman, 675 676 kCFStringEncodingMacRoman 677}; 678 679static const char *__CFISONameList[] = { 680 "Western (ISO Latin 1)", 681 "Central European (ISO Latin 2)", 682 "Western (ISO Latin 3)", 683 "Central European (ISO Latin 4)", 684 "Cyrillic (ISO 8859-5)", 685 "Arabic (ISO 8859-6)", 686 "Greek (ISO 8859-7)", 687 "Hebrew (ISO 8859-8)", 688 "Turkish (ISO Latin 5)", 689 "Nordic (ISO Latin 6)", 690 "Thai (ISO 8859-11)", 691 NULL, 692 "Baltic (ISO Latin 7)", 693 "Celtic (ISO Latin 8)", 694 "Western (ISO Latin 9)", 695 "Romanian (ISO Latin 10)", 696}; 697 698static const char *__CFOtherNameList[] = { 699 "Western (Mac OS Roman)", 700 "Japanese (Mac OS)", 701 "Traditional Chinese (Mac OS)", 702 "Korean (Mac OS)", 703 "Arabic (Mac OS)", 704 "Hebrew (Mac OS)", 705 "Greek (Mac OS)", 706 "Cyrillic (Mac OS)", 707 "Devanagari (Mac OS)", 708 "Gurmukhi (Mac OS)", 709 "Gujarati (Mac OS)", 710 "Oriya (Mac OS)", 711 "Bengali (Mac OS)", 712 "Tamil (Mac OS)", 713 "Telugu (Mac OS)", 714 "Kannada (Mac OS)", 715 "Malayalam (Mac OS)", 716 "Sinhalese (Mac OS)", 717 "Burmese (Mac OS)", 718 "Khmer (Mac OS)", 719 "Thai (Mac OS)", 720 "Laotian (Mac OS)", 721 "Georgian (Mac OS)", 722 "Armenian (Mac OS)", 723 "Simplified Chinese (Mac OS)", 724 "Tibetan (Mac OS)", 725 "Mongolian (Mac OS)", 726 "Ethiopic (Mac OS)", 727 "Central European (Mac OS)", 728 "Vietnamese (Mac OS)", 729 "Symbol (Mac OS)", 730 "Dingbats (Mac OS)", 731 "Turkish (Mac OS)", 732 "Croatian (Mac OS)", 733 "Icelandic (Mac OS)", 734 "Romanian (Mac OS)", 735 "Celtic (Mac OS)", 736 "Gaelic (Mac OS)", 737 "Farsi (Mac OS)", 738 "Cyrillic (Mac OS Ukrainian)", 739 "Inuit (Mac OS)", 740 "Latin-US (DOS)", 741 "Greek (DOS)", 742 "Baltic (DOS)", 743 "Western (DOS Latin 1)", 744 "Greek (DOS Greek 1)", 745 "Central European (DOS Latin 2)", 746 "Cyrillic (DOS)", 747 "Turkish (DOS)", 748 "Portuguese (DOS)", 749 "Icelandic (DOS)", 750 "Hebrew (DOS)", 751 "Canadian French (DOS)", 752 "Arabic (DOS)", 753 "Nordic (DOS)", 754 "Russian (DOS)", 755 "Greek (DOS Greek 2)", 756 "Thai (Windows, DOS)", 757 "Japanese (Windows, DOS)", 758 "Simplified Chinese (Windows, DOS)", 759 "Korean (Windows, DOS)", 760 "Traditional Chinese (Windows, DOS)", 761 "Western (Windows Latin 1)", 762 "Central European (Windows Latin 2)", 763 "Cyrillic (Windows)", 764 "Greek (Windows)", 765 "Turkish (Windows Latin 5)", 766 "Hebrew (Windows)", 767 "Arabic (Windows)", 768 "Baltic (Windows)", 769 "Vietnamese (Windows)", 770 "Korean (Windows Johab)", 771 "Western (ASCII)", 772 "Japanese (Shift JIS X0213)", 773 "Chinese (GB 18030)", 774 "Japanese (ISO 2022-JP)", 775 "Japanese (ISO 2022-JP-2)", 776 "Japanese (ISO 2022-JP-1)", 777 "Japanese (ISO 2022-JP-3)", 778 "Chinese (ISO 2022-CN)", 779 "Chinese (ISO 2022-CN-EXT)", 780 "Korean (ISO 2022-KR)", 781 "Japanese (EUC)", 782 "Simplified Chinese (GB 2312)", 783 "Traditional Chinese (EUC)", 784 "Korean (EUC)", 785 "Japanese (Shift JIS)", 786 "Cyrillic (KOI8-R)", 787 "Traditional Chinese (Big 5)", 788 "Western (Mac Mail)", 789 "Simplified Chinese (HZ GB 2312)", 790 "Traditional Chinese (Big 5 HKSCS)", 791 NULL, 792 "Ukrainian (KOI8-U)", 793 "Traditional Chinese (Big 5-E)", 794 NULL, 795 "Western (NextStep)", 796 "Western (EBCDIC Latin 1)", 797}; 798#endif /* DEPLOYMENT_TARGET_MACOSX */ 799 800CF_PRIVATE CFStringEncoding __CFStringEncodingGetMostCompatibleMacScript(CFStringEncoding encoding) { 801#if DEPLOYMENT_TARGET_MACOSX 802 switch (encoding & 0x0F00) { 803 case 0: return encoding & 0xFF; break; // Mac scripts 804 805 case 0x0100: return kCFStringEncodingUnicode; break; // Unicode 806 807 case 0x200: // ISO 8859 808 return (((encoding & 0xFF) <= (sizeof(__CFISO8859SimilarScriptList) / sizeof(*__CFISO8859SimilarScriptList))) ? __CFISO8859SimilarScriptList[(encoding & 0xFF) - 1] : kCFStringEncodingInvalidId); 809 break; 810 811 default: { 812 CFIndex index = __CFGetEncodingIndex(encoding); 813 814 if (kCFNotFound != index) { 815 index -= __CFGetEncodingIndex(kCFStringEncodingDOSLatinUS); 816 return __CFOtherSimilarScriptList[index]; 817 } 818 } 819 } 820#endif /* DEPLOYMENT_TARGET_MACOSX */ 821 822 return kCFStringEncodingInvalidId; 823} 824 825CF_PRIVATE const char *__CFStringEncodingGetName(CFStringEncoding encoding) { 826 switch (encoding) { 827 case kCFStringEncodingUTF8: return "Unicode (UTF-8)"; break; 828 case kCFStringEncodingUTF16: return "Unicode (UTF-16)"; break; 829 case kCFStringEncodingUTF16BE: return "Unicode (UTF-16BE)"; break; 830 case kCFStringEncodingUTF16LE: return "Unicode (UTF-16LE)"; break; 831 case kCFStringEncodingUTF32: return "Unicode (UTF-32)"; break; 832 case kCFStringEncodingUTF32BE: return "Unicode (UTF-32BE)"; break; 833 case kCFStringEncodingUTF32LE: return "Unicode (UTF-32LE)"; break; 834 case kCFStringEncodingNonLossyASCII: return "Non-lossy ASCII"; break; 835 case kCFStringEncodingUTF7: return "Unicode (UTF-7)"; break; 836 } 837 838#if DEPLOYMENT_TARGET_MACOSX 839 if (0x0200 == (encoding & 0x0F00)) { 840 encoding &= 0x00FF; 841 842 if (encoding <= (sizeof(__CFISONameList) / sizeof(*__CFISONameList))) return __CFISONameList[encoding - 1]; 843 } else { 844 CFIndex index = __CFGetEncodingIndex(encoding); 845 846 if (kCFNotFound != index) return __CFOtherNameList[index]; 847 } 848#endif /* DEPLOYMENT_TARGET_MACOSX */ 849 850 return NULL; 851} 852