/* * Copyright (c) 2014 Apple Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * * This file contains Original Code and/or Modifications of Original Code * as defined in and that are subject to the Apple Public Source License * Version 2.0 (the 'License'). You may not use this file except in * compliance with the License. Please obtain a copy of the License at * http://www.opensource.apple.com/apsl/ and read it before using this * file. * * The Original Code and all software distributed under the License are * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. * Please see the License for the specific language governing rights and * limitations under the License. * * @APPLE_LICENSE_HEADER_END@ */ /* CFUniChar.c Copyright (c) 2001-2013, Apple Inc. All rights reserved. Responsibility: Aki Inoue */ #include #include "CFInternal.h" #include "CFUniChar.h" #include "CFStringEncodingConverterExt.h" #include "CFUnicodeDecomposition.h" #include "CFUniCharPriv.h" #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_EMBEDDED_MINI || DEPLOYMENT_TARGET_LINUX || DEPLOYMENT_TARGET_FREEBSD #include #include #include #include #include #include #include #endif #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED #include #endif #if DEPLOYMENT_TARGET_WINDOWS extern void _CFGetFrameworkPath(wchar_t *path, int maxLength); #endif #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED #define __kCFCharacterSetDir "/System/Library/CoreServices" #elif DEPLOYMENT_TARGET_LINUX || DEPLOYMENT_TARGET_FREEBSD || DEPLOYMENT_TARGET_EMBEDDED_MINI #define __kCFCharacterSetDir "/usr/local/share/CoreFoundation" #elif DEPLOYMENT_TARGET_WINDOWS #define __kCFCharacterSetDir "\\Windows\\CoreFoundation" #endif #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED #define USE_MACHO_SEGMENT 1 #endif enum { kCFUniCharLastExternalSet = kCFUniCharNewlineCharacterSet, kCFUniCharFirstInternalSet = kCFUniCharCompatibilityDecomposableCharacterSet, kCFUniCharLastInternalSet = kCFUniCharGraphemeExtendCharacterSet, kCFUniCharFirstBitmapSet = kCFUniCharDecimalDigitCharacterSet }; CF_INLINE uint32_t __CFUniCharMapExternalSetToInternalIndex(uint32_t cset) { return ((kCFUniCharFirstInternalSet <= cset) ? ((cset - kCFUniCharFirstInternalSet) + kCFUniCharLastExternalSet) : cset) - kCFUniCharFirstBitmapSet; } CF_INLINE uint32_t __CFUniCharMapCompatibilitySetID(uint32_t cset) { return ((cset == kCFUniCharControlCharacterSet) ? kCFUniCharControlAndFormatterCharacterSet : (((cset > kCFUniCharLastExternalSet) && (cset < kCFUniCharFirstInternalSet)) ? ((cset - kCFUniCharLastExternalSet) + kCFUniCharFirstInternalSet) : cset)); } #if (DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED) && USE_MACHO_SEGMENT #include #include #include extern const void* unicode_csbitmaps_section_start __asm("section$start$__UNICODE$__csbitmaps"); extern const void* unicode_csbitmaps_section_end __asm("section$end$__UNICODE$__csbitmaps"); extern const void* unicode_properties_section_start __asm("section$start$__UNICODE$__properties"); extern const void* unicode_properties_section_end __asm("section$end$__UNICODE$__properties"); extern const void* unicode_data_section_start __asm("section$start$__UNICODE$__data"); extern const void* unicode_data_section_end __asm("section$end$__UNICODE$__data"); static const void *__CFGetSectDataPtr(const char *segname, const char *sectname, uint64_t *sizep) { // special case three common sections to have fast access if ( strcmp(segname, "__UNICODE") == 0 ) { if ( strcmp(sectname, "__csbitmaps") == 0) { if (sizep) *sizep = &unicode_csbitmaps_section_end - &unicode_csbitmaps_section_start; return &unicode_csbitmaps_section_start; } else if ( strcmp(sectname, "__properties") == 0 ) { if (sizep) *sizep = &unicode_properties_section_end - &unicode_properties_section_start; return &unicode_properties_section_start; } else if ( strcmp(sectname, "__data") == 0 ) { if (sizep) *sizep = &unicode_data_section_end - &unicode_data_section_start; return &unicode_data_section_start; } } uint32_t idx, cnt = _dyld_image_count(); for (idx = 0; idx < cnt; idx++) { void *mh = (void *)_dyld_get_image_header(idx); if (mh != &_mh_dylib_header) continue; #if __LP64__ const struct section_64 *sect = getsectbynamefromheader_64((struct mach_header_64 *)mh, segname, sectname); #else const struct section *sect = getsectbynamefromheader((struct mach_header *)mh, segname, sectname); #endif if (!sect) break; if (sizep) *sizep = (uint64_t)sect->size; return (char *)sect->addr + _dyld_get_image_vmaddr_slide(idx); } if (sizep) *sizep = 0ULL; return NULL; } #endif #if !USE_MACHO_SEGMENT // Memory map the file #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_EMBEDDED_MINI || DEPLOYMENT_TARGET_LINUX CF_INLINE void __CFUniCharCharacterSetPath(char *cpath) { #elif DEPLOYMENT_TARGET_WINDOWS CF_INLINE void __CFUniCharCharacterSetPath(wchar_t *wpath) { #else #error Unknown or unspecified DEPLOYMENT_TARGET #endif #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED strlcpy(cpath, __kCFCharacterSetDir, MAXPATHLEN); #elif DEPLOYMENT_TARGET_LINUX strlcpy(cpath, __kCFCharacterSetDir, MAXPATHLEN); #elif DEPLOYMENT_TARGET_WINDOWS wchar_t frameworkPath[MAXPATHLEN]; _CFGetFrameworkPath(frameworkPath, MAXPATHLEN); wcsncpy(wpath, frameworkPath, MAXPATHLEN); wcsncat(wpath, L"\\CoreFoundation.resources\\", MAXPATHLEN - wcslen(wpath)); #else strlcpy(cpath, __kCFCharacterSetDir, MAXPATHLEN); strlcat(cpath, "/CharacterSets/", MAXPATHLEN); #endif } #if DEPLOYMENT_TARGET_WINDOWS #define MAX_BITMAP_STATE 512 // // If a string is placed into this array, then it has been previously // determined that the bitmap-file cannot be found. Thus, we make // the assumption it won't be there in future calls and we avoid // hitting the disk un-necessarily. This assumption isn't 100% // correct, as bitmap-files can be added. We would have to re-start // the application in order to pick-up the new bitmap info. // // We should probably re-visit this. // static wchar_t *mappedBitmapState[MAX_BITMAP_STATE]; static int __nNumStateEntries = -1; CRITICAL_SECTION __bitmapStateLock = {0}; bool __GetBitmapStateForName(const wchar_t *bitmapName) { if (NULL == __bitmapStateLock.DebugInfo) InitializeCriticalSection(&__bitmapStateLock); EnterCriticalSection(&__bitmapStateLock); if (__nNumStateEntries >= 0) { for (int i = 0; i < __nNumStateEntries; i++) { if (wcscmp(mappedBitmapState[i], bitmapName) == 0) { LeaveCriticalSection(&__bitmapStateLock); return true; } } } LeaveCriticalSection(&__bitmapStateLock); return false; } void __AddBitmapStateForName(const wchar_t *bitmapName) { if (NULL == __bitmapStateLock.DebugInfo) InitializeCriticalSection(&__bitmapStateLock); EnterCriticalSection(&__bitmapStateLock); __nNumStateEntries++; mappedBitmapState[__nNumStateEntries] = (wchar_t *)malloc((lstrlenW(bitmapName)+1) * sizeof(wchar_t)); lstrcpyW(mappedBitmapState[__nNumStateEntries], bitmapName); LeaveCriticalSection(&__bitmapStateLock); } #endif #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_EMBEDDED_MINI || DEPLOYMENT_TARGET_LINUX static bool __CFUniCharLoadBytesFromFile(const char *fileName, const void **bytes, int64_t *fileSize) { #elif DEPLOYMENT_TARGET_WINDOWS static bool __CFUniCharLoadBytesFromFile(const wchar_t *fileName, const void **bytes, int64_t *fileSize) { #else #error Unknown or unspecified DEPLOYMENT_TARGET #endif #if DEPLOYMENT_TARGET_WINDOWS HANDLE bitmapFileHandle = NULL; HANDLE mappingHandle = NULL; if (__GetBitmapStateForName(fileName)) { // The fileName has been tried in the past, so just return false // and move on. *bytes = NULL; return false; } mappingHandle = OpenFileMappingW(FILE_MAP_READ, TRUE, fileName); if (NULL == mappingHandle) { if ((bitmapFileHandle = CreateFileW(fileName, GENERIC_READ, FILE_SHARE_READ, NULL, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, NULL)) == INVALID_HANDLE_VALUE) { // We tried to get the bitmap file for mapping, but it's not there. Add to list of non-existant bitmap-files so // we don't have to try this again in the future. __AddBitmapStateForName(fileName); return false; } mappingHandle = CreateFileMapping(bitmapFileHandle, NULL, PAGE_READONLY, 0, 0, NULL); CloseHandle(bitmapFileHandle); if (!mappingHandle) return false; } *bytes = MapViewOfFileEx(mappingHandle, FILE_MAP_READ, 0, 0, 0, 0); if (NULL != fileSize) { MEMORY_BASIC_INFORMATION memoryInfo; if (0 == VirtualQueryEx(mappingHandle, *bytes, &memoryInfo, sizeof(memoryInfo))) { *fileSize = 0; // This indicates no checking. Is it right ? } else { *fileSize = memoryInfo.RegionSize; } } CloseHandle(mappingHandle); return (*bytes ? true : false); #else struct stat statBuf; int fd = -1; if ((fd = open(fileName, O_RDONLY, 0)) < 0) { return false; } if (fstat(fd, &statBuf) < 0 || (*bytes = mmap(0, statBuf.st_size, PROT_READ, MAP_PRIVATE, fd, 0)) == (void *)-1) { close(fd); return false; } close(fd); if (NULL != fileSize) *fileSize = statBuf.st_size; return true; #endif } #endif // USE_MACHO_SEGMENT #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_EMBEDDED_MINI || DEPLOYMENT_TARGET_LINUX static bool __CFUniCharLoadFile(const char *bitmapName, const void **bytes, int64_t *fileSize) { #elif DEPLOYMENT_TARGET_WINDOWS static bool __CFUniCharLoadFile(const wchar_t *bitmapName, const void **bytes, int64_t *fileSize) { #else #error Unknown or unspecified DEPLOYMENT_TARGET #endif #if USE_MACHO_SEGMENT *bytes = __CFGetSectDataPtr("__UNICODE", bitmapName, NULL); if (NULL != fileSize) *fileSize = 0; return *bytes ? true : false; #else #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_EMBEDDED_MINI || DEPLOYMENT_TARGET_LINUX char cpath[MAXPATHLEN]; __CFUniCharCharacterSetPath(cpath); strlcat(cpath, bitmapName, MAXPATHLEN); Boolean needToFree = false; const char *possiblyFrameworkRootedCPath = CFPathRelativeToAppleFrameworksRoot(cpath, &needToFree); bool result = __CFUniCharLoadBytesFromFile(possiblyFrameworkRootedCPath, bytes, fileSize); if (needToFree) free((void *)possiblyFrameworkRootedCPath); return result; #elif DEPLOYMENT_TARGET_WINDOWS wchar_t wpath[MAXPATHLEN]; __CFUniCharCharacterSetPath(wpath); wcsncat(wpath, bitmapName, MAXPATHLEN); return __CFUniCharLoadBytesFromFile(wpath, bytes, fileSize); #else #error Unknown or unspecified DEPLOYMENT_TARGET #endif #endif } // Bitmap functions CF_INLINE bool isControl(UTF32Char theChar, uint16_t charset, const void *data) { // ISO Control return (((theChar <= 0x001F) || (theChar >= 0x007F && theChar <= 0x009F)) ? true : false); } CF_INLINE bool isWhitespace(UTF32Char theChar, uint16_t charset, const void *data) { // Space return (((theChar == 0x0020) || (theChar == 0x0009) || (theChar == 0x00A0) || (theChar == 0x1680) || (theChar >= 0x2000 && theChar <= 0x200B) || (theChar == 0x202F) || (theChar == 0x205F) || (theChar == 0x3000)) ? true : false); } CF_INLINE bool isNewline(UTF32Char theChar, uint16_t charset, const void *data) { // White space return (((theChar >= 0x000A && theChar <= 0x000D) || (theChar == 0x0085) || (theChar == 0x2028) || (theChar == 0x2029)) ? true : false); } CF_INLINE bool isWhitespaceAndNewline(UTF32Char theChar, uint16_t charset, const void *data) { // White space return ((isWhitespace(theChar, charset, data) || isNewline(theChar, charset, data)) ? true : false); } #if USE_MACHO_SEGMENT CF_INLINE bool __CFSimpleFileSizeVerification(const void *bytes, int64_t fileSize) { return true; } #elif 1 // __CFSimpleFileSizeVerification is broken static bool __CFSimpleFileSizeVerification(const void *bytes, int64_t fileSize) { return true; } #else static bool __CFSimpleFileSizeVerification(const void *bytes, int64_t fileSize) { bool result = true; if (fileSize > 0) { if ((sizeof(uint32_t) * 2) > fileSize) { result = false; } else { uint32_t headerSize = CFSwapInt32BigToHost(*((uint32_t *)((char *)bytes + 4))); if ((headerSize < (sizeof(uint32_t) * 4)) || (headerSize > fileSize)) { result = false; } else { const uint32_t *lastElement = (uint32_t *)(((uint8_t *)bytes) + headerSize) - 2; if ((headerSize + CFSwapInt32BigToHost(lastElement[0]) + CFSwapInt32BigToHost(lastElement[1])) > headerSize) result = false; } } } if (!result) CFLog(kCFLogLevelCritical, CFSTR("File size verification for Unicode database file failed.")); return result; } #endif // USE_MACHO_SEGMENT typedef struct { uint32_t _numPlanes; const uint8_t **_planes; } __CFUniCharBitmapData; static char __CFUniCharUnicodeVersionString[8] = {0, 0, 0, 0, 0, 0, 0, 0}; static uint32_t __CFUniCharNumberOfBitmaps = 0; static __CFUniCharBitmapData *__CFUniCharBitmapDataArray = NULL; static CFSpinLock_t __CFUniCharBitmapLock = CFSpinLockInit; #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_EMBEDDED_MINI || DEPLOYMENT_TARGET_LINUX #if !defined(CF_UNICHAR_BITMAP_FILE) #if USE_MACHO_SEGMENT #define CF_UNICHAR_BITMAP_FILE "__csbitmaps" #else #define CF_UNICHAR_BITMAP_FILE "/CFCharacterSetBitmaps.bitmap" #endif #endif #elif DEPLOYMENT_TARGET_WINDOWS #if !defined(CF_UNICHAR_BITMAP_FILE) #define CF_UNICHAR_BITMAP_FILE L"CFCharacterSetBitmaps.bitmap" #endif #else #error Unknown or unspecified DEPLOYMENT_TARGET #endif static bool __CFUniCharLoadBitmapData(void) { __CFUniCharBitmapData *array; uint32_t headerSize; uint32_t bitmapSize; int numPlanes; uint8_t currentPlane; const void *bytes; const void *bitmapBase; const void *bitmap; int idx, bitmapIndex; int64_t fileSize; __CFSpinLock(&__CFUniCharBitmapLock); if (__CFUniCharBitmapDataArray || !__CFUniCharLoadFile(CF_UNICHAR_BITMAP_FILE, &bytes, &fileSize) || !__CFSimpleFileSizeVerification(bytes, fileSize)) { __CFSpinUnlock(&__CFUniCharBitmapLock); return false; } for (idx = 0;idx < 4 && ((const uint8_t *)bytes)[idx];idx++) { __CFUniCharUnicodeVersionString[idx * 2] = ((const uint8_t *)bytes)[idx]; __CFUniCharUnicodeVersionString[idx * 2 + 1] = '.'; } __CFUniCharUnicodeVersionString[(idx < 4 ? idx * 2 - 1 : 7)] = '\0'; headerSize = CFSwapInt32BigToHost(*((uint32_t *)((char *)bytes + 4))); bitmapBase = (uint8_t *)bytes + headerSize; bytes = (uint8_t *)bytes + (sizeof(uint32_t) * 2); headerSize -= (sizeof(uint32_t) * 2); __CFUniCharNumberOfBitmaps = headerSize / (sizeof(uint32_t) * 2); array = (__CFUniCharBitmapData *)CFAllocatorAllocate(kCFAllocatorSystemDefault, sizeof(__CFUniCharBitmapData) * __CFUniCharNumberOfBitmaps, 0); for (idx = 0;idx < (int)__CFUniCharNumberOfBitmaps;idx++) { bitmap = (uint8_t *)bitmapBase + CFSwapInt32BigToHost(*((uint32_t *)bytes)); bytes = (uint8_t *)bytes + sizeof(uint32_t); bitmapSize = CFSwapInt32BigToHost(*((uint32_t *)bytes)); bytes = (uint8_t *)bytes + sizeof(uint32_t); numPlanes = bitmapSize / (8 * 1024); numPlanes = *(const uint8_t *)((char *)bitmap + (((numPlanes - 1) * ((8 * 1024) + 1)) - 1)) + 1; array[idx]._planes = (const uint8_t **)CFAllocatorAllocate(kCFAllocatorSystemDefault, sizeof(const void *) * numPlanes, 0); array[idx]._numPlanes = numPlanes; currentPlane = 0; for (bitmapIndex = 0;bitmapIndex < numPlanes;bitmapIndex++) { if (bitmapIndex == currentPlane) { array[idx]._planes[bitmapIndex] = (const uint8_t *)bitmap; bitmap = (uint8_t *)bitmap + (8 * 1024); #if defined (__cplusplus) currentPlane = *(((const uint8_t*&)bitmap)++); #else currentPlane = *((const uint8_t *)bitmap++); #endif } else { array[idx]._planes[bitmapIndex] = NULL; } } } __CFUniCharBitmapDataArray = array; __CFSpinUnlock(&__CFUniCharBitmapLock); return true; } CF_PRIVATE const char *__CFUniCharGetUnicodeVersionString(void) { if (NULL == __CFUniCharBitmapDataArray) __CFUniCharLoadBitmapData(); return __CFUniCharUnicodeVersionString; } bool CFUniCharIsMemberOf(UTF32Char theChar, uint32_t charset) { charset = __CFUniCharMapCompatibilitySetID(charset); switch (charset) { case kCFUniCharWhitespaceCharacterSet: return isWhitespace(theChar, charset, NULL); case kCFUniCharWhitespaceAndNewlineCharacterSet: return isWhitespaceAndNewline(theChar, charset, NULL); case kCFUniCharNewlineCharacterSet: return isNewline(theChar, charset, NULL); default: { uint32_t tableIndex = __CFUniCharMapExternalSetToInternalIndex(charset); if (NULL == __CFUniCharBitmapDataArray) __CFUniCharLoadBitmapData(); if (tableIndex < __CFUniCharNumberOfBitmaps) { __CFUniCharBitmapData *data = __CFUniCharBitmapDataArray + tableIndex; uint8_t planeNo = (theChar >> 16) & 0xFF; // The bitmap data for kCFUniCharIllegalCharacterSet is actually LEGAL set less Plane 14 ~ 16 if (charset == kCFUniCharIllegalCharacterSet) { if (planeNo == 0x0E) { // Plane 14 theChar &= 0xFF; return (((theChar == 0x01) || ((theChar > 0x1F) && (theChar < 0x80))) ? false : true); } else if (planeNo == 0x0F || planeNo == 0x10) { // Plane 15 & 16 return ((theChar & 0xFF) > 0xFFFD ? true : false); } else { return (planeNo < data->_numPlanes && data->_planes[planeNo] ? !CFUniCharIsMemberOfBitmap(theChar, data->_planes[planeNo]) : true); } } else if (charset == kCFUniCharControlAndFormatterCharacterSet) { if (planeNo == 0x0E) { // Plane 14 theChar &= 0xFF; return (((theChar == 0x01) || ((theChar > 0x1F) && (theChar < 0x80))) ? true : false); } else { return (planeNo < data->_numPlanes && data->_planes[planeNo] ? CFUniCharIsMemberOfBitmap(theChar, data->_planes[planeNo]) : false); } } else { return (planeNo < data->_numPlanes && data->_planes[planeNo] ? CFUniCharIsMemberOfBitmap(theChar, data->_planes[planeNo]) : false); } } return false; } } } const uint8_t *CFUniCharGetBitmapPtrForPlane(uint32_t charset, uint32_t plane) { if (NULL == __CFUniCharBitmapDataArray) __CFUniCharLoadBitmapData(); charset = __CFUniCharMapCompatibilitySetID(charset); if ((charset > kCFUniCharWhitespaceAndNewlineCharacterSet) && (charset != kCFUniCharIllegalCharacterSet) && (charset != kCFUniCharNewlineCharacterSet)) { uint32_t tableIndex = __CFUniCharMapExternalSetToInternalIndex(charset); if (tableIndex < __CFUniCharNumberOfBitmaps) { __CFUniCharBitmapData *data = __CFUniCharBitmapDataArray + tableIndex; return (plane < data->_numPlanes ? data->_planes[plane] : NULL); } } return NULL; } CF_PRIVATE uint8_t CFUniCharGetBitmapForPlane(uint32_t charset, uint32_t plane, void *bitmap, bool isInverted) { const uint8_t *src = CFUniCharGetBitmapPtrForPlane(charset, plane); int numBytes = (8 * 1024); if (src) { if (isInverted) { #if defined (__cplusplus) while (numBytes-- > 0) *(((uint8_t *&)bitmap)++) = ~(*(src++)); #else while (numBytes-- > 0) *((uint8_t *)bitmap++) = ~(*(src++)); #endif } else { #if defined (__cplusplus) while (numBytes-- > 0) *(((uint8_t *&)bitmap)++) = *(src++); #else while (numBytes-- > 0) *((uint8_t *)bitmap++) = *(src++); #endif } return kCFUniCharBitmapFilled; } else if (charset == kCFUniCharIllegalCharacterSet) { __CFUniCharBitmapData *data = __CFUniCharBitmapDataArray + __CFUniCharMapExternalSetToInternalIndex(__CFUniCharMapCompatibilitySetID(charset)); if (plane < data->_numPlanes && (src = data->_planes[plane])) { if (isInverted) { #if defined (__cplusplus) while (numBytes-- > 0) *(((uint8_t *&)bitmap)++) = *(src++); #else while (numBytes-- > 0) *((uint8_t *)bitmap++) = *(src++); #endif } else { #if defined (__cplusplus) while (numBytes-- > 0) *(((uint8_t *&)bitmap)++) = ~(*(src++)); #else while (numBytes-- > 0) *((uint8_t *)bitmap++) = ~(*(src++)); #endif } return kCFUniCharBitmapFilled; } else if (plane == 0x0E) { // Plane 14 int idx; uint8_t asciiRange = (isInverted ? (uint8_t)0xFF : (uint8_t)0); uint8_t otherRange = (isInverted ? (uint8_t)0 : (uint8_t)0xFF); #if defined (__cplusplus) *(((uint8_t *&)bitmap)++) = 0x02; // UE0001 LANGUAGE TAG #else *((uint8_t *)bitmap++) = 0x02; // UE0001 LANGUAGE TAG #endif for (idx = 1;idx < numBytes;idx++) { #if defined (__cplusplus) *(((uint8_t *&)bitmap)++) = ((idx >= (0x20 / 8) && (idx < (0x80 / 8))) ? asciiRange : otherRange); #else *((uint8_t *)bitmap++) = ((idx >= (0x20 / 8) && (idx < (0x80 / 8))) ? asciiRange : otherRange); #endif } return kCFUniCharBitmapFilled; } else if (plane == 0x0F || plane == 0x10) { // Plane 15 & 16 uint32_t value = (isInverted ? ~0 : 0); numBytes /= 4; // for 32bit while (numBytes-- > 0) { *((uint32_t *)bitmap) = value; #if defined (__cplusplus) bitmap = (uint8_t *)bitmap + sizeof(uint32_t); #else bitmap += sizeof(uint32_t); #endif } *(((uint8_t *)bitmap) - 5) = (isInverted ? 0x3F : 0xC0); // 0xFFFE & 0xFFFF return kCFUniCharBitmapFilled; } return (isInverted ? kCFUniCharBitmapEmpty : kCFUniCharBitmapAll); } else if ((charset < kCFUniCharDecimalDigitCharacterSet) || (charset == kCFUniCharNewlineCharacterSet)) { if (plane) return (isInverted ? kCFUniCharBitmapAll : kCFUniCharBitmapEmpty); uint8_t *bitmapBase = (uint8_t *)bitmap; CFIndex idx; uint8_t nonFillValue = (isInverted ? (uint8_t)0xFF : (uint8_t)0); #if defined (__cplusplus) while (numBytes-- > 0) *(((uint8_t *&)bitmap)++) = nonFillValue; #else while (numBytes-- > 0) *((uint8_t *)bitmap++) = nonFillValue; #endif if ((charset == kCFUniCharWhitespaceAndNewlineCharacterSet) || (charset == kCFUniCharNewlineCharacterSet)) { const UniChar newlines[] = {0x000A, 0x000B, 0x000C, 0x000D, 0x0085, 0x2028, 0x2029}; for (idx = 0;idx < (int)(sizeof(newlines) / sizeof(*newlines)); idx++) { if (isInverted) { CFUniCharRemoveCharacterFromBitmap(newlines[idx], bitmapBase); } else { CFUniCharAddCharacterToBitmap(newlines[idx], bitmapBase); } } if (charset == kCFUniCharNewlineCharacterSet) return kCFUniCharBitmapFilled; } if (isInverted) { CFUniCharRemoveCharacterFromBitmap(0x0009, bitmapBase); CFUniCharRemoveCharacterFromBitmap(0x0020, bitmapBase); CFUniCharRemoveCharacterFromBitmap(0x00A0, bitmapBase); CFUniCharRemoveCharacterFromBitmap(0x1680, bitmapBase); CFUniCharRemoveCharacterFromBitmap(0x202F, bitmapBase); CFUniCharRemoveCharacterFromBitmap(0x205F, bitmapBase); CFUniCharRemoveCharacterFromBitmap(0x3000, bitmapBase); } else { CFUniCharAddCharacterToBitmap(0x0009, bitmapBase); CFUniCharAddCharacterToBitmap(0x0020, bitmapBase); CFUniCharAddCharacterToBitmap(0x00A0, bitmapBase); CFUniCharAddCharacterToBitmap(0x1680, bitmapBase); CFUniCharAddCharacterToBitmap(0x202F, bitmapBase); CFUniCharAddCharacterToBitmap(0x205F, bitmapBase); CFUniCharAddCharacterToBitmap(0x3000, bitmapBase); } for (idx = 0x2000;idx <= 0x200B;idx++) { if (isInverted) { CFUniCharRemoveCharacterFromBitmap(idx, bitmapBase); } else { CFUniCharAddCharacterToBitmap(idx, bitmapBase); } } return kCFUniCharBitmapFilled; } return (isInverted ? kCFUniCharBitmapAll : kCFUniCharBitmapEmpty); } CF_PRIVATE uint32_t CFUniCharGetNumberOfPlanes(uint32_t charset) { if ((charset == kCFUniCharControlCharacterSet) || (charset == kCFUniCharControlAndFormatterCharacterSet)) { return 15; // 0 to 14 } else if (charset < kCFUniCharDecimalDigitCharacterSet) { return 1; } else if (charset == kCFUniCharIllegalCharacterSet) { return 17; } else { uint32_t numPlanes; if (NULL == __CFUniCharBitmapDataArray) __CFUniCharLoadBitmapData(); numPlanes = __CFUniCharBitmapDataArray[__CFUniCharMapExternalSetToInternalIndex(__CFUniCharMapCompatibilitySetID(charset))]._numPlanes; return numPlanes; } } // Mapping data loading static const void **__CFUniCharMappingTables = NULL; static CFSpinLock_t __CFUniCharMappingTableLock = CFSpinLockInit; #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_EMBEDDED_MINI || DEPLOYMENT_TARGET_LINUX #if __CF_BIG_ENDIAN__ #if USE_MACHO_SEGMENT #define MAPPING_TABLE_FILE "__data" #else #define MAPPING_TABLE_FILE "/CFUnicodeData-B.mapping" #endif #else #if USE_MACHO_SEGMENT #define MAPPING_TABLE_FILE "__data" #else #define MAPPING_TABLE_FILE "/CFUnicodeData-L.mapping" #endif #endif #elif DEPLOYMENT_TARGET_WINDOWS #if __CF_BIG_ENDIAN__ #if USE_MACHO_SEGMENT #define MAPPING_TABLE_FILE "__data" #else #define MAPPING_TABLE_FILE L"CFUnicodeData-B.mapping" #endif #else #if USE_MACHO_SEGMENT #define MAPPING_TABLE_FILE "__data" #else #define MAPPING_TABLE_FILE L"CFUnicodeData-L.mapping" #endif #endif #else #error Unknown or unspecified DEPLOYMENT_TARGET #endif CF_PRIVATE const void *CFUniCharGetMappingData(uint32_t type) { __CFSpinLock(&__CFUniCharMappingTableLock); if (NULL == __CFUniCharMappingTables) { const void *bytes; const void *bodyBase; int headerSize; int idx, count; int64_t fileSize; if (!__CFUniCharLoadFile(MAPPING_TABLE_FILE, &bytes, &fileSize) || !__CFSimpleFileSizeVerification(bytes, fileSize)) { __CFSpinUnlock(&__CFUniCharMappingTableLock); return NULL; } #if defined (__cplusplus) bytes = (uint8_t *)bytes + 4; // Skip Unicode version headerSize = *((uint8_t *)bytes); bytes = (uint8_t *)bytes + sizeof(uint32_t); #else bytes += 4; // Skip Unicode version headerSize = *((uint32_t *)bytes); bytes += sizeof(uint32_t); #endif headerSize -= (sizeof(uint32_t) * 2); bodyBase = (char *)bytes + headerSize; count = headerSize / sizeof(uint32_t); __CFUniCharMappingTables = (const void **)CFAllocatorAllocate(kCFAllocatorSystemDefault, sizeof(const void *) * count, 0); for (idx = 0;idx < count;idx++) { #if defined (__cplusplus) __CFUniCharMappingTables[idx] = (char *)bodyBase + *((uint32_t *)bytes); bytes = (uint8_t *)bytes + sizeof(uint32_t); #else __CFUniCharMappingTables[idx] = (char *)bodyBase + *((uint32_t *)bytes); bytes += sizeof(uint32_t); #endif } } __CFSpinUnlock(&__CFUniCharMappingTableLock); return __CFUniCharMappingTables[type]; } // Case mapping functions #define DO_SPECIAL_CASE_MAPPING 1 static uint32_t *__CFUniCharCaseMappingTableCounts = NULL; static uint32_t **__CFUniCharCaseMappingTable = NULL; static const uint32_t **__CFUniCharCaseMappingExtraTable = NULL; typedef struct { uint32_t _key; uint32_t _value; } __CFUniCharCaseMappings; /* Binary searches CFStringEncodingUnicodeTo8BitCharMap */ static uint32_t __CFUniCharGetMappedCase(const __CFUniCharCaseMappings *theTable, uint32_t numElem, UTF32Char character) { const __CFUniCharCaseMappings *p, *q, *divider; if ((character < theTable[0]._key) || (character > theTable[numElem-1]._key)) { return 0; } p = theTable; q = p + (numElem-1); while (p <= q) { divider = p + ((q - p) >> 1); /* divide by 2 */ if (character < divider->_key) { q = divider - 1; } else if (character > divider->_key) { p = divider + 1; } else { return divider->_value; } } return 0; } #define NUM_CASE_MAP_DATA (kCFUniCharCaseFold + 1) static bool __CFUniCharLoadCaseMappingTable(void) { uint32_t *countArray; int idx; if (NULL == __CFUniCharMappingTables) (void)CFUniCharGetMappingData(kCFUniCharToLowercase); if (NULL == __CFUniCharMappingTables) return false; __CFSpinLock(&__CFUniCharMappingTableLock); if (__CFUniCharCaseMappingTableCounts) { __CFSpinUnlock(&__CFUniCharMappingTableLock); return true; } countArray = (uint32_t *)CFAllocatorAllocate(kCFAllocatorSystemDefault, sizeof(uint32_t) * NUM_CASE_MAP_DATA + sizeof(uint32_t *) * NUM_CASE_MAP_DATA * 2, 0); __CFUniCharCaseMappingTable = (uint32_t **)((char *)countArray + sizeof(uint32_t) * NUM_CASE_MAP_DATA); __CFUniCharCaseMappingExtraTable = (const uint32_t **)__CFUniCharCaseMappingTable + NUM_CASE_MAP_DATA; for (idx = 0;idx < NUM_CASE_MAP_DATA;idx++) { countArray[idx] = *((uint32_t *)__CFUniCharMappingTables[idx]) / (sizeof(uint32_t) * 2); __CFUniCharCaseMappingTable[idx] = ((uint32_t *)__CFUniCharMappingTables[idx]) + 1; __CFUniCharCaseMappingExtraTable[idx] = (const uint32_t *)((char *)__CFUniCharCaseMappingTable[idx] + *((uint32_t *)__CFUniCharMappingTables[idx])); } __CFUniCharCaseMappingTableCounts = countArray; __CFSpinUnlock(&__CFUniCharMappingTableLock); return true; } #if __CF_BIG_ENDIAN__ #define TURKISH_LANG_CODE (0x7472) // tr #define LITHUANIAN_LANG_CODE (0x6C74) // lt #define AZERI_LANG_CODE (0x617A) // az #define DUTCH_LANG_CODE (0x6E6C) // nl #define GREEK_LANG_CODE (0x656C) // el #else #define TURKISH_LANG_CODE (0x7274) // tr #define LITHUANIAN_LANG_CODE (0x746C) // lt #define AZERI_LANG_CODE (0x7A61) // az #define DUTCH_LANG_CODE (0x6C6E) // nl #define GREEK_LANG_CODE (0x6C65) // el #endif CFIndex CFUniCharMapCaseTo(UTF32Char theChar, UTF16Char *convertedChar, CFIndex maxLength, uint32_t ctype, uint32_t flags, const uint8_t *langCode) { __CFUniCharBitmapData *data; uint8_t planeNo = (theChar >> 16) & 0xFF; caseFoldRetry: #if DO_SPECIAL_CASE_MAPPING if (flags & kCFUniCharCaseMapFinalSigma) { if (theChar == 0x03A3) { // Final sigma *convertedChar = (ctype == kCFUniCharToLowercase ? 0x03C2 : 0x03A3); return 1; } } if (langCode) { if (flags & kCFUniCharCaseMapGreekTonos) { // localized Greek uppercasing if (theChar == 0x0301) { // GREEK TONOS return 0; } else if (theChar == 0x0344) {// COMBINING GREEK DIALYTIKA TONOS *convertedChar = 0x0308; // COMBINING GREEK DIALYTIKA return 1; } else if (CFUniCharIsMemberOf(theChar, kCFUniCharDecomposableCharacterSet)) { UTF32Char buffer[MAX_DECOMPOSED_LENGTH]; CFIndex length = CFUniCharDecomposeCharacter(theChar, buffer, MAX_DECOMPOSED_LENGTH); if (length > 1) { UTF32Char *characters = buffer + 1; UTF32Char *tail = buffer + length; while (characters < tail) { if (*characters == 0x0301) break; ++characters; } if (characters < tail) { // found a tonos CFIndex convertedLength = CFUniCharMapCaseTo(*buffer, convertedChar, maxLength, ctype, 0, langCode); if (convertedLength == 0) { *convertedChar = (UTF16Char)*buffer; convertedLength = 1; } characters = buffer + 1; while (characters < tail) { if (*characters != 0x0301) { // not tonos if (*characters < 0x10000) { // BMP convertedChar[convertedLength] = (UTF16Char)*characters; ++convertedLength; } else { UTF32Char character = *characters - 0x10000; convertedChar[convertedLength++] = (UTF16Char)((character >> 10) + 0xD800UL); convertedChar[convertedLength++] = (UTF16Char)((character & 0x3FF) + 0xDC00UL); } } ++characters; } return convertedLength; } } } } switch (*(uint16_t *)langCode) { case LITHUANIAN_LANG_CODE: if (theChar == 0x0307 && (flags & kCFUniCharCaseMapAfter_i)) { return 0; } else if (ctype == kCFUniCharToLowercase) { if (flags & kCFUniCharCaseMapMoreAbove) { switch (theChar) { case 0x0049: // LATIN CAPITAL LETTER I *(convertedChar++) = 0x0069; *(convertedChar++) = 0x0307; return 2; case 0x004A: // LATIN CAPITAL LETTER J *(convertedChar++) = 0x006A; *(convertedChar++) = 0x0307; return 2; case 0x012E: // LATIN CAPITAL LETTER I WITH OGONEK *(convertedChar++) = 0x012F; *(convertedChar++) = 0x0307; return 2; default: break; } } switch (theChar) { case 0x00CC: // LATIN CAPITAL LETTER I WITH GRAVE *(convertedChar++) = 0x0069; *(convertedChar++) = 0x0307; *(convertedChar++) = 0x0300; return 3; case 0x00CD: // LATIN CAPITAL LETTER I WITH ACUTE *(convertedChar++) = 0x0069; *(convertedChar++) = 0x0307; *(convertedChar++) = 0x0301; return 3; case 0x0128: // LATIN CAPITAL LETTER I WITH TILDE *(convertedChar++) = 0x0069; *(convertedChar++) = 0x0307; *(convertedChar++) = 0x0303; return 3; default: break; } } break; case TURKISH_LANG_CODE: case AZERI_LANG_CODE: if ((theChar == 0x0049) || (theChar == 0x0131)) { // LATIN CAPITAL LETTER I & LATIN SMALL LETTER DOTLESS I *convertedChar = (((ctype == kCFUniCharToLowercase) || (ctype == kCFUniCharCaseFold)) ? ((kCFUniCharCaseMapMoreAbove & flags) ? 0x0069 : 0x0131) : 0x0049); return 1; } else if ((theChar == 0x0069) || (theChar == 0x0130)) { // LATIN SMALL LETTER I & LATIN CAPITAL LETTER I WITH DOT ABOVE *convertedChar = (((ctype == kCFUniCharToLowercase) || (ctype == kCFUniCharCaseFold)) ? 0x0069 : 0x0130); return 1; } else if (theChar == 0x0307 && (kCFUniCharCaseMapAfter_i & flags)) { // COMBINING DOT ABOVE AFTER_i if (ctype == kCFUniCharToLowercase) { return 0; } else { *convertedChar = 0x0307; return 1; } } break; case DUTCH_LANG_CODE: if ((theChar == 0x004A) || (theChar == 0x006A)) { *convertedChar = (((ctype == kCFUniCharToUppercase) || (ctype == kCFUniCharToTitlecase) || (kCFUniCharCaseMapDutchDigraph & flags)) ? 0x004A : 0x006A); return 1; } break; default: break; } } #endif // DO_SPECIAL_CASE_MAPPING if (NULL == __CFUniCharBitmapDataArray) __CFUniCharLoadBitmapData(); data = __CFUniCharBitmapDataArray + __CFUniCharMapExternalSetToInternalIndex(__CFUniCharMapCompatibilitySetID(ctype + kCFUniCharHasNonSelfLowercaseCharacterSet)); if (planeNo < data->_numPlanes && data->_planes[planeNo] && CFUniCharIsMemberOfBitmap(theChar, data->_planes[planeNo]) && (__CFUniCharCaseMappingTableCounts || __CFUniCharLoadCaseMappingTable())) { uint32_t value = __CFUniCharGetMappedCase((const __CFUniCharCaseMappings *)__CFUniCharCaseMappingTable[ctype], __CFUniCharCaseMappingTableCounts[ctype], theChar); if (!value && ctype == kCFUniCharToTitlecase) { value = __CFUniCharGetMappedCase((const __CFUniCharCaseMappings *)__CFUniCharCaseMappingTable[kCFUniCharToUppercase], __CFUniCharCaseMappingTableCounts[kCFUniCharToUppercase], theChar); if (value) ctype = kCFUniCharToUppercase; } if (value) { CFIndex count = CFUniCharConvertFlagToCount(value); if (count == 1) { if (value & kCFUniCharNonBmpFlag) { if (maxLength > 1) { value = (value & 0xFFFFFF) - 0x10000; *(convertedChar++) = (UTF16Char)(value >> 10) + 0xD800UL; *(convertedChar++) = (UTF16Char)(value & 0x3FF) + 0xDC00UL; return 2; } } else { *convertedChar = (UTF16Char)value; return 1; } } else if (count < maxLength) { const uint32_t *extraMapping = __CFUniCharCaseMappingExtraTable[ctype] + (value & 0xFFFFFF); if (value & kCFUniCharNonBmpFlag) { CFIndex copiedLen = 0; while (count-- > 0) { value = *(extraMapping++); if (value > 0xFFFF) { if (copiedLen + 2 >= maxLength) break; value = (value & 0xFFFFFF) - 0x10000; convertedChar[copiedLen++] = (UTF16Char)(value >> 10) + 0xD800UL; convertedChar[copiedLen++] = (UTF16Char)(value & 0x3FF) + 0xDC00UL; } else { if (copiedLen + 1 >= maxLength) break; convertedChar[copiedLen++] = value; } } if (!count) return copiedLen; } else { CFIndex idx; for (idx = 0;idx < count;idx++) *(convertedChar++) = (UTF16Char)*(extraMapping++); return count; } } } } else if (ctype == kCFUniCharCaseFold) { ctype = kCFUniCharToLowercase; goto caseFoldRetry; } if (theChar > 0xFFFF) { // non-BMP theChar = (theChar & 0xFFFFFF) - 0x10000; *(convertedChar++) = (UTF16Char)(theChar >> 10) + 0xD800UL; *(convertedChar++) = (UTF16Char)(theChar & 0x3FF) + 0xDC00UL; return 2; } else { *convertedChar = theChar; return 1; } } CFIndex CFUniCharMapTo(UniChar theChar, UniChar *convertedChar, CFIndex maxLength, uint16_t ctype, uint32_t flags) { if (ctype == kCFUniCharCaseFold + 1) { // kCFUniCharDecompose if (CFUniCharIsDecomposableCharacter(theChar, false)) { UTF32Char buffer[MAX_DECOMPOSED_LENGTH]; CFIndex usedLength = CFUniCharDecomposeCharacter(theChar, buffer, MAX_DECOMPOSED_LENGTH); CFIndex idx; for (idx = 0;idx < usedLength;idx++) *(convertedChar++) = buffer[idx]; return usedLength; } else { *convertedChar = theChar; return 1; } } else { return CFUniCharMapCaseTo(theChar, convertedChar, maxLength, ctype, flags, NULL); } } CF_INLINE bool __CFUniCharIsMoreAbove(UTF16Char *buffer, CFIndex length) { UTF32Char currentChar; uint32_t property; while (length-- > 0) { currentChar = *(buffer)++; if (CFUniCharIsSurrogateHighCharacter(currentChar) && (length > 0) && CFUniCharIsSurrogateLowCharacter(*(buffer + 1))) { currentChar = CFUniCharGetLongCharacterForSurrogatePair(currentChar, *(buffer++)); --length; } if (!CFUniCharIsMemberOf(currentChar, kCFUniCharNonBaseCharacterSet)) break; property = CFUniCharGetCombiningPropertyForCharacter(currentChar, (const uint8_t *)CFUniCharGetUnicodePropertyDataForPlane(kCFUniCharCombiningProperty, (currentChar >> 16) & 0xFF)); if (property == 230) return true; // Above priority } return false; } CF_INLINE bool __CFUniCharIsAfter_i(UTF16Char *buffer, CFIndex length) { UTF32Char currentChar = 0; uint32_t property; UTF32Char decomposed[MAX_DECOMPOSED_LENGTH]; CFIndex decompLength; CFIndex idx; if (length < 1) return 0; buffer += length; while (length-- > 1) { currentChar = *(--buffer); if (CFUniCharIsSurrogateLowCharacter(currentChar)) { if ((length > 1) && CFUniCharIsSurrogateHighCharacter(*(buffer - 1))) { currentChar = CFUniCharGetLongCharacterForSurrogatePair(*(--buffer), currentChar); --length; } else { break; } } if (!CFUniCharIsMemberOf(currentChar, kCFUniCharNonBaseCharacterSet)) break; property = CFUniCharGetCombiningPropertyForCharacter(currentChar, (const uint8_t *)CFUniCharGetUnicodePropertyDataForPlane(kCFUniCharCombiningProperty, (currentChar >> 16) & 0xFF)); if (property == 230) return false; // Above priority } if (length == 0) { currentChar = *(--buffer); } else if (CFUniCharIsSurrogateLowCharacter(currentChar) && CFUniCharIsSurrogateHighCharacter(*(--buffer))) { currentChar = CFUniCharGetLongCharacterForSurrogatePair(*buffer, currentChar); } decompLength = CFUniCharDecomposeCharacter(currentChar, decomposed, MAX_DECOMPOSED_LENGTH); currentChar = *decomposed; for (idx = 1;idx < decompLength;idx++) { currentChar = decomposed[idx]; property = CFUniCharGetCombiningPropertyForCharacter(currentChar, (const uint8_t *)CFUniCharGetUnicodePropertyDataForPlane(kCFUniCharCombiningProperty, (currentChar >> 16) & 0xFF)); if (property == 230) return false; // Above priority } return true; } CF_PRIVATE uint32_t CFUniCharGetConditionalCaseMappingFlags(UTF32Char theChar, UTF16Char *buffer, CFIndex currentIndex, CFIndex length, uint32_t type, const uint8_t *langCode, uint32_t lastFlags) { if (theChar == 0x03A3) { // GREEK CAPITAL LETTER SIGMA if ((type == kCFUniCharToLowercase) && (currentIndex > 0)) { UTF16Char *start = buffer; UTF16Char *end = buffer + length; UTF32Char otherChar; // First check if we're after a cased character buffer += (currentIndex - 1); while (start <= buffer) { otherChar = *(buffer--); if (CFUniCharIsSurrogateLowCharacter(otherChar) && (start <= buffer) && CFUniCharIsSurrogateHighCharacter(*buffer)) { otherChar = CFUniCharGetLongCharacterForSurrogatePair(*(buffer--), otherChar); } if (!CFUniCharIsMemberOf(otherChar, kCFUniCharCaseIgnorableCharacterSet)) { if (!CFUniCharIsMemberOf(otherChar, kCFUniCharUppercaseLetterCharacterSet) && !CFUniCharIsMemberOf(otherChar, kCFUniCharLowercaseLetterCharacterSet)) return 0; // Uppercase set contains titlecase break; } } // Next check if we're before a cased character buffer = start + currentIndex + 1; while (buffer < end) { otherChar = *(buffer++); if (CFUniCharIsSurrogateHighCharacter(otherChar) && (buffer < end) && CFUniCharIsSurrogateLowCharacter(*buffer)) { otherChar = CFUniCharGetLongCharacterForSurrogatePair(otherChar, *(buffer++)); } if (!CFUniCharIsMemberOf(otherChar, kCFUniCharCaseIgnorableCharacterSet)) { if (CFUniCharIsMemberOf(otherChar, kCFUniCharUppercaseLetterCharacterSet) || CFUniCharIsMemberOf(otherChar, kCFUniCharLowercaseLetterCharacterSet)) return 0; // Uppercase set contains titlecase break; } } return kCFUniCharCaseMapFinalSigma; } } else if (langCode) { if (*((const uint16_t *)langCode) == LITHUANIAN_LANG_CODE) { if ((theChar == 0x0307) && ((kCFUniCharCaseMapAfter_i|kCFUniCharCaseMapMoreAbove) & lastFlags) == (kCFUniCharCaseMapAfter_i|kCFUniCharCaseMapMoreAbove)) { return (__CFUniCharIsAfter_i(buffer, currentIndex) ? kCFUniCharCaseMapAfter_i : 0); } else if (type == kCFUniCharToLowercase) { if ((theChar == 0x0049) || (theChar == 0x004A) || (theChar == 0x012E)) { ++currentIndex; return (__CFUniCharIsMoreAbove(buffer + currentIndex, length - currentIndex) ? kCFUniCharCaseMapMoreAbove : 0); } } else if ((theChar == 'i') || (theChar == 'j')) { ++currentIndex; return (__CFUniCharIsMoreAbove(buffer + currentIndex, length - currentIndex) ? (kCFUniCharCaseMapAfter_i|kCFUniCharCaseMapMoreAbove) : 0); } } else if ((*((const uint16_t *)langCode) == TURKISH_LANG_CODE) || (*((const uint16_t *)langCode) == AZERI_LANG_CODE)) { if (type == kCFUniCharToLowercase) { if (theChar == 0x0307) { return (kCFUniCharCaseMapMoreAbove & lastFlags ? kCFUniCharCaseMapAfter_i : 0); } else if (theChar == 0x0049) { return (((++currentIndex < length) && (buffer[currentIndex] == 0x0307)) ? kCFUniCharCaseMapMoreAbove : 0); } } } else if (*((const uint16_t *)langCode) == DUTCH_LANG_CODE) { if (kCFUniCharCaseMapDutchDigraph & lastFlags) { return (((theChar == 0x006A) || (theChar == 0x004A)) ? kCFUniCharCaseMapDutchDigraph : 0); } else { if ((type == kCFUniCharToTitlecase) && ((theChar == 0x0069) || (theChar == 0x0049))) { return (((++currentIndex < length) && ((buffer[currentIndex] == 0x006A) || (buffer[currentIndex] == 0x004A))) ? kCFUniCharCaseMapDutchDigraph : 0); } } } if (kCFUniCharCaseMapGreekTonos & lastFlags) { // still searching for tonos if (CFUniCharIsMemberOf(theChar, kCFUniCharNonBaseCharacterSet)) { return kCFUniCharCaseMapGreekTonos; } } if (((theChar >= 0x0370) && (theChar < 0x0400)) || ((theChar >= 0x1F00) && (theChar < 0x2000))) { // Greek/Coptic & Greek extended ranges if ((type == kCFUniCharToUppercase) && (CFUniCharIsMemberOf(theChar, kCFUniCharLetterCharacterSet))) return kCFUniCharCaseMapGreekTonos; } } return 0; } // Unicode property database static __CFUniCharBitmapData *__CFUniCharUnicodePropertyTable = NULL; static int __CFUniCharUnicodePropertyTableCount = 0; static CFSpinLock_t __CFUniCharPropTableLock = CFSpinLockInit; #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_EMBEDDED_MINI || DEPLOYMENT_TARGET_LINUX #if USE_MACHO_SEGMENT #define PROP_DB_FILE "__properties" #else #define PROP_DB_FILE "/CFUniCharPropertyDatabase.data" #endif #elif DEPLOYMENT_TARGET_WINDOWS #if USE_MACHO_SEGMENT #define PROP_DB_FILE "__properties" #else #define PROP_DB_FILE L"CFUniCharPropertyDatabase.data" #endif #else #error Unknown or unspecified DEPLOYMENT_TARGET #endif const void *CFUniCharGetUnicodePropertyDataForPlane(uint32_t propertyType, uint32_t plane) { __CFSpinLock(&__CFUniCharPropTableLock); if (NULL == __CFUniCharUnicodePropertyTable) { __CFUniCharBitmapData *table; const void *bytes; const void *bodyBase; const void *planeBase; int headerSize; int idx, count; int planeIndex, planeCount; int planeSize; int64_t fileSize; if (!__CFUniCharLoadFile(PROP_DB_FILE, &bytes, &fileSize) || !__CFSimpleFileSizeVerification(bytes, fileSize)) { __CFSpinUnlock(&__CFUniCharPropTableLock); return NULL; } #if defined (__cplusplus) bytes = (uint8_t*)bytes + 4; // Skip Unicode version headerSize = CFSwapInt32BigToHost(*((uint32_t *)bytes)); bytes = (uint8_t *)bytes + sizeof(uint32_t); #else bytes += 4; // Skip Unicode version headerSize = CFSwapInt32BigToHost(*((uint32_t *)bytes)); bytes += sizeof(uint32_t); #endif headerSize -= (sizeof(uint32_t) * 2); bodyBase = (char *)bytes + headerSize; count = headerSize / sizeof(uint32_t); __CFUniCharUnicodePropertyTableCount = count; table = (__CFUniCharBitmapData *)CFAllocatorAllocate(kCFAllocatorSystemDefault, sizeof(__CFUniCharBitmapData) * count, 0); for (idx = 0;idx < count;idx++) { planeCount = *((const uint8_t *)bodyBase); planeBase = (char *)bodyBase + planeCount + (planeCount % 4 ? 4 - (planeCount % 4) : 0); table[idx]._planes = (const uint8_t **)CFAllocatorAllocate(kCFAllocatorSystemDefault, sizeof(const void *) * planeCount, 0); for (planeIndex = 0;planeIndex < planeCount;planeIndex++) { if ((planeSize = ((const uint8_t *)bodyBase)[planeIndex + 1])) { table[idx]._planes[planeIndex] = (const uint8_t *)planeBase; #if defined (__cplusplus) planeBase = (char*)planeBase + (planeSize * 256); #else planeBase += (planeSize * 256); #endif } else { table[idx]._planes[planeIndex] = NULL; } } table[idx]._numPlanes = planeCount; #if defined (__cplusplus) bodyBase = (const uint8_t *)bodyBase + (CFSwapInt32BigToHost(*(uint32_t *)bytes)); ((uint32_t *&)bytes) ++; #else bodyBase += (CFSwapInt32BigToHost(*((uint32_t *)bytes++))); #endif } __CFUniCharUnicodePropertyTable = table; } __CFSpinUnlock(&__CFUniCharPropTableLock); return (plane < __CFUniCharUnicodePropertyTable[propertyType]._numPlanes ? __CFUniCharUnicodePropertyTable[propertyType]._planes[plane] : NULL); } CF_PRIVATE uint32_t CFUniCharGetNumberOfPlanesForUnicodePropertyData(uint32_t propertyType) { (void)CFUniCharGetUnicodePropertyDataForPlane(propertyType, 0); return __CFUniCharUnicodePropertyTable[propertyType]._numPlanes; } CF_PRIVATE uint32_t CFUniCharGetUnicodeProperty(UTF32Char character, uint32_t propertyType) { if (propertyType == kCFUniCharCombiningProperty) { return CFUniCharGetCombiningPropertyForCharacter(character, (const uint8_t *)CFUniCharGetUnicodePropertyDataForPlane(propertyType, (character >> 16) & 0xFF)); } else if (propertyType == kCFUniCharBidiProperty) { return CFUniCharGetBidiPropertyForCharacter(character, (const uint8_t *)CFUniCharGetUnicodePropertyDataForPlane(propertyType, (character >> 16) & 0xFF)); } else { return 0; } } /* The UTF8 conversion in the following function is derived from ConvertUTF.c */ /* * Copyright 2001 Unicode, Inc. * * Disclaimer * * This source code is provided as is by Unicode, Inc. No claims are * made as to fitness for any particular purpose. No warranties of any * kind are expressed or implied. The recipient agrees to determine * applicability of information provided. If this file has been * purchased on magnetic or optical media from Unicode, Inc., the * sole remedy for any claim will be exchange of defective media * within 90 days of receipt. * * Limitations on Rights to Redistribute This Code * * Unicode, Inc. hereby grants the right to freely use the information * supplied in this file in the creation of products supporting the * Unicode Standard, and to make copies of this file in any form * for internal or external distribution as long as this notice * remains attached. */ #define UNI_REPLACEMENT_CHAR (0x0000FFFDUL) bool CFUniCharFillDestinationBuffer(const UTF32Char *src, CFIndex srcLength, void **dst, CFIndex dstLength, CFIndex *filledLength, uint32_t dstFormat) { UTF32Char currentChar; CFIndex usedLength = *filledLength; if (dstFormat == kCFUniCharUTF16Format) { UTF16Char *dstBuffer = (UTF16Char *)*dst; while (srcLength-- > 0) { currentChar = *(src++); if (currentChar > 0xFFFF) { // Non-BMP usedLength += 2; if (dstLength) { if (usedLength > dstLength) return false; currentChar -= 0x10000; *(dstBuffer++) = (UTF16Char)((currentChar >> 10) + 0xD800UL); *(dstBuffer++) = (UTF16Char)((currentChar & 0x3FF) + 0xDC00UL); } } else { ++usedLength; if (dstLength) { if (usedLength > dstLength) return false; *(dstBuffer++) = (UTF16Char)currentChar; } } } *dst = dstBuffer; } else if (dstFormat == kCFUniCharUTF8Format) { uint8_t *dstBuffer = (uint8_t *)*dst; uint16_t bytesToWrite = 0; const UTF32Char byteMask = 0xBF; const UTF32Char byteMark = 0x80; static const uint8_t firstByteMark[7] = { 0x00, 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC }; while (srcLength-- > 0) { currentChar = *(src++); /* Figure out how many bytes the result will require */ if (currentChar < (UTF32Char)0x80) { bytesToWrite = 1; } else if (currentChar < (UTF32Char)0x800) { bytesToWrite = 2; } else if (currentChar < (UTF32Char)0x10000) { bytesToWrite = 3; } else if (currentChar < (UTF32Char)0x200000) { bytesToWrite = 4; } else { bytesToWrite = 2; currentChar = UNI_REPLACEMENT_CHAR; } usedLength += bytesToWrite; if (dstLength) { if (usedLength > dstLength) return false; dstBuffer += bytesToWrite; switch (bytesToWrite) { /* note: everything falls through. */ case 4: *--dstBuffer = (currentChar | byteMark) & byteMask; currentChar >>= 6; case 3: *--dstBuffer = (currentChar | byteMark) & byteMask; currentChar >>= 6; case 2: *--dstBuffer = (currentChar | byteMark) & byteMask; currentChar >>= 6; case 1: *--dstBuffer = currentChar | firstByteMark[bytesToWrite]; } dstBuffer += bytesToWrite; } } *dst = dstBuffer; } else { UTF32Char *dstBuffer = (UTF32Char *)*dst; while (srcLength-- > 0) { currentChar = *(src++); ++usedLength; if (dstLength) { if (usedLength > dstLength) return false; *(dstBuffer++) = currentChar; } } *dst = dstBuffer; } *filledLength = usedLength; return true; } #if DEPLOYMENT_TARGET_WINDOWS void __CFUniCharCleanup(void) { int idx; // cleanup memory allocated by __CFUniCharLoadBitmapData() __CFSpinLock(&__CFUniCharBitmapLock); if (__CFUniCharBitmapDataArray != NULL) { for (idx = 0; idx < (int)__CFUniCharNumberOfBitmaps; idx++) { CFAllocatorDeallocate(kCFAllocatorSystemDefault, __CFUniCharBitmapDataArray[idx]._planes); __CFUniCharBitmapDataArray[idx]._planes = NULL; } CFAllocatorDeallocate(kCFAllocatorSystemDefault, __CFUniCharBitmapDataArray); __CFUniCharBitmapDataArray = NULL; __CFUniCharNumberOfBitmaps = 0; } __CFSpinUnlock(&__CFUniCharBitmapLock); // cleanup memory allocated by CFUniCharGetMappingData() __CFSpinLock(&__CFUniCharMappingTableLock); if (__CFUniCharMappingTables != NULL) { CFAllocatorDeallocate(kCFAllocatorSystemDefault, __CFUniCharMappingTables); __CFUniCharMappingTables = NULL; } // cleanup memory allocated by __CFUniCharLoadCaseMappingTable() if (__CFUniCharCaseMappingTableCounts != NULL) { CFAllocatorDeallocate(kCFAllocatorSystemDefault, __CFUniCharCaseMappingTableCounts); __CFUniCharCaseMappingTableCounts = NULL; __CFUniCharCaseMappingTable = NULL; __CFUniCharCaseMappingExtraTable = NULL; } __CFSpinUnlock(&__CFUniCharMappingTableLock); // cleanup memory allocated by CFUniCharGetUnicodePropertyDataForPlane() __CFSpinLock(&__CFUniCharPropTableLock); if (__CFUniCharUnicodePropertyTable != NULL) { for (idx = 0; idx < __CFUniCharUnicodePropertyTableCount; idx++) { CFAllocatorDeallocate(kCFAllocatorSystemDefault, __CFUniCharUnicodePropertyTable[idx]._planes); __CFUniCharUnicodePropertyTable[idx]._planes = NULL; } CFAllocatorDeallocate(kCFAllocatorSystemDefault, __CFUniCharUnicodePropertyTable); __CFUniCharUnicodePropertyTable = NULL; __CFUniCharUnicodePropertyTableCount = 0; } __CFSpinUnlock(&__CFUniCharPropTableLock); } #endif #undef USE_MACHO_SEGMENT