1/*
2 * Copyright (c) 2014 Apple Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. Please obtain a copy of the License at
10 * http://www.opensource.apple.com/apsl/ and read it before using this
11 * file.
12 *
13 * The Original Code and all software distributed under the License are
14 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
15 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
16 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
18 * Please see the License for the specific language governing rights and
19 * limitations under the License.
20 *
21 * @APPLE_LICENSE_HEADER_END@
22 */
23
24/*	CFUniChar.c
25	Copyright (c) 2001-2013, Apple Inc. All rights reserved.
26	Responsibility: Aki Inoue
27*/
28
29#include <CoreFoundation/CFByteOrder.h>
30#include "CFInternal.h"
31#include "CFUniChar.h"
32#include "CFStringEncodingConverterExt.h"
33#include "CFUnicodeDecomposition.h"
34#include "CFUniCharPriv.h"
35#if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_EMBEDDED_MINI || DEPLOYMENT_TARGET_LINUX || DEPLOYMENT_TARGET_FREEBSD
36#include <fcntl.h>
37#include <sys/types.h>
38#include <sys/stat.h>
39#include <sys/param.h>
40#include <sys/mman.h>
41#include <unistd.h>
42#include <stdlib.h>
43#endif
44#if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED
45#include <mach/mach.h>
46#endif
47
48#if DEPLOYMENT_TARGET_WINDOWS
49extern void _CFGetFrameworkPath(wchar_t *path, int maxLength);
50#endif
51
52#if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED
53#define __kCFCharacterSetDir "/System/Library/CoreServices"
54#elif DEPLOYMENT_TARGET_LINUX || DEPLOYMENT_TARGET_FREEBSD || DEPLOYMENT_TARGET_EMBEDDED_MINI
55#define __kCFCharacterSetDir "/usr/local/share/CoreFoundation"
56#elif DEPLOYMENT_TARGET_WINDOWS
57#define __kCFCharacterSetDir "\\Windows\\CoreFoundation"
58#endif
59
60#if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED
61#define USE_MACHO_SEGMENT 1
62#endif
63
64enum {
65    kCFUniCharLastExternalSet = kCFUniCharNewlineCharacterSet,
66    kCFUniCharFirstInternalSet = kCFUniCharCompatibilityDecomposableCharacterSet,
67    kCFUniCharLastInternalSet = kCFUniCharGraphemeExtendCharacterSet,
68    kCFUniCharFirstBitmapSet = kCFUniCharDecimalDigitCharacterSet
69};
70
71CF_INLINE uint32_t __CFUniCharMapExternalSetToInternalIndex(uint32_t cset) { return ((kCFUniCharFirstInternalSet <= cset) ? ((cset - kCFUniCharFirstInternalSet) + kCFUniCharLastExternalSet) : cset) - kCFUniCharFirstBitmapSet; }
72CF_INLINE uint32_t __CFUniCharMapCompatibilitySetID(uint32_t cset) { return ((cset == kCFUniCharControlCharacterSet) ? kCFUniCharControlAndFormatterCharacterSet : (((cset > kCFUniCharLastExternalSet) && (cset < kCFUniCharFirstInternalSet)) ? ((cset - kCFUniCharLastExternalSet) + kCFUniCharFirstInternalSet) : cset)); }
73
74#if (DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED) && USE_MACHO_SEGMENT
75#include <mach-o/getsect.h>
76#include <mach-o/dyld.h>
77#include <mach-o/ldsyms.h>
78
79extern const void* unicode_csbitmaps_section_start      __asm("section$start$__UNICODE$__csbitmaps");
80extern const void* unicode_csbitmaps_section_end        __asm("section$end$__UNICODE$__csbitmaps");
81extern const void* unicode_properties_section_start     __asm("section$start$__UNICODE$__properties");
82extern const void* unicode_properties_section_end       __asm("section$end$__UNICODE$__properties");
83extern const void* unicode_data_section_start           __asm("section$start$__UNICODE$__data");
84extern const void* unicode_data_section_end             __asm("section$end$__UNICODE$__data");
85
86static const void *__CFGetSectDataPtr(const char *segname, const char *sectname, uint64_t *sizep) {
87    // special case three common sections to have fast access
88    if ( strcmp(segname, "__UNICODE") == 0 ) {
89        if ( strcmp(sectname, "__csbitmaps") == 0)  {
90            if (sizep) *sizep = &unicode_csbitmaps_section_end - &unicode_csbitmaps_section_start;
91            return &unicode_csbitmaps_section_start;
92        }
93        else if ( strcmp(sectname, "__properties") == 0 ) {
94            if (sizep) *sizep = &unicode_properties_section_end - &unicode_properties_section_start;
95            return &unicode_properties_section_start;
96        }
97        else if ( strcmp(sectname, "__data") == 0 ) {
98            if (sizep) *sizep = &unicode_data_section_end - &unicode_data_section_start;
99            return &unicode_data_section_start;
100        }
101    }
102
103    uint32_t idx, cnt = _dyld_image_count();
104    for (idx = 0; idx < cnt; idx++) {
105       void *mh = (void *)_dyld_get_image_header(idx);
106       if (mh != &_mh_dylib_header) continue;
107#if __LP64__
108       const struct section_64 *sect = getsectbynamefromheader_64((struct mach_header_64 *)mh, segname, sectname);
109#else
110       const struct section *sect = getsectbynamefromheader((struct mach_header *)mh, segname, sectname);
111#endif
112       if (!sect) break;
113       if (sizep) *sizep = (uint64_t)sect->size;
114       return (char *)sect->addr + _dyld_get_image_vmaddr_slide(idx);
115    }
116    if (sizep) *sizep = 0ULL;
117    return NULL;
118}
119#endif
120
121#if !USE_MACHO_SEGMENT
122
123// Memory map the file
124
125#if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_EMBEDDED_MINI || DEPLOYMENT_TARGET_LINUX
126CF_INLINE void __CFUniCharCharacterSetPath(char *cpath) {
127#elif DEPLOYMENT_TARGET_WINDOWS
128CF_INLINE void __CFUniCharCharacterSetPath(wchar_t *wpath) {
129#else
130#error Unknown or unspecified DEPLOYMENT_TARGET
131#endif
132#if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED
133    strlcpy(cpath, __kCFCharacterSetDir, MAXPATHLEN);
134#elif DEPLOYMENT_TARGET_LINUX
135    strlcpy(cpath, __kCFCharacterSetDir, MAXPATHLEN);
136#elif DEPLOYMENT_TARGET_WINDOWS
137    wchar_t frameworkPath[MAXPATHLEN];
138    _CFGetFrameworkPath(frameworkPath, MAXPATHLEN);
139    wcsncpy(wpath, frameworkPath, MAXPATHLEN);
140    wcsncat(wpath, L"\\CoreFoundation.resources\\", MAXPATHLEN - wcslen(wpath));
141#else
142    strlcpy(cpath, __kCFCharacterSetDir, MAXPATHLEN);
143    strlcat(cpath, "/CharacterSets/", MAXPATHLEN);
144#endif
145}
146
147#if DEPLOYMENT_TARGET_WINDOWS
148#define MAX_BITMAP_STATE 512
149//
150//  If a string is placed into this array, then it has been previously
151//  determined that the bitmap-file cannot be found.  Thus, we make
152//  the assumption it won't be there in future calls and we avoid
153//  hitting the disk un-necessarily.  This assumption isn't 100%
154//  correct, as bitmap-files can be added.  We would have to re-start
155//  the application in order to pick-up the new bitmap info.
156//
157//  We should probably re-visit this.
158//
159static wchar_t *mappedBitmapState[MAX_BITMAP_STATE];
160static int __nNumStateEntries = -1;
161CRITICAL_SECTION __bitmapStateLock = {0};
162
163bool __GetBitmapStateForName(const wchar_t *bitmapName) {
164    if (NULL == __bitmapStateLock.DebugInfo)
165        InitializeCriticalSection(&__bitmapStateLock);
166    EnterCriticalSection(&__bitmapStateLock);
167    if (__nNumStateEntries >= 0) {
168        for (int i = 0; i < __nNumStateEntries; i++) {
169            if (wcscmp(mappedBitmapState[i], bitmapName) == 0) {
170                LeaveCriticalSection(&__bitmapStateLock);
171                return true;
172            }
173        }
174    }
175    LeaveCriticalSection(&__bitmapStateLock);
176    return false;
177}
178void __AddBitmapStateForName(const wchar_t *bitmapName) {
179    if (NULL == __bitmapStateLock.DebugInfo)
180        InitializeCriticalSection(&__bitmapStateLock);
181    EnterCriticalSection(&__bitmapStateLock);
182    __nNumStateEntries++;
183    mappedBitmapState[__nNumStateEntries] = (wchar_t *)malloc((lstrlenW(bitmapName)+1) * sizeof(wchar_t));
184    lstrcpyW(mappedBitmapState[__nNumStateEntries], bitmapName);
185    LeaveCriticalSection(&__bitmapStateLock);
186}
187#endif
188
189#if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_EMBEDDED_MINI || DEPLOYMENT_TARGET_LINUX
190static bool __CFUniCharLoadBytesFromFile(const char *fileName, const void **bytes, int64_t *fileSize) {
191#elif DEPLOYMENT_TARGET_WINDOWS
192static bool __CFUniCharLoadBytesFromFile(const wchar_t *fileName, const void **bytes, int64_t *fileSize) {
193#else
194#error Unknown or unspecified DEPLOYMENT_TARGET
195#endif
196#if DEPLOYMENT_TARGET_WINDOWS
197    HANDLE bitmapFileHandle = NULL;
198    HANDLE mappingHandle = NULL;
199
200    if (__GetBitmapStateForName(fileName)) {
201        // The fileName has been tried in the past, so just return false
202        // and move on.
203        *bytes = NULL;
204        return false;
205    }
206    mappingHandle = OpenFileMappingW(FILE_MAP_READ, TRUE, fileName);
207    if (NULL == mappingHandle) {
208        if ((bitmapFileHandle = CreateFileW(fileName, GENERIC_READ, FILE_SHARE_READ, NULL, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, NULL)) == INVALID_HANDLE_VALUE) {
209            // We tried to get the bitmap file for mapping, but it's not there.  Add to list of non-existant bitmap-files so
210            // we don't have to try this again in the future.
211            __AddBitmapStateForName(fileName);
212            return false;
213        }
214        mappingHandle = CreateFileMapping(bitmapFileHandle, NULL, PAGE_READONLY, 0, 0, NULL);
215        CloseHandle(bitmapFileHandle);
216        if (!mappingHandle) return false;
217    }
218
219    *bytes = MapViewOfFileEx(mappingHandle, FILE_MAP_READ, 0, 0, 0, 0);
220
221    if (NULL != fileSize) {
222	MEMORY_BASIC_INFORMATION memoryInfo;
223
224	if (0 == VirtualQueryEx(mappingHandle, *bytes, &memoryInfo, sizeof(memoryInfo))) {
225	    *fileSize = 0; // This indicates no checking. Is it right ?
226	} else {
227	    *fileSize = memoryInfo.RegionSize;
228	}
229    }
230
231    CloseHandle(mappingHandle);
232
233    return (*bytes ? true : false);
234#else
235    struct stat statBuf;
236    int fd = -1;
237
238    if ((fd = open(fileName, O_RDONLY, 0)) < 0) {
239	return false;
240    }
241    if (fstat(fd, &statBuf) < 0 || (*bytes = mmap(0, statBuf.st_size, PROT_READ, MAP_PRIVATE, fd, 0)) == (void *)-1) {
242        close(fd);
243        return false;
244    }
245    close(fd);
246
247    if (NULL != fileSize) *fileSize = statBuf.st_size;
248
249    return true;
250#endif
251}
252
253#endif // USE_MACHO_SEGMENT
254
255#if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_EMBEDDED_MINI || DEPLOYMENT_TARGET_LINUX
256static bool __CFUniCharLoadFile(const char *bitmapName, const void **bytes, int64_t *fileSize) {
257#elif DEPLOYMENT_TARGET_WINDOWS
258static bool __CFUniCharLoadFile(const wchar_t *bitmapName, const void **bytes, int64_t *fileSize) {
259#else
260#error Unknown or unspecified DEPLOYMENT_TARGET
261#endif
262#if USE_MACHO_SEGMENT
263	*bytes = __CFGetSectDataPtr("__UNICODE", bitmapName, NULL);
264
265    if (NULL != fileSize) *fileSize = 0;
266
267    return *bytes ? true : false;
268#else
269#if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_EMBEDDED_MINI || DEPLOYMENT_TARGET_LINUX
270    char cpath[MAXPATHLEN];
271    __CFUniCharCharacterSetPath(cpath);
272    strlcat(cpath, bitmapName, MAXPATHLEN);
273    Boolean needToFree = false;
274    const char *possiblyFrameworkRootedCPath = CFPathRelativeToAppleFrameworksRoot(cpath, &needToFree);
275    bool result = __CFUniCharLoadBytesFromFile(possiblyFrameworkRootedCPath, bytes, fileSize);
276    if (needToFree) free((void *)possiblyFrameworkRootedCPath);
277    return result;
278#elif DEPLOYMENT_TARGET_WINDOWS
279    wchar_t wpath[MAXPATHLEN];
280    __CFUniCharCharacterSetPath(wpath);
281    wcsncat(wpath, bitmapName, MAXPATHLEN);
282    return __CFUniCharLoadBytesFromFile(wpath, bytes, fileSize);
283#else
284#error Unknown or unspecified DEPLOYMENT_TARGET
285#endif
286#endif
287}
288
289// Bitmap functions
290CF_INLINE bool isControl(UTF32Char theChar, uint16_t charset, const void *data) { // ISO Control
291    return (((theChar <= 0x001F) || (theChar >= 0x007F && theChar <= 0x009F)) ? true : false);
292}
293
294CF_INLINE bool isWhitespace(UTF32Char theChar, uint16_t charset, const void *data) { // Space
295    return (((theChar == 0x0020) || (theChar == 0x0009) || (theChar == 0x00A0) || (theChar == 0x1680) || (theChar >= 0x2000 && theChar <= 0x200B) || (theChar == 0x202F) || (theChar == 0x205F) || (theChar == 0x3000)) ? true : false);
296}
297
298CF_INLINE bool isNewline(UTF32Char theChar, uint16_t charset, const void *data) { // White space
299    return (((theChar >= 0x000A && theChar <= 0x000D) || (theChar == 0x0085) || (theChar == 0x2028) || (theChar == 0x2029)) ? true : false);
300}
301
302CF_INLINE bool isWhitespaceAndNewline(UTF32Char theChar, uint16_t charset, const void *data) { // White space
303    return ((isWhitespace(theChar, charset, data) || isNewline(theChar, charset, data)) ? true : false);
304}
305
306#if USE_MACHO_SEGMENT
307CF_INLINE bool __CFSimpleFileSizeVerification(const void *bytes, int64_t fileSize) { return true; }
308#elif 1
309// <rdar://problem/8961744> __CFSimpleFileSizeVerification is broken
310static bool __CFSimpleFileSizeVerification(const void *bytes, int64_t fileSize) { return true; }
311#else
312static bool __CFSimpleFileSizeVerification(const void *bytes, int64_t fileSize) {
313    bool result = true;
314
315    if (fileSize > 0) {
316	if ((sizeof(uint32_t) * 2) > fileSize) {
317	    result = false;
318	} else {
319	    uint32_t headerSize = CFSwapInt32BigToHost(*((uint32_t *)((char *)bytes + 4)));
320
321	    if ((headerSize < (sizeof(uint32_t) * 4)) || (headerSize > fileSize)) {
322		result = false;
323	    } else {
324		const uint32_t *lastElement = (uint32_t *)(((uint8_t *)bytes) + headerSize) - 2;
325
326		if ((headerSize + CFSwapInt32BigToHost(lastElement[0]) + CFSwapInt32BigToHost(lastElement[1])) > headerSize) result = false;
327	    }
328	}
329    }
330
331    if (!result) CFLog(kCFLogLevelCritical, CFSTR("File size verification for Unicode database file failed."));
332
333    return result;
334}
335#endif // USE_MACHO_SEGMENT
336
337typedef struct {
338    uint32_t _numPlanes;
339    const uint8_t **_planes;
340} __CFUniCharBitmapData;
341
342static char __CFUniCharUnicodeVersionString[8] = {0, 0, 0, 0, 0, 0, 0, 0};
343
344static uint32_t __CFUniCharNumberOfBitmaps = 0;
345static __CFUniCharBitmapData *__CFUniCharBitmapDataArray = NULL;
346
347static CFSpinLock_t __CFUniCharBitmapLock = CFSpinLockInit;
348
349#if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_EMBEDDED_MINI || DEPLOYMENT_TARGET_LINUX
350#if !defined(CF_UNICHAR_BITMAP_FILE)
351#if USE_MACHO_SEGMENT
352#define CF_UNICHAR_BITMAP_FILE "__csbitmaps"
353#else
354#define CF_UNICHAR_BITMAP_FILE "/CFCharacterSetBitmaps.bitmap"
355#endif
356#endif
357#elif DEPLOYMENT_TARGET_WINDOWS
358#if !defined(CF_UNICHAR_BITMAP_FILE)
359#define CF_UNICHAR_BITMAP_FILE L"CFCharacterSetBitmaps.bitmap"
360#endif
361#else
362#error Unknown or unspecified DEPLOYMENT_TARGET
363#endif
364
365static bool __CFUniCharLoadBitmapData(void) {
366    __CFUniCharBitmapData *array;
367    uint32_t headerSize;
368    uint32_t bitmapSize;
369    int numPlanes;
370    uint8_t currentPlane;
371    const void *bytes;
372    const void *bitmapBase;
373    const void *bitmap;
374    int idx, bitmapIndex;
375    int64_t fileSize;
376
377    __CFSpinLock(&__CFUniCharBitmapLock);
378
379    if (__CFUniCharBitmapDataArray || !__CFUniCharLoadFile(CF_UNICHAR_BITMAP_FILE, &bytes, &fileSize) || !__CFSimpleFileSizeVerification(bytes, fileSize)) {
380        __CFSpinUnlock(&__CFUniCharBitmapLock);
381        return false;
382    }
383
384    for (idx = 0;idx < 4 && ((const uint8_t *)bytes)[idx];idx++) {
385        __CFUniCharUnicodeVersionString[idx * 2] = ((const uint8_t *)bytes)[idx];
386        __CFUniCharUnicodeVersionString[idx * 2 + 1] = '.';
387    }
388    __CFUniCharUnicodeVersionString[(idx < 4 ? idx * 2 - 1 : 7)] = '\0';
389
390    headerSize = CFSwapInt32BigToHost(*((uint32_t *)((char *)bytes + 4)));
391
392    bitmapBase = (uint8_t *)bytes + headerSize;
393    bytes = (uint8_t *)bytes + (sizeof(uint32_t) * 2);
394    headerSize -= (sizeof(uint32_t) * 2);
395
396    __CFUniCharNumberOfBitmaps = headerSize / (sizeof(uint32_t) * 2);
397
398    array = (__CFUniCharBitmapData *)CFAllocatorAllocate(kCFAllocatorSystemDefault, sizeof(__CFUniCharBitmapData) * __CFUniCharNumberOfBitmaps, 0);
399
400    for (idx = 0;idx < (int)__CFUniCharNumberOfBitmaps;idx++) {
401        bitmap = (uint8_t *)bitmapBase + CFSwapInt32BigToHost(*((uint32_t *)bytes)); bytes = (uint8_t *)bytes + sizeof(uint32_t);
402        bitmapSize = CFSwapInt32BigToHost(*((uint32_t *)bytes)); bytes = (uint8_t *)bytes + sizeof(uint32_t);
403
404        numPlanes = bitmapSize / (8 * 1024);
405        numPlanes = *(const uint8_t *)((char *)bitmap + (((numPlanes - 1) * ((8 * 1024) + 1)) - 1)) + 1;
406        array[idx]._planes = (const uint8_t **)CFAllocatorAllocate(kCFAllocatorSystemDefault, sizeof(const void *) * numPlanes, 0);
407        array[idx]._numPlanes = numPlanes;
408
409        currentPlane = 0;
410        for (bitmapIndex = 0;bitmapIndex < numPlanes;bitmapIndex++) {
411            if (bitmapIndex == currentPlane) {
412                array[idx]._planes[bitmapIndex] = (const uint8_t *)bitmap;
413                bitmap = (uint8_t *)bitmap + (8 * 1024);
414#if defined (__cplusplus)
415				currentPlane = *(((const uint8_t*&)bitmap)++);
416#else
417				currentPlane = *((const uint8_t *)bitmap++);
418#endif
419
420            } else {
421                array[idx]._planes[bitmapIndex] = NULL;
422            }
423        }
424    }
425
426    __CFUniCharBitmapDataArray = array;
427
428    __CFSpinUnlock(&__CFUniCharBitmapLock);
429
430    return true;
431}
432
433CF_PRIVATE const char *__CFUniCharGetUnicodeVersionString(void) {
434    if (NULL == __CFUniCharBitmapDataArray) __CFUniCharLoadBitmapData();
435    return __CFUniCharUnicodeVersionString;
436}
437
438bool CFUniCharIsMemberOf(UTF32Char theChar, uint32_t charset) {
439    charset = __CFUniCharMapCompatibilitySetID(charset);
440
441    switch (charset) {
442        case kCFUniCharWhitespaceCharacterSet:
443            return isWhitespace(theChar, charset, NULL);
444
445        case kCFUniCharWhitespaceAndNewlineCharacterSet:
446            return isWhitespaceAndNewline(theChar, charset, NULL);
447
448        case kCFUniCharNewlineCharacterSet:
449            return isNewline(theChar, charset, NULL);
450
451        default: {
452            uint32_t tableIndex = __CFUniCharMapExternalSetToInternalIndex(charset);
453
454            if (NULL == __CFUniCharBitmapDataArray) __CFUniCharLoadBitmapData();
455
456            if (tableIndex < __CFUniCharNumberOfBitmaps) {
457                __CFUniCharBitmapData *data = __CFUniCharBitmapDataArray + tableIndex;
458                uint8_t planeNo = (theChar >> 16) & 0xFF;
459
460                // The bitmap data for kCFUniCharIllegalCharacterSet is actually LEGAL set less Plane 14 ~ 16
461                if (charset == kCFUniCharIllegalCharacterSet) {
462                    if (planeNo == 0x0E) { // Plane 14
463                        theChar &= 0xFF;
464                        return (((theChar == 0x01) || ((theChar > 0x1F) && (theChar < 0x80))) ? false : true);
465                    } else if (planeNo == 0x0F || planeNo == 0x10) { // Plane 15 & 16
466                        return ((theChar & 0xFF) > 0xFFFD ? true : false);
467                    } else {
468                        return (planeNo < data->_numPlanes && data->_planes[planeNo] ? !CFUniCharIsMemberOfBitmap(theChar, data->_planes[planeNo]) : true);
469                    }
470                } else if (charset == kCFUniCharControlAndFormatterCharacterSet) {
471                    if (planeNo == 0x0E) { // Plane 14
472                        theChar &= 0xFF;
473                        return (((theChar == 0x01) || ((theChar > 0x1F) && (theChar < 0x80))) ? true : false);
474                    } else {
475                        return (planeNo < data->_numPlanes && data->_planes[planeNo] ? CFUniCharIsMemberOfBitmap(theChar, data->_planes[planeNo]) : false);
476                    }
477                } else {
478                    return (planeNo < data->_numPlanes && data->_planes[planeNo] ? CFUniCharIsMemberOfBitmap(theChar, data->_planes[planeNo]) : false);
479                }
480            }
481            return false;
482        }
483    }
484}
485
486const uint8_t *CFUniCharGetBitmapPtrForPlane(uint32_t charset, uint32_t plane) {
487    if (NULL == __CFUniCharBitmapDataArray) __CFUniCharLoadBitmapData();
488
489    charset = __CFUniCharMapCompatibilitySetID(charset);
490
491    if ((charset > kCFUniCharWhitespaceAndNewlineCharacterSet) && (charset != kCFUniCharIllegalCharacterSet) && (charset != kCFUniCharNewlineCharacterSet)) {
492        uint32_t tableIndex = __CFUniCharMapExternalSetToInternalIndex(charset);
493
494        if (tableIndex < __CFUniCharNumberOfBitmaps) {
495            __CFUniCharBitmapData *data = __CFUniCharBitmapDataArray + tableIndex;
496
497            return (plane < data->_numPlanes ? data->_planes[plane] : NULL);
498        }
499    }
500    return NULL;
501}
502
503CF_PRIVATE uint8_t CFUniCharGetBitmapForPlane(uint32_t charset, uint32_t plane, void *bitmap, bool isInverted) {
504    const uint8_t *src = CFUniCharGetBitmapPtrForPlane(charset, plane);
505    int numBytes = (8 * 1024);
506
507    if (src) {
508        if (isInverted) {
509#if defined (__cplusplus)
510			while (numBytes-- > 0) *(((uint8_t *&)bitmap)++) = ~(*(src++));
511#else
512			while (numBytes-- > 0) *((uint8_t *)bitmap++) = ~(*(src++));
513#endif
514        } else {
515#if defined (__cplusplus)
516            while (numBytes-- > 0) *(((uint8_t *&)bitmap)++) = *(src++);
517#else
518			while (numBytes-- > 0) *((uint8_t *)bitmap++) = *(src++);
519#endif
520        }
521        return kCFUniCharBitmapFilled;
522    } else if (charset == kCFUniCharIllegalCharacterSet) {
523        __CFUniCharBitmapData *data = __CFUniCharBitmapDataArray + __CFUniCharMapExternalSetToInternalIndex(__CFUniCharMapCompatibilitySetID(charset));
524
525        if (plane < data->_numPlanes && (src = data->_planes[plane])) {
526            if (isInverted) {
527#if defined (__cplusplus)
528				while (numBytes-- > 0) *(((uint8_t *&)bitmap)++) = *(src++);
529#else
530				while (numBytes-- > 0) *((uint8_t *)bitmap++) = *(src++);
531#endif
532            } else {
533#if defined (__cplusplus)
534                while (numBytes-- > 0) *(((uint8_t *&)bitmap)++) = ~(*(src++));
535#else
536				while (numBytes-- > 0) *((uint8_t *)bitmap++) = ~(*(src++));
537#endif
538            }
539            return kCFUniCharBitmapFilled;
540        } else if (plane == 0x0E) { // Plane 14
541            int idx;
542            uint8_t asciiRange = (isInverted ? (uint8_t)0xFF : (uint8_t)0);
543            uint8_t otherRange = (isInverted ? (uint8_t)0 : (uint8_t)0xFF);
544
545#if defined (__cplusplus)
546			*(((uint8_t *&)bitmap)++) = 0x02; // UE0001 LANGUAGE TAG
547#else
548			*((uint8_t *)bitmap++) = 0x02; // UE0001 LANGUAGE TAG
549#endif
550            for (idx = 1;idx < numBytes;idx++) {
551#if defined (__cplusplus)
552				*(((uint8_t *&)bitmap)++) = ((idx >= (0x20 / 8) && (idx < (0x80 / 8))) ? asciiRange : otherRange);
553#else
554				*((uint8_t *)bitmap++) = ((idx >= (0x20 / 8) && (idx < (0x80 / 8))) ? asciiRange : otherRange);
555#endif
556            }
557            return kCFUniCharBitmapFilled;
558        } else if (plane == 0x0F || plane == 0x10) { // Plane 15 & 16
559            uint32_t value = (isInverted ? ~0 : 0);
560            numBytes /= 4; // for 32bit
561
562            while (numBytes-- > 0) {
563                *((uint32_t *)bitmap) = value;
564#if defined (__cplusplus)
565				bitmap = (uint8_t *)bitmap + sizeof(uint32_t);
566#else
567				bitmap += sizeof(uint32_t);
568#endif
569            }
570            *(((uint8_t *)bitmap) - 5) = (isInverted ? 0x3F : 0xC0); // 0xFFFE & 0xFFFF
571            return kCFUniCharBitmapFilled;
572        }
573        return (isInverted ? kCFUniCharBitmapEmpty : kCFUniCharBitmapAll);
574    } else if ((charset < kCFUniCharDecimalDigitCharacterSet) || (charset == kCFUniCharNewlineCharacterSet)) {
575        if (plane) return (isInverted ? kCFUniCharBitmapAll : kCFUniCharBitmapEmpty);
576
577        uint8_t *bitmapBase = (uint8_t *)bitmap;
578        CFIndex idx;
579        uint8_t nonFillValue = (isInverted ? (uint8_t)0xFF : (uint8_t)0);
580
581#if defined (__cplusplus)
582                    while (numBytes-- > 0) *(((uint8_t *&)bitmap)++) = nonFillValue;
583#else
584                    while (numBytes-- > 0) *((uint8_t *)bitmap++) = nonFillValue;
585#endif
586
587        if ((charset == kCFUniCharWhitespaceAndNewlineCharacterSet) || (charset == kCFUniCharNewlineCharacterSet)) {
588            const UniChar newlines[] = {0x000A, 0x000B, 0x000C, 0x000D, 0x0085, 0x2028, 0x2029};
589
590            for (idx = 0;idx < (int)(sizeof(newlines) / sizeof(*newlines)); idx++) {
591                if (isInverted) {
592                    CFUniCharRemoveCharacterFromBitmap(newlines[idx], bitmapBase);
593                } else {
594                    CFUniCharAddCharacterToBitmap(newlines[idx], bitmapBase);
595                }
596            }
597
598            if (charset == kCFUniCharNewlineCharacterSet) return kCFUniCharBitmapFilled;
599        }
600
601        if (isInverted) {
602            CFUniCharRemoveCharacterFromBitmap(0x0009, bitmapBase);
603            CFUniCharRemoveCharacterFromBitmap(0x0020, bitmapBase);
604            CFUniCharRemoveCharacterFromBitmap(0x00A0, bitmapBase);
605            CFUniCharRemoveCharacterFromBitmap(0x1680, bitmapBase);
606            CFUniCharRemoveCharacterFromBitmap(0x202F, bitmapBase);
607            CFUniCharRemoveCharacterFromBitmap(0x205F, bitmapBase);
608            CFUniCharRemoveCharacterFromBitmap(0x3000, bitmapBase);
609        } else {
610            CFUniCharAddCharacterToBitmap(0x0009, bitmapBase);
611            CFUniCharAddCharacterToBitmap(0x0020, bitmapBase);
612            CFUniCharAddCharacterToBitmap(0x00A0, bitmapBase);
613            CFUniCharAddCharacterToBitmap(0x1680, bitmapBase);
614            CFUniCharAddCharacterToBitmap(0x202F, bitmapBase);
615            CFUniCharAddCharacterToBitmap(0x205F, bitmapBase);
616            CFUniCharAddCharacterToBitmap(0x3000, bitmapBase);
617        }
618
619        for (idx = 0x2000;idx <= 0x200B;idx++) {
620            if (isInverted) {
621                CFUniCharRemoveCharacterFromBitmap(idx, bitmapBase);
622            } else {
623                CFUniCharAddCharacterToBitmap(idx, bitmapBase);
624            }
625        }
626        return kCFUniCharBitmapFilled;
627    }
628    return (isInverted ? kCFUniCharBitmapAll : kCFUniCharBitmapEmpty);
629}
630
631CF_PRIVATE uint32_t CFUniCharGetNumberOfPlanes(uint32_t charset) {
632    if ((charset == kCFUniCharControlCharacterSet) || (charset == kCFUniCharControlAndFormatterCharacterSet)) {
633        return 15; // 0 to 14
634    } else if (charset < kCFUniCharDecimalDigitCharacterSet) {
635        return 1;
636    } else if (charset == kCFUniCharIllegalCharacterSet) {
637        return 17;
638    } else {
639        uint32_t numPlanes;
640
641        if (NULL == __CFUniCharBitmapDataArray) __CFUniCharLoadBitmapData();
642
643        numPlanes = __CFUniCharBitmapDataArray[__CFUniCharMapExternalSetToInternalIndex(__CFUniCharMapCompatibilitySetID(charset))]._numPlanes;
644
645        return numPlanes;
646    }
647}
648
649// Mapping data loading
650static const void **__CFUniCharMappingTables = NULL;
651
652static CFSpinLock_t __CFUniCharMappingTableLock = CFSpinLockInit;
653
654#if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_EMBEDDED_MINI || DEPLOYMENT_TARGET_LINUX
655#if __CF_BIG_ENDIAN__
656#if USE_MACHO_SEGMENT
657#define MAPPING_TABLE_FILE "__data"
658#else
659#define MAPPING_TABLE_FILE "/CFUnicodeData-B.mapping"
660#endif
661#else
662#if USE_MACHO_SEGMENT
663#define MAPPING_TABLE_FILE "__data"
664#else
665#define MAPPING_TABLE_FILE "/CFUnicodeData-L.mapping"
666#endif
667#endif
668#elif DEPLOYMENT_TARGET_WINDOWS
669#if __CF_BIG_ENDIAN__
670#if USE_MACHO_SEGMENT
671#define MAPPING_TABLE_FILE "__data"
672#else
673#define MAPPING_TABLE_FILE L"CFUnicodeData-B.mapping"
674#endif
675#else
676#if USE_MACHO_SEGMENT
677#define MAPPING_TABLE_FILE "__data"
678#else
679#define MAPPING_TABLE_FILE L"CFUnicodeData-L.mapping"
680#endif
681#endif
682#else
683#error Unknown or unspecified DEPLOYMENT_TARGET
684#endif
685
686CF_PRIVATE const void *CFUniCharGetMappingData(uint32_t type) {
687
688    __CFSpinLock(&__CFUniCharMappingTableLock);
689
690    if (NULL == __CFUniCharMappingTables) {
691        const void *bytes;
692        const void *bodyBase;
693        int headerSize;
694        int idx, count;
695	int64_t fileSize;
696
697        if (!__CFUniCharLoadFile(MAPPING_TABLE_FILE, &bytes, &fileSize) || !__CFSimpleFileSizeVerification(bytes, fileSize)) {
698            __CFSpinUnlock(&__CFUniCharMappingTableLock);
699            return NULL;
700        }
701
702#if defined (__cplusplus)
703		bytes = (uint8_t *)bytes + 4; // Skip Unicode version
704		headerSize = *((uint8_t *)bytes); bytes = (uint8_t *)bytes + sizeof(uint32_t);
705#else
706		bytes += 4; // Skip Unicode version
707		headerSize = *((uint32_t *)bytes); bytes += sizeof(uint32_t);
708#endif
709        headerSize -= (sizeof(uint32_t) * 2);
710        bodyBase = (char *)bytes + headerSize;
711
712        count = headerSize / sizeof(uint32_t);
713
714        __CFUniCharMappingTables = (const void **)CFAllocatorAllocate(kCFAllocatorSystemDefault, sizeof(const void *) * count, 0);
715
716        for (idx = 0;idx < count;idx++) {
717#if defined (__cplusplus)
718			__CFUniCharMappingTables[idx] = (char *)bodyBase + *((uint32_t *)bytes); bytes = (uint8_t *)bytes + sizeof(uint32_t);
719#else
720			__CFUniCharMappingTables[idx] = (char *)bodyBase + *((uint32_t *)bytes); bytes += sizeof(uint32_t);
721#endif
722        }
723    }
724
725    __CFSpinUnlock(&__CFUniCharMappingTableLock);
726
727    return __CFUniCharMappingTables[type];
728}
729
730// Case mapping functions
731#define DO_SPECIAL_CASE_MAPPING 1
732
733static uint32_t *__CFUniCharCaseMappingTableCounts = NULL;
734static uint32_t **__CFUniCharCaseMappingTable = NULL;
735static const uint32_t **__CFUniCharCaseMappingExtraTable = NULL;
736
737typedef struct {
738    uint32_t _key;
739    uint32_t _value;
740} __CFUniCharCaseMappings;
741
742/* Binary searches CFStringEncodingUnicodeTo8BitCharMap */
743static uint32_t __CFUniCharGetMappedCase(const __CFUniCharCaseMappings *theTable, uint32_t numElem, UTF32Char character) {
744    const __CFUniCharCaseMappings *p, *q, *divider;
745
746    if ((character < theTable[0]._key) || (character > theTable[numElem-1]._key)) {
747        return 0;
748    }
749    p = theTable;
750    q = p + (numElem-1);
751    while (p <= q) {
752        divider = p + ((q - p) >> 1);	/* divide by 2 */
753        if (character < divider->_key) { q = divider - 1; }
754        else if (character > divider->_key) { p = divider + 1; }
755        else { return divider->_value; }
756    }
757    return 0;
758}
759
760#define NUM_CASE_MAP_DATA (kCFUniCharCaseFold + 1)
761
762static bool __CFUniCharLoadCaseMappingTable(void) {
763    uint32_t *countArray;
764    int idx;
765
766    if (NULL == __CFUniCharMappingTables) (void)CFUniCharGetMappingData(kCFUniCharToLowercase);
767    if (NULL == __CFUniCharMappingTables) return false;
768
769    __CFSpinLock(&__CFUniCharMappingTableLock);
770
771    if (__CFUniCharCaseMappingTableCounts) {
772        __CFSpinUnlock(&__CFUniCharMappingTableLock);
773        return true;
774    }
775
776    countArray = (uint32_t *)CFAllocatorAllocate(kCFAllocatorSystemDefault, sizeof(uint32_t) * NUM_CASE_MAP_DATA + sizeof(uint32_t *) * NUM_CASE_MAP_DATA * 2, 0);
777    __CFUniCharCaseMappingTable = (uint32_t **)((char *)countArray + sizeof(uint32_t) * NUM_CASE_MAP_DATA);
778    __CFUniCharCaseMappingExtraTable = (const uint32_t **)__CFUniCharCaseMappingTable + NUM_CASE_MAP_DATA;
779
780    for (idx = 0;idx < NUM_CASE_MAP_DATA;idx++) {
781        countArray[idx] = *((uint32_t *)__CFUniCharMappingTables[idx]) / (sizeof(uint32_t) * 2);
782        __CFUniCharCaseMappingTable[idx] = ((uint32_t *)__CFUniCharMappingTables[idx]) + 1;
783        __CFUniCharCaseMappingExtraTable[idx] = (const uint32_t *)((char *)__CFUniCharCaseMappingTable[idx] + *((uint32_t *)__CFUniCharMappingTables[idx]));
784    }
785
786    __CFUniCharCaseMappingTableCounts = countArray;
787
788    __CFSpinUnlock(&__CFUniCharMappingTableLock);
789    return true;
790}
791
792#if __CF_BIG_ENDIAN__
793#define TURKISH_LANG_CODE	(0x7472) // tr
794#define LITHUANIAN_LANG_CODE	(0x6C74) // lt
795#define AZERI_LANG_CODE		(0x617A) // az
796#define DUTCH_LANG_CODE		(0x6E6C) // nl
797#define GREEK_LANG_CODE		(0x656C) // el
798#else
799#define TURKISH_LANG_CODE	(0x7274) // tr
800#define LITHUANIAN_LANG_CODE	(0x746C) // lt
801#define AZERI_LANG_CODE		(0x7A61) // az
802#define DUTCH_LANG_CODE		(0x6C6E) // nl
803#define GREEK_LANG_CODE		(0x6C65) // el
804#endif
805
806CFIndex CFUniCharMapCaseTo(UTF32Char theChar, UTF16Char *convertedChar, CFIndex maxLength, uint32_t ctype, uint32_t flags, const uint8_t *langCode) {
807    __CFUniCharBitmapData *data;
808    uint8_t planeNo = (theChar >> 16) & 0xFF;
809
810caseFoldRetry:
811
812#if DO_SPECIAL_CASE_MAPPING
813    if (flags & kCFUniCharCaseMapFinalSigma) {
814        if (theChar == 0x03A3) { // Final sigma
815            *convertedChar = (ctype == kCFUniCharToLowercase ? 0x03C2 : 0x03A3);
816            return 1;
817        }
818    }
819
820    if (langCode) {
821        if (flags & kCFUniCharCaseMapGreekTonos) { // localized Greek uppercasing
822            if (theChar == 0x0301) { // GREEK TONOS
823                return 0;
824            } else if (theChar == 0x0344) {// COMBINING GREEK DIALYTIKA TONOS
825                *convertedChar = 0x0308; // COMBINING GREEK DIALYTIKA
826                return 1;
827            } else if (CFUniCharIsMemberOf(theChar, kCFUniCharDecomposableCharacterSet)) {
828                UTF32Char buffer[MAX_DECOMPOSED_LENGTH];
829                CFIndex length = CFUniCharDecomposeCharacter(theChar, buffer, MAX_DECOMPOSED_LENGTH);
830
831                if (length > 1) {
832                    UTF32Char *characters = buffer + 1;
833                    UTF32Char *tail = buffer + length;
834
835                    while (characters < tail) {
836                        if (*characters == 0x0301) break;
837                        ++characters;
838                    }
839
840                    if (characters < tail) { // found a tonos
841                        CFIndex convertedLength = CFUniCharMapCaseTo(*buffer, convertedChar, maxLength, ctype, 0, langCode);
842
843                        if (convertedLength == 0) {
844                            *convertedChar = (UTF16Char)*buffer;
845                            convertedLength = 1;
846                        }
847
848                        characters = buffer + 1;
849
850                        while (characters < tail) {
851                            if (*characters != 0x0301) { // not tonos
852                                if (*characters < 0x10000) { // BMP
853                                    convertedChar[convertedLength] = (UTF16Char)*characters;
854                                    ++convertedLength;
855                                } else {
856                                    UTF32Char character = *characters - 0x10000;
857                                    convertedChar[convertedLength++] = (UTF16Char)((character >> 10) + 0xD800UL);
858                                    convertedChar[convertedLength++] = (UTF16Char)((character & 0x3FF) + 0xDC00UL);
859                                }
860                            }
861                            ++characters;
862                        }
863
864                        return convertedLength;
865                    }
866                }
867            }
868        }
869        switch (*(uint16_t *)langCode) {
870            case LITHUANIAN_LANG_CODE:
871                if (theChar == 0x0307 && (flags & kCFUniCharCaseMapAfter_i)) {
872                    return 0;
873                } else if (ctype == kCFUniCharToLowercase) {
874                    if (flags & kCFUniCharCaseMapMoreAbove) {
875                        switch (theChar) {
876                            case 0x0049: // LATIN CAPITAL LETTER I
877                                *(convertedChar++) = 0x0069;
878                                *(convertedChar++) = 0x0307;
879                                return 2;
880
881                            case 0x004A: // LATIN CAPITAL LETTER J
882                                *(convertedChar++) = 0x006A;
883                                *(convertedChar++) = 0x0307;
884                                return 2;
885
886                            case 0x012E: // LATIN CAPITAL LETTER I WITH OGONEK
887                                *(convertedChar++) = 0x012F;
888                                *(convertedChar++) = 0x0307;
889                                return 2;
890
891                            default: break;
892                        }
893                    }
894                    switch (theChar) {
895                        case 0x00CC: // LATIN CAPITAL LETTER I WITH GRAVE
896                            *(convertedChar++) = 0x0069;
897                            *(convertedChar++) = 0x0307;
898                            *(convertedChar++) = 0x0300;
899                            return 3;
900
901                        case 0x00CD: // LATIN CAPITAL LETTER I WITH ACUTE
902                            *(convertedChar++) = 0x0069;
903                            *(convertedChar++) = 0x0307;
904                            *(convertedChar++) = 0x0301;
905                            return 3;
906
907                        case 0x0128: // LATIN CAPITAL LETTER I WITH TILDE
908                            *(convertedChar++) = 0x0069;
909                            *(convertedChar++) = 0x0307;
910                            *(convertedChar++) = 0x0303;
911                            return 3;
912
913                        default: break;
914                    }
915                }
916            break;
917
918            case TURKISH_LANG_CODE:
919            case AZERI_LANG_CODE:
920                if ((theChar == 0x0049) || (theChar == 0x0131)) { // LATIN CAPITAL LETTER I & LATIN SMALL LETTER DOTLESS I
921                    *convertedChar = (((ctype == kCFUniCharToLowercase) || (ctype == kCFUniCharCaseFold))  ? ((kCFUniCharCaseMapMoreAbove & flags) ? 0x0069 : 0x0131) : 0x0049);
922                    return 1;
923                } else if ((theChar == 0x0069) || (theChar == 0x0130)) { // LATIN SMALL LETTER I & LATIN CAPITAL LETTER I WITH DOT ABOVE
924                    *convertedChar = (((ctype == kCFUniCharToLowercase) || (ctype == kCFUniCharCaseFold)) ? 0x0069 : 0x0130);
925                    return 1;
926                } else if (theChar == 0x0307 && (kCFUniCharCaseMapAfter_i & flags)) { // COMBINING DOT ABOVE AFTER_i
927                    if (ctype == kCFUniCharToLowercase) {
928                        return 0;
929                    } else {
930                        *convertedChar = 0x0307;
931                        return 1;
932                    }
933                }
934                break;
935
936	    case DUTCH_LANG_CODE:
937		if ((theChar == 0x004A) || (theChar == 0x006A)) {
938                    *convertedChar = (((ctype == kCFUniCharToUppercase) || (ctype == kCFUniCharToTitlecase) || (kCFUniCharCaseMapDutchDigraph & flags)) ? 0x004A  : 0x006A);
939                    return 1;
940		}
941		break;
942
943            default: break;
944        }
945    }
946#endif // DO_SPECIAL_CASE_MAPPING
947
948    if (NULL == __CFUniCharBitmapDataArray) __CFUniCharLoadBitmapData();
949
950    data = __CFUniCharBitmapDataArray + __CFUniCharMapExternalSetToInternalIndex(__CFUniCharMapCompatibilitySetID(ctype + kCFUniCharHasNonSelfLowercaseCharacterSet));
951
952    if (planeNo < data->_numPlanes && data->_planes[planeNo] && CFUniCharIsMemberOfBitmap(theChar, data->_planes[planeNo]) && (__CFUniCharCaseMappingTableCounts || __CFUniCharLoadCaseMappingTable())) {
953        uint32_t value = __CFUniCharGetMappedCase((const __CFUniCharCaseMappings *)__CFUniCharCaseMappingTable[ctype], __CFUniCharCaseMappingTableCounts[ctype], theChar);
954
955        if (!value && ctype == kCFUniCharToTitlecase) {
956            value = __CFUniCharGetMappedCase((const __CFUniCharCaseMappings *)__CFUniCharCaseMappingTable[kCFUniCharToUppercase], __CFUniCharCaseMappingTableCounts[kCFUniCharToUppercase], theChar);
957            if (value) ctype = kCFUniCharToUppercase;
958        }
959
960        if (value) {
961            CFIndex count = CFUniCharConvertFlagToCount(value);
962
963            if (count == 1) {
964                if (value & kCFUniCharNonBmpFlag) {
965                    if (maxLength > 1) {
966                        value = (value & 0xFFFFFF) - 0x10000;
967                        *(convertedChar++) = (UTF16Char)(value >> 10) + 0xD800UL;
968                        *(convertedChar++) = (UTF16Char)(value & 0x3FF) + 0xDC00UL;
969                        return 2;
970                    }
971                } else {
972                    *convertedChar = (UTF16Char)value;
973                    return 1;
974                }
975            } else if (count < maxLength) {
976                const uint32_t *extraMapping = __CFUniCharCaseMappingExtraTable[ctype] + (value & 0xFFFFFF);
977
978                if (value & kCFUniCharNonBmpFlag) {
979                    CFIndex copiedLen = 0;
980
981                    while (count-- > 0) {
982                        value = *(extraMapping++);
983                        if (value > 0xFFFF) {
984                            if (copiedLen + 2 >= maxLength) break;
985                            value = (value & 0xFFFFFF) - 0x10000;
986                            convertedChar[copiedLen++] = (UTF16Char)(value >> 10) + 0xD800UL;
987                            convertedChar[copiedLen++] = (UTF16Char)(value & 0x3FF) + 0xDC00UL;
988                        } else {
989                            if (copiedLen + 1 >= maxLength) break;
990                            convertedChar[copiedLen++] = value;
991                        }
992                    }
993                    if (!count) return copiedLen;
994                } else {
995                    CFIndex idx;
996
997                    for (idx = 0;idx < count;idx++) *(convertedChar++) = (UTF16Char)*(extraMapping++);
998                    return count;
999                }
1000            }
1001        }
1002    } else if (ctype == kCFUniCharCaseFold) {
1003        ctype = kCFUniCharToLowercase;
1004        goto caseFoldRetry;
1005    }
1006
1007    if (theChar > 0xFFFF) { // non-BMP
1008        theChar = (theChar & 0xFFFFFF) - 0x10000;
1009        *(convertedChar++) = (UTF16Char)(theChar >> 10) + 0xD800UL;
1010        *(convertedChar++) = (UTF16Char)(theChar & 0x3FF) + 0xDC00UL;
1011        return 2;
1012    } else {
1013        *convertedChar = theChar;
1014        return 1;
1015    }
1016}
1017
1018CFIndex CFUniCharMapTo(UniChar theChar, UniChar *convertedChar, CFIndex maxLength, uint16_t ctype, uint32_t flags) {
1019    if (ctype == kCFUniCharCaseFold + 1) { // kCFUniCharDecompose
1020        if (CFUniCharIsDecomposableCharacter(theChar, false)) {
1021            UTF32Char buffer[MAX_DECOMPOSED_LENGTH];
1022            CFIndex usedLength = CFUniCharDecomposeCharacter(theChar, buffer, MAX_DECOMPOSED_LENGTH);
1023            CFIndex idx;
1024
1025            for (idx = 0;idx < usedLength;idx++) *(convertedChar++) = buffer[idx];
1026            return usedLength;
1027        } else {
1028            *convertedChar = theChar;
1029            return 1;
1030        }
1031    } else {
1032        return CFUniCharMapCaseTo(theChar, convertedChar, maxLength, ctype, flags, NULL);
1033    }
1034}
1035
1036CF_INLINE bool __CFUniCharIsMoreAbove(UTF16Char *buffer, CFIndex length) {
1037    UTF32Char currentChar;
1038    uint32_t property;
1039
1040    while (length-- > 0) {
1041        currentChar = *(buffer)++;
1042        if (CFUniCharIsSurrogateHighCharacter(currentChar) && (length > 0) && CFUniCharIsSurrogateLowCharacter(*(buffer + 1))) {
1043            currentChar = CFUniCharGetLongCharacterForSurrogatePair(currentChar, *(buffer++));
1044            --length;
1045        }
1046        if (!CFUniCharIsMemberOf(currentChar, kCFUniCharNonBaseCharacterSet)) break;
1047
1048        property = CFUniCharGetCombiningPropertyForCharacter(currentChar, (const uint8_t *)CFUniCharGetUnicodePropertyDataForPlane(kCFUniCharCombiningProperty, (currentChar >> 16) & 0xFF));
1049
1050        if (property == 230) return true; // Above priority
1051    }
1052    return false;
1053}
1054
1055CF_INLINE bool __CFUniCharIsAfter_i(UTF16Char *buffer, CFIndex length) {
1056    UTF32Char currentChar = 0;
1057    uint32_t property;
1058    UTF32Char decomposed[MAX_DECOMPOSED_LENGTH];
1059    CFIndex decompLength;
1060    CFIndex idx;
1061
1062    if (length < 1) return 0;
1063
1064    buffer += length;
1065    while (length-- > 1) {
1066        currentChar = *(--buffer);
1067        if (CFUniCharIsSurrogateLowCharacter(currentChar)) {
1068            if ((length > 1) && CFUniCharIsSurrogateHighCharacter(*(buffer - 1))) {
1069                currentChar = CFUniCharGetLongCharacterForSurrogatePair(*(--buffer), currentChar);
1070                --length;
1071            } else {
1072                break;
1073            }
1074        }
1075        if (!CFUniCharIsMemberOf(currentChar, kCFUniCharNonBaseCharacterSet)) break;
1076
1077        property = CFUniCharGetCombiningPropertyForCharacter(currentChar, (const uint8_t *)CFUniCharGetUnicodePropertyDataForPlane(kCFUniCharCombiningProperty, (currentChar >> 16) & 0xFF));
1078
1079        if (property == 230) return false; // Above priority
1080    }
1081    if (length == 0) {
1082        currentChar = *(--buffer);
1083    } else if (CFUniCharIsSurrogateLowCharacter(currentChar) && CFUniCharIsSurrogateHighCharacter(*(--buffer))) {
1084        currentChar = CFUniCharGetLongCharacterForSurrogatePair(*buffer, currentChar);
1085    }
1086
1087    decompLength = CFUniCharDecomposeCharacter(currentChar, decomposed, MAX_DECOMPOSED_LENGTH);
1088    currentChar = *decomposed;
1089
1090
1091    for (idx = 1;idx < decompLength;idx++) {
1092        currentChar = decomposed[idx];
1093        property = CFUniCharGetCombiningPropertyForCharacter(currentChar, (const uint8_t *)CFUniCharGetUnicodePropertyDataForPlane(kCFUniCharCombiningProperty, (currentChar >> 16) & 0xFF));
1094
1095        if (property == 230) return false; // Above priority
1096    }
1097    return true;
1098}
1099
1100CF_PRIVATE uint32_t CFUniCharGetConditionalCaseMappingFlags(UTF32Char theChar, UTF16Char *buffer, CFIndex currentIndex, CFIndex length, uint32_t type, const uint8_t *langCode, uint32_t lastFlags) {
1101    if (theChar == 0x03A3) { // GREEK CAPITAL LETTER SIGMA
1102        if ((type == kCFUniCharToLowercase) && (currentIndex > 0)) {
1103            UTF16Char *start = buffer;
1104            UTF16Char *end = buffer + length;
1105            UTF32Char otherChar;
1106
1107            // First check if we're after a cased character
1108            buffer += (currentIndex - 1);
1109            while (start <= buffer) {
1110                otherChar = *(buffer--);
1111                if (CFUniCharIsSurrogateLowCharacter(otherChar) && (start <= buffer) && CFUniCharIsSurrogateHighCharacter(*buffer)) {
1112                    otherChar = CFUniCharGetLongCharacterForSurrogatePair(*(buffer--), otherChar);
1113                }
1114                if (!CFUniCharIsMemberOf(otherChar, kCFUniCharCaseIgnorableCharacterSet)) {
1115                    if (!CFUniCharIsMemberOf(otherChar, kCFUniCharUppercaseLetterCharacterSet) && !CFUniCharIsMemberOf(otherChar, kCFUniCharLowercaseLetterCharacterSet)) return 0; // Uppercase set contains titlecase
1116                    break;
1117                }
1118            }
1119
1120            // Next check if we're before a cased character
1121            buffer = start + currentIndex + 1;
1122            while (buffer < end) {
1123                otherChar = *(buffer++);
1124                if (CFUniCharIsSurrogateHighCharacter(otherChar) && (buffer < end) && CFUniCharIsSurrogateLowCharacter(*buffer)) {
1125                    otherChar = CFUniCharGetLongCharacterForSurrogatePair(otherChar, *(buffer++));
1126                }
1127                if (!CFUniCharIsMemberOf(otherChar, kCFUniCharCaseIgnorableCharacterSet)) {
1128                    if (CFUniCharIsMemberOf(otherChar, kCFUniCharUppercaseLetterCharacterSet) || CFUniCharIsMemberOf(otherChar, kCFUniCharLowercaseLetterCharacterSet)) return 0; // Uppercase set contains titlecase
1129                    break;
1130                }
1131            }
1132            return kCFUniCharCaseMapFinalSigma;
1133        }
1134    } else if (langCode) {
1135        if (*((const uint16_t *)langCode) == LITHUANIAN_LANG_CODE) {
1136            if ((theChar == 0x0307) && ((kCFUniCharCaseMapAfter_i|kCFUniCharCaseMapMoreAbove) & lastFlags) == (kCFUniCharCaseMapAfter_i|kCFUniCharCaseMapMoreAbove)) {
1137                return (__CFUniCharIsAfter_i(buffer, currentIndex) ? kCFUniCharCaseMapAfter_i : 0);
1138            } else if (type == kCFUniCharToLowercase) {
1139                if ((theChar == 0x0049) || (theChar == 0x004A) || (theChar == 0x012E)) {
1140                    ++currentIndex;
1141                    return (__CFUniCharIsMoreAbove(buffer + currentIndex, length - currentIndex) ? kCFUniCharCaseMapMoreAbove : 0);
1142                }
1143            } else if ((theChar == 'i') || (theChar == 'j')) {
1144                ++currentIndex;
1145                return (__CFUniCharIsMoreAbove(buffer + currentIndex, length - currentIndex) ? (kCFUniCharCaseMapAfter_i|kCFUniCharCaseMapMoreAbove) : 0);
1146            }
1147        } else if ((*((const uint16_t *)langCode) == TURKISH_LANG_CODE) || (*((const uint16_t *)langCode) == AZERI_LANG_CODE)) {
1148            if (type == kCFUniCharToLowercase) {
1149                if (theChar == 0x0307) {
1150                    return (kCFUniCharCaseMapMoreAbove & lastFlags ? kCFUniCharCaseMapAfter_i : 0);
1151                } else if (theChar == 0x0049) {
1152                    return (((++currentIndex < length) && (buffer[currentIndex] == 0x0307)) ? kCFUniCharCaseMapMoreAbove : 0);
1153                }
1154            }
1155        } else if (*((const uint16_t *)langCode) == DUTCH_LANG_CODE) {
1156	    if (kCFUniCharCaseMapDutchDigraph & lastFlags) {
1157		return (((theChar == 0x006A) || (theChar == 0x004A)) ? kCFUniCharCaseMapDutchDigraph : 0);
1158	    } else {
1159		if ((type == kCFUniCharToTitlecase) && ((theChar == 0x0069) || (theChar == 0x0049))) {
1160		    return (((++currentIndex < length) && ((buffer[currentIndex] == 0x006A) || (buffer[currentIndex] == 0x004A))) ? kCFUniCharCaseMapDutchDigraph : 0);
1161		}
1162	    }
1163	}
1164
1165        if (kCFUniCharCaseMapGreekTonos & lastFlags) { // still searching for tonos
1166            if (CFUniCharIsMemberOf(theChar, kCFUniCharNonBaseCharacterSet)) {
1167                return kCFUniCharCaseMapGreekTonos;
1168            }
1169        }
1170        if (((theChar >= 0x0370) && (theChar < 0x0400)) || ((theChar >= 0x1F00) && (theChar < 0x2000))) { // Greek/Coptic & Greek extended ranges
1171            if ((type == kCFUniCharToUppercase) && (CFUniCharIsMemberOf(theChar, kCFUniCharLetterCharacterSet))) return kCFUniCharCaseMapGreekTonos;
1172        }
1173    }
1174    return 0;
1175}
1176
1177// Unicode property database
1178static __CFUniCharBitmapData *__CFUniCharUnicodePropertyTable = NULL;
1179static int __CFUniCharUnicodePropertyTableCount = 0;
1180
1181static CFSpinLock_t __CFUniCharPropTableLock = CFSpinLockInit;
1182
1183#if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_EMBEDDED_MINI || DEPLOYMENT_TARGET_LINUX
1184#if USE_MACHO_SEGMENT
1185#define PROP_DB_FILE "__properties"
1186#else
1187#define PROP_DB_FILE "/CFUniCharPropertyDatabase.data"
1188#endif
1189#elif DEPLOYMENT_TARGET_WINDOWS
1190#if USE_MACHO_SEGMENT
1191#define PROP_DB_FILE "__properties"
1192#else
1193#define PROP_DB_FILE L"CFUniCharPropertyDatabase.data"
1194#endif
1195#else
1196#error Unknown or unspecified DEPLOYMENT_TARGET
1197#endif
1198
1199const void *CFUniCharGetUnicodePropertyDataForPlane(uint32_t propertyType, uint32_t plane) {
1200
1201    __CFSpinLock(&__CFUniCharPropTableLock);
1202
1203    if (NULL == __CFUniCharUnicodePropertyTable) {
1204        __CFUniCharBitmapData *table;
1205        const void *bytes;
1206        const void *bodyBase;
1207        const void *planeBase;
1208        int headerSize;
1209        int idx, count;
1210        int planeIndex, planeCount;
1211        int planeSize;
1212	int64_t fileSize;
1213
1214        if (!__CFUniCharLoadFile(PROP_DB_FILE, &bytes, &fileSize) || !__CFSimpleFileSizeVerification(bytes, fileSize)) {
1215            __CFSpinUnlock(&__CFUniCharPropTableLock);
1216            return NULL;
1217        }
1218
1219#if defined (__cplusplus)
1220		bytes = (uint8_t*)bytes + 4; // Skip Unicode version
1221		headerSize = CFSwapInt32BigToHost(*((uint32_t *)bytes)); bytes = (uint8_t *)bytes + sizeof(uint32_t);
1222#else
1223		bytes += 4; // Skip Unicode version
1224		headerSize = CFSwapInt32BigToHost(*((uint32_t *)bytes)); bytes += sizeof(uint32_t);
1225#endif
1226
1227        headerSize -= (sizeof(uint32_t) * 2);
1228        bodyBase = (char *)bytes + headerSize;
1229
1230        count = headerSize / sizeof(uint32_t);
1231        __CFUniCharUnicodePropertyTableCount = count;
1232
1233        table = (__CFUniCharBitmapData *)CFAllocatorAllocate(kCFAllocatorSystemDefault, sizeof(__CFUniCharBitmapData) * count, 0);
1234
1235        for (idx = 0;idx < count;idx++) {
1236            planeCount = *((const uint8_t *)bodyBase);
1237            planeBase = (char *)bodyBase + planeCount + (planeCount % 4 ? 4 - (planeCount % 4) : 0);
1238            table[idx]._planes = (const uint8_t **)CFAllocatorAllocate(kCFAllocatorSystemDefault, sizeof(const void *) * planeCount, 0);
1239
1240            for (planeIndex = 0;planeIndex < planeCount;planeIndex++) {
1241                if ((planeSize = ((const uint8_t *)bodyBase)[planeIndex + 1])) {
1242                    table[idx]._planes[planeIndex] = (const uint8_t *)planeBase;
1243#if defined (__cplusplus)
1244					planeBase = (char*)planeBase + (planeSize * 256);
1245#else
1246					planeBase += (planeSize * 256);
1247#endif
1248                } else {
1249                    table[idx]._planes[planeIndex] = NULL;
1250                }
1251            }
1252
1253            table[idx]._numPlanes = planeCount;
1254#if defined (__cplusplus)
1255			bodyBase = (const uint8_t *)bodyBase + (CFSwapInt32BigToHost(*(uint32_t *)bytes));
1256			((uint32_t *&)bytes) ++;
1257#else
1258			bodyBase += (CFSwapInt32BigToHost(*((uint32_t *)bytes++)));
1259#endif
1260        }
1261
1262        __CFUniCharUnicodePropertyTable = table;
1263    }
1264
1265    __CFSpinUnlock(&__CFUniCharPropTableLock);
1266
1267    return (plane < __CFUniCharUnicodePropertyTable[propertyType]._numPlanes ? __CFUniCharUnicodePropertyTable[propertyType]._planes[plane] : NULL);
1268}
1269
1270CF_PRIVATE uint32_t CFUniCharGetNumberOfPlanesForUnicodePropertyData(uint32_t propertyType) {
1271    (void)CFUniCharGetUnicodePropertyDataForPlane(propertyType, 0);
1272    return __CFUniCharUnicodePropertyTable[propertyType]._numPlanes;
1273}
1274
1275CF_PRIVATE uint32_t CFUniCharGetUnicodeProperty(UTF32Char character, uint32_t propertyType) {
1276    if (propertyType == kCFUniCharCombiningProperty) {
1277        return CFUniCharGetCombiningPropertyForCharacter(character, (const uint8_t *)CFUniCharGetUnicodePropertyDataForPlane(propertyType, (character >> 16) & 0xFF));
1278    } else if (propertyType == kCFUniCharBidiProperty) {
1279        return CFUniCharGetBidiPropertyForCharacter(character, (const uint8_t *)CFUniCharGetUnicodePropertyDataForPlane(propertyType, (character >> 16) & 0xFF));
1280    } else {
1281        return 0;
1282    }
1283}
1284
1285
1286
1287/*
1288    The UTF8 conversion in the following function is derived from ConvertUTF.c
1289*/
1290/*
1291 * Copyright 2001 Unicode, Inc.
1292 *
1293 * Disclaimer
1294 *
1295 * This source code is provided as is by Unicode, Inc. No claims are
1296 * made as to fitness for any particular purpose. No warranties of any
1297 * kind are expressed or implied. The recipient agrees to determine
1298 * applicability of information provided. If this file has been
1299 * purchased on magnetic or optical media from Unicode, Inc., the
1300 * sole remedy for any claim will be exchange of defective media
1301 * within 90 days of receipt.
1302 *
1303 * Limitations on Rights to Redistribute This Code
1304 *
1305 * Unicode, Inc. hereby grants the right to freely use the information
1306 * supplied in this file in the creation of products supporting the
1307 * Unicode Standard, and to make copies of this file in any form
1308 * for internal or external distribution as long as this notice
1309 * remains attached.
1310 */
1311#define UNI_REPLACEMENT_CHAR (0x0000FFFDUL)
1312
1313bool CFUniCharFillDestinationBuffer(const UTF32Char *src, CFIndex srcLength, void **dst, CFIndex dstLength, CFIndex *filledLength, uint32_t dstFormat) {
1314    UTF32Char currentChar;
1315    CFIndex usedLength = *filledLength;
1316
1317    if (dstFormat == kCFUniCharUTF16Format) {
1318        UTF16Char *dstBuffer = (UTF16Char *)*dst;
1319
1320        while (srcLength-- > 0) {
1321            currentChar = *(src++);
1322
1323            if (currentChar > 0xFFFF) { // Non-BMP
1324                usedLength += 2;
1325                if (dstLength) {
1326                    if (usedLength > dstLength) return false;
1327                    currentChar -= 0x10000;
1328                    *(dstBuffer++) = (UTF16Char)((currentChar >> 10) + 0xD800UL);
1329                    *(dstBuffer++) = (UTF16Char)((currentChar & 0x3FF) + 0xDC00UL);
1330                }
1331            } else {
1332                ++usedLength;
1333                if (dstLength) {
1334                    if (usedLength > dstLength) return false;
1335                    *(dstBuffer++) = (UTF16Char)currentChar;
1336                }
1337            }
1338        }
1339
1340        *dst = dstBuffer;
1341    } else if (dstFormat == kCFUniCharUTF8Format) {
1342        uint8_t *dstBuffer = (uint8_t *)*dst;
1343        uint16_t bytesToWrite = 0;
1344        const UTF32Char byteMask = 0xBF;
1345        const UTF32Char byteMark = 0x80;
1346        static const uint8_t firstByteMark[7] = { 0x00, 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC };
1347
1348        while (srcLength-- > 0) {
1349            currentChar = *(src++);
1350
1351            /* Figure out how many bytes the result will require */
1352            if (currentChar < (UTF32Char)0x80) {
1353                bytesToWrite = 1;
1354            } else if (currentChar < (UTF32Char)0x800) {
1355                bytesToWrite = 2;
1356            } else if (currentChar < (UTF32Char)0x10000) {
1357                bytesToWrite = 3;
1358            } else if (currentChar < (UTF32Char)0x200000) {
1359                bytesToWrite = 4;
1360            } else {
1361                bytesToWrite = 2;
1362                currentChar = UNI_REPLACEMENT_CHAR;
1363            }
1364
1365            usedLength += bytesToWrite;
1366
1367            if (dstLength) {
1368                if (usedLength > dstLength) return false;
1369
1370                dstBuffer += bytesToWrite;
1371                switch (bytesToWrite) {	/* note: everything falls through. */
1372                    case 4:	*--dstBuffer = (currentChar | byteMark) & byteMask; currentChar >>= 6;
1373                    case 3:	*--dstBuffer = (currentChar | byteMark) & byteMask; currentChar >>= 6;
1374                    case 2:	*--dstBuffer = (currentChar | byteMark) & byteMask; currentChar >>= 6;
1375                    case 1:	*--dstBuffer =  currentChar | firstByteMark[bytesToWrite];
1376                }
1377                dstBuffer += bytesToWrite;
1378            }
1379        }
1380
1381        *dst = dstBuffer;
1382    } else {
1383        UTF32Char *dstBuffer = (UTF32Char *)*dst;
1384
1385        while (srcLength-- > 0) {
1386            currentChar = *(src++);
1387
1388            ++usedLength;
1389            if (dstLength) {
1390                if (usedLength > dstLength) return false;
1391                *(dstBuffer++) = currentChar;
1392            }
1393        }
1394
1395        *dst = dstBuffer;
1396    }
1397
1398    *filledLength = usedLength;
1399
1400    return true;
1401}
1402
1403#if DEPLOYMENT_TARGET_WINDOWS
1404void __CFUniCharCleanup(void)
1405{
1406    int	idx;
1407
1408    // cleanup memory allocated by __CFUniCharLoadBitmapData()
1409    __CFSpinLock(&__CFUniCharBitmapLock);
1410
1411    if (__CFUniCharBitmapDataArray != NULL) {
1412        for (idx = 0; idx < (int)__CFUniCharNumberOfBitmaps; idx++) {
1413            CFAllocatorDeallocate(kCFAllocatorSystemDefault, __CFUniCharBitmapDataArray[idx]._planes);
1414            __CFUniCharBitmapDataArray[idx]._planes = NULL;
1415        }
1416
1417        CFAllocatorDeallocate(kCFAllocatorSystemDefault, __CFUniCharBitmapDataArray);
1418        __CFUniCharBitmapDataArray = NULL;
1419        __CFUniCharNumberOfBitmaps = 0;
1420    }
1421
1422    __CFSpinUnlock(&__CFUniCharBitmapLock);
1423
1424    // cleanup memory allocated by CFUniCharGetMappingData()
1425    __CFSpinLock(&__CFUniCharMappingTableLock);
1426
1427    if (__CFUniCharMappingTables != NULL) {
1428        CFAllocatorDeallocate(kCFAllocatorSystemDefault, __CFUniCharMappingTables);
1429        __CFUniCharMappingTables = NULL;
1430    }
1431
1432    // cleanup memory allocated by __CFUniCharLoadCaseMappingTable()
1433    if (__CFUniCharCaseMappingTableCounts != NULL) {
1434        CFAllocatorDeallocate(kCFAllocatorSystemDefault, __CFUniCharCaseMappingTableCounts);
1435        __CFUniCharCaseMappingTableCounts = NULL;
1436
1437        __CFUniCharCaseMappingTable = NULL;
1438        __CFUniCharCaseMappingExtraTable = NULL;
1439    }
1440
1441    __CFSpinUnlock(&__CFUniCharMappingTableLock);
1442
1443    // cleanup memory allocated by CFUniCharGetUnicodePropertyDataForPlane()
1444    __CFSpinLock(&__CFUniCharPropTableLock);
1445
1446    if (__CFUniCharUnicodePropertyTable != NULL) {
1447        for (idx = 0; idx < __CFUniCharUnicodePropertyTableCount; idx++) {
1448            CFAllocatorDeallocate(kCFAllocatorSystemDefault, __CFUniCharUnicodePropertyTable[idx]._planes);
1449            __CFUniCharUnicodePropertyTable[idx]._planes = NULL;
1450        }
1451
1452        CFAllocatorDeallocate(kCFAllocatorSystemDefault, __CFUniCharUnicodePropertyTable);
1453        __CFUniCharUnicodePropertyTable = NULL;
1454        __CFUniCharUnicodePropertyTableCount = 0;
1455    }
1456
1457    __CFSpinUnlock(&__CFUniCharPropTableLock);
1458}
1459#endif
1460
1461#undef USE_MACHO_SEGMENT
1462
1463