1/* 2 * Copyright (c) 2014 Apple Inc. All rights reserved. 3 * 4 * @APPLE_LICENSE_HEADER_START@ 5 * 6 * This file contains Original Code and/or Modifications of Original Code 7 * as defined in and that are subject to the Apple Public Source License 8 * Version 2.0 (the 'License'). You may not use this file except in 9 * compliance with the License. Please obtain a copy of the License at 10 * http://www.opensource.apple.com/apsl/ and read it before using this 11 * file. 12 * 13 * The Original Code and all software distributed under the License are 14 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER 15 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, 16 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. 18 * Please see the License for the specific language governing rights and 19 * limitations under the License. 20 * 21 * @APPLE_LICENSE_HEADER_END@ 22 */ 23 24/* CFUnicodePrecomposition.c 25 Copyright (c) 1999-2013, Apple Inc. All rights reserved. 26 Responsibility: Aki Inoue 27*/ 28 29#include <string.h> 30#include <CoreFoundation/CFBase.h> 31#include <CoreFoundation/CFCharacterSet.h> 32#include "CFUniChar.h" 33#include "CFUnicodePrecomposition.h" 34#include "CFInternal.h" 35#include "CFUniCharPriv.h" 36 37// Canonical Precomposition 38static UTF32Char *__CFUniCharPrecompSourceTable = NULL; 39static uint32_t __CFUniCharPrecompositionTableLength = 0; 40static uint16_t *__CFUniCharBMPPrecompDestinationTable = NULL; 41static uint32_t *__CFUniCharNonBMPPrecompDestinationTable = NULL; 42 43static const uint8_t *__CFUniCharNonBaseBitmapForBMP_P = NULL; // Adding _P so the symbol name is different from the one in CFUnicodeDecomposition.c 44static const uint8_t *__CFUniCharCombiningClassForBMP = NULL; 45 46static CFSpinLock_t __CFUniCharPrecompositionTableLock = CFSpinLockInit; 47 48static void __CFUniCharLoadPrecompositionTable(void) { 49 50 __CFSpinLock(&__CFUniCharPrecompositionTableLock); 51 52 if (NULL == __CFUniCharPrecompSourceTable) { 53 const uint32_t *bytes = (const uint32_t *)CFUniCharGetMappingData(kCFUniCharCanonicalPrecompMapping); 54 uint32_t bmpMappingLength; 55 56 if (NULL == bytes) { 57 __CFSpinUnlock(&__CFUniCharPrecompositionTableLock); 58 return; 59 } 60 61 __CFUniCharPrecompositionTableLength = *(bytes++); 62 bmpMappingLength = *(bytes++); 63 __CFUniCharPrecompSourceTable = (UTF32Char *)bytes; 64 __CFUniCharBMPPrecompDestinationTable = (uint16_t *)((intptr_t)bytes + (__CFUniCharPrecompositionTableLength * sizeof(UTF32Char) * 2)); 65 __CFUniCharNonBMPPrecompDestinationTable = (uint32_t *)(((intptr_t)__CFUniCharBMPPrecompDestinationTable) + bmpMappingLength); 66 67 __CFUniCharNonBaseBitmapForBMP_P = CFUniCharGetBitmapPtrForPlane(kCFUniCharNonBaseCharacterSet, 0); 68 __CFUniCharCombiningClassForBMP = (const uint8_t *)CFUniCharGetUnicodePropertyDataForPlane(kCFUniCharCombiningProperty, 0); 69 } 70 71 __CFSpinUnlock(&__CFUniCharPrecompositionTableLock); 72} 73 74 // Adding _P so the symbol name is different from the one in CFUnicodeDecomposition.c 75#define __CFUniCharIsNonBaseCharacter __CFUniCharIsNonBaseCharacter_P 76CF_INLINE bool __CFUniCharIsNonBaseCharacter(UTF32Char character) { 77 return CFUniCharIsMemberOfBitmap(character, (character < 0x10000 ? __CFUniCharNonBaseBitmapForBMP_P : CFUniCharGetBitmapPtrForPlane(kCFUniCharNonBaseCharacterSet, ((character >> 16) & 0xFF)))); 78} 79 80typedef struct { 81 UTF16Char _key; 82 UTF16Char _value; 83} __CFUniCharPrecomposeBMPMappings; 84 85static UTF16Char __CFUniCharGetMappedBMPValue(const __CFUniCharPrecomposeBMPMappings *theTable, uint32_t numElem, UTF16Char character) { 86 const __CFUniCharPrecomposeBMPMappings *p, *q, *divider; 87 88 if ((character < theTable[0]._key) || (character > theTable[numElem-1]._key)) { 89 return 0; 90 } 91 p = theTable; 92 q = p + (numElem-1); 93 while (p <= q) { 94 divider = p + ((q - p) >> 1); /* divide by 2 */ 95 if (character < divider->_key) { q = divider - 1; } 96 else if (character > divider->_key) { p = divider + 1; } 97 else { return divider->_value; } 98 } 99 return 0; 100} 101 102typedef struct { 103 UTF32Char _key; 104 uint32_t _value; 105} __CFUniCharPrecomposeMappings; 106 107static uint32_t __CFUniCharGetMappedValue_P(const __CFUniCharPrecomposeMappings *theTable, uint32_t numElem, UTF32Char character) { 108 const __CFUniCharPrecomposeMappings *p, *q, *divider; 109 110 if ((character < theTable[0]._key) || (character > theTable[numElem-1]._key)) { 111 return 0; 112 } 113 p = theTable; 114 q = p + (numElem-1); 115 while (p <= q) { 116 divider = p + ((q - p) >> 1); /* divide by 2 */ 117 if (character < divider->_key) { q = divider - 1; } 118 else if (character > divider->_key) { p = divider + 1; } 119 else { return divider->_value; } 120 } 121 return 0; 122} 123 124CF_PRIVATE 125UTF32Char CFUniCharPrecomposeCharacter(UTF32Char base, UTF32Char combining) { 126 uint32_t value; 127 128 if (NULL == __CFUniCharPrecompSourceTable) __CFUniCharLoadPrecompositionTable(); 129 130 if (!(value = __CFUniCharGetMappedValue_P((const __CFUniCharPrecomposeMappings *)__CFUniCharPrecompSourceTable, __CFUniCharPrecompositionTableLength, combining))) return 0xFFFD; 131 132 // We don't have precomposition in non-BMP 133 if (value & kCFUniCharNonBmpFlag) { 134 value = __CFUniCharGetMappedValue_P((const __CFUniCharPrecomposeMappings *)((uint32_t *)__CFUniCharNonBMPPrecompDestinationTable + (value & 0xFFFF)), (value >> 16) & 0x7FFF, base); 135 } else { 136 value = __CFUniCharGetMappedBMPValue((const __CFUniCharPrecomposeBMPMappings *)((uint32_t *)__CFUniCharBMPPrecompDestinationTable + (value & 0xFFFF)), (value >> 16), base); 137 } 138 return (value ? value : 0xFFFD); 139} 140 141#define HANGUL_SBASE 0xAC00 142#define HANGUL_LBASE 0x1100 143#define HANGUL_VBASE 0x1161 144#define HANGUL_TBASE 0x11A7 145#define HANGUL_SCOUNT 11172 146#define HANGUL_LCOUNT 19 147#define HANGUL_VCOUNT 21 148#define HANGUL_TCOUNT 28 149#define HANGUL_NCOUNT (HANGUL_VCOUNT * HANGUL_TCOUNT) 150 151CF_INLINE void __CFUniCharMoveBufferFromEnd0(UTF16Char *convertedChars, CFIndex length, CFIndex delta) { 152 const UTF16Char *limit = convertedChars; 153 UTF16Char *dstP; 154 155 convertedChars += length; 156 dstP = convertedChars + delta; 157 158 while (convertedChars > limit) *(--dstP) = *(--convertedChars); 159} 160 161bool CFUniCharPrecompose(const UTF16Char *characters, CFIndex length, CFIndex *consumedLength, UTF16Char *precomposed, CFIndex maxLength, CFIndex *filledLength) { 162 UTF32Char currentChar = 0, lastChar = 0, precomposedChar = 0xFFFD; 163 CFIndex originalLength = length, usedLength = 0; 164 UTF16Char *currentBase = precomposed; 165 uint8_t currentClass, lastClass = 0; 166 bool currentBaseIsBMP = true; 167 bool isPrecomposed; 168 169 if (NULL == __CFUniCharPrecompSourceTable) __CFUniCharLoadPrecompositionTable(); 170 171 while (length > 0) { 172 currentChar = *(characters++); 173 --length; 174 175 if (CFUniCharIsSurrogateHighCharacter(currentChar) && (length > 0) && CFUniCharIsSurrogateLowCharacter(*characters)) { 176 currentChar = CFUniCharGetLongCharacterForSurrogatePair(currentChar, *(characters++)); 177 --length; 178 } 179 180 if (lastChar && __CFUniCharIsNonBaseCharacter(currentChar)) { 181 isPrecomposed = (precomposedChar == 0xFFFD ? false : true); 182 if (isPrecomposed) lastChar = precomposedChar; 183 184 currentClass = (currentChar > 0xFFFF ? CFUniCharGetUnicodeProperty(currentChar, kCFUniCharCombiningProperty) : CFUniCharGetCombiningPropertyForCharacter(currentChar, __CFUniCharCombiningClassForBMP)); 185 186 if ((lastClass == 0) || (currentClass > lastClass)) { 187 if ((precomposedChar = CFUniCharPrecomposeCharacter(lastChar, currentChar)) == 0xFFFD) { 188 if (isPrecomposed) precomposedChar = lastChar; 189 lastClass = currentClass; 190 } else { 191 continue; 192 } 193 } 194 if (currentChar > 0xFFFF) { // Non-BMP 195 usedLength += 2; 196 if (usedLength > maxLength) break; 197 currentChar -= 0x10000; 198 *(precomposed++) = (UTF16Char)((currentChar >> 10) + 0xD800UL); 199 *(precomposed++) = (UTF16Char)((currentChar & 0x3FF) + 0xDC00UL); 200 } else { 201 ++usedLength; 202 if (usedLength > maxLength) break; 203 *(precomposed++) = (UTF16Char)currentChar; 204 } 205 } else { 206 if ((currentChar >= HANGUL_LBASE) && (currentChar < (HANGUL_LBASE + 0xFF))) { // Hangul Jamo 207 int8_t lIndex = currentChar - HANGUL_LBASE; 208 209 if ((length > 0) && (0 <= lIndex) && (lIndex <= HANGUL_LCOUNT)) { 210 int16_t vIndex = *characters - HANGUL_VBASE; 211 212 if ((vIndex >= 0) && (vIndex <= HANGUL_VCOUNT)) { 213 int16_t tIndex = 0; 214 215 ++characters; --length; 216 217 if (length > 0) { 218 tIndex = *characters - HANGUL_TBASE; 219 if ((tIndex < 0) || (tIndex > HANGUL_TCOUNT)) { 220 tIndex = 0; 221 } else { 222 ++characters; --length; 223 } 224 } 225 currentChar = (lIndex * HANGUL_VCOUNT + vIndex) * HANGUL_TCOUNT + tIndex + HANGUL_SBASE; 226 } 227 } 228 } 229 230 if (precomposedChar != 0xFFFD) { 231 if (currentBaseIsBMP) { // Non-BMP 232 if (lastChar > 0xFFFF) { // Last char was Non-BMP 233 --usedLength; 234 memmove(currentBase + 1, currentBase + 2, (precomposed - (currentBase + 2)) * sizeof(UTF16Char)); 235 } 236 *(currentBase) = (UTF16Char)precomposedChar; 237 } else { 238 if (lastChar < 0x10000) { // Last char was BMP 239 ++usedLength; 240 if (usedLength > maxLength) break; 241 __CFUniCharMoveBufferFromEnd0(currentBase + 1, precomposed - (currentBase + 1), 1); 242 } 243 precomposedChar -= 0x10000; 244 *currentBase = (UTF16Char)((precomposedChar >> 10) + 0xD800UL); 245 *(currentBase + 1) = (UTF16Char)((precomposedChar & 0x3FF) + 0xDC00UL); 246 } 247 precomposedChar = 0xFFFD; 248 } 249 currentBase = precomposed; 250 251 lastChar = currentChar; 252 lastClass = 0; 253 254 if (currentChar > 0xFFFF) { // Non-BMP 255 usedLength += 2; 256 if (usedLength > maxLength) break; 257 currentChar -= 0x10000; 258 *(precomposed++) = (UTF16Char)((currentChar >> 10) + 0xD800UL); 259 *(precomposed++) = (UTF16Char)((currentChar & 0x3FF) + 0xDC00UL); 260 currentBaseIsBMP = false; 261 } else { 262 ++usedLength; 263 if (usedLength > maxLength) break; 264 *(precomposed++) = (UTF16Char)currentChar; 265 currentBaseIsBMP = true; 266 } 267 } 268 } 269 270 if (precomposedChar != 0xFFFD) { 271 if (currentChar > 0xFFFF) { // Non-BMP 272 if (lastChar < 0x10000) { // Last char was BMP 273 ++usedLength; 274 if (usedLength > maxLength) { 275 if (consumedLength) *consumedLength = originalLength - length; 276 if (filledLength) *filledLength = usedLength; 277 return false; 278 } 279 __CFUniCharMoveBufferFromEnd0(currentBase + 1, precomposed - (currentBase + 1), 1); 280 } 281 precomposedChar -= 0x10000; 282 *currentBase = (UTF16Char)((precomposedChar >> 10) + 0xD800UL); 283 *(currentBase + 1) = (UTF16Char)((precomposedChar & 0x3FF) + 0xDC00UL); 284 } else { 285 if (lastChar > 0xFFFF) { // Last char was Non-BMP 286 --usedLength; 287 memmove(currentBase + 1, currentBase + 2, (precomposed - (currentBase + 2)) * sizeof(UTF16Char)); 288 } 289 *(currentBase) = (UTF16Char)precomposedChar; 290 } 291 } 292 293 if (consumedLength) *consumedLength = originalLength - length; 294 if (filledLength) *filledLength = usedLength; 295 296 return true; 297} 298 299#undef __CFUniCharIsNonBaseCharacter 300#undef HANGUL_SBASE 301#undef HANGUL_LBASE 302#undef HANGUL_VBASE 303#undef HANGUL_TBASE 304#undef HANGUL_SCOUNT 305#undef HANGUL_LCOUNT 306#undef HANGUL_VCOUNT 307#undef HANGUL_TCOUNT 308#undef HANGUL_NCOUNT 309 310