1/* 2 * Copyright (c) 2014 Apple Inc. All rights reserved. 3 * 4 * @APPLE_LICENSE_HEADER_START@ 5 * 6 * This file contains Original Code and/or Modifications of Original Code 7 * as defined in and that are subject to the Apple Public Source License 8 * Version 2.0 (the 'License'). You may not use this file except in 9 * compliance with the License. Please obtain a copy of the License at 10 * http://www.opensource.apple.com/apsl/ and read it before using this 11 * file. 12 * 13 * The Original Code and all software distributed under the License are 14 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER 15 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, 16 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. 18 * Please see the License for the specific language governing rights and 19 * limitations under the License. 20 * 21 * @APPLE_LICENSE_HEADER_END@ 22 */ 23 24/* CFUniChar.h 25 Copyright (c) 1998-2013, Apple Inc. All rights reserved. 26*/ 27 28#if !defined(__COREFOUNDATION_CFUNICHAR__) 29#define __COREFOUNDATION_CFUNICHAR__ 1 30 31 32#include <CoreFoundation/CFByteOrder.h> 33#include <CoreFoundation/CFBase.h> 34 35CF_EXTERN_C_BEGIN 36 37#define kCFUniCharBitShiftForByte (3) 38#define kCFUniCharBitShiftForMask (7) 39 40CF_INLINE bool CFUniCharIsSurrogateHighCharacter(UniChar character) { 41 return ((character >= 0xD800UL) && (character <= 0xDBFFUL) ? true : false); 42} 43 44CF_INLINE bool CFUniCharIsSurrogateLowCharacter(UniChar character) { 45 return ((character >= 0xDC00UL) && (character <= 0xDFFFUL) ? true : false); 46} 47 48CF_INLINE UTF32Char CFUniCharGetLongCharacterForSurrogatePair(UniChar surrogateHigh, UniChar surrogateLow) { 49 return ((surrogateHigh - 0xD800UL) << 10) + (surrogateLow - 0xDC00UL) + 0x0010000UL; 50} 51 52// The following values coinside TextEncodingFormat format defines in TextCommon.h 53enum { 54 kCFUniCharUTF16Format = 0, 55 kCFUniCharUTF8Format = 2, 56 kCFUniCharUTF32Format = 3 57}; 58 59CF_INLINE bool CFUniCharIsMemberOfBitmap(UTF16Char theChar, const uint8_t *bitmap) { 60 return (bitmap && (bitmap[(theChar) >> kCFUniCharBitShiftForByte] & (((uint32_t)1) << (theChar & kCFUniCharBitShiftForMask))) ? true : false); 61} 62 63CF_INLINE void CFUniCharAddCharacterToBitmap(UTF16Char theChar, uint8_t *bitmap) { 64 bitmap[(theChar) >> kCFUniCharBitShiftForByte] |= (((uint32_t)1) << (theChar & kCFUniCharBitShiftForMask)); 65} 66 67CF_INLINE void CFUniCharRemoveCharacterFromBitmap(UTF16Char theChar, uint8_t *bitmap) { 68 bitmap[(theChar) >> kCFUniCharBitShiftForByte] &= ~(((uint32_t)1) << (theChar & kCFUniCharBitShiftForMask)); 69} 70 71enum { 72 kCFUniCharControlCharacterSet = 1, 73 kCFUniCharWhitespaceCharacterSet, 74 kCFUniCharWhitespaceAndNewlineCharacterSet, 75 kCFUniCharDecimalDigitCharacterSet, 76 kCFUniCharLetterCharacterSet, 77 kCFUniCharLowercaseLetterCharacterSet, 78 kCFUniCharUppercaseLetterCharacterSet, 79 kCFUniCharNonBaseCharacterSet, 80 kCFUniCharCanonicalDecomposableCharacterSet, 81 kCFUniCharDecomposableCharacterSet = kCFUniCharCanonicalDecomposableCharacterSet, 82 kCFUniCharAlphaNumericCharacterSet, 83 kCFUniCharPunctuationCharacterSet, 84 kCFUniCharIllegalCharacterSet, 85 kCFUniCharTitlecaseLetterCharacterSet, 86 kCFUniCharSymbolAndOperatorCharacterSet, 87 kCFUniCharNewlineCharacterSet, 88 89 kCFUniCharCompatibilityDecomposableCharacterSet = 100, // internal character sets begins here 90 kCFUniCharHFSPlusDecomposableCharacterSet, 91 kCFUniCharStrongRightToLeftCharacterSet, 92 kCFUniCharHasNonSelfLowercaseCharacterSet, 93 kCFUniCharHasNonSelfUppercaseCharacterSet, 94 kCFUniCharHasNonSelfTitlecaseCharacterSet, 95 kCFUniCharHasNonSelfCaseFoldingCharacterSet, 96 kCFUniCharHasNonSelfMirrorMappingCharacterSet, 97 kCFUniCharControlAndFormatterCharacterSet, 98 kCFUniCharCaseIgnorableCharacterSet, 99 kCFUniCharGraphemeExtendCharacterSet 100}; 101 102CF_EXPORT bool CFUniCharIsMemberOf(UTF32Char theChar, uint32_t charset); 103 104// This function returns NULL for kCFUniCharControlCharacterSet, kCFUniCharWhitespaceCharacterSet, kCFUniCharWhitespaceAndNewlineCharacterSet, & kCFUniCharIllegalCharacterSet 105CF_EXPORT const uint8_t *CFUniCharGetBitmapPtrForPlane(uint32_t charset, uint32_t plane); 106 107enum { 108 kCFUniCharBitmapFilled = (uint8_t)0, 109 kCFUniCharBitmapEmpty = (uint8_t)0xFF, 110 kCFUniCharBitmapAll = (uint8_t)1 111}; 112 113CF_EXPORT uint8_t CFUniCharGetBitmapForPlane(uint32_t charset, uint32_t plane, void *bitmap, bool isInverted); 114 115CF_EXPORT uint32_t CFUniCharGetNumberOfPlanes(uint32_t charset); 116 117enum { 118 kCFUniCharToLowercase = 0, 119 kCFUniCharToUppercase, 120 kCFUniCharToTitlecase, 121 kCFUniCharCaseFold 122}; 123 124enum { 125 kCFUniCharCaseMapFinalSigma = (1UL << 0), 126 kCFUniCharCaseMapAfter_i = (1UL << 1), 127 kCFUniCharCaseMapMoreAbove = (1UL << 2), 128 kCFUniCharCaseMapDutchDigraph = (1UL << 3), 129 kCFUniCharCaseMapGreekTonos = (1UL << 4) 130}; 131 132CF_EXPORT CFIndex CFUniCharMapCaseTo(UTF32Char theChar, UTF16Char *convertedChar, CFIndex maxLength, uint32_t ctype, uint32_t flags, const uint8_t *langCode); 133 134CF_EXPORT uint32_t CFUniCharGetConditionalCaseMappingFlags(UTF32Char theChar, UTF16Char *buffer, CFIndex currentIndex, CFIndex length, uint32_t type, const uint8_t *langCode, uint32_t lastFlags); 135 136enum { 137 kCFUniCharBiDiPropertyON = 0, 138 kCFUniCharBiDiPropertyL, 139 kCFUniCharBiDiPropertyR, 140 kCFUniCharBiDiPropertyAN, 141 kCFUniCharBiDiPropertyEN, 142 kCFUniCharBiDiPropertyAL, 143 kCFUniCharBiDiPropertyNSM, 144 kCFUniCharBiDiPropertyCS, 145 kCFUniCharBiDiPropertyES, 146 kCFUniCharBiDiPropertyET, 147 kCFUniCharBiDiPropertyBN, 148 kCFUniCharBiDiPropertyS, 149 kCFUniCharBiDiPropertyWS, 150 kCFUniCharBiDiPropertyB, 151 kCFUniCharBiDiPropertyRLO, 152 kCFUniCharBiDiPropertyRLE, 153 kCFUniCharBiDiPropertyLRO, 154 kCFUniCharBiDiPropertyLRE, 155 kCFUniCharBiDiPropertyPDF 156}; 157 158enum { 159 kCFUniCharCombiningProperty = 0, 160 kCFUniCharBidiProperty 161}; 162 163// The second arg 'bitmap' has to be the pointer to a specific plane 164CF_INLINE uint8_t CFUniCharGetBidiPropertyForCharacter(UTF16Char character, const uint8_t *bitmap) { 165 if (bitmap) { 166 uint8_t value = bitmap[(character >> 8)]; 167 168 if (value > kCFUniCharBiDiPropertyPDF) { 169 bitmap = bitmap + 256 + ((value - kCFUniCharBiDiPropertyPDF - 1) * 256); 170 return bitmap[character % 256]; 171 } else { 172 return value; 173 } 174 } 175 return kCFUniCharBiDiPropertyL; 176} 177 178CF_INLINE uint8_t CFUniCharGetCombiningPropertyForCharacter(UTF16Char character, const uint8_t *bitmap) { 179 if (bitmap) { 180 uint8_t value = bitmap[(character >> 8)]; 181 182 if (value) { 183 bitmap = bitmap + 256 + ((value - 1) * 256); 184 return bitmap[character % 256]; 185 } 186 } 187 return 0; 188} 189 190CF_EXPORT const void *CFUniCharGetUnicodePropertyDataForPlane(uint32_t propertyType, uint32_t plane); 191CF_EXPORT uint32_t CFUniCharGetNumberOfPlanesForUnicodePropertyData(uint32_t propertyType); 192CF_EXPORT uint32_t CFUniCharGetUnicodeProperty(UTF32Char character, uint32_t propertyType); 193 194CF_EXPORT bool CFUniCharFillDestinationBuffer(const UTF32Char *src, CFIndex srcLength, void **dst, CFIndex dstLength, CFIndex *filledLength, uint32_t dstFormat); 195 196// UTF32 support 197 198CF_INLINE bool CFUniCharToUTF32(const UTF16Char *src, CFIndex length, UTF32Char *dst, bool allowLossy, bool isBigEndien) { 199 const UTF16Char *limit = src + length; 200 UTF32Char character; 201 202 while (src < limit) { 203 character = *(src++); 204 205 if (CFUniCharIsSurrogateHighCharacter(character)) { 206 if ((src < limit) && CFUniCharIsSurrogateLowCharacter(*src)) { 207 character = CFUniCharGetLongCharacterForSurrogatePair(character, *(src++)); 208 } else { 209 if (!allowLossy) return false; 210 character = 0xFFFD; // replacement character 211 } 212 } else if (CFUniCharIsSurrogateLowCharacter(character)) { 213 if (!allowLossy) return false; 214 character = 0xFFFD; // replacement character 215 } 216 217 *(dst++) = (isBigEndien ? CFSwapInt32HostToBig(character) : CFSwapInt32HostToLittle(character)); 218 } 219 220 return true; 221} 222 223CF_INLINE bool CFUniCharFromUTF32(const UTF32Char *src, CFIndex length, UTF16Char *dst, bool allowLossy, bool isBigEndien) { 224 const UTF32Char *limit = src + length; 225 UTF32Char character; 226 227 while (src < limit) { 228 character = (isBigEndien ? CFSwapInt32BigToHost(*(src++)) : CFSwapInt32LittleToHost(*(src++))); 229 230 if (character < 0x10000) { // BMP 231 if (allowLossy) { 232 if (CFUniCharIsSurrogateHighCharacter(character)) { 233 UTF32Char otherCharacter = 0xFFFD; // replacement character 234 235 if (src < limit) { 236 otherCharacter = (isBigEndien ? CFSwapInt32BigToHost(*src) : CFSwapInt32LittleToHost(*src)); 237 238 239 if ((otherCharacter < 0x10000) && CFUniCharIsSurrogateLowCharacter(otherCharacter)) { 240 *(dst++) = character; ++src; 241 } else { 242 otherCharacter = 0xFFFD; // replacement character 243 } 244 } 245 246 character = otherCharacter; 247 } else if (CFUniCharIsSurrogateLowCharacter(character)) { 248 character = 0xFFFD; // replacement character 249 } 250 } else { 251 if (CFUniCharIsSurrogateHighCharacter(character) || CFUniCharIsSurrogateLowCharacter(character)) return false; 252 } 253 } else if (character < 0x110000) { // non-BMP 254 character -= 0x10000; 255 *(dst++) = (UTF16Char)((character >> 10) + 0xD800UL); 256 character = (UTF16Char)((character & 0x3FF) + 0xDC00UL); 257 } else { 258 if (!allowLossy) return false; 259 character = 0xFFFD; // replacement character 260 } 261 262 *(dst++) = character; 263 } 264 return true; 265} 266 267CF_EXTERN_C_END 268 269#endif /* ! __COREFOUNDATION_CFUNICHAR__ */ 270 271