1/* 2 * Copyright (c) 2014 Apple Inc. All rights reserved. 3 * 4 * @APPLE_LICENSE_HEADER_START@ 5 * 6 * This file contains Original Code and/or Modifications of Original Code 7 * as defined in and that are subject to the Apple Public Source License 8 * Version 2.0 (the 'License'). You may not use this file except in 9 * compliance with the License. Please obtain a copy of the License at 10 * http://www.opensource.apple.com/apsl/ and read it before using this 11 * file. 12 * 13 * The Original Code and all software distributed under the License are 14 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER 15 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, 16 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. 18 * Please see the License for the specific language governing rights and 19 * limitations under the License. 20 * 21 * @APPLE_LICENSE_HEADER_END@ 22 */ 23 24/* CFCharacterSet.c 25 Copyright (c) 1999-2013, Apple Inc. All rights reserved. 26 Responsibility: Aki Inoue 27*/ 28 29#include <CoreFoundation/CFCharacterSet.h> 30#include <CoreFoundation/CFByteOrder.h> 31#include "CFCharacterSetPriv.h" 32#include <CoreFoundation/CFData.h> 33#include <CoreFoundation/CFString.h> 34#include "CFInternal.h" 35#include <CoreFoundation/CFUniChar.h> 36#include "CFUniCharPriv.h" 37#include <stdlib.h> 38#include <string.h> 39 40 41#define BITSPERBYTE 8 /* (CHAR_BIT * sizeof(unsigned char)) */ 42#define LOG_BPB 3 43#define LOG_BPLW 5 44#define NUMCHARACTERS 65536 45 46#define MAX_ANNEX_PLANE (16) 47 48/* Number of things in the array keeping the bits. 49*/ 50#define __kCFBitmapSize (NUMCHARACTERS / BITSPERBYTE) 51 52/* How many elements max can be in an __kCFCharSetClassString CFCharacterSet 53*/ 54#define __kCFStringCharSetMax 64 55 56/* The last builtin set ID number 57*/ 58#define __kCFLastBuiltinSetID kCFCharacterSetNewline 59 60/* How many elements in the "singles" array before we use binary search. 61*/ 62#define __kCFSetBreakeven 10 63 64/* This tells us, within 1k or so, whether a thing is POTENTIALLY in the set (in the bitmap blob of the private structure) before we bother to do specific checking. 65*/ 66#define __CFCSetBitsInRange(n, i) (i[n>>15] & (1L << ((n>>10) % 32))) 67 68/* Compact bitmap params 69*/ 70#define __kCFCompactBitmapNumPages (256) 71 72#define __kCFCompactBitmapMaxPages (128) // the max pages allocated 73 74#define __kCFCompactBitmapPageSize (__kCFBitmapSize / __kCFCompactBitmapNumPages) 75 76typedef struct { 77 CFCharacterSetRef *_nonBMPPlanes; 78 unsigned int _validEntriesBitmap; 79 unsigned char _numOfAllocEntries; 80 unsigned char _isAnnexInverted; 81 uint16_t _padding; 82} CFCharSetAnnexStruct; 83 84struct __CFCharacterSet { 85 CFRuntimeBase _base; 86 CFHashCode _hashValue; 87 union { 88 struct { 89 CFIndex _type; 90 } _builtin; 91 struct { 92 UInt32 _firstChar; 93 CFIndex _length; 94 } _range; 95 struct { 96 UniChar *_buffer; 97 CFIndex _length; 98 } _string; 99 struct { 100 uint8_t *_bits; 101 } _bitmap; 102 struct { 103 uint8_t *_cBits; 104 } _compactBitmap; 105 } _variants; 106 CFCharSetAnnexStruct *_annex; 107}; 108 109/* _base._info values interesting for CFCharacterSet 110*/ 111enum { 112 __kCFCharSetClassTypeMask = 0x0070, 113 __kCFCharSetClassBuiltin = 0x0000, 114 __kCFCharSetClassRange = 0x0010, 115 __kCFCharSetClassString = 0x0020, 116 __kCFCharSetClassBitmap = 0x0030, 117 __kCFCharSetClassSet = 0x0040, 118 __kCFCharSetClassCompactBitmap = 0x0040, 119 120 __kCFCharSetIsInvertedMask = 0x0008, 121 __kCFCharSetIsInverted = 0x0008, 122 123 __kCFCharSetHasHashValueMask = 0x00004, 124 __kCFCharSetHasHashValue = 0x0004, 125 126 /* Generic CFBase values */ 127 __kCFCharSetIsMutableMask = 0x0001, 128 __kCFCharSetIsMutable = 0x0001, 129}; 130 131/* Inline accessor macros for _base._info 132*/ 133CF_INLINE Boolean __CFCSetIsMutable(CFCharacterSetRef cset) {return (cset->_base._cfinfo[CF_INFO_BITS] & __kCFCharSetIsMutableMask) == __kCFCharSetIsMutable;} 134CF_INLINE Boolean __CFCSetIsBuiltin(CFCharacterSetRef cset) {return (cset->_base._cfinfo[CF_INFO_BITS] & __kCFCharSetClassTypeMask) == __kCFCharSetClassBuiltin;} 135CF_INLINE Boolean __CFCSetIsRange(CFCharacterSetRef cset) {return (cset->_base._cfinfo[CF_INFO_BITS] & __kCFCharSetClassTypeMask) == __kCFCharSetClassRange;} 136CF_INLINE Boolean __CFCSetIsString(CFCharacterSetRef cset) {return (cset->_base._cfinfo[CF_INFO_BITS] & __kCFCharSetClassTypeMask) == __kCFCharSetClassString;} 137CF_INLINE Boolean __CFCSetIsBitmap(CFCharacterSetRef cset) {return (cset->_base._cfinfo[CF_INFO_BITS] & __kCFCharSetClassTypeMask) == __kCFCharSetClassBitmap;} 138CF_INLINE Boolean __CFCSetIsCompactBitmap(CFCharacterSetRef cset) {return (cset->_base._cfinfo[CF_INFO_BITS] & __kCFCharSetClassTypeMask) == __kCFCharSetClassCompactBitmap;} 139CF_INLINE Boolean __CFCSetIsInverted(CFCharacterSetRef cset) {return (cset->_base._cfinfo[CF_INFO_BITS] & __kCFCharSetIsInvertedMask) == __kCFCharSetIsInverted;} 140CF_INLINE Boolean __CFCSetHasHashValue(CFCharacterSetRef cset) {return (cset->_base._cfinfo[CF_INFO_BITS] & __kCFCharSetHasHashValueMask) == __kCFCharSetHasHashValue;} 141CF_INLINE UInt32 __CFCSetClassType(CFCharacterSetRef cset) {return (cset->_base._cfinfo[CF_INFO_BITS] & __kCFCharSetClassTypeMask);} 142 143CF_INLINE void __CFCSetPutIsMutable(CFMutableCharacterSetRef cset, Boolean isMutable) {(isMutable ? (cset->_base._cfinfo[CF_INFO_BITS] |= __kCFCharSetIsMutable) : (cset->_base._cfinfo[CF_INFO_BITS] &= ~ __kCFCharSetIsMutable));} 144CF_INLINE void __CFCSetPutIsInverted(CFMutableCharacterSetRef cset, Boolean isInverted) {(isInverted ? (cset->_base._cfinfo[CF_INFO_BITS] |= __kCFCharSetIsInverted) : (cset->_base._cfinfo[CF_INFO_BITS] &= ~__kCFCharSetIsInverted));} 145CF_INLINE void __CFCSetPutHasHashValue(CFMutableCharacterSetRef cset, Boolean hasHash) {(hasHash ? (cset->_base._cfinfo[CF_INFO_BITS] |= __kCFCharSetHasHashValue) : (cset->_base._cfinfo[CF_INFO_BITS] &= ~__kCFCharSetHasHashValue));} 146CF_INLINE void __CFCSetPutClassType(CFMutableCharacterSetRef cset, UInt32 classType) {cset->_base._cfinfo[CF_INFO_BITS] &= ~__kCFCharSetClassTypeMask; cset->_base._cfinfo[CF_INFO_BITS] |= classType;} 147 148CF_PRIVATE Boolean __CFCharacterSetIsMutable(CFCharacterSetRef cset) {return __CFCSetIsMutable(cset);} 149 150/* Inline contents accessor macros 151*/ 152CF_INLINE CFCharacterSetPredefinedSet __CFCSetBuiltinType(CFCharacterSetRef cset) {return cset->_variants._builtin._type;} 153CF_INLINE UInt32 __CFCSetRangeFirstChar(CFCharacterSetRef cset) {return cset->_variants._range._firstChar;} 154CF_INLINE CFIndex __CFCSetRangeLength(CFCharacterSetRef cset) {return cset->_variants._range._length;} 155CF_INLINE UniChar *__CFCSetStringBuffer(CFCharacterSetRef cset) {return (UniChar*)(cset->_variants._string._buffer);} 156CF_INLINE CFIndex __CFCSetStringLength(CFCharacterSetRef cset) {return cset->_variants._string._length;} 157CF_INLINE uint8_t *__CFCSetBitmapBits(CFCharacterSetRef cset) {return cset->_variants._bitmap._bits;} 158CF_INLINE uint8_t *__CFCSetCompactBitmapBits(CFCharacterSetRef cset) {return cset->_variants._compactBitmap._cBits;} 159 160CF_INLINE void __CFCSetPutBuiltinType(CFMutableCharacterSetRef cset, CFCharacterSetPredefinedSet type) {cset->_variants._builtin._type = type;} 161CF_INLINE void __CFCSetPutRangeFirstChar(CFMutableCharacterSetRef cset, UInt32 first) {cset->_variants._range._firstChar = first;} 162CF_INLINE void __CFCSetPutRangeLength(CFMutableCharacterSetRef cset, CFIndex length) {cset->_variants._range._length = length;} 163CF_INLINE void __CFCSetPutStringBuffer(CFMutableCharacterSetRef cset, UniChar *theBuffer) {cset->_variants._string._buffer = theBuffer;} 164CF_INLINE void __CFCSetPutStringLength(CFMutableCharacterSetRef cset, CFIndex length) {cset->_variants._string._length = length;} 165CF_INLINE void __CFCSetPutBitmapBits(CFMutableCharacterSetRef cset, uint8_t *bits) {cset->_variants._bitmap._bits = bits;} 166CF_INLINE void __CFCSetPutCompactBitmapBits(CFMutableCharacterSetRef cset, uint8_t *bits) {cset->_variants._compactBitmap._cBits = bits;} 167 168/* Validation funcs 169*/ 170#if defined(CF_ENABLE_ASSERTIONS) 171CF_INLINE void __CFCSetValidateBuiltinType(CFCharacterSetPredefinedSet type, const char *func) { 172 CFAssert2(type > 0 && type <= __kCFLastBuiltinSetID, __kCFLogAssertion, "%s: Unknowen builtin type %d", func, type); 173} 174CF_INLINE void __CFCSetValidateRange(CFRange theRange, const char *func) { 175 CFAssert3(theRange.location >= 0 && theRange.location + theRange.length <= 0x1FFFFF, __kCFLogAssertion, "%s: Range out of Unicode range (location -> %d length -> %d)", func, theRange.location, theRange.length); 176} 177CF_INLINE void __CFCSetValidateTypeAndMutability(CFCharacterSetRef cset, const char *func) { 178 __CFGenericValidateType(cset, __kCFCharacterSetTypeID); 179 CFAssert1(__CFCSetIsMutable(cset), __kCFLogAssertion, "%s: Immutable character set passed to mutable function", func); 180} 181#else 182#define __CFCSetValidateBuiltinType(t,f) 183#define __CFCSetValidateRange(r,f) 184#define __CFCSetValidateTypeAndMutability(r,f) 185#endif 186 187/* Inline utility funcs 188*/ 189static Boolean __CFCSetIsEqualBitmap(const UInt32 *bits1, const UInt32 *bits2) { 190 CFIndex length = __kCFBitmapSize / sizeof(UInt32); 191 192 if (bits1 == bits2) { 193 return true; 194 } else if (bits1 && bits2) { 195 if (bits1 == (const UInt32 *)-1) { 196 while (length--) if ((UInt32)-1 != *bits2++) return false; 197 } else if (bits2 == (const UInt32 *)-1) { 198 while (length--) if ((UInt32)-1 != *bits1++) return false; 199 } else { 200 while (length--) if (*bits1++ != *bits2++) return false; 201 } 202 return true; 203 } else if (!bits1 && !bits2) { // empty set 204 return true; 205 } else { 206 if (bits2) bits1 = bits2; 207 if (bits1 == (const UInt32 *)-1) return false; 208 while (length--) if (*bits1++) return false; 209 return true; 210 } 211} 212 213CF_INLINE Boolean __CFCSetIsEqualBitmapInverted(const UInt32 *bits1, const UInt32 *bits2) { 214 CFIndex length = __kCFBitmapSize / sizeof(UInt32); 215 216 while (length--) if (*bits1++ != ~(*(bits2++))) return false; 217 return true; 218} 219 220static Boolean __CFCSetIsBitmapEqualToRange(const UInt32 *bits, UniChar firstChar, UniChar lastChar, Boolean isInverted) { 221 CFIndex firstCharIndex = firstChar >> LOG_BPB; 222 CFIndex lastCharIndex = lastChar >> LOG_BPB; 223 CFIndex length; 224 UInt32 value; 225 226 if (firstCharIndex == lastCharIndex) { 227 value = ((((UInt32)0xFF) << (firstChar & (BITSPERBYTE - 1))) & (((UInt32)0xFF) >> ((BITSPERBYTE - 1) - (lastChar & (BITSPERBYTE - 1))))) << (((sizeof(UInt32) - 1) - (firstCharIndex % sizeof(UInt32))) * BITSPERBYTE); 228 value = CFSwapInt32HostToBig(value); 229 firstCharIndex = lastCharIndex = firstChar >> LOG_BPLW; 230 if (*(bits + firstCharIndex) != (isInverted ? ~value : value)) return FALSE; 231 } else { 232 UInt32 firstCharMask; 233 UInt32 lastCharMask; 234 235 length = firstCharIndex % sizeof(UInt32); 236 firstCharMask = (((((UInt32)0xFF) << (firstChar & (BITSPERBYTE - 1))) & 0xFF) << (((sizeof(UInt32) - 1) - length) * BITSPERBYTE)) | (((UInt32)0xFFFFFFFF) >> ((length + 1) * BITSPERBYTE)); 237 238 length = lastCharIndex % sizeof(UInt32); 239 lastCharMask = ((((UInt32)0xFF) >> ((BITSPERBYTE - 1) - (lastChar & (BITSPERBYTE - 1)))) << (((sizeof(UInt32) - 1) - length) * BITSPERBYTE)) | (((UInt32)0xFFFFFFFF) << ((sizeof(UInt32) - length) * BITSPERBYTE)); 240 241 firstCharIndex = firstChar >> LOG_BPLW; 242 lastCharIndex = lastChar >> LOG_BPLW; 243 244 if (firstCharIndex == lastCharIndex) { 245 firstCharMask &= lastCharMask; 246 value = CFSwapInt32HostToBig(firstCharMask & lastCharMask); 247 if (*(bits + firstCharIndex) != (isInverted ? ~value : value)) return FALSE; 248 } else { 249 value = CFSwapInt32HostToBig(firstCharMask); 250 if (*(bits + firstCharIndex) != (isInverted ? ~value : value)) return FALSE; 251 252 value = CFSwapInt32HostToBig(lastCharMask); 253 if (*(bits + lastCharIndex) != (isInverted ? ~value : value)) return FALSE; 254 } 255 } 256 257 length = firstCharIndex; 258 value = (isInverted ? ((UInt32)0xFFFFFFFF) : 0); 259 while (length--) { 260 if (*(bits++) != value) return FALSE; 261 } 262 263 ++bits; // Skip firstCharIndex 264 length = (lastCharIndex - (firstCharIndex + 1)); 265 value = (isInverted ? 0 : ((UInt32)0xFFFFFFFF)); 266 while (length-- > 0) { 267 if (*(bits++) != value) return FALSE; 268 } 269 if (firstCharIndex != lastCharIndex) ++bits; 270 271 length = (0xFFFF >> LOG_BPLW) - lastCharIndex; 272 value = (isInverted ? ((UInt32)0xFFFFFFFF) : 0); 273 while (length--) { 274 if (*(bits++) != value) return FALSE; 275 } 276 277 return TRUE; 278} 279 280CF_INLINE Boolean __CFCSetIsBitmapSupersetOfBitmap(const UInt32 *bits1, const UInt32 *bits2, Boolean isInverted1, Boolean isInverted2) { 281 CFIndex length = __kCFBitmapSize / sizeof(UInt32); 282 UInt32 val1, val2; 283 284 while (length--) { 285 val2 = (isInverted2 ? ~(*(bits2++)) : *(bits2++)); 286 val1 = (isInverted1 ? ~(*(bits1++)) : *(bits1++)) & val2; 287 if (val1 != val2) return false; 288 } 289 290 return true; 291} 292 293CF_INLINE Boolean __CFCSetHasNonBMPPlane(CFCharacterSetRef cset) { return ((cset)->_annex && (cset)->_annex->_validEntriesBitmap ? true : false); } 294CF_INLINE Boolean __CFCSetAnnexIsInverted (CFCharacterSetRef cset) { return ((cset)->_annex && (cset)->_annex->_isAnnexInverted ? true : false); } 295CF_INLINE UInt32 __CFCSetAnnexValidEntriesBitmap(CFCharacterSetRef cset) { return ((cset)->_annex ? (cset)->_annex->_validEntriesBitmap : 0); } 296 297CF_INLINE Boolean __CFCSetIsEmpty(CFCharacterSetRef cset) { 298 if (__CFCSetHasNonBMPPlane(cset) || __CFCSetAnnexIsInverted(cset)) return false; 299 300 switch (__CFCSetClassType(cset)) { 301 case __kCFCharSetClassRange: if (!__CFCSetRangeLength(cset)) return true; break; 302 case __kCFCharSetClassString: if (!__CFCSetStringLength(cset)) return true; break; 303 case __kCFCharSetClassBitmap: if (!__CFCSetBitmapBits(cset)) return true; break; 304 case __kCFCharSetClassCompactBitmap: if (!__CFCSetCompactBitmapBits(cset)) return true; break; 305 } 306 return false; 307} 308 309CF_INLINE void __CFCSetBitmapAddCharacter(uint8_t *bitmap, UniChar theChar) { 310 bitmap[(theChar) >> LOG_BPB] |= (((unsigned)1) << (theChar & (BITSPERBYTE - 1))); 311} 312 313CF_INLINE void __CFCSetBitmapRemoveCharacter(uint8_t *bitmap, UniChar theChar) { 314 bitmap[(theChar) >> LOG_BPB] &= ~(((unsigned)1) << (theChar & (BITSPERBYTE - 1))); 315} 316 317CF_INLINE Boolean __CFCSetIsMemberBitmap(const uint8_t *bitmap, UniChar theChar) { 318 return ((bitmap[(theChar) >> LOG_BPB] & (((unsigned)1) << (theChar & (BITSPERBYTE - 1)))) ? true : false); 319} 320 321#define NUM_32BIT_SLOTS (NUMCHARACTERS / 32) 322 323CF_INLINE void __CFCSetBitmapFastFillWithValue(UInt32 *bitmap, uint8_t value) { 324 UInt32 mask = (value << 24) | (value << 16) | (value << 8) | value; 325 UInt32 numSlots = NUMCHARACTERS / 32; 326 327 while (numSlots--) *(bitmap++) = mask; 328} 329 330CF_INLINE void __CFCSetBitmapAddCharactersInRange(uint8_t *bitmap, UniChar firstChar, UniChar lastChar) { 331 if (firstChar == lastChar) { 332 bitmap[firstChar >> LOG_BPB] |= (((unsigned)1) << (firstChar & (BITSPERBYTE - 1))); 333 } else { 334 UInt32 idx = firstChar >> LOG_BPB; 335 UInt32 max = lastChar >> LOG_BPB; 336 337 if (idx == max) { 338 bitmap[idx] |= (((unsigned)0xFF) << (firstChar & (BITSPERBYTE - 1))) & (((unsigned)0xFF) >> ((BITSPERBYTE - 1) - (lastChar & (BITSPERBYTE - 1)))); 339 } else { 340 bitmap[idx] |= (((unsigned)0xFF) << (firstChar & (BITSPERBYTE - 1))); 341 bitmap[max] |= (((unsigned)0xFF) >> ((BITSPERBYTE - 1) - (lastChar & (BITSPERBYTE - 1)))); 342 343 ++idx; 344 while (idx < max) bitmap[idx++] = 0xFF; 345 } 346 } 347} 348 349CF_INLINE void __CFCSetBitmapRemoveCharactersInRange(uint8_t *bitmap, UniChar firstChar, UniChar lastChar) { 350 UInt32 idx = firstChar >> LOG_BPB; 351 UInt32 max = lastChar >> LOG_BPB; 352 353 if (idx == max) { 354 bitmap[idx] &= ~((((unsigned)0xFF) << (firstChar & (BITSPERBYTE - 1))) & (((unsigned)0xFF) >> ((BITSPERBYTE - 1) - (lastChar & (BITSPERBYTE - 1))))); 355 } else { 356 bitmap[idx] &= ~(((unsigned)0xFF) << (firstChar & (BITSPERBYTE - 1))); 357 bitmap[max] &= ~(((unsigned)0xFF) >> ((BITSPERBYTE - 1) - (lastChar & (BITSPERBYTE - 1)))); 358 359 ++idx; 360 while (idx < max) bitmap[idx++] = 0; 361 } 362} 363 364#define __CFCSetAnnexBitmapSetPlane(bitmap,plane) ((bitmap) |= (1 << (plane))) 365#define __CFCSetAnnexBitmapClearPlane(bitmap,plane) ((bitmap) &= (~(1 << (plane)))) 366#define __CFCSetAnnexBitmapGetPlane(bitmap,plane) ((bitmap) & (1 << (plane))) 367 368CF_INLINE void __CFCSetAllocateAnnexForPlane(CFCharacterSetRef cset, int plane) { 369 if (cset->_annex == NULL) { 370 ((CFMutableCharacterSetRef)cset)->_annex = (CFCharSetAnnexStruct *)CFAllocatorAllocate(CFGetAllocator(cset), sizeof(CFCharSetAnnexStruct), 0); 371 cset->_annex->_numOfAllocEntries = plane; 372 cset->_annex->_isAnnexInverted = false; 373 cset->_annex->_validEntriesBitmap = 0; 374 cset->_annex->_nonBMPPlanes = ((plane > 0) ? (CFCharacterSetRef*)CFAllocatorAllocate(CFGetAllocator(cset), sizeof(CFCharacterSetRef) * plane, 0) : NULL); 375 } else if (cset->_annex->_numOfAllocEntries < plane) { 376 cset->_annex->_numOfAllocEntries = plane; 377 if (NULL == cset->_annex->_nonBMPPlanes) { 378 cset->_annex->_nonBMPPlanes = (CFCharacterSetRef*)CFAllocatorAllocate(CFGetAllocator(cset), sizeof(CFCharacterSetRef) * plane, 0); 379 } else { 380 cset->_annex->_nonBMPPlanes = (CFCharacterSetRef*)CFAllocatorReallocate(CFGetAllocator(cset), (void *)cset->_annex->_nonBMPPlanes, sizeof(CFCharacterSetRef) * plane, 0); 381 } 382 } 383} 384 385CF_INLINE void __CFCSetAnnexSetIsInverted(CFCharacterSetRef cset, Boolean flag) { 386 if (flag) __CFCSetAllocateAnnexForPlane(cset, 0); 387 if (cset->_annex) ((CFMutableCharacterSetRef)cset)->_annex->_isAnnexInverted = flag; 388} 389 390CF_INLINE void __CFCSetPutCharacterSetToAnnexPlane(CFCharacterSetRef cset, CFCharacterSetRef annexCSet, int plane) { 391 __CFCSetAllocateAnnexForPlane(cset, plane); 392 if (__CFCSetAnnexBitmapGetPlane(cset->_annex->_validEntriesBitmap, plane)) CFRelease(cset->_annex->_nonBMPPlanes[plane - 1]); 393 if (annexCSet) { 394 cset->_annex->_nonBMPPlanes[plane - 1] = (CFCharacterSetRef)CFRetain(annexCSet); 395 __CFCSetAnnexBitmapSetPlane(cset->_annex->_validEntriesBitmap, plane); 396 } else { 397 __CFCSetAnnexBitmapClearPlane(cset->_annex->_validEntriesBitmap, plane); 398 } 399} 400 401CF_INLINE CFCharacterSetRef __CFCSetGetAnnexPlaneCharacterSet(CFCharacterSetRef cset, int plane) { 402 __CFCSetAllocateAnnexForPlane(cset, plane); 403 if (!__CFCSetAnnexBitmapGetPlane(cset->_annex->_validEntriesBitmap, plane)) { 404 cset->_annex->_nonBMPPlanes[plane - 1] = (CFCharacterSetRef)CFCharacterSetCreateMutable(CFGetAllocator(cset)); 405 __CFCSetAnnexBitmapSetPlane(cset->_annex->_validEntriesBitmap, plane); 406 } 407 return cset->_annex->_nonBMPPlanes[plane - 1]; 408} 409 410CF_INLINE CFCharacterSetRef __CFCSetGetAnnexPlaneCharacterSetNoAlloc(CFCharacterSetRef cset, int plane) { 411 return (cset->_annex && __CFCSetAnnexBitmapGetPlane(cset->_annex->_validEntriesBitmap, plane) ? cset->_annex->_nonBMPPlanes[plane - 1] : NULL); 412} 413 414CF_INLINE void __CFCSetDeallocateAnnexPlane(CFCharacterSetRef cset) { 415 if (cset->_annex) { 416 int idx; 417 418 for (idx = 0;idx < MAX_ANNEX_PLANE;idx++) { 419 if (__CFCSetAnnexBitmapGetPlane(cset->_annex->_validEntriesBitmap, idx + 1)) { 420 CFRelease(cset->_annex->_nonBMPPlanes[idx]); 421 } 422 } 423 CFAllocatorDeallocate(CFGetAllocator(cset), cset->_annex->_nonBMPPlanes); 424 CFAllocatorDeallocate(CFGetAllocator(cset), cset->_annex); 425 ((CFMutableCharacterSetRef)cset)->_annex = NULL; 426 } 427} 428 429CF_INLINE uint8_t __CFCSetGetHeaderValue(const uint8_t *bitmap, int *numPages) { 430 uint8_t value = *bitmap; 431 432 if ((value == 0) || (value == UINT8_MAX)) { 433 int numBytes = __kCFCompactBitmapPageSize - 1; 434 435 while (numBytes > 0) { 436 if (*(++bitmap) != value) break; 437 --numBytes; 438 } 439 if (numBytes == 0) return value; 440 } 441 return (uint8_t)(++(*numPages)); 442} 443 444CF_INLINE bool __CFCSetIsMemberInCompactBitmap(const uint8_t *compactBitmap, UTF16Char character) { 445 uint8_t value = compactBitmap[(character >> 8)]; // Assuming __kCFCompactBitmapNumPages == 256 446 447 if (value == 0) { 448 return false; 449 } else if (value == UINT8_MAX) { 450 return true; 451 } else { 452 compactBitmap += (__kCFCompactBitmapNumPages + (__kCFCompactBitmapPageSize * (value - 1))); 453 character &= 0xFF; // Assuming __kCFCompactBitmapNumPages == 256 454 return ((compactBitmap[(character / BITSPERBYTE)] & (1 << (character % BITSPERBYTE))) ? true : false); 455 } 456} 457 458CF_INLINE uint32_t __CFCSetGetCompactBitmapSize(const uint8_t *compactBitmap) { 459 uint32_t length = __kCFCompactBitmapNumPages; 460 uint32_t size = __kCFCompactBitmapNumPages; 461 uint8_t value; 462 463 while (length-- > 0) { 464 value = *(compactBitmap++); 465 if ((value != 0) && (value != UINT8_MAX)) size += __kCFCompactBitmapPageSize; 466 } 467 return size; 468} 469 470/* Take a private "set" structure and make a bitmap from it. Return the bitmap. THE CALLER MUST RELEASE THE RETURNED MEMORY as necessary. 471*/ 472 473CF_INLINE void __CFCSetBitmapProcessManyCharacters(unsigned char *map, unsigned n, unsigned m, Boolean isInverted) { 474 if (isInverted) { 475 __CFCSetBitmapRemoveCharactersInRange(map, n, m); 476 } else { 477 __CFCSetBitmapAddCharactersInRange(map, n, m); 478 } 479} 480 481CF_INLINE void __CFExpandCompactBitmap(const uint8_t *src, uint8_t *dst) { 482 const uint8_t *srcBody = src + __kCFCompactBitmapNumPages; 483 int i; 484 uint8_t value; 485 486 for (i = 0;i < __kCFCompactBitmapNumPages;i++) { 487 value = *(src++); 488 if ((value == 0) || (value == UINT8_MAX)) { 489 memset(dst, value, __kCFCompactBitmapPageSize); 490 } else { 491 memmove(dst, srcBody, __kCFCompactBitmapPageSize); 492 srcBody += __kCFCompactBitmapPageSize; 493 } 494 dst += __kCFCompactBitmapPageSize; 495 } 496} 497 498 499static void __CFCheckForExpandedSet(CFCharacterSetRef cset) { 500 static int8_t __CFNumberOfPlanesForLogging = -1; 501 static bool warnedOnce = false; 502 503 if (0 > __CFNumberOfPlanesForLogging) { 504 const char *envVar = __CFgetenv("CFCharacterSetCheckForExpandedSet"); 505 long value = (envVar ? strtol_l(envVar, NULL, 0, NULL) : 0); 506 __CFNumberOfPlanesForLogging = (int8_t)(((value > 0) && (value <= 16)) ? value : 0); 507 } 508 509 if (__CFNumberOfPlanesForLogging) { 510 uint32_t entries = __CFCSetAnnexValidEntriesBitmap(cset); 511 int count = 0; 512 513 while (entries) { 514 if ((entries & 1) && (++count >= __CFNumberOfPlanesForLogging)) { 515 if (!warnedOnce) { 516 CFLog(kCFLogLevelWarning, CFSTR("An expanded CFMutableCharacter has been detected. Recommend to compact with CFCharacterSetCreateCopy")); 517 warnedOnce = true; 518 } 519 break; 520 } 521 entries >>= 1; 522 } 523 } 524} 525 526static void __CFCSetGetBitmap(CFCharacterSetRef cset, uint8_t *bits) { 527 uint8_t *bitmap; 528 CFIndex length = __kCFBitmapSize; 529 530 if (__CFCSetIsBitmap(cset) && (bitmap = __CFCSetBitmapBits(cset))) { 531 memmove(bits, bitmap, __kCFBitmapSize); 532 } else { 533 Boolean isInverted = __CFCSetIsInverted(cset); 534 uint8_t value = (isInverted ? (uint8_t)-1 : 0); 535 536 bitmap = bits; 537 while (length--) *bitmap++ = value; // Initialize the buffer 538 539 if (!__CFCSetIsEmpty(cset)) { 540 switch (__CFCSetClassType(cset)) { 541 case __kCFCharSetClassBuiltin: { 542 UInt8 result = CFUniCharGetBitmapForPlane(__CFCSetBuiltinType(cset), 0, bits, (isInverted != 0)); 543 if (result == kCFUniCharBitmapEmpty && isInverted) { 544 length = __kCFBitmapSize; 545 bitmap = bits; 546 while (length--) *bitmap++ = 0; 547 } else if (result == kCFUniCharBitmapAll && !isInverted) { 548 length = __kCFBitmapSize; 549 bitmap = bits; 550 while (length--) *bitmap++ = (UInt8)0xFF; 551 } 552 } 553 break; 554 555 case __kCFCharSetClassRange: { 556 UInt32 theChar = __CFCSetRangeFirstChar(cset); 557 if (theChar < NUMCHARACTERS) { // the range starts in BMP 558 length = __CFCSetRangeLength(cset); 559 if (theChar + length >= NUMCHARACTERS) length = NUMCHARACTERS - theChar; 560 if (isInverted) { 561 __CFCSetBitmapRemoveCharactersInRange(bits, theChar, (UniChar)(theChar + length) - 1); 562 } else { 563 __CFCSetBitmapAddCharactersInRange(bits, theChar, (UniChar)(theChar + length) - 1); 564 } 565 } 566 } 567 break; 568 569 case __kCFCharSetClassString: { 570 const UniChar *buffer = __CFCSetStringBuffer(cset); 571 length = __CFCSetStringLength(cset); 572 while (length--) (isInverted ? __CFCSetBitmapRemoveCharacter(bits, *buffer++) : __CFCSetBitmapAddCharacter(bits, *buffer++)); 573 } 574 break; 575 576 case __kCFCharSetClassCompactBitmap: 577 __CFExpandCompactBitmap(__CFCSetCompactBitmapBits(cset), bits); 578 break; 579 } 580 } 581 } 582} 583 584static Boolean __CFCharacterSetEqual(CFTypeRef cf1, CFTypeRef cf2); 585 586static Boolean __CFCSetIsEqualAnnex(CFCharacterSetRef cf1, CFCharacterSetRef cf2) { 587 CFCharacterSetRef subSet1; 588 CFCharacterSetRef subSet2; 589 Boolean isAnnexInvertStateIdentical = (__CFCSetAnnexIsInverted(cf1) == __CFCSetAnnexIsInverted(cf2) ? true: false); 590 int idx; 591 592 if (isAnnexInvertStateIdentical) { 593 if (__CFCSetAnnexValidEntriesBitmap(cf1) != __CFCSetAnnexValidEntriesBitmap(cf2)) return false; 594 for (idx = 1;idx <= MAX_ANNEX_PLANE;idx++) { 595 subSet1 = __CFCSetGetAnnexPlaneCharacterSetNoAlloc(cf1, idx); 596 subSet2 = __CFCSetGetAnnexPlaneCharacterSetNoAlloc(cf2, idx); 597 598 if (subSet1 && !__CFCharacterSetEqual(subSet1, subSet2)) return false; 599 } 600 } else { 601 uint8_t bitsBuf[__kCFBitmapSize]; 602 uint8_t bitsBuf2[__kCFBitmapSize]; 603 604 for (idx = 1;idx <= MAX_ANNEX_PLANE;idx++) { 605 subSet1 = __CFCSetGetAnnexPlaneCharacterSetNoAlloc(cf1, idx); 606 subSet2 = __CFCSetGetAnnexPlaneCharacterSetNoAlloc(cf2, idx); 607 608 if (subSet1 == NULL && subSet2 == NULL) { 609 return false; 610 } else if (subSet1 == NULL) { 611 if (__CFCSetIsBitmap(subSet2)) { 612 if (!__CFCSetIsEqualBitmap((const UInt32 *)__CFCSetBitmapBits(subSet2), (const UInt32 *)-1)) { 613 return false; 614 } 615 } else { 616 __CFCSetGetBitmap(subSet2, bitsBuf); 617 if (!__CFCSetIsEqualBitmap((const UInt32 *)bitsBuf, (const UInt32 *)-1)) { 618 return false; 619 } 620 } 621 } else if (subSet2 == NULL) { 622 if (__CFCSetIsBitmap(subSet1)) { 623 if (!__CFCSetIsEqualBitmap((const UInt32 *)__CFCSetBitmapBits(subSet1), (const UInt32 *)-1)) { 624 return false; 625 } 626 } else { 627 __CFCSetGetBitmap(subSet1, bitsBuf); 628 if (!__CFCSetIsEqualBitmap((const UInt32 *)bitsBuf, (const UInt32 *)-1)) { 629 return false; 630 } 631 } 632 } else { 633 Boolean isBitmap1 = __CFCSetIsBitmap(subSet1); 634 Boolean isBitmap2 = __CFCSetIsBitmap(subSet2); 635 636 if (isBitmap1 && isBitmap2) { 637 if (!__CFCSetIsEqualBitmapInverted((const UInt32 *)__CFCSetBitmapBits(subSet1), (const UInt32 *)__CFCSetBitmapBits(subSet2))) { 638 return false; 639 } 640 } else if (!isBitmap1 && !isBitmap2) { 641 __CFCSetGetBitmap(subSet1, bitsBuf); 642 __CFCSetGetBitmap(subSet2, bitsBuf2); 643 if (!__CFCSetIsEqualBitmapInverted((const UInt32 *)bitsBuf, (const UInt32 *)bitsBuf2)) { 644 return false; 645 } 646 } else { 647 if (isBitmap2) { 648 CFCharacterSetRef tmp = subSet2; 649 subSet2 = subSet1; 650 subSet1 = tmp; 651 } 652 __CFCSetGetBitmap(subSet2, bitsBuf); 653 if (!__CFCSetIsEqualBitmapInverted((const UInt32 *)__CFCSetBitmapBits(subSet1), (const UInt32 *)bitsBuf)) { 654 return false; 655 } 656 } 657 } 658 } 659 } 660 return true; 661} 662 663/* Compact bitmap 664*/ 665static uint8_t *__CFCreateCompactBitmap(CFAllocatorRef allocator, const uint8_t *bitmap) { 666 const uint8_t *src; 667 uint8_t *dst; 668 int i; 669 int numPages = 0; 670 uint8_t header[__kCFCompactBitmapNumPages]; 671 672 src = bitmap; 673 for (i = 0;i < __kCFCompactBitmapNumPages;i++) { 674 header[i] = __CFCSetGetHeaderValue(src, &numPages); 675 676 // Allocating more pages is probably not interesting enough to be compact 677 if (numPages > __kCFCompactBitmapMaxPages) return NULL; 678 src += __kCFCompactBitmapPageSize; 679 } 680 681 dst = (uint8_t *)CFAllocatorAllocate(allocator, __kCFCompactBitmapNumPages + (__kCFCompactBitmapPageSize * numPages), 0); 682 683 if (numPages > 0) { 684 uint8_t *dstBody = dst + __kCFCompactBitmapNumPages; 685 686 src = bitmap; 687 for (i = 0;i < __kCFCompactBitmapNumPages;i++) { 688 dst[i] = header[i]; 689 690 if ((dst[i] != 0) && (dst[i] != UINT8_MAX)) { 691 memmove(dstBody, src, __kCFCompactBitmapPageSize); 692 dstBody += __kCFCompactBitmapPageSize; 693 } 694 src += __kCFCompactBitmapPageSize; 695 } 696 } else { 697 memmove(dst, header, __kCFCompactBitmapNumPages); 698 } 699 700 return dst; 701} 702 703static void __CFCSetMakeCompact(CFMutableCharacterSetRef cset) { 704 if (__CFCSetIsBitmap(cset) && __CFCSetBitmapBits(cset)) { 705 uint8_t *bitmap = __CFCSetBitmapBits(cset); 706 uint8_t *cBitmap = __CFCreateCompactBitmap(CFGetAllocator(cset), bitmap); 707 708 if (cBitmap) { 709 CFAllocatorDeallocate(CFGetAllocator(cset), bitmap); 710 __CFCSetPutClassType(cset, __kCFCharSetClassCompactBitmap); 711 __CFCSetPutCompactBitmapBits(cset, cBitmap); 712 } 713 } 714} 715 716static void __CFCSetAddNonBMPPlanesInRange(CFMutableCharacterSetRef cset, CFRange range) { 717 int firstChar = (range.location & 0xFFFF); 718 int maxChar = range.location + range.length; 719 int idx = range.location >> 16; // first plane 720 int maxPlane = (maxChar - 1) >> 16; // last plane 721 CFRange planeRange; 722 CFMutableCharacterSetRef annexPlane; 723 724 maxChar &= 0xFFFF; 725 726 for (idx = (idx ? idx : 1);idx <= maxPlane;idx++) { 727 planeRange.location = __CFMax(firstChar, 0); 728 planeRange.length = (idx == maxPlane && maxChar ? maxChar : 0x10000) - planeRange.location; 729 if (__CFCSetAnnexIsInverted(cset)) { 730 if ((annexPlane = (CFMutableCharacterSetRef)__CFCSetGetAnnexPlaneCharacterSetNoAlloc(cset, idx))) { 731 CFCharacterSetRemoveCharactersInRange(annexPlane, planeRange); 732 if (__CFCSetIsEmpty(annexPlane) && !__CFCSetIsInverted(annexPlane)) { 733 CFRelease(annexPlane); 734 __CFCSetAnnexBitmapClearPlane(cset->_annex->_validEntriesBitmap, idx); 735 } 736 } 737 } else { 738 CFCharacterSetAddCharactersInRange((CFMutableCharacterSetRef)__CFCSetGetAnnexPlaneCharacterSet(cset, idx), planeRange); 739 } 740 } 741 if (!__CFCSetHasNonBMPPlane(cset) && !__CFCSetAnnexIsInverted(cset)) __CFCSetDeallocateAnnexPlane(cset); 742} 743 744static void __CFCSetRemoveNonBMPPlanesInRange(CFMutableCharacterSetRef cset, CFRange range) { 745 int firstChar = (range.location & 0xFFFF); 746 int maxChar = range.location + range.length; 747 int idx = range.location >> 16; // first plane 748 int maxPlane = (maxChar - 1) >> 16; // last plane 749 CFRange planeRange; 750 CFMutableCharacterSetRef annexPlane; 751 752 maxChar &= 0xFFFF; 753 754 for (idx = (idx ? idx : 1);idx <= maxPlane;idx++) { 755 planeRange.location = __CFMax(firstChar, 0); 756 planeRange.length = (idx == maxPlane && maxChar ? maxChar : 0x10000) - planeRange.location; 757 if (__CFCSetAnnexIsInverted(cset)) { 758 CFCharacterSetAddCharactersInRange((CFMutableCharacterSetRef)__CFCSetGetAnnexPlaneCharacterSet(cset, idx), planeRange); 759 } else { 760 if ((annexPlane = (CFMutableCharacterSetRef)__CFCSetGetAnnexPlaneCharacterSetNoAlloc(cset, idx))) { 761 CFCharacterSetRemoveCharactersInRange(annexPlane, planeRange); 762 if(__CFCSetIsEmpty(annexPlane) && !__CFCSetIsInverted(annexPlane)) { 763 CFRelease(annexPlane); 764 __CFCSetAnnexBitmapClearPlane(cset->_annex->_validEntriesBitmap, idx); 765 } 766 } 767 } 768 } 769 if (!__CFCSetHasNonBMPPlane(cset) && !__CFCSetAnnexIsInverted(cset)) __CFCSetDeallocateAnnexPlane(cset); 770} 771 772static void __CFCSetMakeBitmap(CFMutableCharacterSetRef cset) { 773 if (!__CFCSetIsBitmap(cset) || !__CFCSetBitmapBits(cset)) { 774 CFAllocatorRef allocator = CFGetAllocator(cset); 775 uint8_t *bitmap = (uint8_t *)CFAllocatorAllocate(allocator, __kCFBitmapSize, 0); 776 __CFCSetGetBitmap(cset, bitmap); 777 778 if (__CFCSetIsBuiltin(cset)) { 779 CFIndex numPlanes = CFUniCharGetNumberOfPlanes(__CFCSetBuiltinType(cset)); 780 781 if (numPlanes > 1) { 782 CFMutableCharacterSetRef annexSet; 783 uint8_t *annexBitmap = NULL; 784 int idx; 785 UInt8 result; 786 787 __CFCSetAllocateAnnexForPlane(cset, numPlanes - 1); 788 for (idx = 1;idx < numPlanes;idx++) { 789 if (NULL == annexBitmap) { 790 annexBitmap = (uint8_t *)CFAllocatorAllocate(allocator, __kCFBitmapSize, 0); 791 } 792 result = CFUniCharGetBitmapForPlane(__CFCSetBuiltinType(cset), idx, annexBitmap, false); 793 if (result == kCFUniCharBitmapEmpty) continue; 794 if (result == kCFUniCharBitmapAll) { 795 CFIndex bitmapLength = __kCFBitmapSize; 796 uint8_t *bytes = annexBitmap; 797 while (bitmapLength-- > 0) *(bytes++) = (uint8_t)0xFF; 798 } 799 annexSet = (CFMutableCharacterSetRef)__CFCSetGetAnnexPlaneCharacterSet(cset, idx); 800 __CFCSetPutClassType(annexSet, __kCFCharSetClassBitmap); 801 __CFCSetPutBitmapBits(annexSet, annexBitmap); 802 __CFCSetPutIsInverted(annexSet, false); 803 __CFCSetPutHasHashValue(annexSet, false); 804 annexBitmap = NULL; 805 } 806 if (annexBitmap) CFAllocatorDeallocate(allocator, annexBitmap); 807 } 808 } else if (__CFCSetIsCompactBitmap(cset) && __CFCSetCompactBitmapBits(cset)) { 809 CFAllocatorDeallocate(allocator, __CFCSetCompactBitmapBits(cset)); 810 __CFCSetPutCompactBitmapBits(cset, NULL); 811 } else if (__CFCSetIsString(cset) && __CFCSetStringBuffer(cset)) { 812 CFAllocatorDeallocate(allocator, __CFCSetStringBuffer(cset)); 813 __CFCSetPutStringBuffer(cset, NULL); 814 } else if (__CFCSetIsRange(cset)) { // We may have to allocate annex here 815 Boolean needsToInvert = (!__CFCSetHasNonBMPPlane(cset) && __CFCSetIsInverted(cset) ? true : false); 816 __CFCSetAddNonBMPPlanesInRange(cset, CFRangeMake(__CFCSetRangeFirstChar(cset), __CFCSetRangeLength(cset))); 817 if (needsToInvert) __CFCSetAnnexSetIsInverted(cset, true); 818 } 819 __CFCSetPutClassType(cset, __kCFCharSetClassBitmap); 820 __CFCSetPutBitmapBits(cset, bitmap); 821 __CFCSetPutIsInverted(cset, false); 822 } 823} 824 825CF_INLINE CFMutableCharacterSetRef __CFCSetGenericCreate(CFAllocatorRef allocator, UInt32 flags) { 826 CFMutableCharacterSetRef cset; 827 CFIndex size = sizeof(struct __CFCharacterSet) - sizeof(CFRuntimeBase); 828 829 cset = (CFMutableCharacterSetRef)_CFRuntimeCreateInstance(allocator, CFCharacterSetGetTypeID(), size, NULL); 830 if (NULL == cset) return NULL; 831 832 cset->_base._cfinfo[CF_INFO_BITS] |= flags; 833 cset->_hashValue = 0; 834 cset->_annex = NULL; 835 836 return cset; 837} 838 839static void __CFApplySurrogatesInString(CFMutableCharacterSetRef cset, CFStringRef string, void (*applyer)(CFMutableCharacterSetRef, CFRange)) { 840 CFStringInlineBuffer buffer; 841 CFIndex index, length = CFStringGetLength(string); 842 CFRange range = CFRangeMake(0, 0); 843 UTF32Char character; 844 845 CFStringInitInlineBuffer(string, &buffer, CFRangeMake(0, length)); 846 847 for (index = 0;index < length;index++) { 848 character = __CFStringGetCharacterFromInlineBufferQuick(&buffer, index); 849 850 if (CFStringIsSurrogateHighCharacter(character) && ((index + 1) < length)) { 851 UTF16Char other = __CFStringGetCharacterFromInlineBufferQuick(&buffer, index + 1); 852 853 if (CFStringIsSurrogateLowCharacter(other)) { 854 character = CFStringGetLongCharacterForSurrogatePair(character, other); 855 856 if ((range.length + range.location) == character) { 857 ++range.length; 858 } else { 859 if (range.length > 0) applyer(cset, range); 860 range.location = character; 861 range.length = 1; 862 } 863 } 864 865 ++index; // skip the low surrogate 866 } 867 } 868 869 if (range.length > 0) applyer(cset, range); 870} 871 872 873/* Bsearch theChar for __kCFCharSetClassString 874*/ 875CF_INLINE Boolean __CFCSetBsearchUniChar(const UniChar *theTable, CFIndex length, UniChar theChar) { 876 const UniChar *p, *q, *divider; 877 878 if ((theChar < theTable[0]) || (theChar > theTable[length - 1])) return false; 879 880 p = theTable; 881 q = p + (length - 1); 882 while (p <= q) { 883 divider = p + ((q - p) >> 1); /* divide by 2 */ 884 if (theChar < *divider) q = divider - 1; 885 else if (theChar > *divider) p = divider + 1; 886 else return true; 887 } 888 return false; 889} 890 891/* Array of instantiated builtin set. Note builtin set ID starts with 1 so the array index is ID - 1 892*/ 893static CFCharacterSetRef *__CFBuiltinSets = NULL; 894 895/* Global lock for character set 896*/ 897static CFSpinLock_t __CFCharacterSetLock = CFSpinLockInit; 898 899/* CFBase API functions 900*/ 901static Boolean __CFCharacterSetEqual(CFTypeRef cf1, CFTypeRef cf2) { 902 Boolean isInvertStateIdentical = (__CFCSetIsInverted((CFCharacterSetRef)cf1) == __CFCSetIsInverted((CFCharacterSetRef)cf2) ? true: false); 903 Boolean isAnnexInvertStateIdentical = (__CFCSetAnnexIsInverted((CFCharacterSetRef)cf1) == __CFCSetAnnexIsInverted((CFCharacterSetRef)cf2) ? true: false); 904 CFIndex idx; 905 CFCharacterSetRef subSet1; 906 uint8_t bitsBuf[__kCFBitmapSize]; 907 uint8_t *bits; 908 Boolean isBitmap1; 909 Boolean isBitmap2; 910 911 if (__CFCSetHasHashValue((CFCharacterSetRef)cf1) && __CFCSetHasHashValue((CFCharacterSetRef)cf2) && ((CFCharacterSetRef)cf1)->_hashValue != ((CFCharacterSetRef)cf2)->_hashValue) return false; 912 if (__CFCSetIsEmpty((CFCharacterSetRef)cf1) && __CFCSetIsEmpty((CFCharacterSetRef)cf2) && !isInvertStateIdentical) return false; 913 914 if ((__CFCSetClassType((CFCharacterSetRef)cf1) == __CFCSetClassType((CFCharacterSetRef)cf2)) && !__CFCSetIsCompactBitmap((CFCharacterSetRef)cf1)) { // Types are identical, we can do it fast 915 switch (__CFCSetClassType((CFCharacterSetRef)cf1)) { 916 case __kCFCharSetClassBuiltin: 917 return (__CFCSetBuiltinType((CFCharacterSetRef)cf1) == __CFCSetBuiltinType((CFCharacterSetRef)cf2) && isInvertStateIdentical ? true : false); 918 919 case __kCFCharSetClassRange: 920 return (__CFCSetRangeFirstChar((CFCharacterSetRef)cf1) == __CFCSetRangeFirstChar((CFCharacterSetRef)cf2) && __CFCSetRangeLength((CFCharacterSetRef)cf1) && __CFCSetRangeLength((CFCharacterSetRef)cf2) && isInvertStateIdentical ? true : false); 921 922 case __kCFCharSetClassString: 923 if (isInvertStateIdentical) { 924 const UniChar *buf1 = __CFCSetStringBuffer((CFCharacterSetRef)cf1); 925 const UniChar *buf1End = buf1 + __CFCSetStringLength((CFCharacterSetRef)cf1); 926 const UniChar *buf2 = __CFCSetStringBuffer((CFCharacterSetRef)cf2); 927 const UniChar *buf2End = buf2 + __CFCSetStringLength((CFCharacterSetRef)cf2); 928 929 while ((buf1 < buf1End) && (buf2 < buf2End)) { 930 UniChar char1 = *buf1; 931 UniChar char2 = *buf2; 932 933 if (char1 != char2) return false; 934 935 do { ++buf1; } while ((buf1 < buf1End) && (char1 == *buf1)); 936 do { ++buf2; } while ((buf2 < buf2End) && (char2 == *buf2)); 937 } 938 } else { 939 return false; 940 } 941 break; 942 943 case __kCFCharSetClassBitmap: 944 if (!__CFCSetIsEqualBitmap((const UInt32 *)__CFCSetBitmapBits((CFCharacterSetRef)cf1), (const UInt32 *)__CFCSetBitmapBits((CFCharacterSetRef)cf2))) return false; 945 break; 946 } 947 return __CFCSetIsEqualAnnex((CFCharacterSetRef)cf1, (CFCharacterSetRef)cf2); 948 } 949 950 // Check for easy empty cases 951 if (__CFCSetIsEmpty((CFCharacterSetRef)cf1) || __CFCSetIsEmpty((CFCharacterSetRef)cf2)) { 952 CFCharacterSetRef emptySet = (__CFCSetIsEmpty((CFCharacterSetRef)cf1) ? (CFCharacterSetRef)cf1 : (CFCharacterSetRef)cf2); 953 CFCharacterSetRef nonEmptySet = (emptySet == cf1 ? (CFCharacterSetRef)cf2 : (CFCharacterSetRef)cf1); 954 955 if (__CFCSetIsBuiltin(nonEmptySet)) { 956 return false; 957 } else if (__CFCSetIsRange(nonEmptySet)) { 958 if (isInvertStateIdentical) { 959 return (__CFCSetRangeLength(nonEmptySet) ? false : true); 960 } else { 961 return (__CFCSetRangeLength(nonEmptySet) == 0x110000 ? true : false); 962 } 963 } else { 964 if (__CFCSetAnnexIsInverted(nonEmptySet)) { 965 if (__CFCSetAnnexValidEntriesBitmap(nonEmptySet) != 0x1FFFE) return false; 966 } else { 967 if (__CFCSetAnnexValidEntriesBitmap(nonEmptySet)) return false; 968 } 969 970 if (__CFCSetIsBitmap(nonEmptySet)) { 971 bits = __CFCSetBitmapBits(nonEmptySet); 972 } else { 973 bits = bitsBuf; 974 __CFCSetGetBitmap(nonEmptySet, bitsBuf); 975 } 976 977 if (__CFCSetIsEqualBitmap(NULL, (const UInt32 *)bits)) { 978 if (!__CFCSetAnnexIsInverted(nonEmptySet)) return true; 979 } else { 980 return false; 981 } 982 983 // Annex set has to be CFRangeMake(0x10000, 0xfffff) 984 for (idx = 1;idx < MAX_ANNEX_PLANE;idx++) { 985 if (__CFCSetIsBitmap(nonEmptySet)) { 986 if (!__CFCSetIsEqualBitmap((__CFCSetAnnexIsInverted(nonEmptySet) ? NULL : (const UInt32 *)-1), (const UInt32 *)bitsBuf)) return false; 987 } else { 988 __CFCSetGetBitmap(__CFCSetGetAnnexPlaneCharacterSetNoAlloc(nonEmptySet, idx), bitsBuf); 989 if (!__CFCSetIsEqualBitmap((const UInt32 *)-1, (const UInt32 *)bitsBuf)) return false; 990 } 991 } 992 return true; 993 } 994 } 995 996 if (__CFCSetIsBuiltin((CFCharacterSetRef)cf1) || __CFCSetIsBuiltin((CFCharacterSetRef)cf2)) { 997 CFCharacterSetRef builtinSet = (__CFCSetIsBuiltin((CFCharacterSetRef)cf1) ? (CFCharacterSetRef)cf1 : (CFCharacterSetRef)cf2); 998 CFCharacterSetRef nonBuiltinSet = (builtinSet == cf1 ? (CFCharacterSetRef)cf2 : (CFCharacterSetRef)cf1); 999 1000 1001 if (__CFCSetIsRange(nonBuiltinSet)) { 1002 UTF32Char firstChar = __CFCSetRangeFirstChar(nonBuiltinSet); 1003 UTF32Char lastChar = (firstChar + __CFCSetRangeLength(nonBuiltinSet) - 1); 1004 uint8_t firstPlane = (firstChar >> 16) & 0xFF; 1005 uint8_t lastPlane = (lastChar >> 16) & 0xFF; 1006 uint8_t result; 1007 1008 for (idx = 0;idx < MAX_ANNEX_PLANE;idx++) { 1009 result = CFUniCharGetBitmapForPlane(__CFCSetBuiltinType(builtinSet), idx, bitsBuf, (isInvertStateIdentical != 0)); 1010 1011 if (idx < firstPlane || idx > lastPlane) { 1012 if (result == kCFUniCharBitmapAll) { 1013 return false; 1014 } else if (result == kCFUniCharBitmapFilled) { 1015 if (!__CFCSetIsEqualBitmap(NULL, (const UInt32 *)bitsBuf)) return false; 1016 } 1017 } else if (idx > firstPlane && idx < lastPlane) { 1018 if (result == kCFUniCharBitmapEmpty) { 1019 return false; 1020 } else if (result == kCFUniCharBitmapFilled) { 1021 if (!__CFCSetIsEqualBitmap((const UInt32 *)-1, (const UInt32 *)bitsBuf)) return false; 1022 } 1023 } else { 1024 if (result == kCFUniCharBitmapEmpty) { 1025 return false; 1026 } else if (result == kCFUniCharBitmapAll) { 1027 if (idx == firstPlane) { 1028 if (((firstChar & 0xFFFF) != 0) || (firstPlane == lastPlane && ((lastChar & 0xFFFF) != 0xFFFF))) return false; 1029 } else { 1030 if (((lastChar & 0xFFFF) != 0xFFFF) || (firstPlane == lastPlane && ((firstChar & 0xFFFF) != 0))) return false; 1031 } 1032 } else { 1033 if (idx == firstPlane) { 1034 if (!__CFCSetIsBitmapEqualToRange((const UInt32 *)bitsBuf, firstChar & 0xFFFF, (firstPlane == lastPlane ? lastChar & 0xFFFF : 0xFFFF), false)) return false; 1035 } else { 1036 if (!__CFCSetIsBitmapEqualToRange((const UInt32 *)bitsBuf, (firstPlane == lastPlane ? firstChar & 0xFFFF : 0), lastChar & 0xFFFF, false)) return false; 1037 } 1038 } 1039 } 1040 } 1041 return true; 1042 } else { 1043 uint8_t bitsBuf2[__kCFBitmapSize]; 1044 uint8_t result; 1045 1046 result = CFUniCharGetBitmapForPlane(__CFCSetBuiltinType(builtinSet), 0, bitsBuf, (__CFCSetIsInverted(builtinSet) != 0)); 1047 if (result == kCFUniCharBitmapFilled) { 1048 if (__CFCSetIsBitmap(nonBuiltinSet)) { 1049 if (!__CFCSetIsEqualBitmap((const UInt32 *)bitsBuf, (const UInt32 *)__CFCSetBitmapBits(nonBuiltinSet))) return false; 1050 } else { 1051 1052 __CFCSetGetBitmap(nonBuiltinSet, bitsBuf2); 1053 if (!__CFCSetIsEqualBitmap((const UInt32 *)bitsBuf, (const UInt32 *)bitsBuf2)) { 1054 return false; 1055 } 1056 } 1057 } else { 1058 if (__CFCSetIsBitmap(nonBuiltinSet)) { 1059 if (!__CFCSetIsEqualBitmap((result == kCFUniCharBitmapAll ? (const UInt32*)-1 : NULL), (const UInt32 *)__CFCSetBitmapBits(nonBuiltinSet))) return false; 1060 } else { 1061 __CFCSetGetBitmap(nonBuiltinSet, bitsBuf); 1062 if (!__CFCSetIsEqualBitmap((result == kCFUniCharBitmapAll ? (const UInt32*)-1: NULL), (const UInt32 *)bitsBuf)) return false; 1063 } 1064 } 1065 1066 isInvertStateIdentical = (__CFCSetIsInverted(builtinSet) == __CFCSetAnnexIsInverted(nonBuiltinSet) ? true : false); 1067 1068 for (idx = 1;idx < MAX_ANNEX_PLANE;idx++) { 1069 result = CFUniCharGetBitmapForPlane(__CFCSetBuiltinType(builtinSet), idx, bitsBuf, !isInvertStateIdentical); 1070 subSet1 = __CFCSetGetAnnexPlaneCharacterSetNoAlloc(nonBuiltinSet, idx); 1071 1072 if (result == kCFUniCharBitmapFilled) { 1073 if (NULL == subSet1) { 1074 return false; 1075 } else if (__CFCSetIsBitmap(subSet1)) { 1076 if (!__CFCSetIsEqualBitmap((const UInt32*)bitsBuf, (const UInt32*)__CFCSetBitmapBits(subSet1))) { 1077 return false; 1078 } 1079 } else { 1080 1081 __CFCSetGetBitmap(subSet1, bitsBuf2); 1082 if (!__CFCSetIsEqualBitmap((const UInt32*)bitsBuf, (const UInt32*)bitsBuf2)) { 1083 return false; 1084 } 1085 } 1086 } else { 1087 if (NULL == subSet1) { 1088 if (result == kCFUniCharBitmapAll) { 1089 return false; 1090 } 1091 } else if (__CFCSetIsBitmap(subSet1)) { 1092 if (!__CFCSetIsEqualBitmap((result == kCFUniCharBitmapAll ? (const UInt32*)-1: NULL), (const UInt32*)__CFCSetBitmapBits(subSet1))) { 1093 return false; 1094 } 1095 } else { 1096 __CFCSetGetBitmap(subSet1, bitsBuf); 1097 if (!__CFCSetIsEqualBitmap((result == kCFUniCharBitmapAll ? (const UInt32*)-1: NULL), (const UInt32*)bitsBuf)) { 1098 return false; 1099 } 1100 } 1101 } 1102 } 1103 return true; 1104 } 1105 } 1106 1107 if (__CFCSetIsRange((CFCharacterSetRef)cf1) || __CFCSetIsRange((CFCharacterSetRef)cf2)) { 1108 CFCharacterSetRef rangeSet = (__CFCSetIsRange((CFCharacterSetRef)cf1) ? (CFCharacterSetRef)cf1 : (CFCharacterSetRef)cf2); 1109 CFCharacterSetRef nonRangeSet = (rangeSet == cf1 ? (CFCharacterSetRef)cf2 : (CFCharacterSetRef)cf1); 1110 UTF32Char firstChar = __CFCSetRangeFirstChar(rangeSet); 1111 UTF32Char lastChar = (firstChar + __CFCSetRangeLength(rangeSet) - 1); 1112 uint8_t firstPlane = (firstChar >> 16) & 0xFF; 1113 uint8_t lastPlane = (lastChar >> 16) & 0xFF; 1114 Boolean isRangeSetInverted = __CFCSetIsInverted(rangeSet); 1115 1116 if (__CFCSetIsBitmap(nonRangeSet)) { 1117 bits = __CFCSetBitmapBits(nonRangeSet); 1118 } else { 1119 bits = bitsBuf; 1120 __CFCSetGetBitmap(nonRangeSet, bitsBuf); 1121 } 1122 if (firstPlane == 0) { 1123 if (!__CFCSetIsBitmapEqualToRange((const UInt32*)bits, firstChar, (lastPlane == 0 ? lastChar : 0xFFFF), isRangeSetInverted)) return false; 1124 firstPlane = 1; 1125 firstChar = 0; 1126 } else { 1127 if (!__CFCSetIsEqualBitmap((const UInt32*)bits, (isRangeSetInverted ? (const UInt32 *)-1 : NULL))) return false; 1128 firstChar &= 0xFFFF; 1129 } 1130 1131 lastChar &= 0xFFFF; 1132 1133 isAnnexInvertStateIdentical = (isRangeSetInverted == __CFCSetAnnexIsInverted(nonRangeSet) ? true : false); 1134 1135 for (idx = 1;idx < MAX_ANNEX_PLANE;idx++) { 1136 subSet1 = __CFCSetGetAnnexPlaneCharacterSetNoAlloc(nonRangeSet, idx); 1137 if (NULL == subSet1) { 1138 if (idx < firstPlane || idx > lastPlane) { 1139 if (!isAnnexInvertStateIdentical) return false; 1140 } else if (idx > firstPlane && idx < lastPlane) { 1141 if (isAnnexInvertStateIdentical) return false; 1142 } else if (idx == firstPlane) { 1143 if (isAnnexInvertStateIdentical || firstChar || (idx == lastPlane && lastChar != 0xFFFF)) return false; 1144 } else if (idx == lastPlane) { 1145 if (isAnnexInvertStateIdentical || (idx == firstPlane && firstChar) || (lastChar != 0xFFFF)) return false; 1146 } 1147 } else { 1148 if (__CFCSetIsBitmap(subSet1)) { 1149 bits = __CFCSetBitmapBits(subSet1); 1150 } else { 1151 __CFCSetGetBitmap(subSet1, bitsBuf); 1152 bits = bitsBuf; 1153 } 1154 1155 if (idx < firstPlane || idx > lastPlane) { 1156 if (!__CFCSetIsEqualBitmap((const UInt32*)bits, (isAnnexInvertStateIdentical ? NULL : (const UInt32 *)-1))) return false; 1157 } else if (idx > firstPlane && idx < lastPlane) { 1158 if (!__CFCSetIsEqualBitmap((const UInt32*)bits, (isAnnexInvertStateIdentical ? (const UInt32 *)-1 : NULL))) return false; 1159 } else if (idx == firstPlane) { 1160 if (!__CFCSetIsBitmapEqualToRange((const UInt32*)bits, firstChar, (idx == lastPlane ? lastChar : 0xFFFF), !isAnnexInvertStateIdentical)) return false; 1161 } else if (idx == lastPlane) { 1162 if (!__CFCSetIsBitmapEqualToRange((const UInt32*)bits, (idx == firstPlane ? firstChar : 0), lastChar, !isAnnexInvertStateIdentical)) return false; 1163 } 1164 } 1165 } 1166 return true; 1167 } 1168 1169 isBitmap1 = __CFCSetIsBitmap((CFCharacterSetRef)cf1); 1170 isBitmap2 = __CFCSetIsBitmap((CFCharacterSetRef)cf2); 1171 1172 if (isBitmap1 && isBitmap2) { 1173 if (!__CFCSetIsEqualBitmap((const UInt32 *)__CFCSetBitmapBits((CFCharacterSetRef)cf1), (const UInt32 *)__CFCSetBitmapBits((CFCharacterSetRef)cf2))) return false; 1174 } else if (!isBitmap1 && !isBitmap2) { 1175 uint8_t bitsBuf2[__kCFBitmapSize]; 1176 1177 __CFCSetGetBitmap((CFCharacterSetRef)cf1, bitsBuf); 1178 __CFCSetGetBitmap((CFCharacterSetRef)cf2, bitsBuf2); 1179 1180 if (!__CFCSetIsEqualBitmap((const UInt32*)bitsBuf, (const UInt32*)bitsBuf2)) { 1181 return false; 1182 } 1183 } else { 1184 if (isBitmap2) { 1185 CFCharacterSetRef tmp = (CFCharacterSetRef)cf2; 1186 cf2 = cf1; 1187 cf1 = tmp; 1188 } 1189 1190 __CFCSetGetBitmap((CFCharacterSetRef)cf2, bitsBuf); 1191 1192 if (!__CFCSetIsEqualBitmap((const UInt32 *)__CFCSetBitmapBits((CFCharacterSetRef)cf1), (const UInt32 *)bitsBuf)) return false; 1193 } 1194 return __CFCSetIsEqualAnnex((CFCharacterSetRef)cf1, (CFCharacterSetRef)cf2); 1195} 1196 1197static CFHashCode __CFCharacterSetHash(CFTypeRef cf) { 1198 if (!__CFCSetHasHashValue((CFCharacterSetRef)cf)) { 1199 if (__CFCSetIsEmpty((CFCharacterSetRef)cf)) { 1200 ((CFMutableCharacterSetRef)cf)->_hashValue = (__CFCSetIsInverted((CFCharacterSetRef)cf) ? ((UInt32)0xFFFFFFFF) : 0); 1201 } else if (__CFCSetIsBitmap( (CFCharacterSetRef) cf )) { 1202 ((CFMutableCharacterSetRef)cf)->_hashValue = CFHashBytes(__CFCSetBitmapBits((CFCharacterSetRef)cf), __kCFBitmapSize); 1203 } else { 1204 uint8_t bitsBuf[__kCFBitmapSize]; 1205 __CFCSetGetBitmap((CFCharacterSetRef)cf, bitsBuf); 1206 ((CFMutableCharacterSetRef)cf)->_hashValue = CFHashBytes(bitsBuf, __kCFBitmapSize); 1207 } 1208 __CFCSetPutHasHashValue((CFMutableCharacterSetRef)cf, true); 1209 } 1210 return ((CFCharacterSetRef)cf)->_hashValue; 1211} 1212 1213static CFStringRef __CFCharacterSetCopyDescription(CFTypeRef cf) { 1214 CFMutableStringRef string; 1215 CFIndex idx; 1216 CFIndex length; 1217 1218 if (__CFCSetIsEmpty((CFCharacterSetRef)cf)) { 1219 return (CFStringRef)(__CFCSetIsInverted((CFCharacterSetRef)cf) ? CFRetain(CFSTR("<CFCharacterSet All>")) : CFRetain(CFSTR("<CFCharacterSet Empty>"))); 1220 } 1221 1222 switch (__CFCSetClassType((CFCharacterSetRef)cf)) { 1223 case __kCFCharSetClassBuiltin: 1224 switch (__CFCSetBuiltinType((CFCharacterSetRef)cf)) { 1225 case kCFCharacterSetControl: return (CFStringRef)CFRetain(CFSTR("<CFCharacterSet Predefined Control Set>")); 1226 case kCFCharacterSetWhitespace : return (CFStringRef)CFRetain(CFSTR("<CFCharacterSet Predefined Whitespace Set>")); 1227 case kCFCharacterSetWhitespaceAndNewline: return (CFStringRef)CFRetain(CFSTR("<CFCharacterSet Predefined WhitespaceAndNewline Set>")); 1228 case kCFCharacterSetDecimalDigit: return (CFStringRef)CFRetain(CFSTR("<CFCharacterSet Predefined DecimalDigit Set>")); 1229 case kCFCharacterSetLetter: return (CFStringRef)CFRetain(CFSTR("<CFCharacterSet Predefined Letter Set>")); 1230 case kCFCharacterSetLowercaseLetter: return (CFStringRef)CFRetain(CFSTR("<CFCharacterSet Predefined LowercaseLetter Set>")); 1231 case kCFCharacterSetUppercaseLetter: return (CFStringRef)CFRetain(CFSTR("<CFCharacterSet Predefined UppercaseLetter Set>")); 1232 case kCFCharacterSetNonBase: return (CFStringRef)CFRetain(CFSTR("<CFCharacterSet Predefined NonBase Set>")); 1233 case kCFCharacterSetDecomposable: return (CFStringRef)CFRetain(CFSTR("<CFCharacterSet Predefined Decomposable Set>")); 1234 case kCFCharacterSetAlphaNumeric: return (CFStringRef)CFRetain(CFSTR("<CFCharacterSet Predefined AlphaNumeric Set>")); 1235 case kCFCharacterSetPunctuation: return (CFStringRef)CFRetain(CFSTR("<CFCharacterSet Predefined Punctuation Set>")); 1236 case kCFCharacterSetIllegal: return (CFStringRef)CFRetain(CFSTR("<CFCharacterSet Predefined Illegal Set>")); 1237 case kCFCharacterSetCapitalizedLetter: return (CFStringRef)CFRetain(CFSTR("<CFCharacterSet Predefined CapitalizedLetter Set>")); 1238 case kCFCharacterSetSymbol: return (CFStringRef)CFRetain(CFSTR("<CFCharacterSet Predefined Symbol Set>")); 1239 case kCFCharacterSetNewline: return (CFStringRef)CFRetain(CFSTR("<CFCharacterSet Predefined Newline Set>")); 1240 } 1241 break; 1242 1243 case __kCFCharSetClassRange: 1244 return CFStringCreateWithFormat(CFGetAllocator((CFCharacterSetRef)cf), NULL, CFSTR("<CFCharacterSet Range(%u, %ld)>"), (unsigned int)__CFCSetRangeFirstChar((CFCharacterSetRef)cf), (long)__CFCSetRangeLength((CFCharacterSetRef)cf)); 1245 1246 case __kCFCharSetClassString: { 1247 CFStringRef format = CFSTR("<CFCharacterSet Items("); 1248 1249 length = __CFCSetStringLength((CFCharacterSetRef)cf); 1250 string = CFStringCreateMutable(CFGetAllocator(cf), CFStringGetLength(format) + 7 * length + 2); // length of format + "U+XXXX "(7) * length + ")>"(2) 1251 CFStringAppend(string, format); 1252 for (idx = 0;idx < length;idx++) { 1253 CFStringAppendFormat(string, NULL, CFSTR("%sU+%04X"), (idx > 0 ? " " : ""), (unsigned int)((__CFCSetStringBuffer((CFCharacterSetRef)cf))[idx])); 1254 } 1255 CFStringAppend(string, CFSTR(")>")); 1256 return string; 1257 } 1258 1259 case __kCFCharSetClassBitmap: 1260 case __kCFCharSetClassCompactBitmap: 1261 return (CFStringRef)CFRetain(CFSTR("<CFCharacterSet Bitmap>")); // ??? Should generate description for 8k bitmap ? 1262 } 1263 CFAssert1(0, __kCFLogAssertion, "%s: Internal inconsistency error: unknown character set type", __PRETTY_FUNCTION__); // We should never come here 1264 return NULL; 1265} 1266 1267static void __CFCharacterSetDeallocate(CFTypeRef cf) { 1268 CFAllocatorRef allocator = CFGetAllocator(cf); 1269 1270 if (__CFCSetIsBuiltin((CFCharacterSetRef)cf) && !__CFCSetIsMutable((CFCharacterSetRef)cf) && !__CFCSetIsInverted((CFCharacterSetRef)cf)) { 1271 CFCharacterSetRef sharedSet = CFCharacterSetGetPredefined(__CFCSetBuiltinType((CFCharacterSetRef)cf)); 1272 if (sharedSet == cf) { // We're trying to dealloc the builtin set 1273 CFAssert1(0, __kCFLogAssertion, "%s: Trying to deallocate predefined set. The process is likely to crash.", __PRETTY_FUNCTION__); 1274 return; // We never deallocate builtin set 1275 } 1276 } 1277 1278 if (__CFCSetIsString((CFCharacterSetRef)cf) && __CFCSetStringBuffer((CFCharacterSetRef)cf)) CFAllocatorDeallocate(allocator, __CFCSetStringBuffer((CFCharacterSetRef)cf)); 1279 else if (__CFCSetIsBitmap((CFCharacterSetRef)cf) && __CFCSetBitmapBits((CFCharacterSetRef)cf)) CFAllocatorDeallocate(allocator, __CFCSetBitmapBits((CFCharacterSetRef)cf)); 1280 else if (__CFCSetIsCompactBitmap((CFCharacterSetRef)cf) && __CFCSetCompactBitmapBits((CFCharacterSetRef)cf)) CFAllocatorDeallocate(allocator, __CFCSetCompactBitmapBits((CFCharacterSetRef)cf)); 1281 __CFCSetDeallocateAnnexPlane((CFCharacterSetRef)cf); 1282} 1283 1284static CFTypeID __kCFCharacterSetTypeID = _kCFRuntimeNotATypeID; 1285 1286static const CFRuntimeClass __CFCharacterSetClass = { 1287 0, 1288 "CFCharacterSet", 1289 NULL, // init 1290 NULL, // copy 1291 __CFCharacterSetDeallocate, 1292 __CFCharacterSetEqual, 1293 __CFCharacterSetHash, 1294 NULL, // 1295 __CFCharacterSetCopyDescription 1296}; 1297 1298static bool __CFCheckForExapendedSet = false; 1299 1300CF_PRIVATE void __CFCharacterSetInitialize(void) { 1301 const char *checkForExpandedSet = __CFgetenv("__CF_DEBUG_EXPANDED_SET"); 1302 1303 __kCFCharacterSetTypeID = _CFRuntimeRegisterClass(&__CFCharacterSetClass); 1304 1305 if (checkForExpandedSet && (*checkForExpandedSet == 'Y')) __CFCheckForExapendedSet = true; 1306} 1307 1308/* Public functions 1309*/ 1310 1311CFTypeID CFCharacterSetGetTypeID(void) { 1312 return __kCFCharacterSetTypeID; 1313} 1314 1315/*** CharacterSet creation ***/ 1316/* Functions to create basic immutable characterset. 1317*/ 1318CFCharacterSetRef CFCharacterSetGetPredefined(CFCharacterSetPredefinedSet theSetIdentifier) { 1319 CFCharacterSetRef cset; 1320 1321 __CFCSetValidateBuiltinType(theSetIdentifier, __PRETTY_FUNCTION__); 1322 1323 __CFSpinLock(&__CFCharacterSetLock); 1324 cset = ((NULL != __CFBuiltinSets) ? __CFBuiltinSets[theSetIdentifier - 1] : NULL); 1325 __CFSpinUnlock(&__CFCharacterSetLock); 1326 1327 if (NULL != cset) return cset; 1328 1329 if (!(cset = __CFCSetGenericCreate(kCFAllocatorSystemDefault, __kCFCharSetClassBuiltin))) return NULL; 1330 __CFCSetPutBuiltinType((CFMutableCharacterSetRef)cset, theSetIdentifier); 1331 1332 __CFSpinLock(&__CFCharacterSetLock); 1333 if (!__CFBuiltinSets) { 1334 __CFBuiltinSets = (CFCharacterSetRef *)CFAllocatorAllocate((CFAllocatorRef)CFRetain(__CFGetDefaultAllocator()), sizeof(CFCharacterSetRef) * __kCFLastBuiltinSetID, 0); 1335 memset(__CFBuiltinSets, 0, sizeof(CFCharacterSetRef) * __kCFLastBuiltinSetID); 1336 } 1337 1338 __CFBuiltinSets[theSetIdentifier - 1] = cset; 1339 __CFSpinUnlock(&__CFCharacterSetLock); 1340 1341 return cset; 1342} 1343 1344CFCharacterSetRef CFCharacterSetCreateWithCharactersInRange(CFAllocatorRef allocator, CFRange theRange) { 1345 CFMutableCharacterSetRef cset; 1346 1347 __CFCSetValidateRange(theRange, __PRETTY_FUNCTION__); 1348 1349 if (theRange.length) { 1350 if (!(cset = __CFCSetGenericCreate(allocator, __kCFCharSetClassRange))) return NULL; 1351 __CFCSetPutRangeFirstChar(cset, theRange.location); 1352 __CFCSetPutRangeLength(cset, theRange.length); 1353 } else { 1354 if (!(cset = __CFCSetGenericCreate(allocator, __kCFCharSetClassBitmap))) return NULL; 1355 __CFCSetPutBitmapBits(cset, NULL); 1356 __CFCSetPutHasHashValue(cset, true); // _hashValue is 0 1357 } 1358 1359 return cset; 1360} 1361 1362static int chcompar(const void *a, const void *b) { 1363 return -(int)(*(UniChar *)b - *(UniChar *)a); 1364} 1365 1366CFCharacterSetRef CFCharacterSetCreateWithCharactersInString(CFAllocatorRef allocator, CFStringRef theString) { 1367 CFIndex length; 1368 1369 length = CFStringGetLength(theString); 1370 if (length < __kCFStringCharSetMax) { 1371 CFMutableCharacterSetRef cset; 1372 1373 if (!(cset = __CFCSetGenericCreate(allocator, __kCFCharSetClassString))) return NULL; 1374 __CFCSetPutStringBuffer(cset, (UniChar *)CFAllocatorAllocate(CFGetAllocator(cset), __kCFStringCharSetMax * sizeof(UniChar), 0)); 1375 __CFCSetPutStringLength(cset, length); 1376 CFStringGetCharacters(theString, CFRangeMake(0, length), __CFCSetStringBuffer(cset)); 1377 qsort(__CFCSetStringBuffer(cset), length, sizeof(UniChar), chcompar); 1378 1379 if (0 == length) { 1380 __CFCSetPutHasHashValue(cset, true); // _hashValue is 0 1381 } else if (length > 1) { // Check for surrogate 1382 const UTF16Char *characters = __CFCSetStringBuffer(cset); 1383 const UTF16Char *charactersLimit = characters + length; 1384 1385 if ((*characters < 0xDC00UL) && (*(charactersLimit - 1) > 0xDBFFUL)) { // might have surrogate chars 1386 while (characters < charactersLimit) { 1387 if (CFStringIsSurrogateHighCharacter(*characters) || CFStringIsSurrogateLowCharacter(*characters)) { 1388 CFRelease(cset); 1389 cset = NULL; 1390 break; 1391 } 1392 ++characters; 1393 } 1394 } 1395 } 1396 if (NULL != cset) return cset; 1397 } 1398 1399 CFMutableCharacterSetRef mcset = CFCharacterSetCreateMutable(allocator); 1400 CFCharacterSetAddCharactersInString(mcset, theString); 1401 __CFCSetMakeCompact(mcset); 1402 __CFCSetPutIsMutable(mcset, false); 1403 return mcset; 1404} 1405 1406CFCharacterSetRef CFCharacterSetCreateWithBitmapRepresentation(CFAllocatorRef allocator, CFDataRef theData) { 1407 CFMutableCharacterSetRef cset; 1408 CFIndex length; 1409 1410 if (!(cset = __CFCSetGenericCreate(allocator, __kCFCharSetClassBitmap))) return NULL; 1411 1412 if (theData && (length = CFDataGetLength(theData)) > 0) { 1413 uint8_t *bitmap; 1414 uint8_t *cBitmap; 1415 1416 if (length < __kCFBitmapSize) { 1417 bitmap = (uint8_t *)CFAllocatorAllocate(allocator, __kCFBitmapSize, 0); 1418 memmove(bitmap, CFDataGetBytePtr(theData), length); 1419 memset(bitmap + length, 0, __kCFBitmapSize - length); 1420 1421 cBitmap = __CFCreateCompactBitmap(allocator, bitmap); 1422 1423 if (cBitmap == NULL) { 1424 __CFCSetPutBitmapBits(cset, bitmap); 1425 } else { 1426 CFAllocatorDeallocate(allocator, bitmap); 1427 __CFCSetPutCompactBitmapBits(cset, cBitmap); 1428 __CFCSetPutClassType(cset, __kCFCharSetClassCompactBitmap); 1429 } 1430 } else { 1431 cBitmap = __CFCreateCompactBitmap(allocator, CFDataGetBytePtr(theData)); 1432 1433 if (cBitmap == NULL) { 1434 bitmap = (uint8_t *)CFAllocatorAllocate(allocator, __kCFBitmapSize, 0); 1435 memmove(bitmap, CFDataGetBytePtr(theData), __kCFBitmapSize); 1436 1437 __CFCSetPutBitmapBits(cset, bitmap); 1438 } else { 1439 __CFCSetPutCompactBitmapBits(cset, cBitmap); 1440 __CFCSetPutClassType(cset, __kCFCharSetClassCompactBitmap); 1441 } 1442 1443 if (length > __kCFBitmapSize) { 1444 CFMutableCharacterSetRef annexSet; 1445 const uint8_t *bytes = CFDataGetBytePtr(theData) + __kCFBitmapSize; 1446 1447 length -= __kCFBitmapSize; 1448 1449 while (length > 1) { 1450 annexSet = (CFMutableCharacterSetRef)__CFCSetGetAnnexPlaneCharacterSet(cset, *(bytes++)); 1451 --length; // Decrement the plane no byte 1452 1453 if (length < __kCFBitmapSize) { 1454 bitmap = (uint8_t *)CFAllocatorAllocate(allocator, __kCFBitmapSize, 0); 1455 memmove(bitmap, bytes, length); 1456 memset(bitmap + length, 0, __kCFBitmapSize - length); 1457 1458 cBitmap = __CFCreateCompactBitmap(allocator, bitmap); 1459 1460 if (cBitmap == NULL) { 1461 __CFCSetPutBitmapBits(annexSet, bitmap); 1462 } else { 1463 CFAllocatorDeallocate(allocator, bitmap); 1464 __CFCSetPutCompactBitmapBits(annexSet, cBitmap); 1465 __CFCSetPutClassType(annexSet, __kCFCharSetClassCompactBitmap); 1466 } 1467 } else { 1468 cBitmap = __CFCreateCompactBitmap(allocator, bytes); 1469 1470 if (cBitmap == NULL) { 1471 bitmap = (uint8_t *)CFAllocatorAllocate(allocator, __kCFBitmapSize, 0); 1472 memmove(bitmap, bytes, __kCFBitmapSize); 1473 1474 __CFCSetPutBitmapBits(annexSet, bitmap); 1475 } else { 1476 __CFCSetPutCompactBitmapBits(annexSet, cBitmap); 1477 __CFCSetPutClassType(annexSet, __kCFCharSetClassCompactBitmap); 1478 } 1479 } 1480 length -= __kCFBitmapSize; 1481 bytes += __kCFBitmapSize; 1482 } 1483 } 1484 } 1485 } else { 1486 __CFCSetPutBitmapBits(cset, NULL); 1487 __CFCSetPutHasHashValue(cset, true); // Hash value is 0 1488 } 1489 1490 return cset; 1491} 1492 1493CFCharacterSetRef CFCharacterSetCreateInvertedSet(CFAllocatorRef alloc, CFCharacterSetRef theSet) { 1494 CFMutableCharacterSetRef cset; 1495 1496 CF_OBJC_FUNCDISPATCHV(__kCFCharacterSetTypeID, CFCharacterSetRef , (NSCharacterSet *)theSet, invertedSet); 1497 1498 cset = CFCharacterSetCreateMutableCopy(alloc, theSet); 1499 CFCharacterSetInvert(cset); 1500 __CFCSetPutIsMutable(cset, false); 1501 1502 return cset; 1503} 1504 1505/* Functions to create mutable characterset. 1506*/ 1507CFMutableCharacterSetRef CFCharacterSetCreateMutable(CFAllocatorRef allocator) { 1508 CFMutableCharacterSetRef cset; 1509 1510 if (!(cset = __CFCSetGenericCreate(allocator, __kCFCharSetClassBitmap| __kCFCharSetIsMutable))) return NULL; 1511 __CFCSetPutBitmapBits(cset, NULL); 1512 __CFCSetPutHasHashValue(cset, true); // Hash value is 0 1513 1514 return cset; 1515} 1516 1517static CFMutableCharacterSetRef __CFCharacterSetCreateCopy(CFAllocatorRef alloc, CFCharacterSetRef theSet, bool isMutable) { 1518 CFMutableCharacterSetRef cset; 1519 1520 CF_OBJC_FUNCDISPATCHV(__kCFCharacterSetTypeID, CFMutableCharacterSetRef , (NSCharacterSet *)theSet, mutableCopy); 1521 1522 __CFGenericValidateType(theSet, __kCFCharacterSetTypeID); 1523 1524 if (!isMutable && !__CFCSetIsMutable(theSet)) { 1525 return (CFMutableCharacterSetRef)CFRetain(theSet); 1526 } 1527 1528 cset = CFCharacterSetCreateMutable(alloc); 1529 1530 __CFCSetPutClassType(cset, __CFCSetClassType(theSet)); 1531 __CFCSetPutHasHashValue(cset, __CFCSetHasHashValue(theSet)); 1532 __CFCSetPutIsInverted(cset, __CFCSetIsInverted(theSet)); 1533 cset->_hashValue = theSet->_hashValue; 1534 1535 switch (__CFCSetClassType(theSet)) { 1536 case __kCFCharSetClassBuiltin: 1537 __CFCSetPutBuiltinType(cset, __CFCSetBuiltinType(theSet)); 1538 break; 1539 1540 case __kCFCharSetClassRange: 1541 __CFCSetPutRangeFirstChar(cset, __CFCSetRangeFirstChar(theSet)); 1542 __CFCSetPutRangeLength(cset, __CFCSetRangeLength(theSet)); 1543 break; 1544 1545 case __kCFCharSetClassString: 1546 __CFCSetPutStringBuffer(cset, (UniChar *)CFAllocatorAllocate(alloc, __kCFStringCharSetMax * sizeof(UniChar), 0)); 1547 1548 __CFCSetPutStringLength(cset, __CFCSetStringLength(theSet)); 1549 memmove(__CFCSetStringBuffer(cset), __CFCSetStringBuffer(theSet), __CFCSetStringLength(theSet) * sizeof(UniChar)); 1550 break; 1551 1552 case __kCFCharSetClassBitmap: 1553 if (__CFCSetBitmapBits(theSet)) { 1554 uint8_t * bitmap = (isMutable ? NULL : __CFCreateCompactBitmap(alloc, __CFCSetBitmapBits(theSet))); 1555 1556 if (bitmap == NULL) { 1557 bitmap = (uint8_t *)CFAllocatorAllocate(alloc, sizeof(uint8_t) * __kCFBitmapSize, 0); 1558 memmove(bitmap, __CFCSetBitmapBits(theSet), __kCFBitmapSize); 1559 __CFCSetPutBitmapBits(cset, bitmap); 1560 } else { 1561 __CFCSetPutCompactBitmapBits(cset, bitmap); 1562 __CFCSetPutClassType(cset, __kCFCharSetClassCompactBitmap); 1563 } 1564 } else { 1565 __CFCSetPutBitmapBits(cset, NULL); 1566 } 1567 break; 1568 1569 case __kCFCharSetClassCompactBitmap: { 1570 const uint8_t *compactBitmap = __CFCSetCompactBitmapBits(theSet); 1571 1572 if (compactBitmap) { 1573 uint32_t size = __CFCSetGetCompactBitmapSize(compactBitmap); 1574 uint8_t *newBitmap = (uint8_t *)CFAllocatorAllocate(alloc, size, 0); 1575 1576 memmove(newBitmap, compactBitmap, size); 1577 __CFCSetPutCompactBitmapBits(cset, newBitmap); 1578 } 1579 } 1580 break; 1581 1582 default: 1583 CFAssert1(0, __kCFLogAssertion, "%s: Internal inconsistency error: unknown character set type", __PRETTY_FUNCTION__); // We should never come here 1584 } 1585 if (__CFCSetHasNonBMPPlane(theSet)) { 1586 CFMutableCharacterSetRef annexPlane; 1587 int idx; 1588 1589 for (idx = 1;idx <= MAX_ANNEX_PLANE;idx++) { 1590 if ((annexPlane = (CFMutableCharacterSetRef)__CFCSetGetAnnexPlaneCharacterSetNoAlloc(theSet, idx))) { 1591 annexPlane = __CFCharacterSetCreateCopy(alloc, annexPlane, isMutable); 1592 __CFCSetPutCharacterSetToAnnexPlane(cset, annexPlane, idx); 1593 CFRelease(annexPlane); 1594 } 1595 } 1596 __CFCSetAnnexSetIsInverted(cset, __CFCSetAnnexIsInverted(theSet)); 1597 } else if (__CFCSetAnnexIsInverted(theSet)) { 1598 __CFCSetAnnexSetIsInverted(cset, true); 1599 } 1600 1601 return cset; 1602} 1603 1604CFCharacterSetRef CFCharacterSetCreateCopy(CFAllocatorRef alloc, CFCharacterSetRef theSet) { 1605 return __CFCharacterSetCreateCopy(alloc, theSet, false); 1606} 1607 1608CFMutableCharacterSetRef CFCharacterSetCreateMutableCopy(CFAllocatorRef alloc, CFCharacterSetRef theSet) { 1609 return __CFCharacterSetCreateCopy(alloc, theSet, true); 1610} 1611 1612/*** Basic accessors ***/ 1613Boolean CFCharacterSetIsCharacterMember(CFCharacterSetRef theSet, UniChar theChar) { 1614 CFIndex length; 1615 Boolean isInverted; 1616 Boolean result = false; 1617 1618 CF_OBJC_FUNCDISPATCHV(__kCFCharacterSetTypeID, Boolean, (NSCharacterSet *)theSet, longCharacterIsMember:(UTF32Char)theChar); 1619 1620 __CFGenericValidateType(theSet, __kCFCharacterSetTypeID); 1621 1622 isInverted = __CFCSetIsInverted(theSet); 1623 1624 switch (__CFCSetClassType(theSet)) { 1625 case __kCFCharSetClassBuiltin: 1626 result = (CFUniCharIsMemberOf(theChar, __CFCSetBuiltinType(theSet)) ? !isInverted : isInverted); 1627 break; 1628 1629 case __kCFCharSetClassRange: 1630 length = __CFCSetRangeLength(theSet); 1631 result = (length && __CFCSetRangeFirstChar(theSet) <= theChar && theChar < __CFCSetRangeFirstChar(theSet) + length ? !isInverted : isInverted); 1632 break; 1633 1634 case __kCFCharSetClassString: 1635 result = ((length = __CFCSetStringLength(theSet)) ? (__CFCSetBsearchUniChar(__CFCSetStringBuffer(theSet), length, theChar) ? !isInverted : isInverted) : isInverted); 1636 break; 1637 1638 case __kCFCharSetClassBitmap: 1639 result = (__CFCSetCompactBitmapBits(theSet) ? (__CFCSetIsMemberBitmap(__CFCSetBitmapBits(theSet), theChar) ? true : false) : isInverted); 1640 break; 1641 1642 case __kCFCharSetClassCompactBitmap: 1643 result = (__CFCSetCompactBitmapBits(theSet) ? (__CFCSetIsMemberInCompactBitmap(__CFCSetCompactBitmapBits(theSet), theChar) ? true : false) : isInverted); 1644 break; 1645 1646 default: 1647 CFAssert1(0, __kCFLogAssertion, "%s: Internal inconsistency error: unknown character set type", __PRETTY_FUNCTION__); // We should never come here 1648 break; 1649 } 1650 1651 return result; 1652} 1653 1654Boolean CFCharacterSetIsLongCharacterMember(CFCharacterSetRef theSet, UTF32Char theChar) { 1655 CFIndex length; 1656 UInt32 plane = (theChar >> 16); 1657 Boolean isAnnexInverted = false; 1658 Boolean isInverted; 1659 Boolean result = false; 1660 1661 CF_OBJC_FUNCDISPATCHV(__kCFCharacterSetTypeID, Boolean, (NSCharacterSet *)theSet, longCharacterIsMember:(UTF32Char)theChar); 1662 1663 __CFGenericValidateType(theSet, __kCFCharacterSetTypeID); 1664 1665 if (plane) { 1666 CFCharacterSetRef annexPlane; 1667 1668 if (__CFCSetIsBuiltin(theSet)) { 1669 isInverted = __CFCSetIsInverted(theSet); 1670 return (CFUniCharIsMemberOf(theChar, __CFCSetBuiltinType(theSet)) ? !isInverted : isInverted); 1671 } 1672 1673 isAnnexInverted = __CFCSetAnnexIsInverted(theSet); 1674 1675 if ((annexPlane = __CFCSetGetAnnexPlaneCharacterSetNoAlloc(theSet, plane)) == NULL) { 1676 if (!__CFCSetHasNonBMPPlane(theSet) && __CFCSetIsRange(theSet)) { 1677 isInverted = __CFCSetIsInverted(theSet); 1678 length = __CFCSetRangeLength(theSet); 1679 return (length && __CFCSetRangeFirstChar(theSet) <= theChar && theChar < __CFCSetRangeFirstChar(theSet) + length ? !isInverted : isInverted); 1680 } else { 1681 return (isAnnexInverted ? true : false); 1682 } 1683 } else { 1684 theSet = annexPlane; 1685 theChar &= 0xFFFF; 1686 } 1687 } 1688 1689 isInverted = __CFCSetIsInverted(theSet); 1690 1691 switch (__CFCSetClassType(theSet)) { 1692 case __kCFCharSetClassBuiltin: 1693 result = (CFUniCharIsMemberOf(theChar, __CFCSetBuiltinType(theSet)) ? !isInverted : isInverted); 1694 break; 1695 1696 case __kCFCharSetClassRange: 1697 length = __CFCSetRangeLength(theSet); 1698 result = (length && __CFCSetRangeFirstChar(theSet) <= theChar && theChar < __CFCSetRangeFirstChar(theSet) + length ? !isInverted : isInverted); 1699 break; 1700 1701 case __kCFCharSetClassString: 1702 result = ((length = __CFCSetStringLength(theSet)) ? (__CFCSetBsearchUniChar(__CFCSetStringBuffer(theSet), length, theChar) ? !isInverted : isInverted) : isInverted); 1703 break; 1704 1705 case __kCFCharSetClassBitmap: 1706 result = (__CFCSetCompactBitmapBits(theSet) ? (__CFCSetIsMemberBitmap(__CFCSetBitmapBits(theSet), theChar) ? true : false) : isInverted); 1707 break; 1708 1709 case __kCFCharSetClassCompactBitmap: 1710 result = (__CFCSetCompactBitmapBits(theSet) ? (__CFCSetIsMemberInCompactBitmap(__CFCSetCompactBitmapBits(theSet), theChar) ? true : false) : isInverted); 1711 break; 1712 1713 default: 1714 CFAssert1(0, __kCFLogAssertion, "%s: Internal inconsistency error: unknown character set type", __PRETTY_FUNCTION__); // We should never come here 1715 return false; // To make compiler happy 1716 } 1717 1718 return (result ? !isAnnexInverted : isAnnexInverted); 1719} 1720 1721Boolean CFCharacterSetIsSurrogatePairMember(CFCharacterSetRef theSet, UniChar surrogateHigh, UniChar surrogateLow) { 1722 return CFCharacterSetIsLongCharacterMember(theSet, CFCharacterSetGetLongCharacterForSurrogatePair(surrogateHigh, surrogateLow)); 1723} 1724 1725 1726static inline CFCharacterSetRef __CFCharacterSetGetExpandedSetForNSCharacterSet(const void *characterSet) { 1727 CF_OBJC_FUNCDISPATCHV(__kCFCharacterSetTypeID, CFCharacterSetRef , (NSCharacterSet *)characterSet, _expandedCFCharacterSet); 1728 return NULL; 1729} 1730 1731Boolean CFCharacterSetIsSupersetOfSet(CFCharacterSetRef theSet, CFCharacterSetRef theOtherSet) { 1732 CFMutableCharacterSetRef copy; 1733 CFCharacterSetRef expandedSet = NULL; 1734 CFCharacterSetRef expandedOtherSet = NULL; 1735 Boolean result; 1736 1737 if ((!CF_IS_OBJC(__kCFCharacterSetTypeID, theSet) || (expandedSet = __CFCharacterSetGetExpandedSetForNSCharacterSet(theSet))) && (!CF_IS_OBJC(__kCFCharacterSetTypeID, theOtherSet) || (expandedOtherSet = __CFCharacterSetGetExpandedSetForNSCharacterSet(theOtherSet)))) { // Really CF, we can do some trick here 1738 if (expandedSet) theSet = expandedSet; 1739 if (expandedOtherSet) theOtherSet = expandedOtherSet; 1740 1741 __CFGenericValidateType(theSet, __kCFCharacterSetTypeID); 1742 __CFGenericValidateType(theOtherSet, __kCFCharacterSetTypeID); 1743 1744 if (__CFCSetIsEmpty(theSet)) { 1745 if (__CFCSetIsInverted(theSet)) { 1746 return TRUE; // Inverted empty set covers all range 1747 } else if (!__CFCSetIsEmpty(theOtherSet) || __CFCSetIsInverted(theOtherSet)) { 1748 return FALSE; 1749 } 1750 } else if (__CFCSetIsEmpty(theOtherSet) && !__CFCSetIsInverted(theOtherSet)) { 1751 return TRUE; 1752 } else { 1753 if (__CFCSetIsBuiltin(theSet) || __CFCSetIsBuiltin(theOtherSet)) { 1754 if (__CFCSetClassType(theSet) == __CFCSetClassType(theOtherSet) && __CFCSetBuiltinType(theSet) == __CFCSetBuiltinType(theOtherSet) && !__CFCSetIsInverted(theSet) && !__CFCSetIsInverted(theOtherSet)) return TRUE; 1755 } else if (__CFCSetIsRange(theSet) || __CFCSetIsRange(theOtherSet)) { 1756 if (__CFCSetClassType(theSet) == __CFCSetClassType(theOtherSet)) { 1757 if (__CFCSetIsInverted(theSet)) { 1758 if (__CFCSetIsInverted(theOtherSet)) { 1759 return (__CFCSetRangeFirstChar(theOtherSet) > __CFCSetRangeFirstChar(theSet) || (__CFCSetRangeFirstChar(theSet) + __CFCSetRangeLength(theSet)) > (__CFCSetRangeFirstChar(theOtherSet) + __CFCSetRangeLength(theOtherSet)) ? FALSE : TRUE); 1760 } else { 1761 return ((__CFCSetRangeFirstChar(theOtherSet) + __CFCSetRangeLength(theOtherSet)) <= __CFCSetRangeFirstChar(theSet) || (__CFCSetRangeFirstChar(theSet) + __CFCSetRangeLength(theSet)) <= __CFCSetRangeFirstChar(theOtherSet) ? TRUE : FALSE); 1762 } 1763 } else { 1764 if (__CFCSetIsInverted(theOtherSet)) { 1765 return ((__CFCSetRangeFirstChar(theSet) == 0 && __CFCSetRangeLength(theSet) == 0x110000) || (__CFCSetRangeFirstChar(theOtherSet) == 0 && (UInt32)__CFCSetRangeLength(theOtherSet) <= __CFCSetRangeFirstChar(theSet)) || ((__CFCSetRangeFirstChar(theSet) + __CFCSetRangeLength(theSet)) <= __CFCSetRangeFirstChar(theOtherSet) && (__CFCSetRangeFirstChar(theOtherSet) + __CFCSetRangeLength(theOtherSet)) == 0x110000) ? TRUE : FALSE); 1766 } else { 1767 return (__CFCSetRangeFirstChar(theOtherSet) < __CFCSetRangeFirstChar(theSet) || (__CFCSetRangeFirstChar(theSet) + __CFCSetRangeLength(theSet)) < (__CFCSetRangeFirstChar(theOtherSet) + __CFCSetRangeLength(theOtherSet)) ? FALSE : TRUE); 1768 } 1769 } 1770 } 1771 } else { 1772 UInt32 theSetAnnexMask = __CFCSetAnnexValidEntriesBitmap(theSet); 1773 UInt32 theOtherSetAnnexMask = __CFCSetAnnexValidEntriesBitmap(theOtherSet); 1774 Boolean isTheSetAnnexInverted = __CFCSetAnnexIsInverted(theSet); 1775 Boolean isTheOtherSetAnnexInverted = __CFCSetAnnexIsInverted(theOtherSet); 1776 uint8_t theSetBuffer[__kCFBitmapSize]; 1777 uint8_t theOtherSetBuffer[__kCFBitmapSize]; 1778 1779 // We mask plane 1 to plane 16 1780 if (isTheSetAnnexInverted) theSetAnnexMask = (~theSetAnnexMask) & (0xFFFF << 1); 1781 if (isTheOtherSetAnnexInverted) theOtherSetAnnexMask = (~theOtherSetAnnexMask) & (0xFFFF << 1); 1782 1783 __CFCSetGetBitmap(theSet, theSetBuffer); 1784 __CFCSetGetBitmap(theOtherSet, theOtherSetBuffer); 1785 1786 if (!__CFCSetIsBitmapSupersetOfBitmap((const UInt32 *)theSetBuffer, (const UInt32 *)theOtherSetBuffer, FALSE, FALSE)) return FALSE; 1787 1788 if (theOtherSetAnnexMask) { 1789 CFCharacterSetRef theSetAnnex; 1790 CFCharacterSetRef theOtherSetAnnex; 1791 uint32_t idx; 1792 1793 if ((theSetAnnexMask & theOtherSetAnnexMask) != theOtherSetAnnexMask) return FALSE; 1794 1795 for (idx = 1;idx <= 16;idx++) { 1796 theSetAnnex = __CFCSetGetAnnexPlaneCharacterSetNoAlloc(theSet, idx); 1797 if (NULL == theSetAnnex) continue; // This case is already handled by the mask above 1798 1799 theOtherSetAnnex = __CFCSetGetAnnexPlaneCharacterSetNoAlloc(theOtherSet, idx); 1800 1801 if (NULL == theOtherSetAnnex) { 1802 if (isTheOtherSetAnnexInverted) { 1803 __CFCSetGetBitmap(theSetAnnex, theSetBuffer); 1804 if (!__CFCSetIsEqualBitmap((const UInt32 *)theSetBuffer, (isTheSetAnnexInverted ? NULL : (const UInt32 *)-1))) return FALSE; 1805 } 1806 } else { 1807 __CFCSetGetBitmap(theSetAnnex, theSetBuffer); 1808 __CFCSetGetBitmap(theOtherSetAnnex, theOtherSetBuffer); 1809 if (!__CFCSetIsBitmapSupersetOfBitmap((const UInt32 *)theSetBuffer, (const UInt32 *)theOtherSetBuffer, isTheSetAnnexInverted, isTheOtherSetAnnexInverted)) return FALSE; 1810 } 1811 } 1812 } 1813 1814 return TRUE; 1815 } 1816 } 1817 } 1818 1819 copy = CFCharacterSetCreateMutableCopy(kCFAllocatorSystemDefault, theSet); 1820 CFCharacterSetIntersect(copy, theOtherSet); 1821 result = __CFCharacterSetEqual(copy, theOtherSet); 1822 CFRelease(copy); 1823 1824 return result; 1825} 1826 1827Boolean CFCharacterSetHasMemberInPlane(CFCharacterSetRef theSet, CFIndex thePlane) { 1828 Boolean isInverted = __CFCSetIsInverted(theSet); 1829 1830 CF_OBJC_FUNCDISPATCHV(__kCFCharacterSetTypeID, Boolean, (NSCharacterSet *)theSet, hasMemberInPlane:(uint8_t)thePlane); 1831 1832 if (__CFCSetIsEmpty(theSet)) { 1833 return (isInverted ? TRUE : FALSE); 1834 } else if (__CFCSetIsBuiltin(theSet)) { 1835 CFCharacterSetPredefinedSet type = __CFCSetBuiltinType(theSet); 1836 1837 if (type == kCFCharacterSetControl) { 1838 if (isInverted || (thePlane == 14)) { 1839 return TRUE; // There is no plane that covers all values || Plane 14 has language tags 1840 } else { 1841 return (CFUniCharGetBitmapPtrForPlane(type, thePlane) ? TRUE : FALSE); 1842 } 1843 } else if ((type < kCFCharacterSetDecimalDigit) || (type == kCFCharacterSetNewline)) { 1844 return (thePlane && !isInverted ? FALSE : TRUE); 1845 } else if (__CFCSetBuiltinType(theSet) == kCFCharacterSetIllegal) { 1846 return (isInverted ? (thePlane < 3 || thePlane > 13 ? TRUE : FALSE) : TRUE); // This is according to Unicode 3.1 1847 } else { 1848 if (isInverted) { 1849 return TRUE; // There is no plane that covers all values 1850 } else { 1851 return (CFUniCharGetBitmapPtrForPlane(type, thePlane) ? TRUE : FALSE); 1852 } 1853 } 1854 } else if (__CFCSetIsRange(theSet)) { 1855 UTF32Char firstChar = __CFCSetRangeFirstChar(theSet); 1856 UTF32Char lastChar = (firstChar + __CFCSetRangeLength(theSet) - 1); 1857 CFIndex firstPlane = firstChar >> 16; 1858 CFIndex lastPlane = lastChar >> 16; 1859 1860 if (isInverted) { 1861 if (thePlane < firstPlane || thePlane > lastPlane) { 1862 return TRUE; 1863 } else if (thePlane > firstPlane && thePlane < lastPlane) { 1864 return FALSE; 1865 } else { 1866 firstChar &= 0xFFFF; 1867 lastChar &= 0xFFFF; 1868 if (thePlane == firstPlane) { 1869 return (firstChar || (firstPlane == lastPlane && lastChar != 0xFFFF) ? TRUE : FALSE); 1870 } else { 1871 return (lastChar != 0xFFFF || (firstPlane == lastPlane && firstChar) ? TRUE : FALSE); 1872 } 1873 } 1874 } else { 1875 return (thePlane < firstPlane || thePlane > lastPlane ? FALSE : TRUE); 1876 } 1877 } else { 1878 if (thePlane == 0) { 1879 switch (__CFCSetClassType(theSet)) { 1880 case __kCFCharSetClassString: if (!__CFCSetStringLength(theSet)) return isInverted; break; 1881 case __kCFCharSetClassCompactBitmap: return (__CFCSetCompactBitmapBits(theSet) ? TRUE : FALSE); break; 1882 case __kCFCharSetClassBitmap: return (__CFCSetBitmapBits(theSet) ? TRUE : FALSE); break; 1883 } 1884 return TRUE; 1885 } else { 1886 CFCharacterSetRef annex = __CFCSetGetAnnexPlaneCharacterSetNoAlloc(theSet, thePlane); 1887 if (annex) { 1888 if (__CFCSetIsRange(annex)) { 1889 return (__CFCSetAnnexIsInverted(theSet) && (__CFCSetRangeFirstChar(annex) == 0) && (__CFCSetRangeLength(annex) == 0x10000) ? FALSE : TRUE); 1890 } else if (__CFCSetIsBitmap(annex)) { 1891 return (__CFCSetAnnexIsInverted(theSet) && __CFCSetIsEqualBitmap((const UInt32 *)__CFCSetBitmapBits(annex), (const UInt32 *)-1) ? FALSE : TRUE); 1892 } else { 1893 uint8_t bitsBuf[__kCFBitmapSize]; 1894 __CFCSetGetBitmap(annex, bitsBuf); 1895 return (__CFCSetAnnexIsInverted(theSet) && __CFCSetIsEqualBitmap((const UInt32 *)bitsBuf, (const UInt32 *)-1) ? FALSE : TRUE); 1896 } 1897 } else { 1898 return __CFCSetAnnexIsInverted(theSet); 1899 } 1900 } 1901 } 1902 1903 return FALSE; 1904} 1905 1906 1907CFDataRef CFCharacterSetCreateBitmapRepresentation(CFAllocatorRef alloc, CFCharacterSetRef theSet) { 1908 CFMutableDataRef data; 1909 int numNonBMPPlanes = 0; 1910 int planeIndices[MAX_ANNEX_PLANE]; 1911 int idx; 1912 int length; 1913 bool isAnnexInverted; 1914 1915 CF_OBJC_FUNCDISPATCHV(__kCFCharacterSetTypeID, CFDataRef , (NSCharacterSet *)theSet, _retainedBitmapRepresentation); 1916 1917 __CFGenericValidateType(theSet, __kCFCharacterSetTypeID); 1918 1919 isAnnexInverted = (__CFCSetAnnexIsInverted(theSet) != 0); 1920 1921 if (__CFCSetHasNonBMPPlane(theSet)) { 1922 for (idx = 1;idx <= MAX_ANNEX_PLANE;idx++) { 1923 if (isAnnexInverted || __CFCSetGetAnnexPlaneCharacterSetNoAlloc(theSet, idx)) { 1924 planeIndices[numNonBMPPlanes++] = idx; 1925 } 1926 } 1927 } else if (__CFCSetIsBuiltin(theSet)) { 1928 numNonBMPPlanes = (__CFCSetIsInverted(theSet) ? MAX_ANNEX_PLANE : CFUniCharGetNumberOfPlanes(__CFCSetBuiltinType(theSet)) - 1); 1929 } else if (__CFCSetIsRange(theSet)) { 1930 UInt32 firstChar = __CFCSetRangeFirstChar(theSet); 1931 UInt32 lastChar = __CFCSetRangeFirstChar(theSet) + __CFCSetRangeLength(theSet) - 1; 1932 int firstPlane = (firstChar >> 16); 1933 int lastPlane = (lastChar >> 16); 1934 bool isInverted = (__CFCSetIsInverted(theSet) != 0); 1935 1936 if (lastPlane > 0) { 1937 if (firstPlane == 0) { 1938 firstPlane = 1; 1939 firstChar = 0x10000; 1940 } 1941 numNonBMPPlanes = (lastPlane - firstPlane) + 1; 1942 if (isInverted) { 1943 numNonBMPPlanes = MAX_ANNEX_PLANE - numNonBMPPlanes; 1944 if (firstPlane == lastPlane) { 1945 if (((firstChar & 0xFFFF) > 0) || ((lastChar & 0xFFFF) < 0xFFFF)) ++numNonBMPPlanes; 1946 } else { 1947 if ((firstChar & 0xFFFF) > 0) ++numNonBMPPlanes; 1948 if ((lastChar & 0xFFFF) < 0xFFFF) ++numNonBMPPlanes; 1949 } 1950 } 1951 } else if (isInverted) { 1952 numNonBMPPlanes = MAX_ANNEX_PLANE; 1953 } 1954 } else if (isAnnexInverted) { 1955 numNonBMPPlanes = MAX_ANNEX_PLANE; 1956 } 1957 1958 length = __kCFBitmapSize + ((__kCFBitmapSize + 1) * numNonBMPPlanes); 1959 data = CFDataCreateMutable(alloc, length); 1960 CFDataSetLength(data, length); 1961 __CFCSetGetBitmap(theSet, CFDataGetMutableBytePtr(data)); 1962 1963 if (numNonBMPPlanes > 0) { 1964 uint8_t *bytes = CFDataGetMutableBytePtr(data) + __kCFBitmapSize; 1965 1966 if (__CFCSetHasNonBMPPlane(theSet)) { 1967 CFCharacterSetRef subset; 1968 1969 for (idx = 0;idx < numNonBMPPlanes;idx++) { 1970 *(bytes++) = planeIndices[idx]; 1971 if ((subset = __CFCSetGetAnnexPlaneCharacterSetNoAlloc(theSet, planeIndices[idx])) == NULL) { 1972 __CFCSetBitmapFastFillWithValue((UInt32 *)bytes, (isAnnexInverted ? 0xFF : 0)); 1973 } else { 1974 __CFCSetGetBitmap(subset, bytes); 1975 if (isAnnexInverted) { 1976 uint32_t count = __kCFBitmapSize / sizeof(uint32_t); 1977 uint32_t *bits = (uint32_t *)bytes; 1978 1979 while (count-- > 0) { 1980 *bits = ~(*bits); 1981 ++bits; 1982 } 1983 } 1984 } 1985 bytes += __kCFBitmapSize; 1986 } 1987 } else if (__CFCSetIsBuiltin(theSet)) { 1988 UInt8 result; 1989 CFIndex delta; 1990 Boolean isInverted = __CFCSetIsInverted(theSet); 1991 1992 for (idx = 0;idx < numNonBMPPlanes;idx++) { 1993 if ((result = CFUniCharGetBitmapForPlane(__CFCSetBuiltinType(theSet), idx + 1, bytes + 1, (isInverted != 0))) == kCFUniCharBitmapEmpty) continue; 1994 *(bytes++) = idx + 1; 1995 if (result == kCFUniCharBitmapAll) { 1996 CFIndex bitmapLength = __kCFBitmapSize; 1997 while (bitmapLength-- > 0) *(bytes++) = (uint8_t)0xFF; 1998 } else { 1999 bytes += __kCFBitmapSize; 2000 } 2001 } 2002 delta = bytes - (const uint8_t *)CFDataGetBytePtr(data); 2003 if (delta < length) CFDataSetLength(data, delta); 2004 } else if (__CFCSetIsRange(theSet)) { 2005 UInt32 firstChar = __CFCSetRangeFirstChar(theSet); 2006 UInt32 lastChar = __CFCSetRangeFirstChar(theSet) + __CFCSetRangeLength(theSet) - 1; 2007 int firstPlane = (firstChar >> 16); 2008 int lastPlane = (lastChar >> 16); 2009 2010 if (firstPlane == 0) { 2011 firstPlane = 1; 2012 firstChar = 0x10000; 2013 } 2014 if (__CFCSetIsInverted(theSet)) { 2015 // Mask out the plane byte 2016 firstChar &= 0xFFFF; 2017 lastChar &= 0xFFFF; 2018 2019 for (idx = 1;idx < firstPlane;idx++) { // Fill up until the first plane 2020 *(bytes++) = idx; 2021 __CFCSetBitmapFastFillWithValue((UInt32 *)bytes, 0xFF); 2022 bytes += __kCFBitmapSize; 2023 } 2024 if (firstPlane == lastPlane) { 2025 if ((firstChar > 0) || (lastChar < 0xFFFF)) { 2026 *(bytes++) = idx; 2027 __CFCSetBitmapFastFillWithValue((UInt32 *)bytes, 0xFF); 2028 __CFCSetBitmapRemoveCharactersInRange(bytes, firstChar, lastChar); 2029 bytes += __kCFBitmapSize; 2030 } 2031 } else if (firstPlane < lastPlane) { 2032 if (firstChar > 0) { 2033 *(bytes++) = idx; 2034 __CFCSetBitmapFastFillWithValue((UInt32 *)bytes, 0); 2035 __CFCSetBitmapAddCharactersInRange(bytes, 0, firstChar - 1); 2036 bytes += __kCFBitmapSize; 2037 } 2038 if (lastChar < 0xFFFF) { 2039 *(bytes++) = idx; 2040 __CFCSetBitmapFastFillWithValue((UInt32 *)bytes, 0); 2041 __CFCSetBitmapAddCharactersInRange(bytes, lastChar, 0xFFFF); 2042 bytes += __kCFBitmapSize; 2043 } 2044 } 2045 for (idx = lastPlane + 1;idx <= MAX_ANNEX_PLANE;idx++) { 2046 *(bytes++) = idx; 2047 __CFCSetBitmapFastFillWithValue((UInt32 *)bytes, 0xFF); 2048 bytes += __kCFBitmapSize; 2049 } 2050 } else { 2051 for (idx = firstPlane;idx <= lastPlane;idx++) { 2052 *(bytes++) = idx; 2053 __CFCSetBitmapAddCharactersInRange(bytes, (idx == firstPlane ? firstChar : 0), (idx == lastPlane ? lastChar : 0xFFFF)); 2054 bytes += __kCFBitmapSize; 2055 } 2056 } 2057 } else if (isAnnexInverted) { 2058 for (idx = 1;idx <= MAX_ANNEX_PLANE;idx++) { 2059 *(bytes++) = idx; 2060 __CFCSetBitmapFastFillWithValue((UInt32 *)bytes, 0xFF); 2061 bytes += __kCFBitmapSize; 2062 } 2063 } 2064 } 2065 2066 return data; 2067} 2068 2069/*** MutableCharacterSet functions ***/ 2070void CFCharacterSetAddCharactersInRange(CFMutableCharacterSetRef theSet, CFRange theRange) { 2071 CF_OBJC_FUNCDISPATCHV(__kCFCharacterSetTypeID, void, (NSMutableCharacterSet *)theSet, addCharactersInRange:NSMakeRange(theRange.location, theRange.length)); 2072 2073 __CFCSetValidateTypeAndMutability(theSet, __PRETTY_FUNCTION__); 2074 __CFCSetValidateRange(theRange, __PRETTY_FUNCTION__); 2075 2076 if (__CFCSetIsBuiltin((CFCharacterSetRef)theSet) && !__CFCSetIsMutable((CFCharacterSetRef)theSet) && !__CFCSetIsInverted((CFCharacterSetRef)theSet)) { 2077 CFCharacterSetRef sharedSet = CFCharacterSetGetPredefined(__CFCSetBuiltinType((CFCharacterSetRef)theSet)); 2078 if (sharedSet == theSet) { // We're trying to dealloc the builtin set 2079 CFAssert1(0, __kCFLogAssertion, "%s: Trying to mutable predefined set.", __PRETTY_FUNCTION__); 2080 return; // We don't mutate builtin set 2081 } 2082 } 2083 2084 if (!theRange.length || (__CFCSetIsInverted(theSet) && __CFCSetIsEmpty(theSet))) return; // Inverted && empty set contains all char 2085 2086 if (!__CFCSetIsInverted(theSet)) { 2087 if (__CFCSetIsEmpty(theSet)) { 2088 __CFCSetPutClassType(theSet, __kCFCharSetClassRange); 2089 __CFCSetPutRangeFirstChar(theSet, theRange.location); 2090 __CFCSetPutRangeLength(theSet, theRange.length); 2091 __CFCSetPutHasHashValue(theSet, false); 2092 return; 2093 } else if (__CFCSetIsRange(theSet)) { 2094 CFIndex firstChar = __CFCSetRangeFirstChar(theSet); 2095 CFIndex length = __CFCSetRangeLength(theSet); 2096 2097 if (firstChar == theRange.location) { 2098 __CFCSetPutRangeLength(theSet, __CFMax(length, theRange.length)); 2099 __CFCSetPutHasHashValue(theSet, false); 2100 return; 2101 } else if (firstChar < theRange.location && theRange.location <= firstChar + length) { 2102 if (firstChar + length < theRange.location + theRange.length) __CFCSetPutRangeLength(theSet, theRange.length + (theRange.location - firstChar)); 2103 __CFCSetPutHasHashValue(theSet, false); 2104 return; 2105 } else if (theRange.location < firstChar && firstChar <= theRange.location + theRange.length) { 2106 __CFCSetPutRangeFirstChar(theSet, theRange.location); 2107 __CFCSetPutRangeLength(theSet, length + (firstChar - theRange.location)); 2108 __CFCSetPutHasHashValue(theSet, false); 2109 return; 2110 } 2111 } else if (__CFCSetIsString(theSet) && __CFCSetStringLength(theSet) + theRange.length < __kCFStringCharSetMax) { 2112 UniChar *buffer; 2113 if (!__CFCSetStringBuffer(theSet)) 2114 __CFCSetPutStringBuffer(theSet, (UniChar *)CFAllocatorAllocate(CFGetAllocator(theSet), __kCFStringCharSetMax * sizeof(UniChar), 0)); 2115 buffer = __CFCSetStringBuffer(theSet) + __CFCSetStringLength(theSet); 2116 __CFCSetPutStringLength(theSet, __CFCSetStringLength(theSet) + theRange.length); 2117 while (theRange.length--) *buffer++ = (UniChar)theRange.location++; 2118 qsort(__CFCSetStringBuffer(theSet), __CFCSetStringLength(theSet), sizeof(UniChar), chcompar); 2119 __CFCSetPutHasHashValue(theSet, false); 2120 return; 2121 } 2122 } 2123 2124 // OK, I have to be a bitmap 2125 __CFCSetMakeBitmap(theSet); 2126 __CFCSetAddNonBMPPlanesInRange(theSet, theRange); 2127 if (theRange.location < 0x10000) { // theRange is in BMP 2128 if (theRange.location + theRange.length >= NUMCHARACTERS) theRange.length = NUMCHARACTERS - theRange.location; 2129 __CFCSetBitmapAddCharactersInRange(__CFCSetBitmapBits(theSet), (UniChar)theRange.location, (UniChar)(theRange.location + theRange.length - 1)); 2130 } 2131 __CFCSetPutHasHashValue(theSet, false); 2132 2133 if (__CFCheckForExapendedSet) __CFCheckForExpandedSet(theSet); 2134} 2135 2136void CFCharacterSetRemoveCharactersInRange(CFMutableCharacterSetRef theSet, CFRange theRange) { 2137 CF_OBJC_FUNCDISPATCHV(__kCFCharacterSetTypeID, void, (NSMutableCharacterSet *)theSet, removeCharactersInRange:NSMakeRange(theRange.location, theRange.length)); 2138 2139 __CFCSetValidateTypeAndMutability(theSet, __PRETTY_FUNCTION__); 2140 __CFCSetValidateRange(theRange, __PRETTY_FUNCTION__); 2141 2142 if (__CFCSetIsBuiltin((CFCharacterSetRef)theSet) && !__CFCSetIsMutable((CFCharacterSetRef)theSet) && !__CFCSetIsInverted((CFCharacterSetRef)theSet)) { 2143 CFCharacterSetRef sharedSet = CFCharacterSetGetPredefined(__CFCSetBuiltinType((CFCharacterSetRef)theSet)); 2144 if (sharedSet == theSet) { // We're trying to dealloc the builtin set 2145 CFAssert1(0, __kCFLogAssertion, "%s: Trying to mutable predefined set.", __PRETTY_FUNCTION__); 2146 return; // We don't mutate builtin set 2147 } 2148 } 2149 2150 if (!theRange.length || (!__CFCSetIsInverted(theSet) && __CFCSetIsEmpty(theSet))) return; // empty set 2151 2152 if (__CFCSetIsInverted(theSet)) { 2153 if (__CFCSetIsEmpty(theSet)) { 2154 __CFCSetPutClassType(theSet, __kCFCharSetClassRange); 2155 __CFCSetPutRangeFirstChar(theSet, theRange.location); 2156 __CFCSetPutRangeLength(theSet, theRange.length); 2157 __CFCSetPutHasHashValue(theSet, false); 2158 return; 2159 } else if (__CFCSetIsRange(theSet)) { 2160 CFIndex firstChar = __CFCSetRangeFirstChar(theSet); 2161 CFIndex length = __CFCSetRangeLength(theSet); 2162 2163 if (firstChar == theRange.location) { 2164 __CFCSetPutRangeLength(theSet, __CFMin(length, theRange.length)); 2165 __CFCSetPutHasHashValue(theSet, false); 2166 return; 2167 } else if (firstChar < theRange.location && theRange.location <= firstChar + length) { 2168 if (firstChar + length < theRange.location + theRange.length) __CFCSetPutRangeLength(theSet, theRange.length + (theRange.location - firstChar)); 2169 __CFCSetPutHasHashValue(theSet, false); 2170 return; 2171 } else if (theRange.location < firstChar && firstChar <= theRange.location + theRange.length) { 2172 __CFCSetPutRangeFirstChar(theSet, theRange.location); 2173 __CFCSetPutRangeLength(theSet, length + (firstChar - theRange.location)); 2174 __CFCSetPutHasHashValue(theSet, false); 2175 return; 2176 } 2177 } else if (__CFCSetIsString(theSet) && __CFCSetStringLength(theSet) + theRange.length < __kCFStringCharSetMax) { 2178 UniChar *buffer; 2179 if (!__CFCSetStringBuffer(theSet)) 2180 __CFCSetPutStringBuffer(theSet, (UniChar *)CFAllocatorAllocate(CFGetAllocator(theSet), __kCFStringCharSetMax * sizeof(UniChar), 0)); 2181 buffer = __CFCSetStringBuffer(theSet) + __CFCSetStringLength(theSet); 2182 __CFCSetPutStringLength(theSet, __CFCSetStringLength(theSet) + theRange.length); 2183 while (theRange.length--) *buffer++ = (UniChar)theRange.location++; 2184 qsort(__CFCSetStringBuffer(theSet), __CFCSetStringLength(theSet), sizeof(UniChar), chcompar); 2185 __CFCSetPutHasHashValue(theSet, false); 2186 return; 2187 } 2188 } 2189 2190 // OK, I have to be a bitmap 2191 __CFCSetMakeBitmap(theSet); 2192 __CFCSetRemoveNonBMPPlanesInRange(theSet, theRange); 2193 if (theRange.location < 0x10000) { // theRange is in BMP 2194 if (theRange.location + theRange.length > NUMCHARACTERS) theRange.length = NUMCHARACTERS - theRange.location; 2195 if (theRange.location == 0 && theRange.length == NUMCHARACTERS) { // Remove all 2196 CFAllocatorDeallocate(CFGetAllocator(theSet), __CFCSetBitmapBits(theSet)); 2197 __CFCSetPutBitmapBits(theSet, NULL); 2198 } else { 2199 __CFCSetBitmapRemoveCharactersInRange(__CFCSetBitmapBits(theSet), (UniChar)theRange.location, (UniChar)(theRange.location + theRange.length - 1)); 2200 } 2201 } 2202 2203 __CFCSetPutHasHashValue(theSet, false); 2204 if (__CFCheckForExapendedSet) __CFCheckForExpandedSet(theSet); 2205} 2206 2207void CFCharacterSetAddCharactersInString(CFMutableCharacterSetRef theSet, CFStringRef theString) { 2208 UniChar *buffer; 2209 CFIndex length; 2210 BOOL hasSurrogate = NO; 2211 2212 CF_OBJC_FUNCDISPATCHV(__kCFCharacterSetTypeID, void, (NSMutableCharacterSet *)theSet, addCharactersInString:(NSString *)theString); 2213 2214 __CFCSetValidateTypeAndMutability(theSet, __PRETTY_FUNCTION__); 2215 2216 if (__CFCSetIsBuiltin((CFCharacterSetRef)theSet) && !__CFCSetIsMutable((CFCharacterSetRef)theSet) && !__CFCSetIsInverted((CFCharacterSetRef)theSet)) { 2217 CFCharacterSetRef sharedSet = CFCharacterSetGetPredefined(__CFCSetBuiltinType((CFCharacterSetRef)theSet)); 2218 if (sharedSet == theSet) { // We're trying to dealloc the builtin set 2219 CFAssert1(0, __kCFLogAssertion, "%s: Trying to mutable predefined set.", __PRETTY_FUNCTION__); 2220 return; // We don't mutate builtin set 2221 } 2222 } 2223 2224 if ((__CFCSetIsEmpty(theSet) && __CFCSetIsInverted(theSet)) || !(length = CFStringGetLength(theString))) return; 2225 2226 if (!__CFCSetIsInverted(theSet)) { 2227 CFIndex newLength = length + (__CFCSetIsEmpty(theSet) ? 0 : (__CFCSetIsString(theSet) ? __CFCSetStringLength(theSet) : __kCFStringCharSetMax)); 2228 2229 if (newLength < __kCFStringCharSetMax) { 2230 buffer = __CFCSetStringBuffer(theSet); 2231 2232 if (NULL == buffer) { 2233 buffer = (UniChar *)CFAllocatorAllocate(CFGetAllocator(theSet), __kCFStringCharSetMax * sizeof(UniChar), 0); 2234 } else { 2235 buffer += __CFCSetStringLength(theSet); 2236 } 2237 2238 CFStringGetCharacters(theString, CFRangeMake(0, length), (UniChar*)buffer); 2239 2240 if (length > 1) { 2241 UTF16Char *characters = buffer; 2242 const UTF16Char *charactersLimit = characters + length; 2243 2244 while (characters < charactersLimit) { 2245 if (CFStringIsSurrogateHighCharacter(*characters) || CFStringIsSurrogateLowCharacter(*characters)) { 2246 memmove(characters, characters + 1, (charactersLimit - (characters + 1)) * sizeof(*characters)); 2247 --charactersLimit; 2248 hasSurrogate = YES; 2249 } else { 2250 ++characters; 2251 } 2252 } 2253 2254 newLength -= (length - (charactersLimit - buffer)); 2255 } 2256 2257 if (0 == newLength) { 2258 if (NULL == __CFCSetStringBuffer(theSet)) CFAllocatorDeallocate(CFGetAllocator(theSet), buffer); 2259 } else { 2260 if (NULL == __CFCSetStringBuffer(theSet)) { 2261 __CFCSetPutClassType(theSet, __kCFCharSetClassString); 2262 __CFCSetPutStringBuffer(theSet, buffer); 2263 } 2264 __CFCSetPutStringLength(theSet, newLength); 2265 qsort(__CFCSetStringBuffer(theSet), newLength, sizeof(UniChar), chcompar); 2266 } 2267 __CFCSetPutHasHashValue(theSet, false); 2268 2269 if (hasSurrogate) __CFApplySurrogatesInString(theSet, theString, &CFCharacterSetAddCharactersInRange); 2270 2271 return; 2272 } 2273 } 2274 2275 // OK, I have to be a bitmap 2276 __CFCSetMakeBitmap(theSet); 2277 CFStringInlineBuffer inlineBuffer; 2278 CFIndex idx; 2279 2280 CFStringInitInlineBuffer(theString, &inlineBuffer, CFRangeMake(0, length)); 2281 2282 for (idx = 0;idx < length;idx++) { 2283 UTF16Char character = __CFStringGetCharacterFromInlineBufferQuick(&inlineBuffer, idx); 2284 2285 if (CFStringIsSurrogateHighCharacter(character) || CFStringIsSurrogateLowCharacter(character)) { 2286 hasSurrogate = YES; 2287 } else { 2288 __CFCSetBitmapAddCharacter(__CFCSetBitmapBits(theSet), character); 2289 } 2290 } 2291 2292 __CFCSetPutHasHashValue(theSet, false); 2293 2294 if (__CFCheckForExapendedSet) __CFCheckForExpandedSet(theSet); 2295 2296 if (hasSurrogate) __CFApplySurrogatesInString(theSet, theString, &CFCharacterSetAddCharactersInRange); 2297} 2298 2299void CFCharacterSetRemoveCharactersInString(CFMutableCharacterSetRef theSet, CFStringRef theString) { 2300 UniChar *buffer; 2301 CFIndex length; 2302 BOOL hasSurrogate = NO; 2303 2304 CF_OBJC_FUNCDISPATCHV(__kCFCharacterSetTypeID, void, (NSMutableCharacterSet *)theSet, removeCharactersInString:(NSString *)theString); 2305 2306 __CFCSetValidateTypeAndMutability(theSet, __PRETTY_FUNCTION__); 2307 2308 if (__CFCSetIsBuiltin((CFCharacterSetRef)theSet) && !__CFCSetIsMutable((CFCharacterSetRef)theSet) && !__CFCSetIsInverted((CFCharacterSetRef)theSet)) { 2309 CFCharacterSetRef sharedSet = CFCharacterSetGetPredefined(__CFCSetBuiltinType((CFCharacterSetRef)theSet)); 2310 if (sharedSet == theSet) { // We're trying to dealloc the builtin set 2311 CFAssert1(0, __kCFLogAssertion, "%s: Trying to mutable predefined set.", __PRETTY_FUNCTION__); 2312 return; // We don't mutate builtin set 2313 } 2314 } 2315 2316 if ((__CFCSetIsEmpty(theSet) && !__CFCSetIsInverted(theSet)) || !(length = CFStringGetLength(theString))) return; 2317 2318 if (__CFCSetIsInverted(theSet)) { 2319 CFIndex newLength = length + (__CFCSetIsEmpty(theSet) ? 0 : (__CFCSetIsString(theSet) ? __CFCSetStringLength(theSet) : __kCFStringCharSetMax)); 2320 2321 if (newLength < __kCFStringCharSetMax) { 2322 buffer = __CFCSetStringBuffer(theSet); 2323 2324 if (NULL == buffer) { 2325 buffer = (UniChar *)CFAllocatorAllocate(CFGetAllocator(theSet), __kCFStringCharSetMax * sizeof(UniChar), 0); 2326 } else { 2327 buffer += __CFCSetStringLength(theSet); 2328 } 2329 2330 CFStringGetCharacters(theString, CFRangeMake(0, length), (UniChar*)buffer); 2331 2332 if (length > 1) { 2333 UTF16Char *characters = buffer; 2334 const UTF16Char *charactersLimit = characters + length; 2335 2336 while (characters < charactersLimit) { 2337 if (CFStringIsSurrogateHighCharacter(*characters) || CFStringIsSurrogateLowCharacter(*characters)) { 2338 memmove(characters, characters + 1, charactersLimit - (characters + 1)); 2339 --charactersLimit; 2340 hasSurrogate = YES; 2341 } 2342 ++characters; 2343 } 2344 2345 newLength -= (length - (charactersLimit - buffer)); 2346 } 2347 2348 if (NULL == __CFCSetStringBuffer(theSet)) { 2349 __CFCSetPutClassType(theSet, __kCFCharSetClassString); 2350 __CFCSetPutStringBuffer(theSet, buffer); 2351 } 2352 __CFCSetPutStringLength(theSet, newLength); 2353 qsort(__CFCSetStringBuffer(theSet), newLength, sizeof(UniChar), chcompar); 2354 __CFCSetPutHasHashValue(theSet, false); 2355 2356 if (hasSurrogate) __CFApplySurrogatesInString(theSet, theString, &CFCharacterSetRemoveCharactersInRange); 2357 2358 return; 2359 } 2360 } 2361 2362 // OK, I have to be a bitmap 2363 __CFCSetMakeBitmap(theSet); 2364 CFStringInlineBuffer inlineBuffer; 2365 CFIndex idx; 2366 2367 CFStringInitInlineBuffer(theString, &inlineBuffer, CFRangeMake(0, length)); 2368 2369 for (idx = 0;idx < length;idx++) { 2370 UTF16Char character = __CFStringGetCharacterFromInlineBufferQuick(&inlineBuffer, idx); 2371 2372 if (CFStringIsSurrogateHighCharacter(character) || CFStringIsSurrogateLowCharacter(character)) { 2373 hasSurrogate = YES; 2374 } else { 2375 __CFCSetBitmapRemoveCharacter(__CFCSetBitmapBits(theSet), character); 2376 } 2377 } 2378 2379 __CFCSetPutHasHashValue(theSet, false); 2380 if (__CFCheckForExapendedSet) __CFCheckForExpandedSet(theSet); 2381 2382 if (hasSurrogate) __CFApplySurrogatesInString(theSet, theString, &CFCharacterSetRemoveCharactersInRange); 2383} 2384 2385void CFCharacterSetUnion(CFMutableCharacterSetRef theSet, CFCharacterSetRef theOtherSet) { 2386 CFCharacterSetRef expandedSet = NULL; 2387 2388 CF_OBJC_FUNCDISPATCHV(__kCFCharacterSetTypeID, void, (NSMutableCharacterSet *)theSet, formUnionWithCharacterSet:(NSCharacterSet *)theOtherSet); 2389 2390 __CFCSetValidateTypeAndMutability(theSet, __PRETTY_FUNCTION__); 2391 2392 if (__CFCSetIsBuiltin((CFCharacterSetRef)theSet) && !__CFCSetIsMutable((CFCharacterSetRef)theSet) && !__CFCSetIsInverted((CFCharacterSetRef)theSet)) { 2393 CFCharacterSetRef sharedSet = CFCharacterSetGetPredefined(__CFCSetBuiltinType((CFCharacterSetRef)theSet)); 2394 if (sharedSet == theSet) { // We're trying to dealloc the builtin set 2395 CFAssert1(0, __kCFLogAssertion, "%s: Trying to mutable predefined set.", __PRETTY_FUNCTION__); 2396 return; // We don't mutate builtin set 2397 } 2398 } 2399 2400 if (__CFCSetIsEmpty(theSet) && __CFCSetIsInverted(theSet)) return; // Inverted empty set contains all char 2401 2402 if (!CF_IS_OBJC(__kCFCharacterSetTypeID, theOtherSet) || (expandedSet = __CFCharacterSetGetExpandedSetForNSCharacterSet(theOtherSet))) { // Really CF, we can do some trick here 2403 if (expandedSet) theOtherSet = expandedSet; 2404 2405 if (__CFCSetIsEmpty(theOtherSet)) { 2406 if (__CFCSetIsInverted(theOtherSet)) { 2407 if (__CFCSetIsString(theSet) && __CFCSetStringBuffer(theSet)) { 2408 CFAllocatorDeallocate(CFGetAllocator(theSet), __CFCSetStringBuffer(theSet)); 2409 } else if (__CFCSetIsBitmap(theSet) && __CFCSetBitmapBits(theSet)) { 2410 CFAllocatorDeallocate(CFGetAllocator(theSet), __CFCSetBitmapBits(theSet)); 2411 } else if (__CFCSetIsCompactBitmap(theSet) && __CFCSetCompactBitmapBits(theSet)) { 2412 CFAllocatorDeallocate(CFGetAllocator(theSet), __CFCSetCompactBitmapBits(theSet)); 2413 } 2414 __CFCSetPutClassType(theSet, __kCFCharSetClassRange); 2415 __CFCSetPutRangeLength(theSet, 0); 2416 __CFCSetPutIsInverted(theSet, true); 2417 __CFCSetPutHasHashValue(theSet, false); 2418 __CFCSetDeallocateAnnexPlane(theSet); 2419 } 2420 } else if (__CFCSetIsBuiltin(theOtherSet) && __CFCSetIsEmpty(theSet)) { // theSet can be builtin set 2421 __CFCSetPutClassType(theSet, __kCFCharSetClassBuiltin); 2422 __CFCSetPutBuiltinType(theSet, __CFCSetBuiltinType(theOtherSet)); 2423 if (__CFCSetIsInverted(theOtherSet)) __CFCSetPutIsInverted(theSet, true); 2424 if (__CFCSetAnnexIsInverted(theOtherSet)) __CFCSetAnnexSetIsInverted(theSet, true); 2425 __CFCSetPutHasHashValue(theSet, false); 2426 } else { 2427 if (__CFCSetIsRange(theOtherSet)) { 2428 if (__CFCSetIsInverted(theOtherSet)) { 2429 UTF32Char firstChar = __CFCSetRangeFirstChar(theOtherSet); 2430 CFIndex length = __CFCSetRangeLength(theOtherSet); 2431 2432 if (firstChar > 0) CFCharacterSetAddCharactersInRange(theSet, CFRangeMake(0, firstChar)); 2433 firstChar += length; 2434 length = 0x110000 - firstChar; 2435 CFCharacterSetAddCharactersInRange(theSet, CFRangeMake(firstChar, length)); 2436 } else { 2437 CFCharacterSetAddCharactersInRange(theSet, CFRangeMake(__CFCSetRangeFirstChar(theOtherSet), __CFCSetRangeLength(theOtherSet))); 2438 } 2439 } else if (__CFCSetIsString(theOtherSet)) { 2440 CFStringRef string = CFStringCreateWithCharactersNoCopy(CFGetAllocator(theSet), __CFCSetStringBuffer(theOtherSet), __CFCSetStringLength(theOtherSet), kCFAllocatorNull); 2441 CFCharacterSetAddCharactersInString(theSet, string); 2442 CFRelease(string); 2443 } else { 2444 __CFCSetMakeBitmap(theSet); 2445 if (__CFCSetIsBitmap(theOtherSet)) { 2446 UInt32 *bitmap1 = (UInt32*)__CFCSetBitmapBits(theSet); 2447 UInt32 *bitmap2 = (UInt32*)__CFCSetBitmapBits(theOtherSet); 2448 CFIndex length = __kCFBitmapSize / sizeof(UInt32); 2449 while (length--) *bitmap1++ |= *bitmap2++; 2450 } else { 2451 UInt32 *bitmap1 = (UInt32*)__CFCSetBitmapBits(theSet); 2452 UInt32 *bitmap2; 2453 CFIndex length = __kCFBitmapSize / sizeof(UInt32); 2454 uint8_t bitmapBuffer[__kCFBitmapSize]; 2455 __CFCSetGetBitmap(theOtherSet, bitmapBuffer); 2456 bitmap2 = (UInt32*)bitmapBuffer; 2457 while (length--) *bitmap1++ |= *bitmap2++; 2458 } 2459 __CFCSetPutHasHashValue(theSet, false); 2460 } 2461 if (__CFCSetHasNonBMPPlane(theOtherSet)) { 2462 CFMutableCharacterSetRef otherSetPlane; 2463 int idx; 2464 2465 for (idx = 1;idx <= MAX_ANNEX_PLANE;idx++) { 2466 if ((otherSetPlane = (CFMutableCharacterSetRef)__CFCSetGetAnnexPlaneCharacterSetNoAlloc(theOtherSet, idx))) { 2467 CFCharacterSetUnion((CFMutableCharacterSetRef)__CFCSetGetAnnexPlaneCharacterSet(theSet, idx), otherSetPlane); 2468 } 2469 } 2470 } else if (__CFCSetAnnexIsInverted(theOtherSet)) { 2471 if (__CFCSetHasNonBMPPlane(theSet)) __CFCSetDeallocateAnnexPlane(theSet); 2472 __CFCSetAnnexSetIsInverted(theSet, true); 2473 } else if (__CFCSetIsBuiltin(theOtherSet)) { 2474 CFMutableCharacterSetRef annexPlane; 2475 uint8_t bitmapBuffer[__kCFBitmapSize]; 2476 uint8_t result; 2477 int planeIndex; 2478 Boolean isOtherAnnexPlaneInverted = __CFCSetAnnexIsInverted(theOtherSet); 2479 UInt32 *bitmap1; 2480 UInt32 *bitmap2; 2481 CFIndex length; 2482 2483 for (planeIndex = 1;planeIndex <= MAX_ANNEX_PLANE;planeIndex++) { 2484 result = CFUniCharGetBitmapForPlane(__CFCSetBuiltinType(theOtherSet), planeIndex, bitmapBuffer, (isOtherAnnexPlaneInverted != 0)); 2485 if (result != kCFUniCharBitmapEmpty) { 2486 annexPlane = (CFMutableCharacterSetRef)__CFCSetGetAnnexPlaneCharacterSet(theSet, planeIndex); 2487 if (result == kCFUniCharBitmapAll) { 2488 CFCharacterSetAddCharactersInRange(annexPlane, CFRangeMake(0x0000, 0x10000)); 2489 } else { 2490 __CFCSetMakeBitmap(annexPlane); 2491 bitmap1 = (UInt32 *)__CFCSetBitmapBits(annexPlane); 2492 length = __kCFBitmapSize / sizeof(UInt32); 2493 bitmap2 = (UInt32*)bitmapBuffer; 2494 while (length--) *bitmap1++ |= *bitmap2++; 2495 } 2496 } 2497 } 2498 } 2499 } 2500 if (__CFCheckForExapendedSet) __CFCheckForExpandedSet(theSet); 2501 } else { // It's NSCharacterSet 2502 CFDataRef bitmapRep = CFCharacterSetCreateBitmapRepresentation(kCFAllocatorSystemDefault, theOtherSet); 2503 const UInt32 *bitmap2 = (bitmapRep && CFDataGetLength(bitmapRep) ? (const UInt32 *)CFDataGetBytePtr(bitmapRep) : NULL); 2504 if (bitmap2) { 2505 UInt32 *bitmap1; 2506 CFIndex length = __kCFBitmapSize / sizeof(UInt32); 2507 __CFCSetMakeBitmap(theSet); 2508 bitmap1 = (UInt32*)__CFCSetBitmapBits(theSet); 2509 while (length--) *bitmap1++ |= *bitmap2++; 2510 __CFCSetPutHasHashValue(theSet, false); 2511 } 2512 CFRelease(bitmapRep); 2513 } 2514} 2515 2516void CFCharacterSetIntersect(CFMutableCharacterSetRef theSet, CFCharacterSetRef theOtherSet) { 2517 CFCharacterSetRef expandedSet = NULL; 2518 2519 CF_OBJC_FUNCDISPATCHV(__kCFCharacterSetTypeID, void, (NSMutableCharacterSet *)theSet, formIntersectionWithCharacterSet:(NSCharacterSet *)theOtherSet); 2520 2521 __CFCSetValidateTypeAndMutability(theSet, __PRETTY_FUNCTION__); 2522 2523 if (__CFCSetIsBuiltin((CFCharacterSetRef)theSet) && !__CFCSetIsMutable((CFCharacterSetRef)theSet) && !__CFCSetIsInverted((CFCharacterSetRef)theSet)) { 2524 CFCharacterSetRef sharedSet = CFCharacterSetGetPredefined(__CFCSetBuiltinType((CFCharacterSetRef)theSet)); 2525 if (sharedSet == theSet) { // We're trying to dealloc the builtin set 2526 CFAssert1(0, __kCFLogAssertion, "%s: Trying to mutable predefined set.", __PRETTY_FUNCTION__); 2527 return; // We don't mutate builtin set 2528 } 2529 } 2530 2531 if (__CFCSetIsEmpty(theSet) && !__CFCSetIsInverted(theSet)) return; // empty set 2532 2533 if (!CF_IS_OBJC(__kCFCharacterSetTypeID, theOtherSet) || (expandedSet = __CFCharacterSetGetExpandedSetForNSCharacterSet(theOtherSet))) { // Really CF, we can do some trick here 2534 if (expandedSet) theOtherSet = expandedSet; 2535 2536 if (__CFCSetIsEmpty(theOtherSet)) { 2537 if (!__CFCSetIsInverted(theOtherSet)) { 2538 if (__CFCSetIsString(theSet) && __CFCSetStringBuffer(theSet)) { 2539 CFAllocatorDeallocate(CFGetAllocator(theSet), __CFCSetStringBuffer(theSet)); 2540 } else if (__CFCSetIsBitmap(theSet) && __CFCSetBitmapBits(theSet)) { 2541 CFAllocatorDeallocate(CFGetAllocator(theSet), __CFCSetBitmapBits(theSet)); 2542 } else if (__CFCSetIsCompactBitmap(theSet) && __CFCSetCompactBitmapBits(theSet)) { 2543 CFAllocatorDeallocate(CFGetAllocator(theSet), __CFCSetCompactBitmapBits(theSet)); 2544 } 2545 __CFCSetPutClassType(theSet, __kCFCharSetClassBitmap); 2546 __CFCSetPutBitmapBits(theSet, NULL); 2547 __CFCSetPutIsInverted(theSet, false); 2548 theSet->_hashValue = 0; 2549 __CFCSetPutHasHashValue(theSet, true); 2550 __CFCSetDeallocateAnnexPlane(theSet); 2551 } 2552 } else if (__CFCSetIsEmpty(theSet)) { // non inverted empty set contains all character 2553 __CFCSetPutClassType(theSet, __CFCSetClassType(theOtherSet)); 2554 __CFCSetPutHasHashValue(theSet, __CFCSetHasHashValue(theOtherSet)); 2555 __CFCSetPutIsInverted(theSet, __CFCSetIsInverted(theOtherSet)); 2556 theSet->_hashValue = theOtherSet->_hashValue; 2557 if (__CFCSetHasNonBMPPlane(theOtherSet)) { 2558 CFMutableCharacterSetRef otherSetPlane; 2559 int idx; 2560 for (idx = 1;idx <= MAX_ANNEX_PLANE;idx++) { 2561 if ((otherSetPlane = (CFMutableCharacterSetRef)__CFCSetGetAnnexPlaneCharacterSetNoAlloc(theOtherSet, idx))) { 2562 otherSetPlane = (CFMutableCharacterSetRef)CFCharacterSetCreateMutableCopy(CFGetAllocator(theSet), otherSetPlane); 2563 __CFCSetPutCharacterSetToAnnexPlane(theSet, otherSetPlane, idx); 2564 CFRelease(otherSetPlane); 2565 } 2566 } 2567 __CFCSetAnnexSetIsInverted(theSet, __CFCSetAnnexIsInverted(theOtherSet)); 2568 } 2569 2570 switch (__CFCSetClassType(theOtherSet)) { 2571 case __kCFCharSetClassBuiltin: 2572 __CFCSetPutBuiltinType(theSet, __CFCSetBuiltinType(theOtherSet)); 2573 break; 2574 2575 case __kCFCharSetClassRange: 2576 __CFCSetPutRangeFirstChar(theSet, __CFCSetRangeFirstChar(theOtherSet)); 2577 __CFCSetPutRangeLength(theSet, __CFCSetRangeLength(theOtherSet)); 2578 break; 2579 2580 case __kCFCharSetClassString: 2581 __CFCSetPutStringLength(theSet, __CFCSetStringLength(theOtherSet)); 2582 if (!__CFCSetStringBuffer(theSet)) 2583 __CFCSetPutStringBuffer(theSet, (UniChar *)CFAllocatorAllocate(CFGetAllocator(theSet), __kCFStringCharSetMax * sizeof(UniChar), 0)); 2584 memmove(__CFCSetStringBuffer(theSet), __CFCSetStringBuffer(theOtherSet), __CFCSetStringLength(theSet) * sizeof(UniChar)); 2585 break; 2586 2587 case __kCFCharSetClassBitmap: 2588 __CFCSetPutBitmapBits(theSet, (uint8_t *)CFAllocatorAllocate(CFGetAllocator(theSet), sizeof(uint8_t) * __kCFBitmapSize, 0)); 2589 memmove(__CFCSetBitmapBits(theSet), __CFCSetBitmapBits(theOtherSet), __kCFBitmapSize); 2590 break; 2591 2592 case __kCFCharSetClassCompactBitmap: { 2593 const uint8_t *cBitmap = __CFCSetCompactBitmapBits(theOtherSet); 2594 uint8_t *newBitmap; 2595 uint32_t size = __CFCSetGetCompactBitmapSize(cBitmap); 2596 newBitmap = (uint8_t *)CFAllocatorAllocate(CFGetAllocator(theSet), sizeof(uint8_t) * size, 0); 2597 __CFCSetPutBitmapBits(theSet, newBitmap); 2598 memmove(newBitmap, cBitmap, size); 2599 } 2600 break; 2601 2602 default: 2603 CFAssert1(0, __kCFLogAssertion, "%s: Internal inconsistency error: unknown character set type", __PRETTY_FUNCTION__); // We should never come here 2604 } 2605 } else { 2606 __CFCSetMakeBitmap(theSet); 2607 if (__CFCSetIsBitmap(theOtherSet)) { 2608 UInt32 *bitmap1 = (UInt32*)__CFCSetBitmapBits(theSet); 2609 UInt32 *bitmap2 = (UInt32*)__CFCSetBitmapBits(theOtherSet); 2610 CFIndex length = __kCFBitmapSize / sizeof(UInt32); 2611 while (length--) *bitmap1++ &= *bitmap2++; 2612 } else { 2613 UInt32 *bitmap1 = (UInt32*)__CFCSetBitmapBits(theSet); 2614 UInt32 *bitmap2; 2615 CFIndex length = __kCFBitmapSize / sizeof(UInt32); 2616 uint8_t bitmapBuffer[__kCFBitmapSize]; 2617 __CFCSetGetBitmap(theOtherSet, bitmapBuffer); 2618 bitmap2 = (UInt32*)bitmapBuffer; 2619 while (length--) *bitmap1++ &= *bitmap2++; 2620 } 2621 __CFCSetPutHasHashValue(theSet, false); 2622 if (__CFCSetHasNonBMPPlane(theOtherSet)) { 2623 CFMutableCharacterSetRef annexPlane; 2624 CFMutableCharacterSetRef otherSetPlane; 2625 int idx; 2626 for (idx = 1;idx <= MAX_ANNEX_PLANE;idx++) { 2627 if ((otherSetPlane = (CFMutableCharacterSetRef)__CFCSetGetAnnexPlaneCharacterSetNoAlloc(theOtherSet, idx))) { 2628 annexPlane = (CFMutableCharacterSetRef)__CFCSetGetAnnexPlaneCharacterSet(theSet, idx); 2629 if (__CFCSetAnnexIsInverted(theSet)) CFCharacterSetInvert(annexPlane); 2630 CFCharacterSetIntersect(annexPlane, otherSetPlane); 2631 if (__CFCSetAnnexIsInverted(theSet)) CFCharacterSetInvert(annexPlane); 2632 if (__CFCSetIsEmpty(annexPlane) && !__CFCSetIsInverted(annexPlane)) __CFCSetPutCharacterSetToAnnexPlane(theSet, NULL, idx); 2633 } else if (__CFCSetGetAnnexPlaneCharacterSetNoAlloc(theSet, idx)) { 2634 __CFCSetPutCharacterSetToAnnexPlane(theSet, NULL, idx); 2635 } 2636 } 2637 if (!__CFCSetHasNonBMPPlane(theSet)) __CFCSetDeallocateAnnexPlane(theSet); 2638 } else if (__CFCSetIsBuiltin(theOtherSet) && !__CFCSetAnnexIsInverted(theOtherSet)) { 2639 CFMutableCharacterSetRef annexPlane; 2640 uint8_t bitmapBuffer[__kCFBitmapSize]; 2641 uint8_t result; 2642 int planeIndex; 2643 UInt32 *bitmap1; 2644 UInt32 *bitmap2; 2645 CFIndex length; 2646 2647 for (planeIndex = 1;planeIndex <= MAX_ANNEX_PLANE;planeIndex++) { 2648 annexPlane = (CFMutableCharacterSetRef)__CFCSetGetAnnexPlaneCharacterSetNoAlloc(theSet, planeIndex); 2649 if (annexPlane) { 2650 result = CFUniCharGetBitmapForPlane(__CFCSetBuiltinType(theOtherSet), planeIndex, bitmapBuffer, false); 2651 if (result == kCFUniCharBitmapEmpty) { 2652 __CFCSetPutCharacterSetToAnnexPlane(theSet, NULL, planeIndex); 2653 } else if (result == kCFUniCharBitmapFilled) { 2654 Boolean isEmpty = true; 2655 2656 __CFCSetMakeBitmap(annexPlane); 2657 bitmap1 = (UInt32 *)__CFCSetBitmapBits(annexPlane); 2658 length = __kCFBitmapSize / sizeof(UInt32); 2659 bitmap2 = (UInt32*)bitmapBuffer; 2660 2661 while (length--) { 2662 if ((*bitmap1++ &= *bitmap2++)) isEmpty = false; 2663 } 2664 if (isEmpty) __CFCSetPutCharacterSetToAnnexPlane(theSet, NULL, planeIndex); 2665 } 2666 } 2667 } 2668 if (!__CFCSetHasNonBMPPlane(theSet)) __CFCSetDeallocateAnnexPlane(theSet); 2669 } else if (__CFCSetIsRange(theOtherSet)) { 2670 CFMutableCharacterSetRef tempOtherSet = CFCharacterSetCreateMutable(CFGetAllocator(theSet)); 2671 CFMutableCharacterSetRef annexPlane; 2672 CFMutableCharacterSetRef otherSetPlane; 2673 int idx; 2674 2675 __CFCSetAddNonBMPPlanesInRange(tempOtherSet, CFRangeMake(__CFCSetRangeFirstChar(theOtherSet), __CFCSetRangeLength(theOtherSet))); 2676 2677 for (idx = 1;idx <= MAX_ANNEX_PLANE;idx++) { 2678 if ((otherSetPlane = (CFMutableCharacterSetRef)__CFCSetGetAnnexPlaneCharacterSetNoAlloc(tempOtherSet, idx))) { 2679 annexPlane = (CFMutableCharacterSetRef)__CFCSetGetAnnexPlaneCharacterSet(theSet, idx); 2680 if (__CFCSetAnnexIsInverted(theSet)) CFCharacterSetInvert(annexPlane); 2681 CFCharacterSetIntersect(annexPlane, otherSetPlane); 2682 if (__CFCSetAnnexIsInverted(theSet)) CFCharacterSetInvert(annexPlane); 2683 if (__CFCSetIsEmpty(annexPlane) && !__CFCSetIsInverted(annexPlane)) __CFCSetPutCharacterSetToAnnexPlane(theSet, NULL, idx); 2684 } else if (__CFCSetGetAnnexPlaneCharacterSetNoAlloc(theSet, idx)) { 2685 __CFCSetPutCharacterSetToAnnexPlane(theSet, NULL, idx); 2686 } 2687 } 2688 if (!__CFCSetHasNonBMPPlane(theSet)) __CFCSetDeallocateAnnexPlane(theSet); 2689 CFRelease(tempOtherSet); 2690 } else if ((__CFCSetHasNonBMPPlane(theSet) || __CFCSetAnnexIsInverted(theSet)) && !__CFCSetAnnexIsInverted(theOtherSet)) { 2691 __CFCSetDeallocateAnnexPlane(theSet); 2692 } 2693 } 2694 if (__CFCheckForExapendedSet) __CFCheckForExpandedSet(theSet); 2695 } else { // It's NSCharacterSet 2696 CFDataRef bitmapRep = CFCharacterSetCreateBitmapRepresentation(kCFAllocatorSystemDefault, theOtherSet); 2697 const UInt32 *bitmap2 = (bitmapRep && CFDataGetLength(bitmapRep) ? (const UInt32 *)CFDataGetBytePtr(bitmapRep) : NULL); 2698 if (bitmap2) { 2699 UInt32 *bitmap1; 2700 CFIndex length = __kCFBitmapSize / sizeof(UInt32); 2701 __CFCSetMakeBitmap(theSet); 2702 bitmap1 = (UInt32*)__CFCSetBitmapBits(theSet); 2703 while (length--) *bitmap1++ &= *bitmap2++; 2704 __CFCSetPutHasHashValue(theSet, false); 2705 } 2706 CFRelease(bitmapRep); 2707 } 2708} 2709 2710void CFCharacterSetInvert(CFMutableCharacterSetRef theSet) { 2711 2712 CF_OBJC_FUNCDISPATCHV(__kCFCharacterSetTypeID, void, (NSMutableCharacterSet *)theSet, invert); 2713 2714 __CFCSetValidateTypeAndMutability(theSet, __PRETTY_FUNCTION__); 2715 2716 if (__CFCSetIsBuiltin((CFCharacterSetRef)theSet) && !__CFCSetIsMutable((CFCharacterSetRef)theSet) && !__CFCSetIsInverted((CFCharacterSetRef)theSet)) { 2717 CFCharacterSetRef sharedSet = CFCharacterSetGetPredefined(__CFCSetBuiltinType((CFCharacterSetRef)theSet)); 2718 if (sharedSet == theSet) { // We're trying to dealloc the builtin set 2719 CFAssert1(0, __kCFLogAssertion, "%s: Trying to mutable predefined set.", __PRETTY_FUNCTION__); 2720 return; // We don't mutate builtin set 2721 } 2722 } 2723 2724 __CFCSetPutHasHashValue(theSet, false); 2725 2726 if (__CFCSetClassType(theSet) == __kCFCharSetClassBitmap) { 2727 CFIndex idx; 2728 CFIndex count = __kCFBitmapSize / sizeof(UInt32); 2729 UInt32 *bitmap = (UInt32*) __CFCSetBitmapBits(theSet); 2730 2731 if (NULL == bitmap) { 2732 bitmap = (UInt32 *)CFAllocatorAllocate(CFGetAllocator(theSet), __kCFBitmapSize, 0); 2733 __CFCSetPutBitmapBits(theSet, (uint8_t *)bitmap); 2734 for (idx = 0;idx < count;idx++) bitmap[idx] = ((UInt32)0xFFFFFFFF); 2735 } else { 2736 for (idx = 0;idx < count;idx++) bitmap[idx] = ~(bitmap[idx]); 2737 } 2738 __CFCSetAllocateAnnexForPlane(theSet, 0); // We need to alloc annex to invert 2739 } else if (__CFCSetClassType(theSet) == __kCFCharSetClassCompactBitmap) { 2740 uint8_t *bitmap = __CFCSetCompactBitmapBits(theSet); 2741 int idx; 2742 int length = 0; 2743 uint8_t value; 2744 2745 for (idx = 0;idx < __kCFCompactBitmapNumPages;idx++) { 2746 value = bitmap[idx]; 2747 2748 if (value == 0) { 2749 bitmap[idx] = UINT8_MAX; 2750 } else if (value == UINT8_MAX) { 2751 bitmap[idx] = 0; 2752 } else { 2753 length += __kCFCompactBitmapPageSize; 2754 } 2755 } 2756 bitmap += __kCFCompactBitmapNumPages; 2757 for (idx = 0;idx < length;idx++) bitmap[idx] = ~(bitmap[idx]); 2758 __CFCSetAllocateAnnexForPlane(theSet, 0); // We need to alloc annex to invert 2759 } else { 2760 __CFCSetPutIsInverted(theSet, !__CFCSetIsInverted(theSet)); 2761 } 2762 __CFCSetAnnexSetIsInverted(theSet, !__CFCSetAnnexIsInverted(theSet)); 2763} 2764 2765void CFCharacterSetCompact(CFMutableCharacterSetRef theSet) { 2766 if (__CFCSetIsBitmap(theSet) && __CFCSetBitmapBits(theSet)) __CFCSetMakeCompact(theSet); 2767 if (__CFCSetHasNonBMPPlane(theSet)) { 2768 CFMutableCharacterSetRef annex; 2769 int idx; 2770 2771 for (idx = 1;idx <= MAX_ANNEX_PLANE;idx++) { 2772 if ((annex = (CFMutableCharacterSetRef)__CFCSetGetAnnexPlaneCharacterSetNoAlloc(theSet, idx)) && __CFCSetIsBitmap(annex) && __CFCSetBitmapBits(annex)) { 2773 __CFCSetMakeCompact(annex); 2774 } 2775 } 2776 } 2777} 2778 2779void CFCharacterSetFast(CFMutableCharacterSetRef theSet) { 2780 if (__CFCSetIsCompactBitmap(theSet) && __CFCSetCompactBitmapBits(theSet)) __CFCSetMakeBitmap(theSet); 2781 if (__CFCSetHasNonBMPPlane(theSet)) { 2782 CFMutableCharacterSetRef annex; 2783 int idx; 2784 2785 for (idx = 1;idx <= MAX_ANNEX_PLANE;idx++) { 2786 if ((annex = (CFMutableCharacterSetRef)__CFCSetGetAnnexPlaneCharacterSetNoAlloc(theSet, idx)) && __CFCSetIsCompactBitmap(annex) && __CFCSetCompactBitmapBits(annex)) { 2787 __CFCSetMakeBitmap(annex); 2788 } 2789 } 2790 } 2791} 2792 2793/* Keyed-coding support 2794*/ 2795CFCharacterSetKeyedCodingType _CFCharacterSetGetKeyedCodingType(CFCharacterSetRef cset) { 2796 if (CF_IS_OBJC(__kCFCharacterSetTypeID, cset)) return kCFCharacterSetKeyedCodingTypeBitmap; 2797 2798 switch (__CFCSetClassType(cset)) { 2799 case __kCFCharSetClassBuiltin: return ((__CFCSetBuiltinType(cset) < kCFCharacterSetSymbol) ? kCFCharacterSetKeyedCodingTypeBuiltin : kCFCharacterSetKeyedCodingTypeBuiltinAndBitmap); 2800 case __kCFCharSetClassRange: return kCFCharacterSetKeyedCodingTypeRange; 2801 2802 case __kCFCharSetClassString: // We have to check if we have non-BMP here 2803 if (!__CFCSetHasNonBMPPlane(cset) && !__CFCSetAnnexIsInverted(cset)) return kCFCharacterSetKeyedCodingTypeString; // BMP only. we can archive the string 2804 /* fallthrough */ 2805 2806 default: 2807 return kCFCharacterSetKeyedCodingTypeBitmap; 2808 } 2809} 2810 2811CFCharacterSetPredefinedSet _CFCharacterSetGetKeyedCodingBuiltinType(CFCharacterSetRef cset) { return __CFCSetBuiltinType(cset); } 2812CFRange _CFCharacterSetGetKeyedCodingRange(CFCharacterSetRef cset) { return CFRangeMake(__CFCSetRangeFirstChar(cset), __CFCSetRangeLength(cset)); } 2813CFStringRef _CFCharacterSetCreateKeyedCodingString(CFCharacterSetRef cset) { return CFStringCreateWithCharacters(kCFAllocatorSystemDefault, __CFCSetStringBuffer(cset), __CFCSetStringLength(cset)); } 2814 2815bool _CFCharacterSetIsInverted(CFCharacterSetRef cset) { return (__CFCSetIsInverted(cset) != 0); } 2816void _CFCharacterSetSetIsInverted(CFCharacterSetRef cset, bool flag) { __CFCSetPutIsInverted((CFMutableCharacterSetRef)cset, flag); } 2817 2818/* Inline buffer support 2819*/ 2820void CFCharacterSetInitInlineBuffer(CFCharacterSetRef cset, CFCharacterSetInlineBuffer *buffer) { 2821 memset(buffer, 0, sizeof(CFCharacterSetInlineBuffer)); 2822 buffer->cset = cset; 2823 buffer->rangeLimit = 0x10000; 2824 2825 if (CF_IS_OBJC(__kCFCharacterSetTypeID, cset)) { 2826 CFCharacterSetRef expandedSet = __CFCharacterSetGetExpandedSetForNSCharacterSet(cset); 2827 2828 if (NULL == expandedSet) { 2829 buffer->flags = kCFCharacterSetNoBitmapAvailable; 2830 buffer->rangeLimit = 0x110000; 2831 2832 return; 2833 } else { 2834 cset = expandedSet; 2835 } 2836 } 2837 2838 switch (__CFCSetClassType(cset)) { 2839 case __kCFCharSetClassBuiltin: 2840 buffer->bitmap = CFUniCharGetBitmapPtrForPlane(__CFCSetBuiltinType(cset), 0); 2841 buffer->rangeLimit = 0x110000; 2842 if (NULL == buffer->bitmap) { 2843 buffer->flags = kCFCharacterSetNoBitmapAvailable; 2844 } else { 2845 if (__CFCSetIsInverted(cset)) buffer->flags = kCFCharacterSetIsInverted; 2846 } 2847 break; 2848 2849 case __kCFCharSetClassRange: 2850 buffer->rangeStart = __CFCSetRangeFirstChar(cset); 2851 buffer->rangeLimit = __CFCSetRangeFirstChar(cset) + __CFCSetRangeLength(cset); 2852 if (__CFCSetIsInverted(cset)) buffer->flags = kCFCharacterSetIsInverted; 2853 return; 2854 2855 case __kCFCharSetClassString: 2856 buffer->flags = kCFCharacterSetNoBitmapAvailable; 2857 if (__CFCSetStringLength(cset) > 0) { 2858 buffer->rangeStart = *__CFCSetStringBuffer(cset); 2859 buffer->rangeLimit = *(__CFCSetStringBuffer(cset) + __CFCSetStringLength(cset) - 1) + 1; 2860 2861 if (__CFCSetIsInverted(cset)) { 2862 if (0 == buffer->rangeStart) { 2863 buffer->rangeStart = buffer->rangeLimit; 2864 buffer->rangeLimit = 0x10000; 2865 } else if (0x10000 == buffer->rangeLimit) { 2866 buffer->rangeLimit = buffer->rangeStart; 2867 buffer->rangeStart = 0; 2868 } else { 2869 buffer->rangeStart = 0; 2870 buffer->rangeLimit = 0x10000; 2871 } 2872 } 2873 } 2874 break; 2875 2876 case __kCFCharSetClassBitmap: 2877 case __kCFCharSetClassCompactBitmap: 2878 buffer->bitmap = __CFCSetCompactBitmapBits(cset); 2879 if (NULL == buffer->bitmap) { 2880 buffer->flags = kCFCharacterSetIsCompactBitmap; 2881 if (__CFCSetIsInverted(cset)) buffer->flags |= kCFCharacterSetIsInverted; 2882 } else { 2883 if (__kCFCharSetClassCompactBitmap == __CFCSetClassType(cset)) buffer->flags = kCFCharacterSetIsCompactBitmap; 2884 } 2885 break; 2886 2887 default: 2888 CFAssert1(0, __kCFLogAssertion, "%s: Internal inconsistency error: unknown character set type", __PRETTY_FUNCTION__); // We should never come here 2889 return; 2890 } 2891 2892 if (__CFCSetAnnexIsInverted(cset)) { 2893 buffer->rangeLimit = 0x110000; 2894 } else if (__CFCSetHasNonBMPPlane(cset)) { 2895 CFIndex index; 2896 2897 for (index = MAX_ANNEX_PLANE;index > 0;index--) { 2898 if (NULL != __CFCSetGetAnnexPlaneCharacterSetNoAlloc(cset, index)) { 2899 buffer->rangeLimit = (index + 1) << 16; 2900 break; 2901 } 2902 } 2903 } 2904} 2905