1/* 2 * Copyright (c) 2014 Apple Inc. All rights reserved. 3 * 4 * @APPLE_LICENSE_HEADER_START@ 5 * 6 * This file contains Original Code and/or Modifications of Original Code 7 * as defined in and that are subject to the Apple Public Source License 8 * Version 2.0 (the 'License'). You may not use this file except in 9 * compliance with the License. Please obtain a copy of the License at 10 * http://www.opensource.apple.com/apsl/ and read it before using this 11 * file. 12 * 13 * The Original Code and all software distributed under the License are 14 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER 15 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, 16 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. 18 * Please see the License for the specific language governing rights and 19 * limitations under the License. 20 * 21 * @APPLE_LICENSE_HEADER_END@ 22 */ 23 24/* CFString.c 25 Copyright (c) 1998-2013, Apple Inc. All rights reserved. 26 Responsibility: Ali Ozer 27 28!!! For performance reasons, it's important that all functions marked CF_INLINE in this file are inlined. 29*/ 30 31#include <CoreFoundation/CFBase.h> 32#include <CoreFoundation/CFString.h> 33#include <CoreFoundation/CFDictionary.h> 34#include <CoreFoundation/CFStringEncodingConverterExt.h> 35#include <CoreFoundation/CFUniChar.h> 36#include <CoreFoundation/CFUnicodeDecomposition.h> 37#include <CoreFoundation/CFUnicodePrecomposition.h> 38#include <CoreFoundation/CFPriv.h> 39#include <CoreFoundation/CFNumber.h> 40#include <CoreFoundation/CFNumberFormatter.h> 41#include "CFInternal.h" 42#if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_EMBEDDED_MINI || DEPLOYMENT_TARGET_WINDOWS || DEPLOYMENT_TARGET_LINUX 43#include "CFLocaleInternal.h" 44#endif 45#include <stdarg.h> 46#include <stdio.h> 47#include <string.h> 48#if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_LINUX || DEPLOYMENT_TARGET_FREEBSD 49#include <unistd.h> 50#endif 51 52#if defined(__GNUC__) 53#define LONG_DOUBLE_SUPPORT 1 54#else 55#define LONG_DOUBLE_SUPPORT 0 56#endif 57 58 59 60#define USE_STRING_ROM 0 61 62 63#ifndef INSTRUMENT_SHARED_STRINGS 64#define INSTRUMENT_SHARED_STRINGS 0 65#endif 66 67CF_PRIVATE const CFStringRef __kCFLocaleCollatorID; 68 69#if INSTRUMENT_SHARED_STRINGS 70#include <sys/stat.h> /* for umask() */ 71 72static void __CFRecordStringAllocationEvent(const char *encoding, const char *bytes, CFIndex byteCount) { 73 static CFSpinLock_t lock = CFSpinLockInit; 74 75 if (memchr(bytes, '\n', byteCount)) return; //never record string allocation events for strings with newlines, because those confuse our parser and because they'll never go into the ROM 76 77 __CFSpinLock(&lock); 78 static int fd; 79 if (! fd) { 80 extern char **_NSGetProgname(void); 81 const char *name = *_NSGetProgname(); 82 if (! name) name = "UNKNOWN"; 83 umask(0); 84 char path[1024]; 85 snprintf(path, sizeof(path), "/tmp/CFSharedStringInstrumentation_%s_%d.txt", name, getpid()); 86 fd = open(path, O_WRONLY | O_APPEND | O_CREAT, 0666); 87 if (fd <= 0) { 88 int error = errno; 89 const char *errString = strerror(error); 90 fprintf(stderr, "open() failed with error %d (%s)\n", error, errString); 91 } 92 } 93 if (fd > 0) { 94 char *buffer = NULL; 95 char formatString[256]; 96 snprintf(formatString, sizeof(formatString), "%%-8d\t%%-16s\t%%.%lds\n", byteCount); 97 int resultCount = asprintf(&buffer, formatString, getpid(), encoding, bytes); 98 if (buffer && resultCount > 0) write(fd, buffer, resultCount); 99 else puts("Couldn't record allocation event"); 100 free(buffer); 101 } 102 __CFSpinUnlock(&lock); 103} 104#endif //INSTRUMENT_SHARED_STRINGS 105 106 107 108typedef Boolean (*UNI_CHAR_FUNC)(UInt32 flags, UInt8 ch, UniChar *unicodeChar); 109 110#if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_EMBEDDED_MINI 111extern size_t malloc_good_size(size_t size); 112#endif 113extern void __CFStrConvertBytesToUnicode(const uint8_t *bytes, UniChar *buffer, CFIndex numChars); 114 115static void __CFStringAppendFormatCore(CFMutableStringRef outputString, CFStringRef (*copyDescFunc)(void *, const void *), CFDictionaryRef formatOptions, CFDictionaryRef stringsDictConfig, CFStringRef formatString, CFIndex initialArgPosition, const void *origValues, CFIndex originalValuesSize, va_list args); 116 117#if defined(DEBUG) 118 119// We put this into C & Pascal strings if we can't convert 120#define CONVERSIONFAILURESTR "CFString conversion failed" 121 122// We set this to true when purging the constant string table, so CFStringDeallocate doesn't assert 123static Boolean __CFConstantStringTableBeingFreed = false; 124 125#endif 126 127 128 129// This section is for CFString compatibility and other behaviors... 130 131static CFOptionFlags _CFStringCompatibilityMask = 0; 132 133void _CFStringSetCompatibility(CFOptionFlags mask) { 134 _CFStringCompatibilityMask |= mask; 135} 136 137CF_INLINE Boolean __CFStringGetCompatibility(CFOptionFlags mask) { 138 return (_CFStringCompatibilityMask & mask) == mask; 139} 140 141 142 143// Two constant strings used by CFString; these are initialized in CFStringInitialize 144CONST_STRING_DECL(kCFEmptyString, "") 145 146// This is separate for C++ 147struct __notInlineMutable { 148 void *buffer; 149 CFIndex length; 150 CFIndex capacity; // Capacity in bytes 151 unsigned int hasGap:1; // Currently unused 152 unsigned int isFixedCapacity:1; 153 unsigned int isExternalMutable:1; 154 unsigned int capacityProvidedExternally:1; 155#if __LP64__ 156 unsigned long desiredCapacity:60; 157#else 158 unsigned long desiredCapacity:28; 159#endif 160 CFAllocatorRef contentsAllocator; // Optional 161}; // The only mutable variant for CFString 162 163 164/* !!! Never do sizeof(CFString); the union is here just to make it easier to access some fields. 165*/ 166struct __CFString { 167 CFRuntimeBase base; 168 union { // In many cases the allocated structs are smaller than these 169 struct __inline1 { 170 CFIndex length; 171 } inline1; // Bytes follow the length 172 struct __notInlineImmutable1 { 173 void *buffer; // Note that the buffer is in the same place for all non-inline variants of CFString 174 CFIndex length; 175 CFAllocatorRef contentsDeallocator; // Optional; just the dealloc func is used 176 } notInlineImmutable1; // This is the usual not-inline immutable CFString 177 struct __notInlineImmutable2 { 178 void *buffer; 179 CFAllocatorRef contentsDeallocator; // Optional; just the dealloc func is used 180 } notInlineImmutable2; // This is the not-inline immutable CFString when length is stored with the contents (first byte) 181 struct __notInlineMutable notInlineMutable; 182 } variants; 183}; 184 185/* 186I = is immutable 187E = not inline contents 188U = is Unicode 189N = has NULL byte 190L = has length byte 191D = explicit deallocator for contents (for mutable objects, allocator) 192C = length field is CFIndex (rather than UInt32); only meaningful for 64-bit, really 193 if needed this bit (valuable real-estate) can be given up for another bit elsewhere, since this info is needed just for 64-bit 194 195Also need (only for mutable) 196F = is fixed 197G = has gap 198Cap, DesCap = capacity 199 200B7 B6 B5 B4 B3 B2 B1 B0 201 U N L C I 202 203B6 B5 204 0 0 inline contents 205 0 1 E (freed with default allocator) 206 1 0 E (not freed) 207 1 1 E D 208 209!!! Note: Constant CFStrings use the bit patterns: 210C8 (11001000 = default allocator, not inline, not freed contents; 8-bit; has NULL byte; doesn't have length; is immutable) 211D0 (11010000 = default allocator, not inline, not freed contents; Unicode; is immutable) 212The bit usages should not be modified in a way that would effect these bit patterns. 213*/ 214 215enum { 216 __kCFFreeContentsWhenDoneMask = 0x020, 217 __kCFFreeContentsWhenDone = 0x020, 218 __kCFContentsMask = 0x060, 219 __kCFHasInlineContents = 0x000, 220 __kCFNotInlineContentsNoFree = 0x040, // Don't free 221 __kCFNotInlineContentsDefaultFree = 0x020, // Use allocator's free function 222 __kCFNotInlineContentsCustomFree = 0x060, // Use a specially provided free function 223 __kCFHasContentsAllocatorMask = 0x060, 224 __kCFHasContentsAllocator = 0x060, // (For mutable strings) use a specially provided allocator 225 __kCFHasContentsDeallocatorMask = 0x060, 226 __kCFHasContentsDeallocator = 0x060, 227 __kCFIsMutableMask = 0x01, 228 __kCFIsMutable = 0x01, 229 __kCFIsUnicodeMask = 0x10, 230 __kCFIsUnicode = 0x10, 231 __kCFHasNullByteMask = 0x08, 232 __kCFHasNullByte = 0x08, 233 __kCFHasLengthByteMask = 0x04, 234 __kCFHasLengthByte = 0x04, 235 // !!! Bit 0x02 has been freed up 236}; 237 238 239// !!! Assumptions: 240// Mutable strings are not inline 241// Compile-time constant strings are not inline 242// Mutable strings always have explicit length (but they might also have length byte and null byte) 243// If there is an explicit length, always use that instead of the length byte (length byte is useful for quickly returning pascal strings) 244// Never look at the length byte for the length; use __CFStrLength or __CFStrLength2 245 246/* The following set of functions and macros need to be updated on change to the bit configuration 247*/ 248CF_INLINE Boolean __CFStrIsMutable(CFStringRef str) {return (str->base._cfinfo[CF_INFO_BITS] & __kCFIsMutableMask) == __kCFIsMutable;} 249CF_INLINE Boolean __CFStrIsInline(CFStringRef str) {return (str->base._cfinfo[CF_INFO_BITS] & __kCFContentsMask) == __kCFHasInlineContents;} 250CF_INLINE Boolean __CFStrFreeContentsWhenDone(CFStringRef str) {return (str->base._cfinfo[CF_INFO_BITS] & __kCFFreeContentsWhenDoneMask) == __kCFFreeContentsWhenDone;} 251CF_INLINE Boolean __CFStrHasContentsDeallocator(CFStringRef str) {return (str->base._cfinfo[CF_INFO_BITS] & __kCFHasContentsDeallocatorMask) == __kCFHasContentsDeallocator;} 252CF_INLINE Boolean __CFStrIsUnicode(CFStringRef str) {return (str->base._cfinfo[CF_INFO_BITS] & __kCFIsUnicodeMask) == __kCFIsUnicode;} 253CF_INLINE Boolean __CFStrIsEightBit(CFStringRef str) {return (str->base._cfinfo[CF_INFO_BITS] & __kCFIsUnicodeMask) != __kCFIsUnicode;} 254CF_INLINE Boolean __CFStrHasNullByte(CFStringRef str) {return (str->base._cfinfo[CF_INFO_BITS] & __kCFHasNullByteMask) == __kCFHasNullByte;} 255CF_INLINE Boolean __CFStrHasLengthByte(CFStringRef str) {return (str->base._cfinfo[CF_INFO_BITS] & __kCFHasLengthByteMask) == __kCFHasLengthByte;} 256CF_INLINE Boolean __CFStrHasExplicitLength(CFStringRef str) {return (str->base._cfinfo[CF_INFO_BITS] & (__kCFIsMutableMask | __kCFHasLengthByteMask)) != __kCFHasLengthByte;} // Has explicit length if (1) mutable or (2) not mutable and no length byte 257CF_INLINE Boolean __CFStrIsConstant(CFStringRef str) { 258#if __LP64__ 259 return str->base._rc == 0; 260#else 261 return (str->base._cfinfo[CF_RC_BITS]) == 0; 262#endif 263} 264 265CF_INLINE SInt32 __CFStrSkipAnyLengthByte(CFStringRef str) {return ((str->base._cfinfo[CF_INFO_BITS] & __kCFHasLengthByteMask) == __kCFHasLengthByte) ? 1 : 0;} // Number of bytes to skip over the length byte in the contents 266 267/* Returns ptr to the buffer (which might include the length byte) 268*/ 269CF_INLINE const void *__CFStrContents(CFStringRef str) { 270 if (__CFStrIsInline(str)) { 271 return (const void *)(((uintptr_t)&(str->variants)) + (__CFStrHasExplicitLength(str) ? sizeof(CFIndex) : 0)); 272 } else { // Not inline; pointer is always word 2 273 return str->variants.notInlineImmutable1.buffer; 274 } 275} 276 277static CFAllocatorRef *__CFStrContentsDeallocatorPtr(CFStringRef str) { 278 return __CFStrHasExplicitLength(str) ? &(((CFMutableStringRef)str)->variants.notInlineImmutable1.contentsDeallocator) : &(((CFMutableStringRef)str)->variants.notInlineImmutable2.contentsDeallocator); } 279 280// Assumption: Called with immutable strings only, and on strings that are known to have a contentsDeallocator 281CF_INLINE CFAllocatorRef __CFStrContentsDeallocator(CFStringRef str) { 282 return *__CFStrContentsDeallocatorPtr(str); 283} 284 285// Assumption: Called with immutable strings only, and on strings that are known to have a contentsDeallocator 286CF_INLINE void __CFStrSetContentsDeallocator(CFStringRef str, CFAllocatorRef allocator) { 287 if (!(0 || 0)) CFRetain(allocator); 288 *__CFStrContentsDeallocatorPtr(str) = allocator; 289} 290 291static CFAllocatorRef *__CFStrContentsAllocatorPtr(CFStringRef str) { 292 CFAssert(!__CFStrIsInline(str), __kCFLogAssertion, "Asking for contents allocator of inline string"); 293 CFAssert(__CFStrIsMutable(str), __kCFLogAssertion, "Asking for contents allocator of an immutable string"); 294 return (CFAllocatorRef *)&(str->variants.notInlineMutable.contentsAllocator); 295} 296 297// Assumption: Called with strings that have a contents allocator; also, contents allocator follows custom 298CF_INLINE CFAllocatorRef __CFStrContentsAllocator(CFMutableStringRef str) { 299 return *(__CFStrContentsAllocatorPtr(str)); 300} 301 302// Assumption: Called with strings that have a contents allocator; also, contents allocator follows custom 303CF_INLINE void __CFStrSetContentsAllocator(CFMutableStringRef str, CFAllocatorRef allocator) { 304 if (!(0 || 0)) CFRetain(allocator); 305 *(__CFStrContentsAllocatorPtr(str)) = allocator; 306} 307 308/* Returns length; use __CFStrLength2 if contents buffer pointer has already been computed. 309*/ 310CF_INLINE CFIndex __CFStrLength(CFStringRef str) { 311 if (__CFStrHasExplicitLength(str)) { 312 if (__CFStrIsInline(str)) { 313 return str->variants.inline1.length; 314 } else { 315 return str->variants.notInlineImmutable1.length; 316 } 317 } else { 318 return (CFIndex)(*((uint8_t *)__CFStrContents(str))); 319 } 320} 321 322CF_INLINE CFIndex __CFStrLength2(CFStringRef str, const void *buffer) { 323 if (__CFStrHasExplicitLength(str)) { 324 if (__CFStrIsInline(str)) { 325 return str->variants.inline1.length; 326 } else { 327 return str->variants.notInlineImmutable1.length; 328 } 329 } else { 330 return (CFIndex)(*((uint8_t *)buffer)); 331 } 332} 333 334 335Boolean __CFStringIsEightBit(CFStringRef str) { 336 return __CFStrIsEightBit(str); 337} 338 339/* Sets the content pointer for immutable or mutable strings. 340*/ 341CF_INLINE void __CFStrSetContentPtr(CFStringRef str, const void *p) { 342 // XXX_PCB catch all writes for mutable string case. 343 __CFAssignWithWriteBarrier((void **)&((CFMutableStringRef)str)->variants.notInlineImmutable1.buffer, (void *)p); 344} 345CF_INLINE void __CFStrSetInfoBits(CFStringRef str, UInt32 v) {__CFBitfieldSetValue(((CFMutableStringRef)str)->base._cfinfo[CF_INFO_BITS], 6, 0, v);} 346 347CF_INLINE void __CFStrSetExplicitLength(CFStringRef str, CFIndex v) { 348 if (__CFStrIsInline(str)) { 349 ((CFMutableStringRef)str)->variants.inline1.length = v; 350 } else { 351 ((CFMutableStringRef)str)->variants.notInlineImmutable1.length = v; 352 } 353} 354 355CF_INLINE void __CFStrSetUnicode(CFMutableStringRef str) {str->base._cfinfo[CF_INFO_BITS] |= __kCFIsUnicode;} 356CF_INLINE void __CFStrClearUnicode(CFMutableStringRef str) {str->base._cfinfo[CF_INFO_BITS] &= ~__kCFIsUnicode;} 357CF_INLINE void __CFStrSetHasLengthAndNullBytes(CFMutableStringRef str) {str->base._cfinfo[CF_INFO_BITS] |= (__kCFHasLengthByte | __kCFHasNullByte);} 358CF_INLINE void __CFStrClearHasLengthAndNullBytes(CFMutableStringRef str) {str->base._cfinfo[CF_INFO_BITS] &= ~(__kCFHasLengthByte | __kCFHasNullByte);} 359 360 361// Assumption: The following set of inlines (using str->variants.notInlineMutable) are called with mutable strings only 362CF_INLINE Boolean __CFStrIsFixed(CFStringRef str) {return str->variants.notInlineMutable.isFixedCapacity;} 363CF_INLINE Boolean __CFStrIsExternalMutable(CFStringRef str) {return str->variants.notInlineMutable.isExternalMutable;} 364CF_INLINE Boolean __CFStrHasContentsAllocator(CFStringRef str) {return (str->base._cfinfo[CF_INFO_BITS] & __kCFHasContentsAllocatorMask) == __kCFHasContentsAllocator;} 365CF_INLINE void __CFStrSetIsFixed(CFMutableStringRef str) {str->variants.notInlineMutable.isFixedCapacity = 1;} 366CF_INLINE void __CFStrSetIsExternalMutable(CFMutableStringRef str) {str->variants.notInlineMutable.isExternalMutable = 1;} 367CF_INLINE void __CFStrSetHasGap(CFMutableStringRef str) {str->variants.notInlineMutable.hasGap = 1;} 368 369// If capacity is provided externally, we only change it when we need to grow beyond it 370CF_INLINE Boolean __CFStrCapacityProvidedExternally(CFStringRef str) {return str->variants.notInlineMutable.capacityProvidedExternally;} 371CF_INLINE void __CFStrSetCapacityProvidedExternally(CFMutableStringRef str) {str->variants.notInlineMutable.capacityProvidedExternally = 1;} 372CF_INLINE void __CFStrClearCapacityProvidedExternally(CFMutableStringRef str) {str->variants.notInlineMutable.capacityProvidedExternally = 0;} 373 374// "Capacity" is stored in number of bytes, not characters. It indicates the total number of bytes in the contents buffer. 375CF_INLINE CFIndex __CFStrCapacity(CFStringRef str) {return str->variants.notInlineMutable.capacity;} 376CF_INLINE void __CFStrSetCapacity(CFMutableStringRef str, CFIndex cap) {str->variants.notInlineMutable.capacity = cap;} 377 378// "Desired capacity" is in number of characters; it is the client requested capacity; if fixed, it is the upper bound on the mutable string backing store. 379CF_INLINE CFIndex __CFStrDesiredCapacity(CFStringRef str) {return str->variants.notInlineMutable.desiredCapacity;} 380CF_INLINE void __CFStrSetDesiredCapacity(CFMutableStringRef str, CFIndex size) {str->variants.notInlineMutable.desiredCapacity = size;} 381 382 383static void *__CFStrAllocateMutableContents(CFMutableStringRef str, CFIndex size) { 384 void *ptr; 385 CFAllocatorRef alloc = (__CFStrHasContentsAllocator(str)) ? __CFStrContentsAllocator(str) : __CFGetAllocator(str); 386 ptr = CFAllocatorAllocate(alloc, size, 0); 387 if (__CFOASafe) __CFSetLastAllocationEventName(ptr, "CFString (store)"); 388 return ptr; 389} 390 391static void __CFStrDeallocateMutableContents(CFMutableStringRef str, void *buffer) { 392 CFAllocatorRef alloc = (__CFStrHasContentsAllocator(str)) ? __CFStrContentsAllocator(str) : __CFGetAllocator(str); 393 if (__CFStrIsMutable(str) && __CFStrHasContentsAllocator(str) && (0)) { 394 // do nothing 395 } else if (CF_IS_COLLECTABLE_ALLOCATOR(alloc)) { 396 // GC: for finalization safety, let collector reclaim the buffer in the next GC cycle. 397 auto_zone_release(objc_collectableZone(), buffer); 398 } else { 399 CFAllocatorDeallocate(alloc, buffer); 400 } 401} 402 403 404 405 406/* CFString specific init flags 407 Note that you cannot count on the external buffer not being copied. 408 Also, if you specify an external buffer, you should not change it behind the CFString's back. 409*/ 410enum { 411 __kCFThinUnicodeIfPossible = 0x1000000, /* See if the Unicode contents can be thinned down to 8-bit */ 412 kCFStringPascal = 0x10000, /* Indicating that the string data has a Pascal string structure (length byte at start) */ 413 kCFStringNoCopyProvidedContents = 0x20000, /* Don't copy the provided string contents if possible; free it when no longer needed */ 414 kCFStringNoCopyNoFreeProvidedContents = 0x30000 /* Don't copy the provided string contents if possible; don't free it when no longer needed */ 415}; 416 417/* System Encoding. 418*/ 419static CFStringEncoding __CFDefaultSystemEncoding = kCFStringEncodingInvalidId; 420static CFStringEncoding __CFDefaultFileSystemEncoding = kCFStringEncodingInvalidId; 421CFStringEncoding __CFDefaultEightBitStringEncoding = kCFStringEncodingInvalidId; 422 423 424#if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_EMBEDDED_MINI || DEPLOYMENT_TARGET_LINUX 425#define __defaultEncoding kCFStringEncodingMacRoman 426#elif DEPLOYMENT_TARGET_WINDOWS 427#define __defaultEncoding kCFStringEncodingWindowsLatin1 428#else 429#warning This value must match __CFGetConverter condition in CFStringEncodingConverter.c 430#define __defaultEncoding kCFStringEncodingISOLatin1 431#endif 432 433CFStringEncoding CFStringGetSystemEncoding(void) { 434 if (__CFDefaultSystemEncoding == kCFStringEncodingInvalidId) { 435 __CFDefaultSystemEncoding = __defaultEncoding; 436 const CFStringEncodingConverter *converter = CFStringEncodingGetConverter(__CFDefaultSystemEncoding); 437 __CFSetCharToUniCharFunc(converter->encodingClass == kCFStringEncodingConverterCheapEightBit ? (UNI_CHAR_FUNC)converter->toUnicode : NULL); 438 } 439 return __CFDefaultSystemEncoding; 440} 441 442// Fast version for internal use 443 444CF_INLINE CFStringEncoding __CFStringGetSystemEncoding(void) { 445 if (__CFDefaultSystemEncoding == kCFStringEncodingInvalidId) (void)CFStringGetSystemEncoding(); 446 return __CFDefaultSystemEncoding; 447} 448 449CFStringEncoding CFStringFileSystemEncoding(void) { 450 if (__CFDefaultFileSystemEncoding == kCFStringEncodingInvalidId) { 451#if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_EMBEDDED_MINI || DEPLOYMENT_TARGET_WINDOWS 452 __CFDefaultFileSystemEncoding = kCFStringEncodingUTF8; 453#else 454 __CFDefaultFileSystemEncoding = CFStringGetSystemEncoding(); 455#endif 456 } 457 458 return __CFDefaultFileSystemEncoding; 459} 460 461/* ??? Is returning length when no other answer is available the right thing? 462 !!! All of the (length > (LONG_MAX / N)) type checks are to avoid wrap-around and eventual malloc overflow in the client 463*/ 464CFIndex CFStringGetMaximumSizeForEncoding(CFIndex length, CFStringEncoding encoding) { 465 if (encoding == kCFStringEncodingUTF8) { 466 return (length > (LONG_MAX / 3)) ? kCFNotFound : (length * 3); 467 } else if ((encoding == kCFStringEncodingUTF32) || (encoding == kCFStringEncodingUTF32BE) || (encoding == kCFStringEncodingUTF32LE)) { // UTF-32 468 return (length > (LONG_MAX / sizeof(UTF32Char))) ? kCFNotFound : (length * sizeof(UTF32Char)); 469 } else { 470 encoding &= 0xFFF; // Mask off non-base part 471 } 472 switch (encoding) { 473 case kCFStringEncodingUnicode: 474 return (length > (LONG_MAX / sizeof(UniChar))) ? kCFNotFound : (length * sizeof(UniChar)); 475 476 case kCFStringEncodingNonLossyASCII: 477 return (length > (LONG_MAX / 6)) ? kCFNotFound : (length * 6); // 1 Unichar can expand to 6 bytes 478 479 case kCFStringEncodingMacRoman: 480 case kCFStringEncodingWindowsLatin1: 481 case kCFStringEncodingISOLatin1: 482 case kCFStringEncodingNextStepLatin: 483 case kCFStringEncodingASCII: 484 return length / sizeof(uint8_t); 485 486 default: 487 return length / sizeof(uint8_t); 488 } 489} 490 491 492/* Returns whether the indicated encoding can be stored in 8-bit chars 493*/ 494CF_INLINE Boolean __CFStrEncodingCanBeStoredInEightBit(CFStringEncoding encoding) { 495 switch (encoding & 0xFFF) { // just use encoding base 496 case kCFStringEncodingInvalidId: 497 case kCFStringEncodingUnicode: 498 case kCFStringEncodingNonLossyASCII: 499 return false; 500 501 case kCFStringEncodingMacRoman: 502 case kCFStringEncodingWindowsLatin1: 503 case kCFStringEncodingISOLatin1: 504 case kCFStringEncodingNextStepLatin: 505 case kCFStringEncodingASCII: 506 return true; 507 508 default: return false; 509 } 510} 511 512/* Returns the encoding used in eight bit CFStrings (can't be any encoding which isn't 1-to-1 with Unicode) 513 For 10.9-linked apps, we've set this encoding to ASCII for all cases; see <rdar://problem/3597233> 514*/ 515CFStringEncoding __CFStringComputeEightBitStringEncoding(void) { 516 // This flag prevents recursive entry into __CFStringComputeEightBitStringEncoding 517 static Boolean __CFStringIsBeingInitialized2 = false; 518 if (__CFStringIsBeingInitialized2) return kCFStringEncodingASCII; 519 __CFStringIsBeingInitialized2 = true; 520 521 Boolean useAscii = true; 522 __CFStringIsBeingInitialized2 = false; 523 if (useAscii) { 524 __CFDefaultEightBitStringEncoding = kCFStringEncodingASCII; 525 } else { 526 if (__CFDefaultEightBitStringEncoding == kCFStringEncodingInvalidId) { 527 CFStringEncoding systemEncoding = CFStringGetSystemEncoding(); 528 if (systemEncoding == kCFStringEncodingInvalidId) { // We're right in the middle of querying system encoding from default database. Delaying to set until system encoding is determined. 529 return kCFStringEncodingASCII; 530 } else if (__CFStrEncodingCanBeStoredInEightBit(systemEncoding)) { 531 __CFDefaultEightBitStringEncoding = systemEncoding; 532 } else { 533 __CFDefaultEightBitStringEncoding = kCFStringEncodingASCII; 534 } 535 } 536 } 537 return __CFDefaultEightBitStringEncoding; 538} 539 540/* Returns whether the provided bytes can be stored in ASCII 541*/ 542CF_INLINE Boolean __CFBytesInASCII(const uint8_t *bytes, CFIndex len) { 543#if __LP64__ 544 /* A bit of unrolling; go by 32s, 16s, and 8s first */ 545 while (len >= 32) { 546 uint64_t val = *(const uint64_t *)bytes; 547 uint64_t hiBits = (val & 0x8080808080808080ULL); // More efficient to collect this rather than do a conditional at every step 548 bytes += 8; 549 val = *(const uint64_t *)bytes; 550 hiBits |= (val & 0x8080808080808080ULL); 551 bytes += 8; 552 val = *(const uint64_t *)bytes; 553 hiBits |= (val & 0x8080808080808080ULL); 554 bytes += 8; 555 val = *(const uint64_t *)bytes; 556 if (hiBits | (val & 0x8080808080808080ULL)) return false; 557 bytes += 8; 558 len -= 32; 559 } 560 561 while (len >= 16) { 562 uint64_t val = *(const uint64_t *)bytes; 563 uint64_t hiBits = (val & 0x8080808080808080ULL); 564 bytes += 8; 565 val = *(const uint64_t *)bytes; 566 if (hiBits | (val & 0x8080808080808080ULL)) return false; 567 bytes += 8; 568 len -= 16; 569 } 570 571 while (len >= 8) { 572 uint64_t val = *(const uint64_t *)bytes; 573 if (val & 0x8080808080808080ULL) return false; 574 bytes += 8; 575 len -= 8; 576 } 577#endif 578 /* Go by 4s */ 579 while (len >= 4) { 580 uint32_t val = *(const uint32_t *)bytes; 581 if (val & 0x80808080U) return false; 582 bytes += 4; 583 len -= 4; 584 } 585 /* Handle the rest one byte at a time */ 586 while (len--) { 587 if (*bytes++ & 0x80) return false; 588 } 589 590 return true; 591} 592 593/* Returns whether the provided 8-bit string in the specified encoding can be stored in an 8-bit CFString. 594*/ 595CF_INLINE Boolean __CFCanUseEightBitCFStringForBytes(const uint8_t *bytes, CFIndex len, CFStringEncoding encoding) { 596 // If the encoding is the same as the 8-bit CFString encoding, we can just use the bytes as-is. 597 // One exception is ASCII, which unfortunately needs to mean ISOLatin1 for compatibility reasons <rdar://problem/5458321>. 598 if (encoding == __CFStringGetEightBitStringEncoding() && encoding != kCFStringEncodingASCII) return true; 599 if (__CFStringEncodingIsSupersetOfASCII(encoding) && __CFBytesInASCII(bytes, len)) return true; 600 return false; 601} 602 603 604/* Returns whether a length byte can be tacked on to a string of the indicated length. 605*/ 606CF_INLINE Boolean __CFCanUseLengthByte(CFIndex len) { 607#define __kCFMaxPascalStrLen 255 608 return (len <= __kCFMaxPascalStrLen) ? true : false; 609} 610 611/* Various string assertions 612*/ 613#define __CFAssertIsString(cf) __CFGenericValidateType(cf, __kCFStringTypeID) 614#define __CFAssertIndexIsInStringBounds(cf, idx) CFAssert3((idx) >= 0 && (idx) < __CFStrLength(cf), __kCFLogAssertion, "%s(): string index %d out of bounds (length %d)", __PRETTY_FUNCTION__, idx, __CFStrLength(cf)) 615#define __CFAssertRangeIsInStringBounds(cf, idx, count) CFAssert4((idx) >= 0 && (idx + count) <= __CFStrLength(cf), __kCFLogAssertion, "%s(): string range %d,%d out of bounds (length %d)", __PRETTY_FUNCTION__, idx, count, __CFStrLength(cf)) 616#define __CFAssertIsStringAndMutable(cf) {__CFGenericValidateType(cf, __kCFStringTypeID); CFAssert1(__CFStrIsMutable(cf), __kCFLogAssertion, "%s(): string not mutable", __PRETTY_FUNCTION__);} 617#define __CFAssertIsStringAndExternalMutable(cf) {__CFGenericValidateType(cf, __kCFStringTypeID); CFAssert1(__CFStrIsMutable(cf) && __CFStrIsExternalMutable(cf), __kCFLogAssertion, "%s(): string not external mutable", __PRETTY_FUNCTION__);} 618#define __CFAssertIsNotNegative(idx) CFAssert2(idx >= 0, __kCFLogAssertion, "%s(): index %d is negative", __PRETTY_FUNCTION__, idx) 619#define __CFAssertIfFixedLengthIsOK(cf, reqLen) CFAssert2(!__CFStrIsFixed(cf) || (reqLen <= __CFStrDesiredCapacity(cf)), __kCFLogAssertion, "%s(): length %d too large", __PRETTY_FUNCTION__, reqLen) 620 621 622/* Basic algorithm is to shrink memory when capacity is SHRINKFACTOR times the required capacity or to allocate memory when the capacity is less than GROWFACTOR times the required capacity. This function will return -1 if the new capacity is just too big (> LONG_MAX). 623Additional complications are applied in the following order: 624- desiredCapacity, which is the minimum (except initially things can be at zero) 625- rounding up to factor of 8 626- compressing (to fit the number if 16 bits), which effectively rounds up to factor of 256 627- we need to make sure GROWFACTOR computation doesn't suffer from overflow issues on 32-bit, hence the casting to unsigned. Normally for required capacity of C bytes, the allocated space is (3C+1)/2. If C > ULONG_MAX/3, we instead simply return LONG_MAX 628*/ 629#define SHRINKFACTOR(c) (c / 2) 630 631#if __LP64__ 632#define GROWFACTOR(c) ((c * 3 + 1) / 2) 633#else 634#define GROWFACTOR(c) (((c) >= (ULONG_MAX / 3UL)) ? __CFMax(LONG_MAX - 4095, (c)) : (((unsigned long)c * 3 + 1) / 2)) 635#endif 636 637CF_INLINE CFIndex __CFStrNewCapacity(CFMutableStringRef str, unsigned long reqCapacity, CFIndex capacity, Boolean leaveExtraRoom, CFIndex charSize) { 638 if (capacity != 0 || reqCapacity != 0) { /* If initially zero, and space not needed, leave it at that... */ 639 if ((capacity < reqCapacity) || /* We definitely need the room... */ 640 (!__CFStrCapacityProvidedExternally(str) && /* Assuming we control the capacity... */ 641 ((reqCapacity < SHRINKFACTOR(capacity)) || /* ...we have too much room! */ 642 (!leaveExtraRoom && (reqCapacity < capacity))))) { /* ...we need to eliminate the extra space... */ 643 if (reqCapacity > LONG_MAX) return -1; /* Too big any way you cut it */ 644 unsigned long newCapacity = leaveExtraRoom ? GROWFACTOR(reqCapacity) : reqCapacity; /* Grow by 3/2 if extra room is desired */ 645 CFIndex desiredCapacity = __CFStrDesiredCapacity(str) * charSize; 646 if (newCapacity < desiredCapacity) { /* If less than desired, bump up to desired */ 647 newCapacity = desiredCapacity; 648 } else if (__CFStrIsFixed(str)) { /* Otherwise, if fixed, no need to go above the desired (fixed) capacity */ 649 newCapacity = __CFMax(desiredCapacity, reqCapacity); /* !!! So, fixed is not really fixed, but "tight" */ 650 } 651 if (__CFStrHasContentsAllocator(str)) { /* Also apply any preferred size from the allocator */ 652 newCapacity = CFAllocatorGetPreferredSizeForSize(__CFStrContentsAllocator(str), newCapacity, 0); 653#if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_EMBEDDED_MINI 654 } else { 655 newCapacity = malloc_good_size(newCapacity); 656#endif 657 } 658 return (newCapacity > LONG_MAX) ? -1 : (CFIndex)newCapacity; // If packing: __CFStrUnpackNumber(__CFStrPackNumber(newCapacity)); 659 } 660 } 661 return capacity; 662} 663 664 665/* rearrangeBlocks() rearranges the blocks of data within the buffer so that they are "evenly spaced". buffer is assumed to have enough room for the result. 666 numBlocks is current total number of blocks within buffer. 667 blockSize is the size of each block in bytes 668 ranges and numRanges hold the ranges that are no longer needed; ranges are stored sorted in increasing order, and don't overlap 669 insertLength is the final spacing between the remaining blocks 670 671Example: buffer = A B C D E F G H, blockSize = 1, ranges = { (2,1) , (4,2) } (so we want to "delete" C and E F), fromEnd = NO 672if insertLength = 4, result = A B ? ? ? ? D ? ? ? ? G H 673if insertLength = 0, result = A B D G H 674 675Example: buffer = A B C D E F G H I J K L M N O P Q R S T U, blockSize = 1, ranges { (1,1), (3,1), (5,11), (17,1), (19,1) }, fromEnd = NO 676if insertLength = 3, result = A ? ? ? C ? ? ? E ? ? ? Q ? ? ? S ? ? ? U 677 678*/ 679typedef struct _CFStringDeferredRange { 680 CFIndex beginning; 681 CFIndex length; 682 CFIndex shift; 683} CFStringDeferredRange; 684 685typedef struct _CFStringStackInfo { 686 CFIndex capacity; // Capacity (if capacity == count, need to realloc to add another) 687 CFIndex count; // Number of elements actually stored 688 CFStringDeferredRange *stack; 689 Boolean hasMalloced; // Indicates "stack" is allocated and needs to be deallocated when done 690 char _padding[3]; 691} CFStringStackInfo; 692 693CF_INLINE void pop (CFStringStackInfo *si, CFStringDeferredRange *topRange) { 694 si->count = si->count - 1; 695 *topRange = si->stack[si->count]; 696} 697 698CF_INLINE void push (CFStringStackInfo *si, const CFStringDeferredRange *newRange) { 699 if (si->count == si->capacity) { 700 // increase size of the stack 701 si->capacity = (si->capacity + 4) * 2; 702 if (si->hasMalloced) { 703 si->stack = (CFStringDeferredRange *)CFAllocatorReallocate(kCFAllocatorSystemDefault, si->stack, si->capacity * sizeof(CFStringDeferredRange), 0); 704 } else { 705 CFStringDeferredRange *newStack = (CFStringDeferredRange *)CFAllocatorAllocate(kCFAllocatorSystemDefault, si->capacity * sizeof(CFStringDeferredRange), 0); 706 memmove(newStack, si->stack, si->count * sizeof(CFStringDeferredRange)); 707 si->stack = newStack; 708 si->hasMalloced = true; 709 } 710 } 711 si->stack[si->count] = *newRange; 712 si->count = si->count + 1; 713} 714 715static void rearrangeBlocks( 716 uint8_t *buffer, 717 CFIndex numBlocks, 718 CFIndex blockSize, 719 const CFRange *ranges, 720 CFIndex numRanges, 721 CFIndex insertLength) { 722 723#define origStackSize 10 724 CFStringDeferredRange origStack[origStackSize]; 725 CFStringStackInfo si = {origStackSize, 0, origStack, false, {0, 0, 0}}; 726 CFStringDeferredRange currentNonRange = {0, 0, 0}; 727 CFIndex currentRange = 0; 728 CFIndex amountShifted = 0; 729 730 // must have at least 1 range left. 731 732 while (currentRange < numRanges) { 733 currentNonRange.beginning = (ranges[currentRange].location + ranges[currentRange].length) * blockSize; 734 if ((numRanges - currentRange) == 1) { 735 // at the end. 736 currentNonRange.length = numBlocks * blockSize - currentNonRange.beginning; 737 if (currentNonRange.length == 0) break; 738 } else { 739 currentNonRange.length = (ranges[currentRange + 1].location * blockSize) - currentNonRange.beginning; 740 } 741 currentNonRange.shift = amountShifted + (insertLength * blockSize) - (ranges[currentRange].length * blockSize); 742 amountShifted = currentNonRange.shift; 743 if (amountShifted <= 0) { 744 // process current item and rest of stack 745 if (currentNonRange.shift && currentNonRange.length) memmove (&buffer[currentNonRange.beginning + currentNonRange.shift], &buffer[currentNonRange.beginning], currentNonRange.length); 746 while (si.count > 0) { 747 pop (&si, ¤tNonRange); // currentNonRange now equals the top element of the stack. 748 if (currentNonRange.shift && currentNonRange.length) memmove (&buffer[currentNonRange.beginning + currentNonRange.shift], &buffer[currentNonRange.beginning], currentNonRange.length); 749 } 750 } else { 751 // add currentNonRange to stack. 752 push (&si, ¤tNonRange); 753 } 754 currentRange++; 755 } 756 757 // no more ranges. if anything is on the stack, process. 758 759 while (si.count > 0) { 760 pop (&si, ¤tNonRange); // currentNonRange now equals the top element of the stack. 761 if (currentNonRange.shift && currentNonRange.length) memmove (&buffer[currentNonRange.beginning + currentNonRange.shift], &buffer[currentNonRange.beginning], currentNonRange.length); 762 } 763 if (si.hasMalloced) CFAllocatorDeallocate (kCFAllocatorSystemDefault, si.stack); 764} 765 766/* See comments for rearrangeBlocks(); this is the same, but the string is assembled in another buffer (dstBuffer), so the algorithm is much easier. We also take care of the case where the source is not-Unicode but destination is. (The reverse case is not supported.) 767*/ 768static void copyBlocks( 769 const uint8_t *srcBuffer, 770 uint8_t *dstBuffer, 771 CFIndex srcLength, 772 Boolean srcIsUnicode, 773 Boolean dstIsUnicode, 774 const CFRange *ranges, 775 CFIndex numRanges, 776 CFIndex insertLength) { 777 778 CFIndex srcLocationInBytes = 0; // in order to avoid multiplying all the time, this is in terms of bytes, not blocks 779 CFIndex dstLocationInBytes = 0; // ditto 780 CFIndex srcBlockSize = srcIsUnicode ? sizeof(UniChar) : sizeof(uint8_t); 781 CFIndex insertLengthInBytes = insertLength * (dstIsUnicode ? sizeof(UniChar) : sizeof(uint8_t)); 782 CFIndex rangeIndex = 0; 783 CFIndex srcToDstMultiplier = (srcIsUnicode == dstIsUnicode) ? 1 : (sizeof(UniChar) / sizeof(uint8_t)); 784 785 // Loop over the ranges, copying the range to be preserved (right before each range) 786 while (rangeIndex < numRanges) { 787 CFIndex srcLengthInBytes = ranges[rangeIndex].location * srcBlockSize - srcLocationInBytes; // srcLengthInBytes is in terms of bytes, not blocks; represents length of region to be preserved 788 if (srcLengthInBytes > 0) { 789 if (srcIsUnicode == dstIsUnicode) { 790 memmove(dstBuffer + dstLocationInBytes, srcBuffer + srcLocationInBytes, srcLengthInBytes); 791 } else { 792 __CFStrConvertBytesToUnicode(srcBuffer + srcLocationInBytes, (UniChar *)(dstBuffer + dstLocationInBytes), srcLengthInBytes); 793 } 794 } 795 srcLocationInBytes += srcLengthInBytes + ranges[rangeIndex].length * srcBlockSize; // Skip over the just-copied and to-be-deleted stuff 796 dstLocationInBytes += srcLengthInBytes * srcToDstMultiplier + insertLengthInBytes; 797 rangeIndex++; 798 } 799 800 // Do last range (the one beyond last range) 801 if (srcLocationInBytes < srcLength * srcBlockSize) { 802 if (srcIsUnicode == dstIsUnicode) { 803 memmove(dstBuffer + dstLocationInBytes, srcBuffer + srcLocationInBytes, srcLength * srcBlockSize - srcLocationInBytes); 804 } else { 805 __CFStrConvertBytesToUnicode(srcBuffer + srcLocationInBytes, (UniChar *)(dstBuffer + dstLocationInBytes), srcLength * srcBlockSize - srcLocationInBytes); 806 } 807 } 808} 809 810/* Call the callback; if it doesn't exist or returns false, then log 811*/ 812static void __CFStringHandleOutOfMemory(CFTypeRef obj) { 813 CFStringRef msg = CFSTR("Out of memory. We suggest restarting the application. If you have an unsaved document, create a backup copy in Finder, then try to save."); 814 { 815 CFLog(kCFLogLevelCritical, CFSTR("%@"), msg); 816 } 817} 818 819/* Reallocates the backing store of the string to accomodate the new length. Space is reserved or characters are deleted as indicated by insertLength and the ranges in deleteRanges. The length is updated to reflect the new state. Will also maintain a length byte and a null byte in 8-bit strings. If length cannot fit in length byte, the space will still be reserved, but will be 0. (Hence the reason the length byte should never be looked at as length unless there is no explicit length.) 820*/ 821static void __CFStringChangeSizeMultiple(CFMutableStringRef str, const CFRange *deleteRanges, CFIndex numDeleteRanges, CFIndex insertLength, Boolean makeUnicode) { 822 const uint8_t *curContents = (uint8_t *)__CFStrContents(str); 823 CFIndex curLength = curContents ? __CFStrLength2(str, curContents) : 0; 824 unsigned long newLength; // We use unsigned to better keep track of overflow 825 826 // Compute new length of the string 827 if (numDeleteRanges == 1) { 828 newLength = curLength + insertLength - deleteRanges[0].length; 829 } else { 830 CFIndex cnt; 831 newLength = curLength + insertLength * numDeleteRanges; 832 for (cnt = 0; cnt < numDeleteRanges; cnt++) newLength -= deleteRanges[cnt].length; 833 } 834 835 __CFAssertIfFixedLengthIsOK(str, newLength); 836 837 if (newLength == 0) { 838 // An somewhat optimized code-path for this special case, with the following implicit values: 839 // newIsUnicode = false 840 // useLengthAndNullBytes = false 841 // newCharSize = sizeof(uint8_t) 842 // If the newCapacity happens to be the same as the old, we don't free the buffer; otherwise we just free it totally 843 // instead of doing a potentially useless reallocation (as the needed capacity later might turn out to be different anyway) 844 CFIndex curCapacity = __CFStrCapacity(str); 845 CFIndex newCapacity = __CFStrNewCapacity(str, 0, curCapacity, true, sizeof(uint8_t)); 846 if (newCapacity != curCapacity) { // If we're reallocing anyway (larger or smaller --- larger could happen if desired capacity was changed in the meantime), let's just free it all 847 if (curContents) __CFStrDeallocateMutableContents(str, (uint8_t *)curContents); 848 __CFStrSetContentPtr(str, NULL); 849 __CFStrSetCapacity(str, 0); 850 __CFStrClearCapacityProvidedExternally(str); 851 __CFStrClearHasLengthAndNullBytes(str); 852 if (!__CFStrIsExternalMutable(str)) __CFStrClearUnicode(str); // External mutable implies Unicode 853 } else { 854 if (!__CFStrIsExternalMutable(str)) { 855 __CFStrClearUnicode(str); 856 if (curCapacity >= (int)(sizeof(uint8_t) * 2)) { // If there's room 857 __CFStrSetHasLengthAndNullBytes(str); 858 ((uint8_t *)curContents)[0] = ((uint8_t *)curContents)[1] = 0; 859 } else { 860 __CFStrClearHasLengthAndNullBytes(str); 861 } 862 } 863 } 864 __CFStrSetExplicitLength(str, 0); 865 } else { /* This else-clause assumes newLength > 0 */ 866 Boolean oldIsUnicode = __CFStrIsUnicode(str); 867 Boolean newIsUnicode = makeUnicode || (oldIsUnicode /* && (newLength > 0) - implicit */ ) || __CFStrIsExternalMutable(str); 868 CFIndex newCharSize = newIsUnicode ? sizeof(UniChar) : sizeof(uint8_t); 869 Boolean useLengthAndNullBytes = !newIsUnicode /* && (newLength > 0) - implicit */; 870 CFIndex numExtraBytes = useLengthAndNullBytes ? 2 : 0; /* 2 extra bytes to keep the length byte & null... */ 871 CFIndex curCapacity = __CFStrCapacity(str); 872 if (newLength > (LONG_MAX - numExtraBytes) / newCharSize) __CFStringHandleOutOfMemory(str); // Does not return 873 CFIndex newCapacity = __CFStrNewCapacity(str, newLength * newCharSize + numExtraBytes, curCapacity, true, newCharSize); 874 if (newCapacity == -1) __CFStringHandleOutOfMemory(str); // Does not return 875 Boolean allocNewBuffer = (newCapacity != curCapacity) || (curLength > 0 && !oldIsUnicode && newIsUnicode); /* We alloc new buffer if oldIsUnicode != newIsUnicode because the contents have to be copied */ 876 uint8_t *newContents; 877 if (allocNewBuffer) { 878 newContents = (uint8_t *)__CFStrAllocateMutableContents(str, newCapacity); 879 if (!newContents) { // Try allocating without extra room 880 newCapacity = __CFStrNewCapacity(str, newLength * newCharSize + numExtraBytes, curCapacity, false, newCharSize); 881 // Since we checked for this above, it shouldn't be the case here, but just in case 882 if (newCapacity == -1) __CFStringHandleOutOfMemory(str); // Does not return 883 newContents = (uint8_t *)__CFStrAllocateMutableContents(str, newCapacity); 884 if (!newContents) __CFStringHandleOutOfMemory(str); // Does not return 885 } 886 } else { 887 newContents = (uint8_t *)curContents; 888 } 889 890 Boolean hasLengthAndNullBytes = __CFStrHasLengthByte(str); 891 892 CFAssert1(hasLengthAndNullBytes == __CFStrHasNullByte(str), __kCFLogAssertion, "%s(): Invalid state in 8-bit string", __PRETTY_FUNCTION__); 893 894 // Calculate pointers to the actual string content (skipping over the length byte, if present). Note that keeping a reference to the base is needed for newContents under GC, since the copy may take a long time. 895 const uint8_t *curContentsBody = hasLengthAndNullBytes ? (curContents+1) : curContents; 896 uint8_t *newContentsBody = useLengthAndNullBytes ? (newContents+1) : newContents; 897 898 if (curContents) { 899 if (oldIsUnicode == newIsUnicode) { 900 if (newContentsBody == curContentsBody) { 901 rearrangeBlocks(newContentsBody, curLength, newCharSize, deleteRanges, numDeleteRanges, insertLength); 902 } else { 903 copyBlocks(curContentsBody, newContentsBody, curLength, oldIsUnicode, newIsUnicode, deleteRanges, numDeleteRanges, insertLength); 904 } 905 } else if (newIsUnicode) { /* this implies we have a new buffer */ 906 copyBlocks(curContentsBody, newContentsBody, curLength, oldIsUnicode, newIsUnicode, deleteRanges, numDeleteRanges, insertLength); 907 } 908 if (allocNewBuffer && __CFStrFreeContentsWhenDone(str)) __CFStrDeallocateMutableContents(str, (void *)curContents); 909 } 910 911 if (!newIsUnicode) { 912 if (useLengthAndNullBytes) { 913 newContentsBody[newLength] = 0; /* Always have null byte, if not unicode */ 914 newContents[0] = __CFCanUseLengthByte(newLength) ? (uint8_t)newLength : 0; 915 if (!hasLengthAndNullBytes) __CFStrSetHasLengthAndNullBytes(str); 916 } else { 917 if (hasLengthAndNullBytes) __CFStrClearHasLengthAndNullBytes(str); 918 } 919 if (oldIsUnicode) __CFStrClearUnicode(str); 920 } else { // New is unicode... 921 if (!oldIsUnicode) __CFStrSetUnicode(str); 922 if (hasLengthAndNullBytes) __CFStrClearHasLengthAndNullBytes(str); 923 } 924 __CFStrSetExplicitLength(str, newLength); 925 926 if (allocNewBuffer) { 927 __CFStrSetCapacity(str, newCapacity); 928 __CFStrClearCapacityProvidedExternally(str); 929 __CFStrSetContentPtr(str, newContents); 930 } 931 } 932} 933 934/* Same as above, but takes one range (very common case) 935*/ 936CF_INLINE void __CFStringChangeSize(CFMutableStringRef str, CFRange range, CFIndex insertLength, Boolean makeUnicode) { 937 __CFStringChangeSizeMultiple(str, &range, 1, insertLength, makeUnicode); 938} 939 940 941#if defined(DEBUG) 942static Boolean __CFStrIsConstantString(CFStringRef str); 943#endif 944 945static void __CFStringDeallocate(CFTypeRef cf) { 946 CFStringRef str = (CFStringRef)cf; 947 948 // If in DEBUG mode, check to see if the string a CFSTR, and complain. 949 CFAssert1(__CFConstantStringTableBeingFreed || !__CFStrIsConstantString((CFStringRef)cf), __kCFLogAssertion, "Tried to deallocate CFSTR(\"%@\")", str); 950 951 if (!__CFStrIsInline(str)) { 952 uint8_t *contents; 953 Boolean isMutable = __CFStrIsMutable(str); 954 if (__CFStrFreeContentsWhenDone(str) && (contents = (uint8_t *)__CFStrContents(str))) { 955 if (isMutable) { 956 __CFStrDeallocateMutableContents((CFMutableStringRef)str, contents); 957 } else { 958 if (__CFStrHasContentsDeallocator(str)) { 959 CFAllocatorRef allocator = __CFStrContentsDeallocator(str); 960 CFAllocatorDeallocate(allocator, contents); 961 if (!(0 || 0 )) CFRelease(allocator); 962 } else { 963 CFAllocatorRef alloc = __CFGetAllocator(str); 964 CFAllocatorDeallocate(alloc, contents); 965 } 966 } 967 } 968 if (isMutable && __CFStrHasContentsAllocator(str)) { 969 CFAllocatorRef allocator = __CFStrContentsAllocator((CFMutableStringRef)str); 970 if (!(0 || 0)) CFRelease(allocator); 971 } 972 } 973} 974 975static Boolean __CFStringEqual(CFTypeRef cf1, CFTypeRef cf2) { 976 CFStringRef str1 = (CFStringRef)cf1; 977 CFStringRef str2 = (CFStringRef)cf2; 978 const uint8_t *contents1; 979 const uint8_t *contents2; 980 CFIndex len1; 981 982 /* !!! We do not need IsString assertions, as the CFBase runtime assures this */ 983 /* !!! We do not need == test, as the CFBase runtime assures this */ 984 985 contents1 = (uint8_t *)__CFStrContents(str1); 986 contents2 = (uint8_t *)__CFStrContents(str2); 987 len1 = __CFStrLength2(str1, contents1); 988 989 if (len1 != __CFStrLength2(str2, contents2)) return false; 990 991 contents1 += __CFStrSkipAnyLengthByte(str1); 992 contents2 += __CFStrSkipAnyLengthByte(str2); 993 994 if (__CFStrIsEightBit(str1) && __CFStrIsEightBit(str2)) { 995 return memcmp((const char *)contents1, (const char *)contents2, len1) ? false : true; 996 } else if (__CFStrIsEightBit(str1)) { /* One string has Unicode contents */ 997 CFStringInlineBuffer buf; 998 CFIndex buf_idx = 0; 999 1000 CFStringInitInlineBuffer(str1, &buf, CFRangeMake(0, len1)); 1001 for (buf_idx = 0; buf_idx < len1; buf_idx++) { 1002 if (__CFStringGetCharacterFromInlineBufferQuick(&buf, buf_idx) != ((UniChar *)contents2)[buf_idx]) return false; 1003 } 1004 } else if (__CFStrIsEightBit(str2)) { /* One string has Unicode contents */ 1005 CFStringInlineBuffer buf; 1006 CFIndex buf_idx = 0; 1007 1008 CFStringInitInlineBuffer(str2, &buf, CFRangeMake(0, len1)); 1009 for (buf_idx = 0; buf_idx < len1; buf_idx++) { 1010 if (__CFStringGetCharacterFromInlineBufferQuick(&buf, buf_idx) != ((UniChar *)contents1)[buf_idx]) return false; 1011 } 1012 } else { /* Both strings have Unicode contents */ 1013 CFIndex idx; 1014 for (idx = 0; idx < len1; idx++) { 1015 if (((UniChar *)contents1)[idx] != ((UniChar *)contents2)[idx]) return false; 1016 } 1017 } 1018 return true; 1019} 1020 1021 1022/* String hashing: Should give the same results whatever the encoding; so we hash UniChars. 1023If the length is less than or equal to 96, then the hash function is simply the 1024following (n is the nth UniChar character, starting from 0): 1025 1026 hash(-1) = length 1027 hash(n) = hash(n-1) * 257 + unichar(n); 1028 Hash = hash(length-1) * ((length & 31) + 1) 1029 1030If the length is greater than 96, then the above algorithm applies to 1031characters 0..31, (length/2)-16..(length/2)+15, and length-32..length-1, inclusive; 1032thus the first, middle, and last 32 characters. 1033 1034Note that the loops below are unrolled; and: 257^2 = 66049; 257^3 = 16974593; 257^4 = 4362470401; 67503105 is 257^4 - 256^4 1035If hashcode is changed from UInt32 to something else, this last piece needs to be readjusted. 1036!!! We haven't updated for LP64 yet 1037 1038NOTE: The hash algorithm used to be duplicated in CF and Foundation; but now it should only be in the four functions below. 1039 1040Hash function was changed between Panther and Tiger, and Tiger and Leopard. 1041*/ 1042#define HashEverythingLimit 96 1043 1044#define HashNextFourUniChars(accessStart, accessEnd, pointer) \ 1045 {result = result * 67503105 + (accessStart 0 accessEnd) * 16974593 + (accessStart 1 accessEnd) * 66049 + (accessStart 2 accessEnd) * 257 + (accessStart 3 accessEnd); pointer += 4;} 1046 1047#define HashNextUniChar(accessStart, accessEnd, pointer) \ 1048 {result = result * 257 + (accessStart 0 accessEnd); pointer++;} 1049 1050 1051/* In this function, actualLen is the length of the original string; but len is the number of characters in buffer. The buffer is expected to contain the parts of the string relevant to hashing. 1052*/ 1053CF_INLINE CFHashCode __CFStrHashCharacters(const UniChar *uContents, CFIndex len, CFIndex actualLen) { 1054 CFHashCode result = actualLen; 1055 if (len <= HashEverythingLimit) { 1056 const UniChar *end4 = uContents + (len & ~3); 1057 const UniChar *end = uContents + len; 1058 while (uContents < end4) HashNextFourUniChars(uContents[, ], uContents); // First count in fours 1059 while (uContents < end) HashNextUniChar(uContents[, ], uContents); // Then for the last <4 chars, count in ones... 1060 } else { 1061 const UniChar *contents, *end; 1062 contents = uContents; 1063 end = contents + 32; 1064 while (contents < end) HashNextFourUniChars(contents[, ], contents); 1065 contents = uContents + (len >> 1) - 16; 1066 end = contents + 32; 1067 while (contents < end) HashNextFourUniChars(contents[, ], contents); 1068 end = uContents + len; 1069 contents = end - 32; 1070 while (contents < end) HashNextFourUniChars(contents[, ], contents); 1071 } 1072 return result + (result << (actualLen & 31)); 1073} 1074 1075/* This hashes cString in the eight bit string encoding. It also includes the little debug-time sanity check. 1076*/ 1077CF_INLINE CFHashCode __CFStrHashEightBit(const uint8_t *cContents, CFIndex len) { 1078#if defined(DEBUG) 1079 if (!__CFCharToUniCharFunc) { // A little sanity verification: If this is not set, trying to hash high byte chars would be a bad idea 1080 CFIndex cnt; 1081 Boolean err = false; 1082 if (len <= HashEverythingLimit) { 1083 for (cnt = 0; cnt < len; cnt++) if (cContents[cnt] >= 128) err = true; 1084 } else { 1085 for (cnt = 0; cnt < 32; cnt++) if (cContents[cnt] >= 128) err = true; 1086 for (cnt = (len >> 1) - 16; cnt < (len >> 1) + 16; cnt++) if (cContents[cnt] >= 128) err = true; 1087 for (cnt = (len - 32); cnt < len; cnt++) if (cContents[cnt] >= 128) err = true; 1088 } 1089 if (err) { 1090 // Can't do log here, as it might be too early 1091 fprintf(stderr, "Warning: CFHash() attempting to hash CFString containing high bytes before properly initialized to do so\n"); 1092 } 1093 } 1094#endif 1095 CFHashCode result = len; 1096 if (len <= HashEverythingLimit) { 1097 const uint8_t *end4 = cContents + (len & ~3); 1098 const uint8_t *end = cContents + len; 1099 while (cContents < end4) HashNextFourUniChars(__CFCharToUniCharTable[cContents[, ]], cContents); // First count in fours 1100 while (cContents < end) HashNextUniChar(__CFCharToUniCharTable[cContents[, ]], cContents); // Then for the last <4 chars, count in ones... 1101 } else { 1102 const uint8_t *contents, *end; 1103 contents = cContents; 1104 end = contents + 32; 1105 while (contents < end) HashNextFourUniChars(__CFCharToUniCharTable[contents[, ]], contents); 1106 contents = cContents + (len >> 1) - 16; 1107 end = contents + 32; 1108 while (contents < end) HashNextFourUniChars(__CFCharToUniCharTable[contents[, ]], contents); 1109 end = cContents + len; 1110 contents = end - 32; 1111 while (contents < end) HashNextFourUniChars(__CFCharToUniCharTable[contents[, ]], contents); 1112 } 1113 return result + (result << (len & 31)); 1114} 1115 1116CFHashCode CFStringHashISOLatin1CString(const uint8_t *bytes, CFIndex len) { 1117 CFHashCode result = len; 1118 if (len <= HashEverythingLimit) { 1119 const uint8_t *end4 = bytes + (len & ~3); 1120 const uint8_t *end = bytes + len; 1121 while (bytes < end4) HashNextFourUniChars(bytes[, ], bytes); // First count in fours 1122 while (bytes < end) HashNextUniChar(bytes[, ], bytes); // Then for the last <4 chars, count in ones... 1123 } else { 1124 const uint8_t *contents, *end; 1125 contents = bytes; 1126 end = contents + 32; 1127 while (contents < end) HashNextFourUniChars(contents[, ], contents); 1128 contents = bytes + (len >> 1) - 16; 1129 end = contents + 32; 1130 while (contents < end) HashNextFourUniChars(contents[, ], contents); 1131 end = bytes + len; 1132 contents = end - 32; 1133 while (contents < end) HashNextFourUniChars(contents[, ], contents); 1134 } 1135 return result + (result << (len & 31)); 1136} 1137 1138CFHashCode CFStringHashCString(const uint8_t *bytes, CFIndex len) { 1139 return __CFStrHashEightBit(bytes, len); 1140} 1141 1142CFHashCode CFStringHashCharacters(const UniChar *characters, CFIndex len) { 1143 return __CFStrHashCharacters(characters, len, len); 1144} 1145 1146/* This is meant to be called from NSString or subclassers only. It is an error for this to be called without the ObjC runtime or an argument which is not an NSString or subclass. It can be called with NSCFString, although that would be inefficient (causing indirection) and won't normally happen anyway, as NSCFString overrides hash. 1147*/ 1148CFHashCode CFStringHashNSString(CFStringRef str) { 1149 UniChar buffer[HashEverythingLimit]; 1150 CFIndex bufLen; // Number of characters in the buffer for hashing 1151 CFIndex len = 0; // Actual length of the string 1152 1153 len = CF_OBJC_CALLV((NSString *)str, length); 1154 if (len <= HashEverythingLimit) { 1155 (void)CF_OBJC_CALLV((NSString *)str, getCharacters:buffer range:NSMakeRange(0, len)); 1156 bufLen = len; 1157 } else { 1158 (void)CF_OBJC_CALLV((NSString *)str, getCharacters:buffer range:NSMakeRange(0, 32)); 1159 (void)CF_OBJC_CALLV((NSString *)str, getCharacters:buffer+32 range:NSMakeRange((len >> 1) - 16, 32)); 1160 (void)CF_OBJC_CALLV((NSString *)str, getCharacters:buffer+64 range:NSMakeRange(len - 32, 32)); 1161 bufLen = HashEverythingLimit; 1162 } 1163 return __CFStrHashCharacters(buffer, bufLen, len); 1164} 1165 1166CFHashCode __CFStringHash(CFTypeRef cf) { 1167 /* !!! We do not need an IsString assertion here, as this is called by the CFBase runtime only */ 1168 CFStringRef str = (CFStringRef)cf; 1169 const uint8_t *contents = (uint8_t *)__CFStrContents(str); 1170 CFIndex len = __CFStrLength2(str, contents); 1171 1172 if (__CFStrIsEightBit(str)) { 1173 contents += __CFStrSkipAnyLengthByte(str); 1174 return __CFStrHashEightBit(contents, len); 1175 } else { 1176 return __CFStrHashCharacters((const UniChar *)contents, len, len); 1177 } 1178} 1179 1180 1181static CFStringRef __CFStringCopyDescription(CFTypeRef cf) { 1182 return CFStringCreateWithFormat(kCFAllocatorSystemDefault, NULL, CFSTR("<CFString %p [%p]>{contents = \"%@\"}"), cf, __CFGetAllocator(cf), cf); 1183} 1184 1185static CFStringRef __CFStringCopyFormattingDescription(CFTypeRef cf, CFDictionaryRef formatOptions) { 1186 return (CFStringRef)CFStringCreateCopy(__CFGetAllocator(cf), (CFStringRef)cf); 1187} 1188 1189static CFTypeID __kCFStringTypeID = _kCFRuntimeNotATypeID; 1190 1191typedef CFTypeRef (*CF_STRING_CREATE_COPY)(CFAllocatorRef alloc, CFTypeRef theString); 1192 1193static const CFRuntimeClass __CFStringClass = { 1194 _kCFRuntimeScannedObject, 1195 "CFString", 1196 NULL, // init 1197 (CF_STRING_CREATE_COPY)CFStringCreateCopy, 1198 __CFStringDeallocate, 1199 __CFStringEqual, 1200 __CFStringHash, 1201 __CFStringCopyFormattingDescription, 1202 __CFStringCopyDescription 1203}; 1204 1205CF_PRIVATE void __CFStringInitialize(void) { 1206 __kCFStringTypeID = _CFRuntimeRegisterClass(&__CFStringClass); 1207} 1208 1209CFTypeID CFStringGetTypeID(void) { 1210 return __kCFStringTypeID; 1211} 1212 1213 1214static Boolean CFStrIsUnicode(CFStringRef str) { 1215 CF_OBJC_FUNCDISPATCHV(__kCFStringTypeID, Boolean, (NSString *)str, _encodingCantBeStoredInEightBitCFString); 1216 return __CFStrIsUnicode(str); 1217} 1218 1219 1220 1221#define ALLOCATORSFREEFUNC ((CFAllocatorRef)-1) 1222 1223/* contentsDeallocator indicates how to free the data if it's noCopy == true: 1224 kCFAllocatorNull: don't free 1225 ALLOCATORSFREEFUNC: free with main allocator's free func (don't pass in the real func ptr here) 1226 NULL: default allocator 1227 otherwise it's the allocator that should be used (it will be explicitly stored) 1228 if noCopy == false, then freeFunc should be ALLOCATORSFREEFUNC 1229 hasLengthByte, hasNullByte: refers to bytes; used only if encoding != Unicode 1230 possiblyExternalFormat indicates that the bytes might have BOM and be swapped 1231 tryToReduceUnicode means that the Unicode should be checked to see if it contains just ASCII (and reduce it if so) 1232 numBytes contains the actual number of bytes in "bytes", including Length byte, 1233 BUT not the NULL byte at the end 1234 bytes should not contain BOM characters 1235 !!! Various flags should be combined to reduce number of arguments, if possible 1236*/ 1237CF_PRIVATE CFStringRef __CFStringCreateImmutableFunnel3( 1238 CFAllocatorRef alloc, const void *bytes, CFIndex numBytes, CFStringEncoding encoding, 1239 Boolean possiblyExternalFormat, Boolean tryToReduceUnicode, Boolean hasLengthByte, Boolean hasNullByte, Boolean noCopy, 1240 CFAllocatorRef contentsDeallocator, UInt32 converterFlags) { 1241 1242 CFMutableStringRef str; 1243 CFVarWidthCharBuffer vBuf; 1244 CFIndex size; 1245 Boolean useLengthByte = false; 1246 Boolean useNullByte = false; 1247 Boolean useInlineData = false; 1248 1249#if INSTRUMENT_SHARED_STRINGS 1250 const char *recordedEncoding; 1251 char encodingBuffer[128]; 1252 if (encoding == kCFStringEncodingUnicode) recordedEncoding = "Unicode"; 1253 else if (encoding == kCFStringEncodingASCII) recordedEncoding = "ASCII"; 1254 else if (encoding == kCFStringEncodingUTF8) recordedEncoding = "UTF8"; 1255 else if (encoding == kCFStringEncodingMacRoman) recordedEncoding = "MacRoman"; 1256 else { 1257 snprintf(encodingBuffer, sizeof(encodingBuffer), "0x%lX", (unsigned long)encoding); 1258 recordedEncoding = encodingBuffer; 1259 } 1260#endif 1261 1262 if (alloc == NULL) alloc = __CFGetDefaultAllocator(); 1263 1264 if (contentsDeallocator == ALLOCATORSFREEFUNC) { 1265 contentsDeallocator = alloc; 1266 } else if (contentsDeallocator == NULL) { 1267 contentsDeallocator = __CFGetDefaultAllocator(); 1268 } 1269 1270 if ((NULL != kCFEmptyString) && (numBytes == 0) && _CFAllocatorIsSystemDefault(alloc)) { // If we are using the system default allocator, and the string is empty, then use the empty string! 1271 if (noCopy && (contentsDeallocator != kCFAllocatorNull)) { // See 2365208... This change was done after Sonata; before we didn't free the bytes at all (leak). 1272 CFAllocatorDeallocate(contentsDeallocator, (void *)bytes); 1273 } 1274 return (CFStringRef)CFRetain(kCFEmptyString); // Quick exit; won't catch all empty strings, but most 1275 } 1276 1277 // At this point, contentsDeallocator is either same as alloc, or kCFAllocatorNull, or something else, but not NULL 1278 1279 vBuf.shouldFreeChars = false; // We use this to remember to free the buffer possibly allocated by decode 1280 1281 // Record whether we're starting out with an ASCII-superset string, because we need to know this later for the string ROM; this may get changed later if we successfully convert down from Unicode. We only record this once because __CFCanUseEightBitCFStringForBytes() can be expensive. 1282 Boolean stringSupportsEightBitCFRepresentation = encoding != kCFStringEncodingUnicode && __CFCanUseEightBitCFStringForBytes((const uint8_t *)bytes, numBytes, encoding); 1283 1284 // We may also change noCopy within this function if we have to decode the string into an external buffer. We do not want to avoid the use of the string ROM merely because we tried to be efficient and reuse the decoded buffer for the CFString's external storage. Therefore, we use this variable to track whether we actually can ignore the noCopy flag (which may or may not be set anyways). 1285 Boolean stringROMShouldIgnoreNoCopy = false; 1286 1287 // First check to see if the data needs to be converted... 1288 // ??? We could be more efficient here and in some cases (Unicode data) eliminate a copy 1289 1290 if ((encoding == kCFStringEncodingUnicode && possiblyExternalFormat) || (encoding != kCFStringEncodingUnicode && ! stringSupportsEightBitCFRepresentation)) { 1291 const void *realBytes = (uint8_t *) bytes + (hasLengthByte ? 1 : 0); 1292 CFIndex realNumBytes = numBytes - (hasLengthByte ? 1 : 0); 1293 Boolean usingPassedInMemory = false; 1294 1295 vBuf.allocator = kCFAllocatorSystemDefault; // We don't want to use client's allocator for temp stuff 1296 vBuf.chars.unicode = NULL; // This will cause the decode function to allocate memory if necessary 1297 1298 if (!__CFStringDecodeByteStream3((const uint8_t *)realBytes, realNumBytes, encoding, false, &vBuf, &usingPassedInMemory, converterFlags)) { 1299 // Note that if the string can't be created, we don't free the buffer, even if there is a contents deallocator. This is on purpose. 1300 return NULL; 1301 } 1302 1303 encoding = vBuf.isASCII ? kCFStringEncodingASCII : kCFStringEncodingUnicode; 1304 1305 // Update our flag according to whether the decoded buffer is ASCII 1306 stringSupportsEightBitCFRepresentation = vBuf.isASCII; 1307 1308 if (!usingPassedInMemory) { 1309 1310 // Because __CFStringDecodeByteStream3() allocated our buffer, it's OK for us to free it if we can get the string from the ROM. 1311 stringROMShouldIgnoreNoCopy = true; 1312 1313 // Make the parameters fit the new situation 1314 numBytes = vBuf.isASCII ? vBuf.numChars : (vBuf.numChars * sizeof(UniChar)); 1315 hasLengthByte = hasNullByte = false; 1316 1317 // Get rid of the original buffer if its not being used 1318 if (noCopy && (contentsDeallocator != kCFAllocatorNull)) { 1319 CFAllocatorDeallocate(contentsDeallocator, (void *)bytes); 1320 } 1321 contentsDeallocator = alloc; // At this point we are using the string's allocator, as the original buffer is gone... 1322 1323 // See if we can reuse any storage the decode func might have allocated 1324 // We do this only for Unicode, as otherwise we would not have NULL and Length bytes 1325 1326 if (vBuf.shouldFreeChars && (alloc == vBuf.allocator) && encoding == kCFStringEncodingUnicode) { 1327 vBuf.shouldFreeChars = false; // Transferring ownership to the CFString 1328 bytes = CFAllocatorReallocate(vBuf.allocator, (void *)vBuf.chars.unicode, numBytes, 0); // Tighten up the storage 1329 noCopy = true; 1330#if INSTRUMENT_SHARED_STRINGS 1331 if (encoding == kCFStringEncodingASCII) recordedEncoding = "ForeignASCII-NoCopy"; 1332 else recordedEncoding = "ForeignUnicode-NoCopy"; 1333#endif 1334 } else { 1335#if INSTRUMENT_SHARED_STRINGS 1336 if (encoding == kCFStringEncodingASCII) recordedEncoding = "ForeignASCII-Copy"; 1337 else recordedEncoding = "ForeignUnicode-Copy"; 1338#endif 1339 bytes = vBuf.chars.unicode; 1340 noCopy = false; // Can't do noCopy anymore 1341 // If vBuf.shouldFreeChars is true, the buffer will be freed as intended near the end of this func 1342 } 1343 1344 } 1345 1346 // At this point, all necessary input arguments have been changed to reflect the new state 1347 1348 } else if (encoding == kCFStringEncodingUnicode && tryToReduceUnicode) { // Check to see if we can reduce Unicode to ASCII 1349 CFIndex cnt; 1350 CFIndex len = numBytes / sizeof(UniChar); 1351 Boolean allASCII = true; 1352 1353 for (cnt = 0; cnt < len; cnt++) if (((const UniChar *)bytes)[cnt] > 127) { 1354 allASCII = false; 1355 break; 1356 } 1357 1358 if (allASCII) { // Yes we can! 1359 uint8_t *ptr, *mem; 1360 Boolean newHasLengthByte = __CFCanUseLengthByte(len); 1361 numBytes = (len + 1 + (newHasLengthByte ? 1 : 0)) * sizeof(uint8_t); // NULL and possible length byte 1362 // See if we can use that temporary local buffer in vBuf... 1363 if (numBytes >= __kCFVarWidthLocalBufferSize) { 1364 mem = ptr = (uint8_t *)CFAllocatorAllocate(alloc, numBytes, 0); 1365 if (__CFOASafe) __CFSetLastAllocationEventName(mem, "CFString (store)"); 1366 } else { 1367 mem = ptr = (uint8_t *)(vBuf.localBuffer); 1368 } 1369 if (mem) { // If we can't allocate memory for some reason, use what we had (that is, as if we didn't have all ASCII) 1370 // Copy the Unicode bytes into the new ASCII buffer 1371 hasLengthByte = newHasLengthByte; 1372 hasNullByte = true; 1373 if (hasLengthByte) *ptr++ = (uint8_t)len; 1374 for (cnt = 0; cnt < len; cnt++) ptr[cnt] = (uint8_t)(((const UniChar *)bytes)[cnt]); 1375 ptr[len] = 0; 1376 if (noCopy && (contentsDeallocator != kCFAllocatorNull)) { 1377 CFAllocatorDeallocate(contentsDeallocator, (void *)bytes); 1378 } 1379 // Now make everything look like we had an ASCII buffer to start with 1380 bytes = mem; 1381 encoding = kCFStringEncodingASCII; 1382 contentsDeallocator = alloc; // At this point we are using the string's allocator, as the original buffer is gone... 1383 noCopy = (numBytes >= __kCFVarWidthLocalBufferSize); // If we had to allocate it, make sure it's kept around 1384 numBytes--; // Should not contain the NULL byte at end... 1385 stringSupportsEightBitCFRepresentation = true; // We're ASCII now! 1386 stringROMShouldIgnoreNoCopy = true; // We allocated this buffer, so we should feel free to get rid of it if we can use the string ROM 1387#if INSTRUMENT_SHARED_STRINGS 1388 recordedEncoding = "U->A"; 1389#endif 1390 } 1391 } 1392 1393 // At this point, all necessary input arguments have been changed to reflect the new state 1394 } 1395 1396 // Now determine the necessary size 1397#if INSTRUMENT_SHARED_STRINGS || USE_STRING_ROM 1398 Boolean stringSupportsROM = stringSupportsEightBitCFRepresentation; 1399#endif 1400 1401#if INSTRUMENT_SHARED_STRINGS 1402 if (stringSupportsROM) { 1403 const void *realBytes = (uint8_t *) bytes + (hasLengthByte ? 1 : 0); 1404 CFIndex realNumBytes = numBytes - !! hasLengthByte; 1405 __CFRecordStringAllocationEvent(recordedEncoding, realBytes, realNumBytes); 1406 } 1407#endif 1408 1409#if USE_STRING_ROM 1410 CFStringRef romResult = NULL; 1411 1412 1413 if (stringSupportsROM) { 1414 // Disable the string ROM if necessary 1415 static char sDisableStringROM = -1; 1416 if (sDisableStringROM == -1) sDisableStringROM = !! __CFgetenv("CFStringDisableROM"); 1417 1418 if (sDisableStringROM == 0) romResult = _CFSearchStringROM(bytes + !! hasLengthByte, numBytes - !! hasLengthByte); 1419 } 1420 /* if we get a result from our ROM, and noCopy is set, then deallocate the buffer immediately */ 1421 if (romResult) { 1422 if (noCopy && (contentsDeallocator != kCFAllocatorNull)) { 1423 CFAllocatorDeallocate(contentsDeallocator, (void *)bytes); 1424 } 1425 1426 /* these don't get used again, but clear them for consistency */ 1427 noCopy = false; 1428 bytes = NULL; 1429 1430 /* set our result to the ROM result which is not really mutable, of course, but that's OK because we don't try to modify it. */ 1431 str = (CFMutableStringRef)romResult; 1432 } 1433 1434 if (! romResult) { 1435#else 1436 if (1) { 1437#endif 1438 // Now determine the necessary size 1439 1440 if (noCopy) { 1441 1442 size = sizeof(void *); // Pointer to the buffer 1443 if ((0) || (contentsDeallocator != alloc && contentsDeallocator != kCFAllocatorNull)) { 1444 size += sizeof(void *); // The contentsDeallocator 1445 } 1446 if (!hasLengthByte) size += sizeof(CFIndex); // Explicit length 1447 useLengthByte = hasLengthByte; 1448 useNullByte = hasNullByte; 1449 1450 } else { // Inline data; reserve space for it 1451 1452 useInlineData = true; 1453 size = numBytes; 1454 1455 if (hasLengthByte || (encoding != kCFStringEncodingUnicode && __CFCanUseLengthByte(numBytes))) { 1456 useLengthByte = true; 1457 if (!hasLengthByte) size += 1; 1458 } else { 1459 size += sizeof(CFIndex); // Explicit length 1460 } 1461 if (hasNullByte || encoding != kCFStringEncodingUnicode) { 1462 useNullByte = true; 1463 size += 1; 1464 } 1465 } 1466 1467#ifdef STRING_SIZE_STATS 1468 // Dump alloced CFString size info every so often 1469 static int cnt = 0; 1470 static unsigned sizes[256] = {0}; 1471 int allocedSize = size + sizeof(CFRuntimeBase); 1472 if (allocedSize < 255) sizes[allocedSize]++; else sizes[255]++; 1473 if ((++cnt % 1000) == 0) { 1474 printf ("\nTotal: %d\n", cnt); 1475 int i; for (i = 0; i < 256; i++) printf("%03d: %5d%s", i, sizes[i], ((i % 8) == 7) ? "\n" : " "); 1476 } 1477#endif 1478 1479 // Finally, allocate! 1480 1481 str = (CFMutableStringRef)_CFRuntimeCreateInstance(alloc, __kCFStringTypeID, size, NULL); 1482 if (str) { 1483 if (__CFOASafe) __CFSetLastAllocationEventName(str, "CFString (immutable)"); 1484 1485 CFOptionFlags allocBits = (0) ? __kCFHasContentsDeallocator : (contentsDeallocator == alloc ? __kCFNotInlineContentsDefaultFree : (contentsDeallocator == kCFAllocatorNull ? __kCFNotInlineContentsNoFree : __kCFNotInlineContentsCustomFree)); 1486 __CFStrSetInfoBits(str, 1487 (useInlineData ? __kCFHasInlineContents : allocBits) | 1488 ((encoding == kCFStringEncodingUnicode) ? __kCFIsUnicode : 0) | 1489 (useNullByte ? __kCFHasNullByte : 0) | 1490 (useLengthByte ? __kCFHasLengthByte : 0)); 1491 1492 if (!useLengthByte) { 1493 CFIndex length = numBytes - (hasLengthByte ? 1 : 0); 1494 if (encoding == kCFStringEncodingUnicode) length /= sizeof(UniChar); 1495 __CFStrSetExplicitLength(str, length); 1496 } 1497 1498 if (useInlineData) { 1499 uint8_t *contents = (uint8_t *)__CFStrContents(str); 1500 if (useLengthByte && !hasLengthByte) *contents++ = (uint8_t)numBytes; 1501 memmove(contents, bytes, numBytes); 1502 if (useNullByte) contents[numBytes] = 0; 1503 } else { 1504 __CFStrSetContentPtr(str, bytes); 1505 if (__CFStrHasContentsDeallocator(str)) __CFStrSetContentsDeallocator(str, contentsDeallocator); 1506 } 1507 } else { 1508 if (noCopy && (contentsDeallocator != kCFAllocatorNull)) { 1509 CFAllocatorDeallocate(contentsDeallocator, (void *)bytes); 1510 } 1511 } 1512 } 1513 if (vBuf.shouldFreeChars) CFAllocatorDeallocate(vBuf.allocator, (void *)bytes); 1514 1515#if 0 1516#warning Debug code 1517 const uint8_t *contents = (uint8_t *)__CFStrContents(str); 1518 CFIndex len = __CFStrLength2(str, contents); 1519 1520 if (__CFStrIsEightBit(str)) { 1521 contents += __CFStrSkipAnyLengthByte(str); 1522 if (!__CFBytesInASCII(contents, len)) { 1523 printf("CFString with 8 bit backing store not ASCII: %p, \"%.*s\"\n", str, (int)len, contents); 1524 } 1525 } 1526#endif 1527 1528 return str; 1529} 1530 1531/* !!! __CFStringCreateImmutableFunnel2() is kept around for compatibility; it should be deprecated 1532*/ 1533CFStringRef __CFStringCreateImmutableFunnel2( 1534 CFAllocatorRef alloc, const void *bytes, CFIndex numBytes, CFStringEncoding encoding, 1535 Boolean possiblyExternalFormat, Boolean tryToReduceUnicode, Boolean hasLengthByte, Boolean hasNullByte, Boolean noCopy, 1536 CFAllocatorRef contentsDeallocator) { 1537 return __CFStringCreateImmutableFunnel3(alloc, bytes, numBytes, encoding, possiblyExternalFormat, tryToReduceUnicode, hasLengthByte, hasNullByte, noCopy, contentsDeallocator, 0); 1538} 1539 1540 1541 1542CFStringRef CFStringCreateWithPascalString(CFAllocatorRef alloc, ConstStringPtr pStr, CFStringEncoding encoding) { 1543 CFIndex len = (CFIndex)(*(uint8_t *)pStr); 1544 return __CFStringCreateImmutableFunnel3(alloc, pStr, len+1, encoding, false, false, true, false, false, ALLOCATORSFREEFUNC, 0); 1545} 1546 1547 1548CFStringRef CFStringCreateWithCString(CFAllocatorRef alloc, const char *cStr, CFStringEncoding encoding) { 1549 CFIndex len = strlen(cStr); 1550 return __CFStringCreateImmutableFunnel3(alloc, cStr, len, encoding, false, false, false, true, false, ALLOCATORSFREEFUNC, 0); 1551} 1552 1553CFStringRef CFStringCreateWithPascalStringNoCopy(CFAllocatorRef alloc, ConstStringPtr pStr, CFStringEncoding encoding, CFAllocatorRef contentsDeallocator) { 1554 CFIndex len = (CFIndex)(*(uint8_t *)pStr); 1555 return __CFStringCreateImmutableFunnel3(alloc, pStr, len+1, encoding, false, false, true, false, true, contentsDeallocator, 0); 1556} 1557 1558 1559CFStringRef CFStringCreateWithCStringNoCopy(CFAllocatorRef alloc, const char *cStr, CFStringEncoding encoding, CFAllocatorRef contentsDeallocator) { 1560 CFIndex len = strlen(cStr); 1561 return __CFStringCreateImmutableFunnel3(alloc, cStr, len, encoding, false, false, false, true, true, contentsDeallocator, 0); 1562} 1563 1564 1565CFStringRef CFStringCreateWithCharacters(CFAllocatorRef alloc, const UniChar *chars, CFIndex numChars) { 1566 return __CFStringCreateImmutableFunnel3(alloc, chars, numChars * sizeof(UniChar), kCFStringEncodingUnicode, false, true, false, false, false, ALLOCATORSFREEFUNC, 0); 1567} 1568 1569 1570CFStringRef CFStringCreateWithCharactersNoCopy(CFAllocatorRef alloc, const UniChar *chars, CFIndex numChars, CFAllocatorRef contentsDeallocator) { 1571 return __CFStringCreateImmutableFunnel3(alloc, chars, numChars * sizeof(UniChar), kCFStringEncodingUnicode, false, false, false, false, true, contentsDeallocator, 0); 1572} 1573 1574 1575CFStringRef CFStringCreateWithBytes(CFAllocatorRef alloc, const uint8_t *bytes, CFIndex numBytes, CFStringEncoding encoding, Boolean externalFormat) { 1576 return __CFStringCreateImmutableFunnel3(alloc, bytes, numBytes, encoding, externalFormat, true, false, false, false, ALLOCATORSFREEFUNC, 0); 1577} 1578 1579CFStringRef _CFStringCreateWithBytesNoCopy(CFAllocatorRef alloc, const uint8_t *bytes, CFIndex numBytes, CFStringEncoding encoding, Boolean externalFormat, CFAllocatorRef contentsDeallocator) { 1580 return __CFStringCreateImmutableFunnel3(alloc, bytes, numBytes, encoding, externalFormat, true, false, false, true, contentsDeallocator, 0); 1581} 1582 1583CFStringRef CFStringCreateWithBytesNoCopy(CFAllocatorRef alloc, const uint8_t *bytes, CFIndex numBytes, CFStringEncoding encoding, Boolean externalFormat, CFAllocatorRef contentsDeallocator) { 1584 return __CFStringCreateImmutableFunnel3(alloc, bytes, numBytes, encoding, externalFormat, true, false, false, true, contentsDeallocator, 0); 1585} 1586 1587CFStringRef CFStringCreateWithFormatAndArguments(CFAllocatorRef alloc, CFDictionaryRef formatOptions, CFStringRef format, va_list arguments) { 1588 return _CFStringCreateWithFormatAndArgumentsAux(alloc, NULL, formatOptions, format, arguments); 1589} 1590 1591CFStringRef _CFStringCreateWithFormatAndArgumentsAux(CFAllocatorRef alloc, CFStringRef (*copyDescFunc)(void *, const void *), CFDictionaryRef formatOptions, CFStringRef format, va_list arguments) { 1592 CFStringRef str; 1593 CFMutableStringRef outputString = CFStringCreateMutable(kCFAllocatorSystemDefault, 0); //should use alloc if no copy/release 1594 __CFStrSetDesiredCapacity(outputString, 120); // Given this will be tightened later, choosing a larger working string is fine 1595 __CFStringAppendFormatCore(outputString, copyDescFunc, formatOptions, NULL, format, 0, NULL, 0, arguments); 1596 // ??? copy/release should not be necessary here -- just make immutable, compress if possible 1597 // (However, this does make the string inline, and cause the supplied allocator to be used...) 1598 str = (CFStringRef)CFStringCreateCopy(alloc, outputString); 1599 CFRelease(outputString); 1600 return str; 1601} 1602 1603CFStringRef CFStringCreateWithFormat(CFAllocatorRef alloc, CFDictionaryRef formatOptions, CFStringRef format, ...) { 1604 CFStringRef result; 1605 va_list argList; 1606 1607 va_start(argList, format); 1608 result = CFStringCreateWithFormatAndArguments(alloc, formatOptions, format, argList); 1609 va_end(argList); 1610 1611 return result; 1612} 1613 1614CFStringRef CFStringCreateWithSubstring(CFAllocatorRef alloc, CFStringRef str, CFRange range) { 1615// CF_OBJC_FUNCDISPATCHV(__kCFStringTypeID, CFStringRef , (NSString *)str, _createSubstringWithRange:NSMakeRange(range.location, range.length)); 1616 1617 __CFAssertIsString(str); 1618 __CFAssertRangeIsInStringBounds(str, range.location, range.length); 1619 1620 if ((range.location == 0) && (range.length == __CFStrLength(str))) { /* The substring is the whole string... */ 1621 return (CFStringRef)CFStringCreateCopy(alloc, str); 1622 } else if (__CFStrIsEightBit(str)) { 1623 const uint8_t *contents = (const uint8_t *)__CFStrContents(str); 1624 return __CFStringCreateImmutableFunnel3(alloc, contents + range.location + __CFStrSkipAnyLengthByte(str), range.length, __CFStringGetEightBitStringEncoding(), false, false, false, false, false, ALLOCATORSFREEFUNC, 0); 1625 } else { 1626 const UniChar *contents = (UniChar *)__CFStrContents(str); 1627 return __CFStringCreateImmutableFunnel3(alloc, contents + range.location, range.length * sizeof(UniChar), kCFStringEncodingUnicode, false, true, false, false, false, ALLOCATORSFREEFUNC, 0); 1628 } 1629} 1630 1631CFStringRef CFStringCreateCopy(CFAllocatorRef alloc, CFStringRef str) { 1632// CF_OBJC_FUNCDISPATCHV(__kCFStringTypeID, CFStringRef, (NSString *)str, copy); 1633 1634 __CFAssertIsString(str); 1635 if (!__CFStrIsMutable((CFStringRef)str) && // If the string is not mutable 1636 ((alloc ? alloc : __CFGetDefaultAllocator()) == __CFGetAllocator(str)) && // and it has the same allocator as the one we're using 1637 (__CFStrIsInline((CFStringRef)str) || __CFStrFreeContentsWhenDone((CFStringRef)str) || __CFStrIsConstant((CFStringRef)str))) { // and the characters are inline, or are owned by the string, or the string is constant 1638 if (!(kCFUseCollectableAllocator && (0))) CFRetain(str); // Then just retain instead of making a true copy 1639 return str; 1640 } 1641 if (__CFStrIsEightBit((CFStringRef)str)) { 1642 const uint8_t *contents = (const uint8_t *)__CFStrContents((CFStringRef)str); 1643 return __CFStringCreateImmutableFunnel3(alloc, contents + __CFStrSkipAnyLengthByte((CFStringRef)str), __CFStrLength2((CFStringRef)str, contents), __CFStringGetEightBitStringEncoding(), false, false, false, false, false, ALLOCATORSFREEFUNC, 0); 1644 } else { 1645 const UniChar *contents = (const UniChar *)__CFStrContents((CFStringRef)str); 1646 return __CFStringCreateImmutableFunnel3(alloc, contents, __CFStrLength2((CFStringRef)str, contents) * sizeof(UniChar), kCFStringEncodingUnicode, false, true, false, false, false, ALLOCATORSFREEFUNC, 0); 1647 } 1648} 1649 1650 1651 1652/*** Constant string stuff... ***/ 1653 1654/* Table which holds constant strings created with CFSTR, when -fconstant-cfstrings option is not used. These dynamically created constant strings are stored in constantStringTable. The keys are the 8-bit constant C-strings from the compiler; the values are the CFStrings created for them. _CFSTRLock protects this table. 1655*/ 1656static CFMutableDictionaryRef constantStringTable = NULL; 1657static CFSpinLock_t _CFSTRLock = CFSpinLockInit; 1658 1659static CFStringRef __cStrCopyDescription(const void *ptr) { 1660 return CFStringCreateWithCStringNoCopy(kCFAllocatorSystemDefault, (const char *)ptr, __CFStringGetEightBitStringEncoding(), kCFAllocatorNull); 1661} 1662 1663static Boolean __cStrEqual(const void *ptr1, const void *ptr2) { 1664 return (strcmp((const char *)ptr1, (const char *)ptr2) == 0); 1665} 1666 1667static CFHashCode __cStrHash(const void *ptr) { 1668 // It doesn't quite matter if we convert to Unicode correctly, as long as we do it consistently 1669 const char *cStr = (const char *)ptr; 1670 CFIndex len = strlen(cStr); 1671 CFHashCode result = 0; 1672 if (len <= 4) { // All chars 1673 unsigned cnt = len; 1674 while (cnt--) result += (result << 8) + *cStr++; 1675 } else { // First and last 2 chars 1676 result += (result << 8) + cStr[0]; 1677 result += (result << 8) + cStr[1]; 1678 result += (result << 8) + cStr[len-2]; 1679 result += (result << 8) + cStr[len-1]; 1680 } 1681 result += (result << (len & 31)); 1682 return result; 1683} 1684 1685 1686CFStringRef __CFStringMakeConstantString(const char *cStr) { 1687 CFStringRef result; 1688#if defined(DEBUG) 1689 // StringTest checks that we share kCFEmptyString, which is defeated by constantStringAllocatorForDebugging 1690 if ('\0' == *cStr) return kCFEmptyString; 1691#endif 1692 if (constantStringTable == NULL) { 1693 CFDictionaryKeyCallBacks constantStringCallBacks = {0, NULL, NULL, __cStrCopyDescription, __cStrEqual, __cStrHash}; 1694 CFDictionaryValueCallBacks constantStringValueCallBacks = kCFTypeDictionaryValueCallBacks; 1695 constantStringValueCallBacks.equal = NULL; // So that we only find strings that are == 1696 CFMutableDictionaryRef table = CFDictionaryCreateMutable(kCFAllocatorSystemDefault, 0, &constantStringCallBacks, &constantStringValueCallBacks); 1697 _CFDictionarySetCapacity(table, 2500); // avoid lots of rehashing 1698 __CFSpinLock(&_CFSTRLock); 1699 if (constantStringTable == NULL) constantStringTable = table; 1700 __CFSpinUnlock(&_CFSTRLock); 1701 if (constantStringTable != table) CFRelease(table); 1702 } 1703 1704 __CFSpinLock(&_CFSTRLock); 1705 if ((result = (CFStringRef)CFDictionaryGetValue(constantStringTable, cStr))) { 1706 __CFSpinUnlock(&_CFSTRLock); 1707 } else { 1708 __CFSpinUnlock(&_CFSTRLock); 1709 1710 { 1711 char *key; 1712 Boolean isASCII = true; 1713 // Given this code path is rarer these days, OK to do this extra work to verify the strings 1714 const char *tmp = cStr; 1715 while (*tmp) { 1716 if (*(tmp++) & 0x80) { 1717 isASCII = false; 1718 break; 1719 } 1720 } 1721 if (!isASCII) { 1722 CFMutableStringRef ms = CFStringCreateMutable(kCFAllocatorSystemDefault, 0); 1723 tmp = cStr; 1724 while (*tmp) { 1725 CFStringAppendFormat(ms, NULL, (*tmp & 0x80) ? CFSTR("\\%3o") : CFSTR("%1c"), *tmp); 1726 tmp++; 1727 } 1728 CFLog(kCFLogLevelWarning, CFSTR("WARNING: CFSTR(\"%@\") has non-7 bit chars, interpreting using MacOS Roman encoding for now, but this will change. Please eliminate usages of non-7 bit chars (including escaped characters above \\177 octal) in CFSTR()."), ms); 1729 CFRelease(ms); 1730 } 1731 // Treat non-7 bit chars in CFSTR() as MacOSRoman, for compatibility 1732 result = CFStringCreateWithCString(kCFAllocatorSystemDefault, cStr, kCFStringEncodingMacRoman); 1733 if (result == NULL) { 1734 CFLog(__kCFLogAssertion, CFSTR("Can't interpret CFSTR() as MacOS Roman, crashing")); 1735 HALT; 1736 } 1737 if (__CFOASafe) __CFSetLastAllocationEventName((void *)result, "CFString (CFSTR)"); 1738 if (__CFStrIsEightBit(result)) { 1739 key = (char *)__CFStrContents(result) + __CFStrSkipAnyLengthByte(result); 1740 } else { // For some reason the string is not 8-bit! 1741 CFIndex keySize = strlen(cStr) + 1; 1742 key = (char *)CFAllocatorAllocate(kCFAllocatorSystemDefault, keySize, 0); 1743 if (__CFOASafe) __CFSetLastAllocationEventName((void *)key, "CFString (CFSTR key)"); 1744 strlcpy(key, cStr, keySize); // !!! We will leak this, if the string is removed from the table (or table is freed) 1745 } 1746 1747 { 1748 CFStringRef resultToBeReleased = result; 1749 CFIndex count; 1750 __CFSpinLock(&_CFSTRLock); 1751 count = CFDictionaryGetCount(constantStringTable); 1752 CFDictionaryAddValue(constantStringTable, key, result); 1753 if (CFDictionaryGetCount(constantStringTable) == count) { // add did nothing, someone already put it there 1754 result = (CFStringRef)CFDictionaryGetValue(constantStringTable, key); 1755 } else { 1756#if __LP64__ 1757 ((struct __CFString *)result)->base._rc = 0; 1758#else 1759 ((struct __CFString *)result)->base._cfinfo[CF_RC_BITS] = 0; 1760#endif 1761 } 1762 __CFSpinUnlock(&_CFSTRLock); 1763 // This either eliminates the extra retain on the freshly created string, or frees it, if it was actually not inserted into the table 1764 CFRelease(resultToBeReleased); 1765 } 1766 } 1767 } 1768 return result; 1769} 1770 1771#if defined(DEBUG) 1772static Boolean __CFStrIsConstantString(CFStringRef str) { 1773 Boolean found = false; 1774 if (constantStringTable) { 1775 __CFSpinLock(&_CFSTRLock); 1776 found = CFDictionaryContainsValue(constantStringTable, str); 1777 __CFSpinUnlock(&_CFSTRLock); 1778 } 1779 return found; 1780} 1781#endif 1782 1783 1784#if DEPLOYMENT_TARGET_WINDOWS 1785void __CFStringCleanup (void) { 1786 /* in case library is unloaded, release store for the constant string table */ 1787 if (constantStringTable != NULL) { 1788#if defined(DEBUG) 1789 __CFConstantStringTableBeingFreed = true; 1790 CFRelease(constantStringTable); 1791 __CFConstantStringTableBeingFreed = false; 1792#else 1793 CFRelease(constantStringTable); 1794#endif 1795 constantStringTable = NULL; 1796 } 1797} 1798#endif 1799 1800 1801// Can pass in NSString as replacement string 1802// Call with numRanges > 0, and incrementing ranges 1803 1804static void __CFStringReplaceMultiple(CFMutableStringRef str, CFRange *ranges, CFIndex numRanges, CFStringRef replacement) { 1805 int cnt; 1806 CFStringRef copy = NULL; 1807 if (replacement == str) copy = replacement = CFStringCreateCopy(kCFAllocatorSystemDefault, replacement); // Very special and hopefully rare case 1808 CFIndex replacementLength = CFStringGetLength(replacement); 1809 1810 __CFStringChangeSizeMultiple(str, ranges, numRanges, replacementLength, (replacementLength > 0) && CFStrIsUnicode(replacement)); 1811 1812 if (__CFStrIsUnicode(str)) { 1813 UniChar *contents = (UniChar *)__CFStrContents(str); 1814 UniChar *firstReplacement = contents + ranges[0].location; 1815 // Extract the replacementString into the first location, then copy from there 1816 CFStringGetCharacters(replacement, CFRangeMake(0, replacementLength), firstReplacement); 1817 for (cnt = 1; cnt < numRanges; cnt++) { 1818 // The ranges are in terms of the original string; so offset by the change in length due to insertion 1819 contents += replacementLength - ranges[cnt - 1].length; 1820 memmove(contents + ranges[cnt].location, firstReplacement, replacementLength * sizeof(UniChar)); 1821 } 1822 } else { 1823 uint8_t *contents = (uint8_t *)__CFStrContents(str); 1824 uint8_t *firstReplacement = contents + ranges[0].location + __CFStrSkipAnyLengthByte(str); 1825 // Extract the replacementString into the first location, then copy from there 1826 CFStringGetBytes(replacement, CFRangeMake(0, replacementLength), __CFStringGetEightBitStringEncoding(), 0, false, firstReplacement, replacementLength, NULL); 1827 contents += __CFStrSkipAnyLengthByte(str); // Now contents will simply track the location to insert next string into 1828 for (cnt = 1; cnt < numRanges; cnt++) { 1829 // The ranges are in terms of the original string; so offset by the change in length due to insertion 1830 contents += replacementLength - ranges[cnt - 1].length; 1831 memmove(contents + ranges[cnt].location, firstReplacement, replacementLength); 1832 } 1833 } 1834 if (copy) CFRelease(copy); 1835} 1836 1837// Can pass in NSString as replacement string 1838 1839CF_INLINE void __CFStringReplace(CFMutableStringRef str, CFRange range, CFStringRef replacement) { 1840 CFStringRef copy = NULL; 1841 if (replacement == str) copy = replacement = (CFStringRef)CFStringCreateCopy(kCFAllocatorSystemDefault, replacement); // Very special and hopefully rare case 1842 CFIndex replacementLength = CFStringGetLength(replacement); 1843 1844 __CFStringChangeSize(str, range, replacementLength, (replacementLength > 0) && CFStrIsUnicode(replacement)); 1845 1846 if (__CFStrIsUnicode(str)) { 1847 UniChar *contents = (UniChar *)__CFStrContents(str); 1848 CFStringGetCharacters(replacement, CFRangeMake(0, replacementLength), contents + range.location); 1849 } else { 1850 uint8_t *contents = (uint8_t *)__CFStrContents(str); 1851 CFStringGetBytes(replacement, CFRangeMake(0, replacementLength), __CFStringGetEightBitStringEncoding(), 0, false, contents + range.location + __CFStrSkipAnyLengthByte(str), replacementLength, NULL); 1852 } 1853 1854 if (copy) CFRelease(copy); 1855} 1856 1857/* If client does not provide a minimum capacity 1858*/ 1859#define DEFAULTMINCAPACITY 32 1860 1861CF_INLINE CFMutableStringRef __CFStringCreateMutableFunnel(CFAllocatorRef alloc, CFIndex maxLength, UInt32 additionalInfoBits) { 1862 CFMutableStringRef str; 1863 if ((0)) additionalInfoBits |= __kCFHasContentsAllocator; 1864 Boolean hasExternalContentsAllocator = (additionalInfoBits & __kCFHasContentsAllocator) ? true : false; 1865 1866 if (alloc == NULL) alloc = __CFGetDefaultAllocator(); 1867 1868 // Note that if there is an externalContentsAllocator, then we also have the storage for the string allocator... 1869 str = (CFMutableStringRef)_CFRuntimeCreateInstance(alloc, __kCFStringTypeID, sizeof(struct __notInlineMutable) - (hasExternalContentsAllocator ? 0 : sizeof(CFAllocatorRef)), NULL); 1870 if (str) { 1871 if (__CFOASafe) __CFSetLastAllocationEventName(str, "CFString (mutable)"); 1872 1873 __CFStrSetInfoBits(str, __kCFIsMutable | additionalInfoBits); 1874 str->variants.notInlineMutable.buffer = NULL; 1875 __CFStrSetExplicitLength(str, 0); 1876 str->variants.notInlineMutable.hasGap = str->variants.notInlineMutable.isFixedCapacity = str->variants.notInlineMutable.isExternalMutable = str->variants.notInlineMutable.capacityProvidedExternally = 0; 1877 if (maxLength != 0) __CFStrSetIsFixed(str); 1878 __CFStrSetDesiredCapacity(str, (maxLength == 0) ? DEFAULTMINCAPACITY : maxLength); 1879 __CFStrSetCapacity(str, 0); 1880 if (__CFStrHasContentsAllocator(str)) { 1881 // contents allocator starts out as the string's own allocator 1882 __CFStrSetContentsAllocator(str, alloc); 1883 } 1884 } 1885 return str; 1886} 1887 1888CFMutableStringRef CFStringCreateMutableWithExternalCharactersNoCopy(CFAllocatorRef alloc, UniChar *chars, CFIndex numChars, CFIndex capacity, CFAllocatorRef externalCharactersAllocator) { 1889 CFOptionFlags contentsAllocationBits = externalCharactersAllocator ? ((externalCharactersAllocator == kCFAllocatorNull) ? __kCFNotInlineContentsNoFree : __kCFHasContentsAllocator) : __kCFNotInlineContentsDefaultFree; 1890 CFMutableStringRef string = __CFStringCreateMutableFunnel(alloc, 0, contentsAllocationBits | __kCFIsUnicode); 1891 if (string) { 1892 __CFStrSetIsExternalMutable(string); 1893 if (__CFStrHasContentsAllocator(string)) { 1894 CFAllocatorRef allocator = __CFStrContentsAllocator((CFMutableStringRef)string); 1895 if (!(0 || 0)) CFRelease(allocator); 1896 __CFStrSetContentsAllocator(string, externalCharactersAllocator); 1897 } 1898 CFStringSetExternalCharactersNoCopy(string, chars, numChars, capacity); 1899 } 1900 return string; 1901} 1902 1903CFMutableStringRef CFStringCreateMutable(CFAllocatorRef alloc, CFIndex maxLength) { 1904 return __CFStringCreateMutableFunnel(alloc, maxLength, __kCFNotInlineContentsDefaultFree); 1905} 1906 1907CFMutableStringRef CFStringCreateMutableCopy(CFAllocatorRef alloc, CFIndex maxLength, CFStringRef string) { 1908 CFMutableStringRef newString; 1909 1910 // CF_OBJC_FUNCDISPATCHV(__kCFStringTypeID, CFMutableStringRef, (NSString *)string, mutableCopy); 1911 1912 __CFAssertIsString(string); 1913 1914 newString = CFStringCreateMutable(alloc, maxLength); 1915 __CFStringReplace(newString, CFRangeMake(0, 0), string); 1916 1917 return newString; 1918} 1919 1920 1921CF_PRIVATE void _CFStrSetDesiredCapacity(CFMutableStringRef str, CFIndex len) { 1922 __CFAssertIsStringAndMutable(str); 1923 __CFStrSetDesiredCapacity(str, len); 1924} 1925 1926 1927/* This one is for CF 1928*/ 1929CFIndex CFStringGetLength(CFStringRef str) { 1930 CF_OBJC_FUNCDISPATCHV(__kCFStringTypeID, CFIndex, (NSString *)str, length); 1931 1932 __CFAssertIsString(str); 1933 return __CFStrLength(str); 1934} 1935 1936/* This one is for NSCFString; it does not ObjC dispatch or assertion check 1937*/ 1938CFIndex _CFStringGetLength2(CFStringRef str) { 1939 return __CFStrLength(str); 1940} 1941 1942 1943/* Guts of CFStringGetCharacterAtIndex(); called from the two functions below. Don't call it from elsewhere. 1944*/ 1945CF_INLINE UniChar __CFStringGetCharacterAtIndexGuts(CFStringRef str, CFIndex idx, const uint8_t *contents) { 1946 if (__CFStrIsEightBit(str)) { 1947 contents += __CFStrSkipAnyLengthByte(str); 1948#if defined(DEBUG) 1949 if (!__CFCharToUniCharFunc && (contents[idx] >= 128)) { 1950 // Can't do log here, as it might be too early 1951 fprintf(stderr, "Warning: CFStringGetCharacterAtIndex() attempted on CFString containing high bytes before properly initialized to do so\n"); 1952 } 1953#endif 1954 return __CFCharToUniCharTable[contents[idx]]; 1955 } 1956 1957 return ((UniChar *)contents)[idx]; 1958} 1959 1960/* This one is for the CF API 1961*/ 1962UniChar CFStringGetCharacterAtIndex(CFStringRef str, CFIndex idx) { 1963 CF_OBJC_FUNCDISPATCHV(__kCFStringTypeID, UniChar, (NSString *)str, characterAtIndex:(NSUInteger)idx); 1964 1965 __CFAssertIsString(str); 1966 __CFAssertIndexIsInStringBounds(str, idx); 1967 return __CFStringGetCharacterAtIndexGuts(str, idx, (const uint8_t *)__CFStrContents(str)); 1968} 1969 1970/* This one is for NSCFString usage; it doesn't do ObjC dispatch; but it does do range check 1971*/ 1972int _CFStringCheckAndGetCharacterAtIndex(CFStringRef str, CFIndex idx, UniChar *ch) { 1973 const uint8_t *contents = (const uint8_t *)__CFStrContents(str); 1974 if (idx >= __CFStrLength2(str, contents) && __CFStringNoteErrors()) return _CFStringErrBounds; 1975 *ch = __CFStringGetCharacterAtIndexGuts(str, idx, contents); 1976 return _CFStringErrNone; 1977} 1978 1979 1980/* Guts of CFStringGetCharacters(); called from the two functions below. Don't call it from elsewhere. 1981*/ 1982CF_INLINE void __CFStringGetCharactersGuts(CFStringRef str, CFRange range, UniChar *buffer, const uint8_t *contents) { 1983 if (__CFStrIsEightBit(str)) { 1984 __CFStrConvertBytesToUnicode(((uint8_t *)contents) + (range.location + __CFStrSkipAnyLengthByte(str)), buffer, range.length); 1985 } else { 1986 const UniChar *uContents = ((UniChar *)contents) + range.location; 1987 memmove(buffer, uContents, range.length * sizeof(UniChar)); 1988 } 1989} 1990 1991/* This one is for the CF API 1992*/ 1993void CFStringGetCharacters(CFStringRef str, CFRange range, UniChar *buffer) { 1994 CF_OBJC_FUNCDISPATCHV(__kCFStringTypeID, void, (NSString *)str, getCharacters:(unichar *)buffer range:NSMakeRange(range.location, range.length)); 1995 1996 __CFAssertIsString(str); 1997 __CFAssertRangeIsInStringBounds(str, range.location, range.length); 1998 __CFStringGetCharactersGuts(str, range, buffer, (const uint8_t *)__CFStrContents(str)); 1999} 2000 2001/* This one is for NSCFString usage; it doesn't do ObjC dispatch; but it does do range check 2002*/ 2003int _CFStringCheckAndGetCharacters(CFStringRef str, CFRange range, UniChar *buffer) { 2004 const uint8_t *contents = (const uint8_t *)__CFStrContents(str); 2005 if (range.location + range.length > __CFStrLength2(str, contents) && __CFStringNoteErrors()) return _CFStringErrBounds; 2006 __CFStringGetCharactersGuts(str, range, buffer, contents); 2007 return _CFStringErrNone; 2008} 2009 2010 2011CFIndex CFStringGetBytes(CFStringRef str, CFRange range, CFStringEncoding encoding, uint8_t lossByte, Boolean isExternalRepresentation, uint8_t *buffer, CFIndex maxBufLen, CFIndex *usedBufLen) { 2012 2013 /* No objc dispatch needed here since __CFStringEncodeByteStream works with both CFString and NSString */ 2014 __CFAssertIsNotNegative(maxBufLen); 2015 2016 if (!CF_IS_OBJC(__kCFStringTypeID, str)) { // If we can grope the ivars, let's do it... 2017 __CFAssertIsString(str); 2018 __CFAssertRangeIsInStringBounds(str, range.location, range.length); 2019 2020 if (__CFStrIsEightBit(str) && ((__CFStringGetEightBitStringEncoding() == encoding) || (__CFStringGetEightBitStringEncoding() == kCFStringEncodingASCII && __CFStringEncodingIsSupersetOfASCII(encoding)))) { // Requested encoding is equal to the encoding in string 2021 const unsigned char *contents = (const unsigned char *)__CFStrContents(str); 2022 CFIndex cLength = range.length; 2023 2024 if (buffer) { 2025 if (cLength > maxBufLen) cLength = maxBufLen; 2026 memmove(buffer, contents + __CFStrSkipAnyLengthByte(str) + range.location, cLength); 2027 } 2028 if (usedBufLen) *usedBufLen = cLength; 2029 2030 return cLength; 2031 } 2032 } 2033 2034 return __CFStringEncodeByteStream(str, range.location, range.length, isExternalRepresentation, encoding, lossByte, buffer, maxBufLen, usedBufLen); 2035} 2036 2037 2038ConstStringPtr CFStringGetPascalStringPtr (CFStringRef str, CFStringEncoding encoding) { 2039 2040 if (!CF_IS_OBJC(__kCFStringTypeID, str)) { /* ??? Hope the compiler optimizes this away if OBJC_MAPPINGS is not on */ 2041 __CFAssertIsString(str); 2042 if (__CFStrHasLengthByte(str) && __CFStrIsEightBit(str) && ((__CFStringGetEightBitStringEncoding() == encoding) || (__CFStringGetEightBitStringEncoding() == kCFStringEncodingASCII && __CFStringEncodingIsSupersetOfASCII(encoding)))) { // Requested encoding is equal to the encoding in string || the contents is in ASCII 2043 const uint8_t *contents = (const uint8_t *)__CFStrContents(str); 2044 if (__CFStrHasExplicitLength(str) && (__CFStrLength2(str, contents) != (SInt32)(*contents))) return NULL; // Invalid length byte 2045 return (ConstStringPtr)contents; 2046 } 2047 // ??? Also check for encoding = SystemEncoding and perhaps bytes are all ASCII? 2048 } 2049 return NULL; 2050} 2051 2052 2053const char * CFStringGetCStringPtr(CFStringRef str, CFStringEncoding encoding) { 2054 2055 if (encoding != __CFStringGetEightBitStringEncoding() && (kCFStringEncodingASCII != __CFStringGetEightBitStringEncoding() || !__CFStringEncodingIsSupersetOfASCII(encoding))) return NULL; 2056 // ??? Also check for encoding = SystemEncoding and perhaps bytes are all ASCII? 2057 2058 if (str == NULL) return NULL; // Should really just crash, but for compatibility... see <rdar://problem/12340248> 2059 2060 CF_OBJC_FUNCDISPATCHV(__kCFStringTypeID, const char *, (NSString *)str, _fastCStringContents:true); 2061 2062 __CFAssertIsString(str); 2063 2064 if (__CFStrHasNullByte(str)) { 2065 // Note: this is called a lot, 27000 times to open a small xcode project with one file open. 2066 // Of these uses about 1500 are for cStrings/utf8strings. 2067#if 0 2068 // Only sometimes when the stars are aligned will this call return a gc pointer 2069 // under GC we can only really return a pointer to the start of a GC buffer for cString use 2070 // (Is there a simpler way to ask if contents isGC?) 2071 CFAllocatorRef alloc = (__CFStrHasContentsAllocator(str)) ? __CFStrContentsAllocator(str) : __CFGetAllocator(str); 2072 if (CF_IS_COLLECTABLE_ALLOCATOR(alloc)) { 2073 if (__CFStrSkipAnyLengthByte(str) != 0 || !__CFStrIsMutable(str)) { 2074 static int counter = 0; 2075 printf("CFString %dth unsafe safe string %s\n", ++counter, __CFStrContents(str) + __CFStrSkipAnyLengthByte(str)); 2076 return NULL; 2077 } 2078 } 2079#endif 2080 return (const char *)__CFStrContents(str) + __CFStrSkipAnyLengthByte(str); 2081 } else { 2082 return NULL; 2083 } 2084} 2085 2086 2087const UniChar *CFStringGetCharactersPtr(CFStringRef str) { 2088 2089 CF_OBJC_FUNCDISPATCHV(__kCFStringTypeID, const UniChar *, (NSString *)str, _fastCharacterContents); 2090 2091 __CFAssertIsString(str); 2092 if (__CFStrIsUnicode(str)) return (const UniChar *)__CFStrContents(str); 2093 return NULL; 2094} 2095 2096 2097Boolean CFStringGetPascalString(CFStringRef str, Str255 buffer, CFIndex bufferSize, CFStringEncoding encoding) { 2098 CFIndex length; 2099 CFIndex usedLen; 2100 2101 __CFAssertIsNotNegative(bufferSize); 2102 if (bufferSize < 1) return false; 2103 2104 if (CF_IS_OBJC(__kCFStringTypeID, str)) { /* ??? Hope the compiler optimizes this away if OBJC_MAPPINGS is not on */ 2105 length = CFStringGetLength(str); 2106 if (!__CFCanUseLengthByte(length)) return false; // Can't fit into pstring 2107 } else { 2108 const uint8_t *contents; 2109 2110 __CFAssertIsString(str); 2111 2112 contents = (const uint8_t *)__CFStrContents(str); 2113 length = __CFStrLength2(str, contents); 2114 2115 if (!__CFCanUseLengthByte(length)) return false; // Can't fit into pstring 2116 2117 if (__CFStrIsEightBit(str) && ((__CFStringGetEightBitStringEncoding() == encoding) || (__CFStringGetEightBitStringEncoding() == kCFStringEncodingASCII && __CFStringEncodingIsSupersetOfASCII(encoding)))) { // Requested encoding is equal to the encoding in string 2118 if (length >= bufferSize) return false; 2119 memmove((void*)(1 + (const char*)buffer), (__CFStrSkipAnyLengthByte(str) + contents), length); 2120 *buffer = (unsigned char)length; 2121 return true; 2122 } 2123 } 2124 2125 if (__CFStringEncodeByteStream(str, 0, length, false, encoding, false, (UInt8 *)(1 + (uint8_t *)buffer), bufferSize - 1, &usedLen) != length) { 2126 2127#if defined(DEBUG) 2128 if (bufferSize > 0) { 2129 strlcpy((char *)buffer + 1, CONVERSIONFAILURESTR, bufferSize - 1); 2130 buffer[0] = (unsigned char)((CFIndex)sizeof(CONVERSIONFAILURESTR) < (bufferSize - 1) ? (CFIndex)sizeof(CONVERSIONFAILURESTR) : (bufferSize - 1)); 2131 } 2132#else 2133 if (bufferSize > 0) buffer[0] = 0; 2134#endif 2135 return false; 2136 } 2137 *buffer = (unsigned char)usedLen; 2138 return true; 2139} 2140 2141Boolean CFStringGetCString(CFStringRef str, char *buffer, CFIndex bufferSize, CFStringEncoding encoding) { 2142 const uint8_t *contents; 2143 CFIndex len; 2144 2145 __CFAssertIsNotNegative(bufferSize); 2146 if (bufferSize < 1) return false; 2147 2148 CF_OBJC_FUNCDISPATCHV(__kCFStringTypeID, Boolean, (NSString *)str, _getCString:buffer maxLength:(NSUInteger)bufferSize - 1 encoding:encoding); 2149 2150 __CFAssertIsString(str); 2151 2152 contents = (const uint8_t *)__CFStrContents(str); 2153 len = __CFStrLength2(str, contents); 2154 2155 if (__CFStrIsEightBit(str) && ((__CFStringGetEightBitStringEncoding() == encoding) || (__CFStringGetEightBitStringEncoding() == kCFStringEncodingASCII && __CFStringEncodingIsSupersetOfASCII(encoding)))) { // Requested encoding is equal to the encoding in string 2156 if (len >= bufferSize) return false; 2157 memmove(buffer, contents + __CFStrSkipAnyLengthByte(str), len); 2158 buffer[len] = 0; 2159 return true; 2160 } else { 2161 CFIndex usedLen; 2162 2163 if (__CFStringEncodeByteStream(str, 0, len, false, encoding, false, (unsigned char*) buffer, bufferSize - 1, &usedLen) == len) { 2164 buffer[usedLen] = '\0'; 2165 return true; 2166 } else { 2167#if defined(DEBUG) 2168 strlcpy(buffer, CONVERSIONFAILURESTR, bufferSize); 2169#else 2170 if (bufferSize > 0) buffer[0] = 0; 2171#endif 2172 return false; 2173 } 2174 } 2175} 2176 2177extern Boolean __CFLocaleGetNullLocale(struct __CFLocale *locale); 2178extern void __CFLocaleSetNullLocale(struct __CFLocale *locale); 2179 2180static const char *_CFStrGetLanguageIdentifierForLocale(CFLocaleRef locale, bool collatorOnly) { 2181 CFStringRef localeID; 2182 const char *langID = NULL; 2183 static const void *lastLocale = NULL; 2184 static const char *lastLangID = NULL; 2185 static CFSpinLock_t lock = CFSpinLockInit; 2186 2187 if (__CFLocaleGetNullLocale((struct __CFLocale *)locale)) return NULL; 2188 2189 __CFSpinLock(&lock); 2190 if ((NULL != lastLocale) && (lastLocale == locale)) { 2191 __CFSpinUnlock(&lock); 2192 return lastLangID; 2193 } 2194 __CFSpinUnlock(&lock); 2195 2196 localeID = (CFStringRef)CFLocaleGetValue(locale, __kCFLocaleCollatorID); 2197 2198 if (!collatorOnly) { 2199 CFIndex length = __CFStrLength(localeID); 2200 2201 if ((length < 2) || ((4 == length) && CFEqual(localeID, CFSTR("root")))) localeID = (CFStringRef)CFLocaleGetIdentifier(locale); 2202 } 2203 2204 // This is somewhat depending on CFLocale implementation always creating CFString for locale identifer ??? 2205 if (__CFStrLength(localeID) > 1) { 2206 const void *contents = __CFStrContents(localeID); 2207 const char *string; 2208 char buffer[2]; 2209 2210 if (__CFStrIsEightBit(localeID)) { 2211 string = ((const char *)contents) + __CFStrSkipAnyLengthByte(localeID); 2212 } else { 2213 const UTF16Char *characters = (const UTF16Char *)contents; 2214 2215 buffer[0] = (char)*(characters++); 2216 buffer[1] = (char)*characters; 2217 string = buffer; 2218 } 2219 2220 if (!strncmp(string, "az", 2)) { // Azerbaijani 2221 langID = "az"; 2222 } else if (!strncmp(string, "lt", 2)) { // Lithuanian 2223 langID = "lt"; 2224 } else if (!strncmp(string, "tr", 2)) { // Turkish 2225 langID = "tr"; 2226 } else if (!strncmp(string, "nl", 2)) { // Dutch 2227 langID = "nl"; 2228 } else if (!strncmp(string, "el", 2)) { // Greek 2229 langID = "el"; 2230 } 2231 } 2232 2233 if (langID == NULL) __CFLocaleSetNullLocale((struct __CFLocale *)locale); 2234 2235 __CFSpinLock(&lock); 2236 lastLocale = locale; 2237 lastLangID = langID; 2238 __CFSpinUnlock(&lock); 2239 2240 return langID; 2241} 2242 2243CF_INLINE bool _CFCanUseLocale(CFLocaleRef locale) { 2244 if (locale) { 2245 return true; 2246 } 2247 return false; 2248} 2249 2250#define MAX_CASE_MAPPING_BUF (8) 2251#define ZERO_WIDTH_JOINER (0x200D) 2252#define COMBINING_GRAPHEME_JOINER (0x034F) 2253// Hangul ranges 2254#define HANGUL_CHOSEONG_START (0x1100) 2255#define HANGUL_CHOSEONG_END (0x115F) 2256#define HANGUL_JUNGSEONG_START (0x1160) 2257#define HANGUL_JUNGSEONG_END (0x11A2) 2258#define HANGUL_JONGSEONG_START (0x11A8) 2259#define HANGUL_JONGSEONG_END (0x11F9) 2260 2261#define HANGUL_SYLLABLE_START (0xAC00) 2262#define HANGUL_SYLLABLE_END (0xD7AF) 2263 2264 2265// Returns the length of characters filled into outCharacters. If no change, returns 0. maxBufLen shoule be at least 8 2266static CFIndex __CFStringFoldCharacterClusterAtIndex(UTF32Char character, CFStringInlineBuffer *buffer, CFIndex index, CFOptionFlags flags, const uint8_t *langCode, UTF32Char *outCharacters, CFIndex maxBufferLength, CFIndex *consumedLength) { 2267 CFIndex filledLength = 0, currentIndex = index; 2268 2269 if (0 != character) { 2270 UTF16Char lowSurrogate; 2271 CFIndex planeNo = (character >> 16); 2272 bool isTurkikCapitalI = false; 2273 static const uint8_t *decompBMP = NULL; 2274 static const uint8_t *graphemeBMP = NULL; 2275 2276 if (NULL == decompBMP) { 2277 decompBMP = CFUniCharGetBitmapPtrForPlane(kCFUniCharCanonicalDecomposableCharacterSet, 0); 2278 graphemeBMP = CFUniCharGetBitmapPtrForPlane(kCFUniCharGraphemeExtendCharacterSet, 0); 2279 } 2280 2281 ++currentIndex; 2282 2283 if ((character < 0x0080) && ((NULL == langCode) || (character != 'I'))) { // ASCII 2284 if ((flags & kCFCompareCaseInsensitive) && (character >= 'A') && (character <= 'Z')) { 2285 character += ('a' - 'A'); 2286 *outCharacters = character; 2287 filledLength = 1; 2288 } 2289 } else { 2290 // do width-insensitive mapping 2291 if ((flags & kCFCompareWidthInsensitive) && (character >= 0xFF00) && (character <= 0xFFEF)) { 2292 (void)CFUniCharCompatibilityDecompose(&character, 1, 1); 2293 *outCharacters = character; 2294 filledLength = 1; 2295 } 2296 2297 // map surrogates 2298 if ((0 == planeNo) && CFUniCharIsSurrogateHighCharacter(character) && CFUniCharIsSurrogateLowCharacter((lowSurrogate = CFStringGetCharacterFromInlineBuffer(buffer, currentIndex)))) { 2299 character = CFUniCharGetLongCharacterForSurrogatePair(character, lowSurrogate); 2300 ++currentIndex; 2301 planeNo = (character >> 16); 2302 } 2303 2304 // decompose 2305 if (flags & (kCFCompareDiacriticInsensitive|kCFCompareNonliteral)) { 2306 if (CFUniCharIsMemberOfBitmap(character, ((0 == planeNo) ? decompBMP : CFUniCharGetBitmapPtrForPlane(kCFUniCharCanonicalDecomposableCharacterSet, planeNo)))) { 2307 UTF32Char original = character; 2308 2309 filledLength = CFUniCharDecomposeCharacter(character, outCharacters, maxBufferLength); 2310 character = *outCharacters; 2311 2312 if ((flags & kCFCompareDiacriticInsensitive) && (character < 0x0510)) { 2313 filledLength = 1; // reset if Roman, Greek, Cyrillic 2314 } else if (0 == (flags & kCFCompareNonliteral)) { 2315 character = original; 2316 filledLength = 0; 2317 } 2318 } 2319 } 2320 2321 // fold case 2322 if (flags & kCFCompareCaseInsensitive) { 2323 const uint8_t *nonBaseBitmap; 2324 bool filterNonBase = (((flags & kCFCompareDiacriticInsensitive) && (character < 0x0510)) ? true : false); 2325 static const uint8_t *lowerBMP = NULL; 2326 static const uint8_t *caseFoldBMP = NULL; 2327 2328 if (NULL == lowerBMP) { 2329 lowerBMP = CFUniCharGetBitmapPtrForPlane(kCFUniCharHasNonSelfLowercaseCharacterSet, 0); 2330 caseFoldBMP = CFUniCharGetBitmapPtrForPlane(kCFUniCharHasNonSelfCaseFoldingCharacterSet, 0); 2331 } 2332 2333 if ((NULL != langCode) && ('I' == character) && ((0 == strcmp((const char *)langCode, "tr")) || (0 == strcmp((const char *)langCode, "az")))) { // do Turkik special-casing 2334 if (filledLength > 1) { 2335 if (0x0307 == outCharacters[1]) { 2336 if (--filledLength > 1) memmove((outCharacters + 1), (outCharacters + 2), sizeof(UTF32Char) * (filledLength - 1)); 2337 character = *outCharacters = 'i'; 2338 isTurkikCapitalI = true; 2339 } 2340 } else if (0x0307 == CFStringGetCharacterFromInlineBuffer(buffer, currentIndex)) { 2341 character = *outCharacters = 'i'; 2342 filledLength = 1; 2343 ++currentIndex; 2344 isTurkikCapitalI = true; 2345 } 2346 } 2347 if (!isTurkikCapitalI && (CFUniCharIsMemberOfBitmap(character, ((0 == planeNo) ? lowerBMP : CFUniCharGetBitmapPtrForPlane(kCFUniCharHasNonSelfLowercaseCharacterSet, planeNo))) || CFUniCharIsMemberOfBitmap(character, ((0 == planeNo) ? caseFoldBMP : CFUniCharGetBitmapPtrForPlane(kCFUniCharHasNonSelfCaseFoldingCharacterSet, planeNo))))) { 2348 UTF16Char caseFoldBuffer[MAX_CASE_MAPPING_BUF]; 2349 const UTF16Char *bufferP = caseFoldBuffer, *bufferLimit; 2350 UTF32Char *outCharactersP = outCharacters; 2351 uint32_t bufferLength = CFUniCharMapCaseTo(character, caseFoldBuffer, MAX_CASE_MAPPING_BUF, kCFUniCharCaseFold, 0, langCode); 2352 2353 bufferLimit = bufferP + bufferLength; 2354 2355 if (filledLength > 0) --filledLength; // decrement filledLength (will add back later) 2356 2357 // make space for casefold characters 2358 if ((filledLength > 0) && (bufferLength > 1)) { 2359 CFIndex totalScalerLength = 0; 2360 2361 while (bufferP < bufferLimit) { 2362 if (CFUniCharIsSurrogateHighCharacter(*(bufferP++)) && (bufferP < bufferLimit) && CFUniCharIsSurrogateLowCharacter(*bufferP)) ++bufferP; 2363 ++totalScalerLength; 2364 } 2365 memmove(outCharacters + totalScalerLength, outCharacters + 1, filledLength * sizeof(UTF32Char)); 2366 bufferP = caseFoldBuffer; 2367 } 2368 2369 // fill 2370 while (bufferP < bufferLimit) { 2371 character = *(bufferP++); 2372 if (CFUniCharIsSurrogateHighCharacter(character) && (bufferP < bufferLimit) && CFUniCharIsSurrogateLowCharacter(*bufferP)) { 2373 character = CFUniCharGetLongCharacterForSurrogatePair(character, *(bufferP++)); 2374 nonBaseBitmap = CFUniCharGetBitmapPtrForPlane(kCFUniCharGraphemeExtendCharacterSet, (character >> 16)); 2375 } else { 2376 nonBaseBitmap = graphemeBMP; 2377 } 2378 2379 if (!filterNonBase || !CFUniCharIsMemberOfBitmap(character, nonBaseBitmap)) { 2380 *(outCharactersP++) = character; 2381 ++filledLength; 2382 } 2383 } 2384 } 2385 } 2386 } 2387 2388 // collect following combining marks 2389 if (flags & (kCFCompareDiacriticInsensitive|kCFCompareNonliteral)) { 2390 const uint8_t *nonBaseBitmap; 2391 const uint8_t *decompBitmap; 2392 bool doFill = (((flags & kCFCompareDiacriticInsensitive) && (character < 0x0510)) ? false : true); 2393 2394 if (0 == filledLength) { 2395 *outCharacters = character; // filledLength will be updated below on demand 2396 2397 if (doFill) { // check if really needs to fill 2398 UTF32Char nonBaseCharacter = CFStringGetCharacterFromInlineBuffer(buffer, currentIndex); 2399 2400 if (CFUniCharIsSurrogateHighCharacter(nonBaseCharacter) && CFUniCharIsSurrogateLowCharacter((lowSurrogate = CFStringGetCharacterFromInlineBuffer(buffer, currentIndex + 1)))) { 2401 nonBaseCharacter = CFUniCharGetLongCharacterForSurrogatePair(nonBaseCharacter, lowSurrogate); 2402 nonBaseBitmap = CFUniCharGetBitmapPtrForPlane(kCFUniCharGraphemeExtendCharacterSet, (nonBaseCharacter >> 16)); 2403 decompBitmap = CFUniCharGetBitmapPtrForPlane(kCFUniCharCanonicalDecomposableCharacterSet, (nonBaseCharacter >> 16)); 2404 } else { 2405 nonBaseBitmap = graphemeBMP; 2406 decompBitmap = decompBMP; 2407 } 2408 2409 if (CFUniCharIsMemberOfBitmap(nonBaseCharacter, nonBaseBitmap)) { 2410 filledLength = 1; // For the base character 2411 2412 if ((0 == (flags & kCFCompareDiacriticInsensitive)) || (nonBaseCharacter > 0x050F)) { 2413 if (CFUniCharIsMemberOfBitmap(nonBaseCharacter, decompBitmap)) { 2414 filledLength += CFUniCharDecomposeCharacter(nonBaseCharacter, &(outCharacters[filledLength]), maxBufferLength - filledLength); 2415 } else { 2416 outCharacters[filledLength++] = nonBaseCharacter; 2417 } 2418 } 2419 currentIndex += ((nonBaseBitmap == graphemeBMP) ? 1 : 2); 2420 } else { 2421 doFill = false; 2422 } 2423 } 2424 } 2425 2426 while (filledLength < maxBufferLength) { // do the rest 2427 character = CFStringGetCharacterFromInlineBuffer(buffer, currentIndex); 2428 2429 if (CFUniCharIsSurrogateHighCharacter(character) && CFUniCharIsSurrogateLowCharacter((lowSurrogate = CFStringGetCharacterFromInlineBuffer(buffer, currentIndex + 1)))) { 2430 character = CFUniCharGetLongCharacterForSurrogatePair(character, lowSurrogate); 2431 nonBaseBitmap = CFUniCharGetBitmapPtrForPlane(kCFUniCharGraphemeExtendCharacterSet, (character >> 16)); 2432 decompBitmap = CFUniCharGetBitmapPtrForPlane(kCFUniCharCanonicalDecomposableCharacterSet, (character >> 16)); 2433 } else { 2434 nonBaseBitmap = graphemeBMP; 2435 decompBitmap = decompBMP; 2436 } 2437 if (isTurkikCapitalI) { 2438 isTurkikCapitalI = false; 2439 } else if (CFUniCharIsMemberOfBitmap(character, nonBaseBitmap)) { 2440 if (doFill) { 2441 if (CFUniCharIsMemberOfBitmap(character, decompBitmap)) { 2442 CFIndex currentLength = CFUniCharDecomposeCharacter(character, &(outCharacters[filledLength]), maxBufferLength - filledLength); 2443 2444 if (0 == currentLength) break; // didn't fit 2445 2446 filledLength += currentLength; 2447 } else { 2448 outCharacters[filledLength++] = character; 2449 } 2450 } else if (0 == filledLength) { 2451 filledLength = 1; // For the base character 2452 } 2453 currentIndex += ((nonBaseBitmap == graphemeBMP) ? 1 : 2); 2454 } else { 2455 break; 2456 } 2457 } 2458 2459 if (filledLength > 1) { 2460 UTF32Char *sortCharactersLimit = outCharacters + filledLength; 2461 UTF32Char *sortCharacters = sortCharactersLimit - 1; 2462 2463 while ((outCharacters < sortCharacters) && CFUniCharIsMemberOfBitmap(*sortCharacters, ((*sortCharacters < 0x10000) ? graphemeBMP : CFUniCharGetBitmapPtrForPlane(kCFUniCharGraphemeExtendCharacterSet, (*sortCharacters >> 16))))) --sortCharacters; 2464 2465 if ((sortCharactersLimit - sortCharacters) > 1) CFUniCharPrioritySort(sortCharacters, (sortCharactersLimit - sortCharacters)); // priority sort 2466 } 2467 } 2468 } 2469 2470 if ((filledLength > 0) && (NULL != consumedLength)) *consumedLength = (currentIndex - index); 2471 2472 return filledLength; 2473} 2474 2475static bool __CFStringFillCharacterSetInlineBuffer(CFCharacterSetInlineBuffer *buffer, CFStringCompareFlags compareOptions) { 2476 if (0 != (compareOptions & kCFCompareIgnoreNonAlphanumeric)) { 2477 static CFCharacterSetRef nonAlnumChars = NULL; 2478 2479 if (NULL == nonAlnumChars) { 2480 CFMutableCharacterSetRef cset = CFCharacterSetCreateMutableCopy(kCFAllocatorSystemDefault, CFCharacterSetGetPredefined(kCFCharacterSetAlphaNumeric)); 2481 CFCharacterSetInvert(cset); 2482 if (!OSAtomicCompareAndSwapPtrBarrier(NULL, cset, (void **)&nonAlnumChars)) CFRelease(cset); 2483 } 2484 2485 CFCharacterSetInitInlineBuffer(nonAlnumChars, buffer); 2486 2487 return true; 2488 } 2489 2490 return false; 2491} 2492 2493#define kCFStringStackBufferLength (__kCFStringInlineBufferLength) 2494 2495CFComparisonResult CFStringCompareWithOptionsAndLocale(CFStringRef string, CFStringRef string2, CFRange rangeToCompare, CFStringCompareFlags compareOptions, CFLocaleRef locale) { 2496 /* No objc dispatch needed here since CFStringInlineBuffer works with both CFString and NSString */ 2497 UTF32Char strBuf1[kCFStringStackBufferLength]; 2498 UTF32Char strBuf2[kCFStringStackBufferLength]; 2499 CFStringInlineBuffer inlineBuf1, inlineBuf2; 2500 UTF32Char str1Char, str2Char; 2501 CFIndex str1UsedLen, str2UsedLen; 2502 CFIndex str1Index = 0, str2Index = 0, strBuf1Index = 0, strBuf2Index = 0, strBuf1Len = 0, strBuf2Len = 0; 2503 CFIndex str1LocalizedIndex = 0, str2LocalizedIndex = 0; 2504 CFIndex forcedIndex1 = 0, forcedIndex2 = 0; 2505 CFIndex str2Len = CFStringGetLength(string2); 2506 bool caseInsensitive = ((compareOptions & kCFCompareCaseInsensitive) ? true : false); 2507 bool diacriticsInsensitive = ((compareOptions & kCFCompareDiacriticInsensitive) ? true : false); 2508 bool equalityOptions = ((compareOptions & (kCFCompareCaseInsensitive|kCFCompareNonliteral|kCFCompareDiacriticInsensitive|kCFCompareWidthInsensitive)) ? true : false); 2509 bool numerically = ((compareOptions & kCFCompareNumerically) ? true : false); 2510 bool forceOrdering = ((compareOptions & kCFCompareForcedOrdering) ? true : false); 2511 const uint8_t *graphemeBMP = CFUniCharGetBitmapPtrForPlane(kCFUniCharGraphemeExtendCharacterSet, 0); 2512 const uint8_t *langCode; 2513 CFComparisonResult compareResult = kCFCompareEqualTo; 2514 UTF16Char otherChar; 2515 Boolean freeLocale = false; 2516 CFCharacterSetInlineBuffer *ignoredChars = NULL; 2517 CFCharacterSetInlineBuffer csetBuffer; 2518 bool numericEquivalence = false; 2519 2520 if ((compareOptions & kCFCompareLocalized) && (NULL == locale)) { 2521 locale = CFLocaleCopyCurrent(); 2522 freeLocale = true; 2523 } 2524 2525 langCode = ((NULL == locale) ? NULL : (const uint8_t *)_CFStrGetLanguageIdentifierForLocale(locale, true)); 2526 2527 if (__CFStringFillCharacterSetInlineBuffer(&csetBuffer, compareOptions)) { 2528 ignoredChars = &csetBuffer; 2529 equalityOptions = true; 2530 } 2531 2532 if ((NULL == locale) && (NULL == ignoredChars) && !numerically) { // could do binary comp (be careful when adding new flags) 2533 CFStringEncoding eightBitEncoding = __CFStringGetEightBitStringEncoding(); 2534 const uint8_t *str1Bytes = (const uint8_t *)CFStringGetCStringPtr(string, eightBitEncoding); 2535 const uint8_t *str2Bytes = (const uint8_t *)CFStringGetCStringPtr(string2, eightBitEncoding); 2536 CFIndex factor = sizeof(uint8_t); 2537 2538 if ((NULL != str1Bytes) && (NULL != str2Bytes)) { 2539 compareOptions &= ~kCFCompareNonliteral; // remove non-literal 2540 2541 if ((kCFStringEncodingASCII == eightBitEncoding) && (false == forceOrdering)) { 2542 if (caseInsensitive) { 2543 int cmpResult = strncasecmp_l((const char *)str1Bytes + rangeToCompare.location, (const char *)str2Bytes, __CFMin(rangeToCompare.length, str2Len), NULL); 2544 2545 if (0 == cmpResult) cmpResult = rangeToCompare.length - str2Len; 2546 2547 return ((0 == cmpResult) ? kCFCompareEqualTo : ((cmpResult < 0) ? kCFCompareLessThan : kCFCompareGreaterThan)); 2548 } 2549 } else if (caseInsensitive || diacriticsInsensitive) { 2550 CFIndex limitLength = __CFMin(rangeToCompare.length, str2Len); 2551 2552 str1Bytes += rangeToCompare.location; 2553 2554 while (str1Index < limitLength) { 2555 str1Char = str1Bytes[str1Index]; 2556 str2Char = str2Bytes[str1Index]; 2557 2558 if (str1Char != str2Char) { 2559 if ((str1Char < 0x80) && (str2Char < 0x80)) { 2560 if (forceOrdering && (kCFCompareEqualTo == compareResult) && (str1Char != str2Char)) compareResult = ((str1Char < str2Char) ? kCFCompareLessThan : kCFCompareGreaterThan); 2561 if (caseInsensitive) { 2562 if ((str1Char >= 'A') && (str1Char <= 'Z')) str1Char += ('a' - 'A'); 2563 if ((str2Char >= 'A') && (str2Char <= 'Z')) str2Char += ('a' - 'A'); 2564 } 2565 2566 if (str1Char != str2Char) return ((str1Char < str2Char) ? kCFCompareLessThan : kCFCompareGreaterThan); 2567 } else { 2568 str1Bytes = NULL; 2569 break; 2570 } 2571 } 2572 ++str1Index; 2573 } 2574 2575 str2Index = str1Index; 2576 2577 if (str1Index == limitLength) { 2578 int cmpResult = rangeToCompare.length - str2Len; 2579 2580 return ((0 == cmpResult) ? compareResult : ((cmpResult < 0) ? kCFCompareLessThan : kCFCompareGreaterThan)); 2581 } 2582 } 2583 } else if (!equalityOptions && (NULL == str1Bytes) && (NULL == str2Bytes)) { 2584 str1Bytes = (const uint8_t *)CFStringGetCharactersPtr(string); 2585 str2Bytes = (const uint8_t *)CFStringGetCharactersPtr(string2); 2586 factor = sizeof(UTF16Char); 2587#if __LITTLE_ENDIAN__ 2588 if ((NULL != str1Bytes) && (NULL != str2Bytes)) { // we cannot use memcmp 2589 const UTF16Char *str1 = ((const UTF16Char *)str1Bytes) + rangeToCompare.location; 2590 const UTF16Char *str1Limit = str1 + __CFMin(rangeToCompare.length, str2Len); 2591 const UTF16Char *str2 = (const UTF16Char *)str2Bytes; 2592 CFIndex cmpResult = 0; 2593 2594 while ((0 == cmpResult) && (str1 < str1Limit)) cmpResult = (CFIndex)*(str1++) - (CFIndex)*(str2++); 2595 2596 if (0 == cmpResult) cmpResult = rangeToCompare.length - str2Len; 2597 2598 return ((0 == cmpResult) ? kCFCompareEqualTo : ((cmpResult < 0) ? kCFCompareLessThan : kCFCompareGreaterThan)); 2599 } 2600#endif /* __LITTLE_ENDIAN__ */ 2601 } 2602 if ((NULL != str1Bytes) && (NULL != str2Bytes)) { 2603 int cmpResult = memcmp(str1Bytes + (rangeToCompare.location * factor), str2Bytes, __CFMin(rangeToCompare.length, str2Len) * factor); 2604 2605 if (0 == cmpResult) cmpResult = rangeToCompare.length - str2Len; 2606 2607 return ((0 == cmpResult) ? kCFCompareEqualTo : ((cmpResult < 0) ? kCFCompareLessThan : kCFCompareGreaterThan)); 2608 } 2609 } 2610 2611 CFStringInitInlineBuffer(string, &inlineBuf1, rangeToCompare); 2612 CFStringInitInlineBuffer(string2, &inlineBuf2, CFRangeMake(0, str2Len)); 2613 2614 if (NULL != locale) { 2615 str1LocalizedIndex = str1Index; 2616 str2LocalizedIndex = str2Index; 2617 2618 // We temporarily disable kCFCompareDiacriticInsensitive for SL <rdar://problem/6767096>. Should be revisited in NMOS <rdar://problem/7003830> 2619 if (forceOrdering) { 2620 diacriticsInsensitive = false; 2621 compareOptions &= ~kCFCompareDiacriticInsensitive; 2622 } 2623 } 2624 while ((str1Index < rangeToCompare.length) && (str2Index < str2Len)) { 2625 if (strBuf1Len == 0) { 2626 str1Char = CFStringGetCharacterFromInlineBuffer(&inlineBuf1, str1Index); 2627 if (caseInsensitive && (str1Char >= 'A') && (str1Char <= 'Z') && ((NULL == langCode) || (str1Char != 'I')) && ((false == forceOrdering) || (kCFCompareEqualTo != compareResult))) str1Char += ('a' - 'A'); 2628 str1UsedLen = 1; 2629 } else { 2630 str1Char = strBuf1[strBuf1Index++]; 2631 } 2632 if (strBuf2Len == 0) { 2633 str2Char = CFStringGetCharacterFromInlineBuffer(&inlineBuf2, str2Index); 2634 if (caseInsensitive && (str2Char >= 'A') && (str2Char <= 'Z') && ((NULL == langCode) || (str2Char != 'I')) && ((false == forceOrdering) || (kCFCompareEqualTo != compareResult))) str2Char += ('a' - 'A'); 2635 str2UsedLen = 1; 2636 } else { 2637 str2Char = strBuf2[strBuf2Index++]; 2638 } 2639 2640 if (numerically && ((0 == strBuf1Len) && (str1Char <= '9') && (str1Char >= '0')) && ((0 == strBuf2Len) && (str2Char <= '9') && (str2Char >= '0'))) { // If both are not ASCII digits, then don't do numerical comparison here 2641 uint64_t intValue1 = 0, intValue2 = 0; // !!! Doesn't work if numbers are > max uint64_t 2642 CFIndex str1NumRangeIndex = str1Index; 2643 CFIndex str2NumRangeIndex = str2Index; 2644 2645 do { 2646 intValue1 = (intValue1 * 10) + (str1Char - '0'); 2647 str1Char = CFStringGetCharacterFromInlineBuffer(&inlineBuf1, ++str1Index); 2648 } while ((str1Char <= '9') && (str1Char >= '0')); 2649 2650 do { 2651 intValue2 = intValue2 * 10 + (str2Char - '0'); 2652 str2Char = CFStringGetCharacterFromInlineBuffer(&inlineBuf2, ++str2Index); 2653 } while ((str2Char <= '9') && (str2Char >= '0')); 2654 2655 if (intValue1 == intValue2) { 2656 if (forceOrdering && (kCFCompareEqualTo == compareResult) && ((str1Index - str1NumRangeIndex) != (str2Index - str2NumRangeIndex))) { 2657 compareResult = (((str1Index - str1NumRangeIndex) < (str2Index - str2NumRangeIndex)) ? kCFCompareLessThan : kCFCompareGreaterThan); 2658 numericEquivalence = true; 2659 forcedIndex1 = str1NumRangeIndex; 2660 forcedIndex2 = str2NumRangeIndex; 2661 } 2662 2663 continue; 2664 } else if (intValue1 < intValue2) { 2665 if (freeLocale && locale) { 2666 CFRelease(locale); 2667 } 2668 return kCFCompareLessThan; 2669 } else { 2670 if (freeLocale && locale) { 2671 CFRelease(locale); 2672 } 2673 return kCFCompareGreaterThan; 2674 } 2675 } 2676 2677 if (str1Char != str2Char) { 2678 if (!equalityOptions) { 2679 compareResult = ((NULL == locale) ? ((str1Char < str2Char) ? kCFCompareLessThan : kCFCompareGreaterThan) : _CFCompareStringsWithLocale(&inlineBuf1, CFRangeMake(str1Index, rangeToCompare.length - str1Index), &inlineBuf2, CFRangeMake(str2Index, str2Len - str2Index), compareOptions, locale)); 2680 if (freeLocale && locale) { 2681 CFRelease(locale); 2682 } 2683 return compareResult; 2684 } 2685 2686 if (forceOrdering && (kCFCompareEqualTo == compareResult)) { 2687 compareResult = ((str1Char < str2Char) ? kCFCompareLessThan : kCFCompareGreaterThan); 2688 forcedIndex1 = str1LocalizedIndex; 2689 forcedIndex2 = str2LocalizedIndex; 2690 } 2691 2692 if ((str1Char < 0x80) && (str2Char < 0x80) && (NULL == ignoredChars)) { 2693 if (NULL != locale) { 2694 compareResult = _CFCompareStringsWithLocale(&inlineBuf1, CFRangeMake(str1Index, rangeToCompare.length - str1Index), &inlineBuf2, CFRangeMake(str2Index, str2Len - str2Index), compareOptions, locale); 2695 if (freeLocale && locale) { 2696 CFRelease(locale); 2697 } 2698 return compareResult; 2699 } else if (!caseInsensitive) { 2700 if (freeLocale && locale) { 2701 CFRelease(locale); 2702 } 2703 return ((str1Char < str2Char) ? kCFCompareLessThan : kCFCompareGreaterThan); 2704 } 2705 } 2706 2707 if (CFUniCharIsSurrogateHighCharacter(str1Char) && CFUniCharIsSurrogateLowCharacter((otherChar = CFStringGetCharacterFromInlineBuffer(&inlineBuf1, str1Index + 1)))) { 2708 str1Char = CFUniCharGetLongCharacterForSurrogatePair(str1Char, otherChar); 2709 str1UsedLen = 2; 2710 } 2711 2712 if (CFUniCharIsSurrogateHighCharacter(str2Char) && CFUniCharIsSurrogateLowCharacter((otherChar = CFStringGetCharacterFromInlineBuffer(&inlineBuf2, str2Index + 1)))) { 2713 str2Char = CFUniCharGetLongCharacterForSurrogatePair(str2Char, otherChar); 2714 str2UsedLen = 2; 2715 } 2716 2717 if (NULL != ignoredChars) { 2718 if (CFCharacterSetInlineBufferIsLongCharacterMember(ignoredChars, str1Char)) { 2719 if ((strBuf1Len > 0) && (strBuf1Index == strBuf1Len)) strBuf1Len = 0; 2720 if (strBuf1Len == 0) str1Index += str1UsedLen; 2721 if (strBuf2Len > 0) --strBuf2Index; 2722 continue; 2723 } 2724 if (CFCharacterSetInlineBufferIsLongCharacterMember(ignoredChars, str2Char)) { 2725 if ((strBuf2Len > 0) && (strBuf2Index == strBuf2Len)) strBuf2Len = 0; 2726 if (strBuf2Len == 0) str2Index += str2UsedLen; 2727 if (strBuf1Len > 0) -- strBuf1Index; 2728 continue; 2729 } 2730 } 2731 2732 if (diacriticsInsensitive && (str1Index > 0)) { 2733 bool str1Skip = false; 2734 bool str2Skip = false; 2735 2736 if ((0 == strBuf1Len) && CFUniCharIsMemberOfBitmap(str1Char, ((str1Char < 0x10000) ? graphemeBMP : CFUniCharGetBitmapPtrForPlane(kCFUniCharGraphemeExtendCharacterSet, (str1Char >> 16))))) { 2737 str1Char = str2Char; 2738 str1Skip = true; 2739 } 2740 if ((0 == strBuf2Len) && CFUniCharIsMemberOfBitmap(str2Char, ((str2Char < 0x10000) ? graphemeBMP : CFUniCharGetBitmapPtrForPlane(kCFUniCharGraphemeExtendCharacterSet, (str2Char >> 16))))) { 2741 str2Char = str1Char; 2742 str2Skip = true; 2743 } 2744 2745 if (str1Skip != str2Skip) { 2746 if (str1Skip) str2Index -= str2UsedLen; 2747 if (str2Skip) str1Index -= str1UsedLen; 2748 } 2749 } 2750 2751 if (str1Char != str2Char) { 2752 if (0 == strBuf1Len) { 2753 strBuf1Len = __CFStringFoldCharacterClusterAtIndex(str1Char, &inlineBuf1, str1Index, compareOptions, langCode, strBuf1, kCFStringStackBufferLength, &str1UsedLen); 2754 if (strBuf1Len > 0) { 2755 str1Char = *strBuf1; 2756 strBuf1Index = 1; 2757 } 2758 } 2759 2760 if ((0 == strBuf1Len) && (0 < strBuf2Len)) { 2761 compareResult = ((NULL == locale) ? ((str1Char < str2Char) ? kCFCompareLessThan : kCFCompareGreaterThan) : _CFCompareStringsWithLocale(&inlineBuf1, CFRangeMake(str1LocalizedIndex, rangeToCompare.length - str1LocalizedIndex), &inlineBuf2, CFRangeMake(str2LocalizedIndex, str2Len - str2LocalizedIndex), compareOptions, locale)); 2762 if (freeLocale && locale) { 2763 CFRelease(locale); 2764 } 2765 return compareResult; 2766 } 2767 2768 if ((0 == strBuf2Len) && ((0 == strBuf1Len) || (str1Char != str2Char))) { 2769 strBuf2Len = __CFStringFoldCharacterClusterAtIndex(str2Char, &inlineBuf2, str2Index, compareOptions, langCode, strBuf2, kCFStringStackBufferLength, &str2UsedLen); 2770 if (strBuf2Len > 0) { 2771 str2Char = *strBuf2; 2772 strBuf2Index = 1; 2773 } 2774 if ((0 == strBuf2Len) || (str1Char != str2Char)) { 2775 compareResult = ((NULL == locale) ? ((str1Char < str2Char) ? kCFCompareLessThan : kCFCompareGreaterThan) : _CFCompareStringsWithLocale(&inlineBuf1, CFRangeMake(str1LocalizedIndex, rangeToCompare.length - str1LocalizedIndex), &inlineBuf2, CFRangeMake(str2LocalizedIndex, str2Len - str2LocalizedIndex), compareOptions, locale)); 2776 if (freeLocale && locale) { 2777 CFRelease(locale); 2778 } 2779 return compareResult; 2780 } 2781 } 2782 } 2783 2784 if ((strBuf1Len > 0) && (strBuf2Len > 0)) { 2785 while ((strBuf1Index < strBuf1Len) && (strBuf2Index < strBuf2Len)) { 2786 if (strBuf1[strBuf1Index] != strBuf2[strBuf2Index]) break; 2787 ++strBuf1Index; ++strBuf2Index; 2788 } 2789 if ((strBuf1Index < strBuf1Len) && (strBuf2Index < strBuf2Len)) { 2790 CFComparisonResult res = ((NULL == locale) ? ((strBuf1[strBuf1Index] < strBuf2[strBuf2Index]) ? kCFCompareLessThan : kCFCompareGreaterThan) : _CFCompareStringsWithLocale(&inlineBuf1, CFRangeMake(str1LocalizedIndex, rangeToCompare.length - str1LocalizedIndex), &inlineBuf2, CFRangeMake(str2LocalizedIndex, str2Len - str2LocalizedIndex), compareOptions, locale)); 2791 if (freeLocale && locale) { 2792 CFRelease(locale); 2793 } 2794 return res; 2795 } 2796 } 2797 } 2798 2799 if ((strBuf1Len > 0) && (strBuf1Index == strBuf1Len)) strBuf1Len = 0; 2800 if ((strBuf2Len > 0) && (strBuf2Index == strBuf2Len)) strBuf2Len = 0; 2801 2802 if (strBuf1Len == 0) str1Index += str1UsedLen; 2803 if (strBuf2Len == 0) str2Index += str2UsedLen; 2804 if ((strBuf1Len == 0) && (strBuf2Len == 0)) { 2805 str1LocalizedIndex = str1Index; 2806 str2LocalizedIndex = str2Index; 2807 } 2808 } 2809 2810 if (diacriticsInsensitive || (NULL != ignoredChars)) { 2811 while (str1Index < rangeToCompare.length) { 2812 str1Char = CFStringGetCharacterFromInlineBuffer(&inlineBuf1, str1Index); 2813 if ((str1Char < 0x80) && (NULL == ignoredChars)) break; // found ASCII 2814 2815 if (CFUniCharIsSurrogateHighCharacter(str1Char) && CFUniCharIsSurrogateLowCharacter((otherChar = CFStringGetCharacterFromInlineBuffer(&inlineBuf1, str1Index + 1)))) str1Char = CFUniCharGetLongCharacterForSurrogatePair(str1Char, otherChar); 2816 2817 if ((!diacriticsInsensitive || !CFUniCharIsMemberOfBitmap(str1Char, ((str1Char < 0x10000) ? graphemeBMP : CFUniCharGetBitmapPtrForPlane(kCFUniCharGraphemeExtendCharacterSet, (str1Char >> 16))))) && ((NULL == ignoredChars) || !CFCharacterSetInlineBufferIsLongCharacterMember(ignoredChars, str1Char))) break; 2818 2819 str1Index += ((str1Char < 0x10000) ? 1 : 2); 2820 } 2821 2822 while (str2Index < str2Len) { 2823 str2Char = CFStringGetCharacterFromInlineBuffer(&inlineBuf2, str2Index); 2824 if ((str2Char < 0x80) && (NULL == ignoredChars)) break; // found ASCII 2825 2826 if (CFUniCharIsSurrogateHighCharacter(str2Char) && CFUniCharIsSurrogateLowCharacter((otherChar = CFStringGetCharacterFromInlineBuffer(&inlineBuf2, str2Index + 1)))) str2Char = CFUniCharGetLongCharacterForSurrogatePair(str2Char, otherChar); 2827 2828 if ((!diacriticsInsensitive || !CFUniCharIsMemberOfBitmap(str2Char, ((str2Char < 0x10000) ? graphemeBMP : CFUniCharGetBitmapPtrForPlane(kCFUniCharGraphemeExtendCharacterSet, (str2Char >> 16))))) && ((NULL == ignoredChars) || !CFCharacterSetInlineBufferIsLongCharacterMember(ignoredChars, str2Char))) break; 2829 2830 str2Index += ((str2Char < 0x10000) ? 1 : 2); 2831 } 2832 } 2833 // Need to recalc localized result here for forced ordering, ICU cannot do numericEquivalence 2834 if (!numericEquivalence && (NULL != locale) && (kCFCompareEqualTo != compareResult) && (str1Index == rangeToCompare.length) && (str2Index == str2Len)) compareResult = _CFCompareStringsWithLocale(&inlineBuf1, CFRangeMake(forcedIndex1, rangeToCompare.length - forcedIndex1), &inlineBuf2, CFRangeMake(forcedIndex2, str2Len - forcedIndex2), compareOptions, locale); 2835 2836 if (freeLocale && locale) { 2837 CFRelease(locale); 2838 } 2839 2840 return ((str1Index < rangeToCompare.length) ? kCFCompareGreaterThan : ((str2Index < str2Len) ? kCFCompareLessThan : compareResult)); 2841} 2842 2843 2844CFComparisonResult CFStringCompareWithOptions(CFStringRef string, CFStringRef string2, CFRange rangeToCompare, CFStringCompareFlags compareOptions) { return CFStringCompareWithOptionsAndLocale(string, string2, rangeToCompare, compareOptions, NULL); } 2845 2846CFComparisonResult CFStringCompare(CFStringRef string, CFStringRef str2, CFStringCompareFlags options) { 2847 return CFStringCompareWithOptions(string, str2, CFRangeMake(0, CFStringGetLength(string)), options); 2848} 2849 2850Boolean CFStringFindWithOptionsAndLocale(CFStringRef string, CFStringRef stringToFind, CFRange rangeToSearch, CFStringCompareFlags compareOptions, CFLocaleRef locale, CFRange *result) { 2851 /* No objc dispatch needed here since CFStringInlineBuffer works with both CFString and NSString */ 2852 CFIndex findStrLen = CFStringGetLength(stringToFind); 2853 Boolean didFind = false; 2854 bool lengthVariants = ((compareOptions & (kCFCompareCaseInsensitive|kCFCompareNonliteral|kCFCompareDiacriticInsensitive)) ? true : false); 2855 CFCharacterSetInlineBuffer *ignoredChars = NULL; 2856 CFCharacterSetInlineBuffer csetBuffer; 2857 2858 if (__CFStringFillCharacterSetInlineBuffer(&csetBuffer, compareOptions)) { 2859 ignoredChars = &csetBuffer; 2860 lengthVariants = true; 2861 } 2862 2863 if ((findStrLen > 0) && (rangeToSearch.length > 0) && ((findStrLen <= rangeToSearch.length) || lengthVariants)) { 2864 UTF32Char strBuf1[kCFStringStackBufferLength]; 2865 UTF32Char strBuf2[kCFStringStackBufferLength]; 2866 CFStringInlineBuffer inlineBuf1, inlineBuf2; 2867 UTF32Char str1Char = 0, str2Char = 0; 2868 CFStringEncoding eightBitEncoding = __CFStringGetEightBitStringEncoding(); 2869 const uint8_t *str1Bytes = (const uint8_t *)CFStringGetCStringPtr(string, eightBitEncoding); 2870 const uint8_t *str2Bytes = (const uint8_t *)CFStringGetCStringPtr(stringToFind, eightBitEncoding); 2871 const UTF32Char *characters, *charactersLimit; 2872 const uint8_t *langCode = NULL; 2873 CFIndex fromLoc, toLoc; 2874 CFIndex str1Index, str2Index; 2875 CFIndex strBuf1Len, strBuf2Len; 2876 CFIndex maxStr1Index = (rangeToSearch.location + rangeToSearch.length); 2877 bool equalityOptions = ((lengthVariants || (compareOptions & kCFCompareWidthInsensitive)) ? true : false); 2878 bool caseInsensitive = ((compareOptions & kCFCompareCaseInsensitive) ? true : false); 2879 bool forwardAnchor = ((kCFCompareAnchored == (compareOptions & (kCFCompareBackwards|kCFCompareAnchored))) ? true : false); 2880 bool backwardAnchor = (((kCFCompareBackwards|kCFCompareAnchored) == (compareOptions & (kCFCompareBackwards|kCFCompareAnchored))) ? true : false); 2881 int8_t delta; 2882 2883 if (NULL == locale) { 2884 if (compareOptions & kCFCompareLocalized) { 2885 CFLocaleRef currentLocale = CFLocaleCopyCurrent(); 2886 langCode = (const uint8_t *)_CFStrGetLanguageIdentifierForLocale(currentLocale, true); 2887 CFRelease(currentLocale); 2888 } 2889 } else { 2890 langCode = (const uint8_t *)_CFStrGetLanguageIdentifierForLocale(locale, true); 2891 } 2892 2893 CFStringInitInlineBuffer(string, &inlineBuf1, CFRangeMake(0, rangeToSearch.location + rangeToSearch.length)); 2894 CFStringInitInlineBuffer(stringToFind, &inlineBuf2, CFRangeMake(0, findStrLen)); 2895 2896 if (compareOptions & kCFCompareBackwards) { 2897 fromLoc = rangeToSearch.location + rangeToSearch.length - (lengthVariants ? 1 : findStrLen); 2898 toLoc = (((compareOptions & kCFCompareAnchored) && !lengthVariants) ? fromLoc : rangeToSearch.location); 2899 } else { 2900 fromLoc = rangeToSearch.location; 2901 toLoc = ((compareOptions & kCFCompareAnchored) ? fromLoc : rangeToSearch.location + rangeToSearch.length - (lengthVariants ? 1 : findStrLen)); 2902 } 2903 2904 delta = ((fromLoc <= toLoc) ? 1 : -1); 2905 2906 if ((NULL != str1Bytes) && (NULL != str2Bytes)) { 2907 uint8_t str1Byte, str2Byte; 2908 2909 while (1) { 2910 str1Index = fromLoc; 2911 str2Index = 0; 2912 2913 while ((str1Index < maxStr1Index) && (str2Index < findStrLen)) { 2914 str1Byte = str1Bytes[str1Index]; 2915 str2Byte = str2Bytes[str2Index]; 2916 2917 if (str1Byte != str2Byte) { 2918 if (equalityOptions) { 2919 if ((str1Byte < 0x80) && ((NULL == langCode) || ('I' != str1Byte))) { 2920 if (caseInsensitive && (str1Byte >= 'A') && (str1Byte <= 'Z')) str1Byte += ('a' - 'A'); 2921 *strBuf1 = str1Byte; 2922 strBuf1Len = 1; 2923 } else { 2924 str1Char = CFStringGetCharacterFromInlineBuffer(&inlineBuf1, str1Index); 2925 strBuf1Len = __CFStringFoldCharacterClusterAtIndex(str1Char, &inlineBuf1, str1Index, compareOptions, langCode, strBuf1, kCFStringStackBufferLength, NULL); 2926 if (1 > strBuf1Len) { 2927 *strBuf1 = str1Char; 2928 strBuf1Len = 1; 2929 } 2930 } 2931 2932 if ((NULL != ignoredChars) && (forwardAnchor || (str1Index != fromLoc)) && CFCharacterSetInlineBufferIsLongCharacterMember(ignoredChars, ((str1Byte < 0x80) ? str1Byte : str1Char))) { 2933 ++str1Index; 2934 continue; 2935 } 2936 2937 if ((str2Byte < 0x80) && ((NULL == langCode) || ('I' != str2Byte))) { 2938 if (caseInsensitive && (str2Byte >= 'A') && (str2Byte <= 'Z')) str2Byte += ('a' - 'A'); 2939 *strBuf2 = str2Byte; 2940 strBuf2Len = 1; 2941 } else { 2942 str2Char = CFStringGetCharacterFromInlineBuffer(&inlineBuf2, str2Index); 2943 strBuf2Len = __CFStringFoldCharacterClusterAtIndex(str2Char, &inlineBuf2, str2Index, compareOptions, langCode, strBuf2, kCFStringStackBufferLength, NULL); 2944 if (1 > strBuf2Len) { 2945 *strBuf2 = str2Char; 2946 strBuf2Len = 1; 2947 } 2948 } 2949 2950 if ((NULL != ignoredChars) && CFCharacterSetInlineBufferIsLongCharacterMember(ignoredChars, ((str2Byte < 0x80) ? str2Byte : str2Char))) { 2951 ++str2Index; 2952 continue; 2953 } 2954 2955 if ((1 == strBuf1Len) && (1 == strBuf2Len)) { // normal case 2956 if (*strBuf1 != *strBuf2) break; 2957 } else { 2958 CFIndex delta; 2959 2960 if (!caseInsensitive && (strBuf1Len != strBuf2Len)) break; 2961 if (memcmp(strBuf1, strBuf2, sizeof(UTF32Char) * __CFMin(strBuf1Len, strBuf2Len))) break; 2962 2963 if (strBuf1Len < strBuf2Len) { 2964 delta = strBuf2Len - strBuf1Len; 2965 2966 if ((str1Index + strBuf1Len + delta) > maxStr1Index) break; 2967 2968 characters = &(strBuf2[strBuf1Len]); 2969 charactersLimit = characters + delta; 2970 2971 while (characters < charactersLimit) { 2972 strBuf1Len = __CFStringFoldCharacterClusterAtIndex(CFStringGetCharacterFromInlineBuffer(&inlineBuf1, str1Index + 1), &inlineBuf1, str1Index + 1, compareOptions, langCode, strBuf1, kCFStringStackBufferLength, NULL); 2973 if ((strBuf1Len > 0) || (*characters != *strBuf1)) break; 2974 ++characters; ++str1Index; 2975 } 2976 if (characters < charactersLimit) break; 2977 } else if (strBuf2Len < strBuf1Len) { 2978 delta = strBuf1Len - strBuf2Len; 2979 2980 if ((str2Index + strBuf2Len + delta) > findStrLen) break; 2981 2982 characters = &(strBuf1[strBuf2Len]); 2983 charactersLimit = characters + delta; 2984 2985 while (characters < charactersLimit) { 2986 strBuf2Len = __CFStringFoldCharacterClusterAtIndex(CFStringGetCharacterFromInlineBuffer(&inlineBuf2, str1Index + 1), &inlineBuf2, str2Index + 1, compareOptions, langCode, strBuf2, kCFStringStackBufferLength, NULL); 2987 if ((strBuf2Len > 0) || (*characters != *strBuf2)) break; 2988 ++characters; ++str2Index; 2989 } 2990 if (characters < charactersLimit) break; 2991 } 2992 } 2993 } else { 2994 break; 2995 } 2996 } 2997 ++str1Index; ++str2Index; 2998 } 2999 3000 if ((NULL != ignoredChars) && (str1Index == maxStr1Index) && (str2Index < findStrLen)) { // Process the stringToFind tail 3001 while (str2Index < findStrLen) { 3002 str2Char = CFStringGetCharacterFromInlineBuffer(&inlineBuf2, str2Index); 3003 3004 if (!CFCharacterSetInlineBufferIsLongCharacterMember(ignoredChars, str2Char)) break; 3005 ++str2Index; 3006 } 3007 } 3008 3009 if (str2Index == findStrLen) { 3010 if ((NULL != ignoredChars) && backwardAnchor && (str1Index < maxStr1Index)) { // Process the anchor tail 3011 while (str1Index < maxStr1Index) { 3012 str1Char = CFStringGetCharacterFromInlineBuffer(&inlineBuf1, str1Index); 3013 3014 if (!CFCharacterSetInlineBufferIsLongCharacterMember(ignoredChars, str1Char)) break; 3015 ++str1Index; 3016 } 3017 } 3018 3019 if (!backwardAnchor || (str1Index == maxStr1Index)) { 3020 didFind = true; 3021 if (NULL != result) *result = CFRangeMake(fromLoc, str1Index - fromLoc); 3022 } 3023 break; 3024 } 3025 3026 if (fromLoc == toLoc) break; 3027 fromLoc += delta; 3028 } 3029 } else if (equalityOptions) { 3030 UTF16Char otherChar; 3031 CFIndex str1UsedLen, str2UsedLen, strBuf1Index = 0, strBuf2Index = 0; 3032 bool diacriticsInsensitive = ((compareOptions & kCFCompareDiacriticInsensitive) ? true : false); 3033 const uint8_t *graphemeBMP = CFUniCharGetBitmapPtrForPlane(kCFUniCharGraphemeExtendCharacterSet, 0); 3034 const uint8_t *combClassBMP = (const uint8_t *)CFUniCharGetUnicodePropertyDataForPlane(kCFUniCharCombiningProperty, 0); 3035 3036 while (1) { 3037 str1Index = fromLoc; 3038 str2Index = 0; 3039 3040 strBuf1Len = strBuf2Len = 0; 3041 3042 while (str2Index < findStrLen) { 3043 if (strBuf1Len == 0) { 3044 str1Char = CFStringGetCharacterFromInlineBuffer(&inlineBuf1, str1Index); 3045 if (caseInsensitive && (str1Char >= 'A') && (str1Char <= 'Z') && ((NULL == langCode) || (str1Char != 'I'))) str1Char += ('a' - 'A'); 3046 str1UsedLen = 1; 3047 } else { 3048 str1Char = strBuf1[strBuf1Index++]; 3049 } 3050 if (strBuf2Len == 0) { 3051 str2Char = CFStringGetCharacterFromInlineBuffer(&inlineBuf2, str2Index); 3052 if (caseInsensitive && (str2Char >= 'A') && (str2Char <= 'Z') && ((NULL == langCode) || (str2Char != 'I'))) str2Char += ('a' - 'A'); 3053 str2UsedLen = 1; 3054 } else { 3055 str2Char = strBuf2[strBuf2Index++]; 3056 } 3057 3058 if (str1Char != str2Char) { 3059 if ((str1Char < 0x80) && (str2Char < 0x80) && (NULL == ignoredChars) && ((NULL == langCode) || !caseInsensitive)) break; 3060 3061 if (CFUniCharIsSurrogateHighCharacter(str1Char) && CFUniCharIsSurrogateLowCharacter((otherChar = CFStringGetCharacterFromInlineBuffer(&inlineBuf1, str1Index + 1)))) { 3062 str1Char = CFUniCharGetLongCharacterForSurrogatePair(str1Char, otherChar); 3063 str1UsedLen = 2; 3064 } 3065 3066 if (CFUniCharIsSurrogateHighCharacter(str2Char) && CFUniCharIsSurrogateLowCharacter((otherChar = CFStringGetCharacterFromInlineBuffer(&inlineBuf2, str2Index + 1)))) { 3067 str2Char = CFUniCharGetLongCharacterForSurrogatePair(str2Char, otherChar); 3068 str2UsedLen = 2; 3069 } 3070 3071 if (NULL != ignoredChars) { 3072 if ((forwardAnchor || (str1Index != fromLoc)) && (str1Index < maxStr1Index) && CFCharacterSetInlineBufferIsLongCharacterMember(ignoredChars, str1Char)) { 3073 if ((strBuf1Len > 0) && (strBuf1Index == strBuf1Len)) strBuf1Len = 0; 3074 if (strBuf1Len == 0) str1Index += str1UsedLen; 3075 if (strBuf2Len > 0) --strBuf2Index; 3076 continue; 3077 } 3078 if (CFCharacterSetInlineBufferIsLongCharacterMember(ignoredChars, str2Char)) { 3079 if ((strBuf2Len > 0) && (strBuf2Index == strBuf2Len)) strBuf2Len = 0; 3080 if (strBuf2Len == 0) str2Index += str2UsedLen; 3081 if (strBuf1Len > 0) -- strBuf1Index; 3082 continue; 3083 } 3084 } 3085 3086 if (diacriticsInsensitive && (str1Index > fromLoc)) { 3087 bool str1Skip = false; 3088 bool str2Skip = false; 3089 3090 if ((0 == strBuf1Len) && CFUniCharIsMemberOfBitmap(str1Char, ((str1Char < 0x10000) ? graphemeBMP : CFUniCharGetBitmapPtrForPlane(kCFUniCharGraphemeExtendCharacterSet, (str1Char >> 16))))) { 3091 str1Char = str2Char; 3092 str1Skip = true; 3093 } 3094 if ((0 == strBuf2Len) && CFUniCharIsMemberOfBitmap(str2Char, ((str2Char < 0x10000) ? graphemeBMP : CFUniCharGetBitmapPtrForPlane(kCFUniCharGraphemeExtendCharacterSet, (str2Char >> 16))))) { 3095 str2Char = str1Char; 3096 str2Skip = true; 3097 } 3098 3099 if (str1Skip != str2Skip) { 3100 if (str1Skip) str2Index -= str2UsedLen; 3101 if (str2Skip) str1Index -= str1UsedLen; 3102 } 3103 } 3104 3105 if (str1Char != str2Char) { 3106 if (0 == strBuf1Len) { 3107 strBuf1Len = __CFStringFoldCharacterClusterAtIndex(str1Char, &inlineBuf1, str1Index, compareOptions, langCode, strBuf1, kCFStringStackBufferLength, &str1UsedLen); 3108 if (strBuf1Len > 0) { 3109 str1Char = *strBuf1; 3110 strBuf1Index = 1; 3111 } 3112 } 3113 3114 if ((0 == strBuf1Len) && (0 < strBuf2Len)) break; 3115 3116 if ((0 == strBuf2Len) && ((0 == strBuf1Len) || (str1Char != str2Char))) { 3117 strBuf2Len = __CFStringFoldCharacterClusterAtIndex(str2Char, &inlineBuf2, str2Index, compareOptions, langCode, strBuf2, kCFStringStackBufferLength, &str2UsedLen); 3118 if ((0 == strBuf2Len) || (str1Char != *strBuf2)) break; 3119 strBuf2Index = 1; 3120 } 3121 } 3122 3123 if ((strBuf1Len > 0) && (strBuf2Len > 0)) { 3124 while ((strBuf1Index < strBuf1Len) && (strBuf2Index < strBuf2Len)) { 3125 if (strBuf1[strBuf1Index] != strBuf2[strBuf2Index]) break; 3126 ++strBuf1Index; ++strBuf2Index; 3127 } 3128 if ((strBuf1Index < strBuf1Len) && (strBuf2Index < strBuf2Len)) break; 3129 } 3130 } 3131 3132 if ((strBuf1Len > 0) && (strBuf1Index == strBuf1Len)) strBuf1Len = 0; 3133 if ((strBuf2Len > 0) && (strBuf2Index == strBuf2Len)) strBuf2Len = 0; 3134 3135 if (strBuf1Len == 0) str1Index += str1UsedLen; 3136 if (strBuf2Len == 0) str2Index += str2UsedLen; 3137 } 3138 3139 if ((NULL != ignoredChars) && (str1Index == maxStr1Index) && (str2Index < findStrLen)) { // Process the stringToFind tail 3140 while (str2Index < findStrLen) { 3141 str2Char = CFStringGetCharacterFromInlineBuffer(&inlineBuf2, str2Index); 3142 if (CFUniCharIsSurrogateHighCharacter(str2Char) && CFUniCharIsSurrogateLowCharacter((otherChar = CFStringGetCharacterFromInlineBuffer(&inlineBuf2, str2Index + 1)))) { 3143 str2Char = CFUniCharGetLongCharacterForSurrogatePair(str2Char, otherChar); 3144 } 3145 if (!CFCharacterSetInlineBufferIsLongCharacterMember(ignoredChars, str2Char)) break; 3146 str2Index += ((str2Char < 0x10000) ? 1 : 2); 3147 } 3148 } 3149 3150 if (str2Index == findStrLen) { 3151 bool match = true; 3152 3153 if (strBuf1Len > 0) { 3154 match = false; 3155 3156 if (diacriticsInsensitive && (strBuf1[0] < 0x0510)) { 3157 while (strBuf1Index < strBuf1Len) { 3158 if (!CFUniCharIsMemberOfBitmap(strBuf1[strBuf1Index], ((strBuf1[strBuf1Index] < 0x10000) ? graphemeBMP : CFUniCharGetBitmapPtrForPlane(kCFUniCharCanonicalDecomposableCharacterSet, (strBuf1[strBuf1Index] >> 16))))) break; 3159 ++strBuf1Index; 3160 } 3161 3162 if (strBuf1Index == strBuf1Len) { 3163 str1Index += str1UsedLen; 3164 match = true; 3165 } 3166 } 3167 } 3168 3169 if (match && (compareOptions & (kCFCompareDiacriticInsensitive|kCFCompareNonliteral)) && (str1Index < maxStr1Index)) { 3170 const uint8_t *nonBaseBitmap; 3171 3172 str1Char = CFStringGetCharacterFromInlineBuffer(&inlineBuf1, str1Index); 3173 3174 if (CFUniCharIsSurrogateHighCharacter(str1Char) && CFUniCharIsSurrogateLowCharacter((otherChar = CFStringGetCharacterFromInlineBuffer(&inlineBuf1, str1Index + 1)))) { 3175 str1Char = CFUniCharGetLongCharacterForSurrogatePair(str1Char, otherChar); 3176 nonBaseBitmap = CFUniCharGetBitmapPtrForPlane(kCFUniCharGraphemeExtendCharacterSet, (str1Char >> 16)); 3177 } else { 3178 nonBaseBitmap = graphemeBMP; 3179 } 3180 3181 if (CFUniCharIsMemberOfBitmap(str1Char, nonBaseBitmap)) { 3182 if (diacriticsInsensitive) { 3183 if (str1Char < 0x10000) { 3184 CFIndex index = str1Index; 3185 3186 do { 3187 str1Char = CFStringGetCharacterFromInlineBuffer(&inlineBuf1, --index); 3188 } while (CFUniCharIsMemberOfBitmap(str1Char, graphemeBMP), (rangeToSearch.location < index)); 3189 3190 if (str1Char < 0x0510) { 3191 while (++str1Index < maxStr1Index) if (!CFUniCharIsMemberOfBitmap(CFStringGetCharacterFromInlineBuffer(&inlineBuf1, str1Index), graphemeBMP)) break; 3192 } 3193 } 3194 } else { 3195 match = false; 3196 } 3197 } else if (!diacriticsInsensitive) { 3198 otherChar = CFStringGetCharacterFromInlineBuffer(&inlineBuf1, str1Index - 1); 3199 3200 // this is assuming viramas are only in BMP ??? 3201 if ((str1Char == COMBINING_GRAPHEME_JOINER) || (otherChar == COMBINING_GRAPHEME_JOINER) || (otherChar == ZERO_WIDTH_JOINER) || ((otherChar >= HANGUL_CHOSEONG_START) && (otherChar <= HANGUL_JONGSEONG_END)) || (CFUniCharGetCombiningPropertyForCharacter(otherChar, combClassBMP) == 9)) { 3202 CFRange clusterRange = CFStringGetRangeOfCharacterClusterAtIndex(string, str1Index - 1, kCFStringGraphemeCluster); 3203 3204 if (str1Index < (clusterRange.location + clusterRange.length)) match = false; 3205 } 3206 } 3207 } 3208 3209 if (match) { 3210 if ((NULL != ignoredChars) && backwardAnchor && (str1Index < maxStr1Index)) { // Process the anchor tail 3211 while (str1Index < maxStr1Index) { 3212 str1Char = CFStringGetCharacterFromInlineBuffer(&inlineBuf1, str1Index); 3213 if (CFUniCharIsSurrogateHighCharacter(str1Char) && CFUniCharIsSurrogateLowCharacter((otherChar = CFStringGetCharacterFromInlineBuffer(&inlineBuf1, str1Index + 1)))) { 3214 str1Char = CFUniCharGetLongCharacterForSurrogatePair(str1Char, otherChar); 3215 } 3216 if (!CFCharacterSetInlineBufferIsLongCharacterMember(ignoredChars, str1Char)) break; 3217 str1Index += ((str1Char < 0x10000) ? 1 : 2); 3218 } 3219 } 3220 3221 if (!backwardAnchor || (str1Index == maxStr1Index)) { 3222 didFind = true; 3223 if (NULL != result) *result = CFRangeMake(fromLoc, str1Index - fromLoc); 3224 } 3225 break; 3226 } 3227 } 3228 3229 if (fromLoc == toLoc) break; 3230 fromLoc += delta; 3231 } 3232 } else { 3233 while (1) { 3234 str1Index = fromLoc; 3235 str2Index = 0; 3236 3237 while (str2Index < findStrLen) { 3238 if (CFStringGetCharacterFromInlineBuffer(&inlineBuf1, str1Index) != CFStringGetCharacterFromInlineBuffer(&inlineBuf2, str2Index)) break; 3239 3240 ++str1Index; ++str2Index; 3241 } 3242 3243 if (str2Index == findStrLen) { 3244 didFind = true; 3245 if (NULL != result) *result = CFRangeMake(fromLoc, findStrLen); 3246 break; 3247 } 3248 3249 if (fromLoc == toLoc) break; 3250 fromLoc += delta; 3251 } 3252 } 3253 } 3254 3255 return didFind; 3256} 3257 3258 3259Boolean CFStringFindWithOptions(CFStringRef string, CFStringRef stringToFind, CFRange rangeToSearch, CFStringCompareFlags compareOptions, CFRange *result) { return CFStringFindWithOptionsAndLocale(string, stringToFind, rangeToSearch, compareOptions, NULL, result); } 3260 3261// Functions to deal with special arrays of CFRange, CFDataRef, created by CFStringCreateArrayWithFindResults() 3262 3263static const void *__rangeRetain(CFAllocatorRef allocator, const void *ptr) { 3264 CFRetain(*(CFDataRef *)((uint8_t *)ptr + sizeof(CFRange))); 3265 return ptr; 3266} 3267 3268static void __rangeRelease(CFAllocatorRef allocator, const void *ptr) { 3269 CFRelease(*(CFDataRef *)((uint8_t *)ptr + sizeof(CFRange))); 3270} 3271 3272static CFStringRef __rangeCopyDescription(const void *ptr) { 3273 CFRange range = *(CFRange *)ptr; 3274 return CFStringCreateWithFormat(kCFAllocatorSystemDefault, NULL, CFSTR("{%ld, %ld}"), (long)range.location, (long)range.length); 3275} 3276 3277static Boolean __rangeEqual(const void *ptr1, const void *ptr2) { 3278 CFRange range1 = *(CFRange *)ptr1; 3279 CFRange range2 = *(CFRange *)ptr2; 3280 return (range1.location == range2.location) && (range1.length == range2.length); 3281} 3282 3283 3284CFArrayRef CFStringCreateArrayWithFindResults(CFAllocatorRef alloc, CFStringRef string, CFStringRef stringToFind, CFRange rangeToSearch, CFStringCompareFlags compareOptions) { 3285 CFRange foundRange; 3286 Boolean backwards = ((compareOptions & kCFCompareBackwards) != 0); 3287 UInt32 endIndex = rangeToSearch.location + rangeToSearch.length; 3288 CFMutableDataRef rangeStorage = NULL; // Basically an array of CFRange, CFDataRef (packed) 3289 uint8_t *rangeStorageBytes = NULL; 3290 CFIndex foundCount = 0; 3291 CFIndex capacity = 0; // Number of CFRange, CFDataRef element slots in rangeStorage 3292 3293 if (alloc == NULL) alloc = __CFGetDefaultAllocator(); 3294 3295 while ((rangeToSearch.length > 0) && CFStringFindWithOptions(string, stringToFind, rangeToSearch, compareOptions, &foundRange)) { 3296 // Determine the next range 3297 if (backwards) { 3298 rangeToSearch.length = foundRange.location - rangeToSearch.location; 3299 } else { 3300 rangeToSearch.location = foundRange.location + foundRange.length; 3301 rangeToSearch.length = endIndex - rangeToSearch.location; 3302 } 3303 3304 // If necessary, grow the data and squirrel away the found range 3305 if (foundCount >= capacity) { 3306 if (rangeStorage == NULL) rangeStorage = CFDataCreateMutable(alloc, 0); 3307 capacity = (capacity + 4) * 2; 3308 CFDataSetLength(rangeStorage, capacity * (sizeof(CFRange) + sizeof(CFDataRef))); 3309 rangeStorageBytes = (uint8_t *)CFDataGetMutableBytePtr(rangeStorage) + foundCount * (sizeof(CFRange) + sizeof(CFDataRef)); 3310 } 3311 memmove(rangeStorageBytes, &foundRange, sizeof(CFRange)); // The range 3312 memmove(rangeStorageBytes + sizeof(CFRange), &rangeStorage, sizeof(CFDataRef)); // The data 3313 rangeStorageBytes += (sizeof(CFRange) + sizeof(CFDataRef)); 3314 foundCount++; 3315 } 3316 3317 if (foundCount > 0) { 3318 CFIndex cnt; 3319 CFMutableArrayRef array; 3320 const CFArrayCallBacks callbacks = {0, __rangeRetain, __rangeRelease, __rangeCopyDescription, __rangeEqual}; 3321 3322 CFDataSetLength(rangeStorage, foundCount * (sizeof(CFRange) + sizeof(CFDataRef))); // Tighten storage up 3323 rangeStorageBytes = (uint8_t *)CFDataGetMutableBytePtr(rangeStorage); 3324 3325 array = CFArrayCreateMutable(alloc, foundCount * sizeof(CFRange *), &callbacks); 3326 for (cnt = 0; cnt < foundCount; cnt++) { 3327 // Each element points to the appropriate CFRange in the CFData 3328 CFArrayAppendValue(array, rangeStorageBytes + cnt * (sizeof(CFRange) + sizeof(CFDataRef))); 3329 } 3330 CFRelease(rangeStorage); // We want the data to go away when all CFRanges inside it are released... 3331 return array; 3332 } else { 3333 return NULL; 3334 } 3335} 3336 3337 3338CFRange CFStringFind(CFStringRef string, CFStringRef stringToFind, CFStringCompareFlags compareOptions) { 3339 CFRange foundRange; 3340 3341 if (CFStringFindWithOptions(string, stringToFind, CFRangeMake(0, CFStringGetLength(string)), compareOptions, &foundRange)) { 3342 return foundRange; 3343 } else { 3344 return CFRangeMake(kCFNotFound, 0); 3345 } 3346} 3347 3348Boolean CFStringHasPrefix(CFStringRef string, CFStringRef prefix) { 3349 return CFStringFindWithOptions(string, prefix, CFRangeMake(0, CFStringGetLength(string)), kCFCompareAnchored, NULL); 3350} 3351 3352Boolean CFStringHasSuffix(CFStringRef string, CFStringRef suffix) { 3353 return CFStringFindWithOptions(string, suffix, CFRangeMake(0, CFStringGetLength(string)), kCFCompareAnchored|kCFCompareBackwards, NULL); 3354} 3355 3356#define MAX_TRANSCODING_LENGTH 4 3357 3358#define HANGUL_JONGSEONG_COUNT (28) 3359 3360CF_INLINE bool _CFStringIsHangulLVT(UTF32Char character) { 3361 return (((character - HANGUL_SYLLABLE_START) % HANGUL_JONGSEONG_COUNT) ? true : false); 3362} 3363 3364static uint8_t __CFTranscodingHintLength[] = { 3365 2, 3, 4, 4, 4, 4, 4, 2, 2, 2, 2, 4, 0, 0, 0, 0 3366}; 3367 3368enum { 3369 kCFStringHangulStateL, 3370 kCFStringHangulStateV, 3371 kCFStringHangulStateT, 3372 kCFStringHangulStateLV, 3373 kCFStringHangulStateLVT, 3374 kCFStringHangulStateBreak 3375}; 3376 3377static CFRange _CFStringInlineBufferGetComposedRange(CFStringInlineBuffer *buffer, CFIndex start, CFStringCharacterClusterType type, const uint8_t *bmpBitmap, CFIndex csetType) { 3378 CFIndex end = start + 1; 3379 const uint8_t *bitmap = bmpBitmap; 3380 UTF32Char character; 3381 UTF16Char otherSurrogate; 3382 uint8_t step; 3383 3384 character = CFStringGetCharacterFromInlineBuffer(buffer, start); 3385 3386 // We don't combine characters in Armenian ~ Limbu range for backward deletion 3387 if ((type != kCFStringBackwardDeletionCluster) || (character < 0x0530) || (character > 0x194F)) { 3388 // Check if the current is surrogate 3389 if (CFUniCharIsSurrogateHighCharacter(character) && CFUniCharIsSurrogateLowCharacter((otherSurrogate = CFStringGetCharacterFromInlineBuffer(buffer, start + 1)))) { 3390 ++end; 3391 character = CFUniCharGetLongCharacterForSurrogatePair(character, otherSurrogate); 3392 bitmap = CFUniCharGetBitmapPtrForPlane(csetType, (character >> 16)); 3393 } 3394 3395 // Extend backward 3396 while (start > 0) { 3397 if ((type == kCFStringBackwardDeletionCluster) && (character >= 0x0530) && (character < 0x1950)) break; 3398 3399 if (character < 0x10000) { // the first round could be already be non-BMP 3400 if (CFUniCharIsSurrogateLowCharacter(character) && CFUniCharIsSurrogateHighCharacter((otherSurrogate = CFStringGetCharacterFromInlineBuffer(buffer, start - 1)))) { 3401 character = CFUniCharGetLongCharacterForSurrogatePair(otherSurrogate, character); 3402 bitmap = CFUniCharGetBitmapPtrForPlane(csetType, (character >> 16)); 3403 if (--start == 0) break; // starting with non-BMP combining mark 3404 } else { 3405 bitmap = bmpBitmap; 3406 } 3407 } 3408 3409 if (!CFUniCharIsMemberOfBitmap(character, bitmap) && (character != 0xFF9E) && (character != 0xFF9F) && ((character & 0x1FFFF0) != 0xF870)) break; 3410 3411 --start; 3412 3413 character = CFStringGetCharacterFromInlineBuffer(buffer, start); 3414 } 3415 } 3416 3417 // Hangul 3418 if (((character >= HANGUL_CHOSEONG_START) && (character <= HANGUL_JONGSEONG_END)) || ((character >= HANGUL_SYLLABLE_START) && (character <= HANGUL_SYLLABLE_END))) { 3419 uint8_t state; 3420 uint8_t initialState; 3421 3422 if (character < HANGUL_JUNGSEONG_START) { 3423 state = kCFStringHangulStateL; 3424 } else if (character < HANGUL_JONGSEONG_START) { 3425 state = kCFStringHangulStateV; 3426 } else if (character < HANGUL_SYLLABLE_START) { 3427 state = kCFStringHangulStateT; 3428 } else { 3429 state = (_CFStringIsHangulLVT(character) ? kCFStringHangulStateLVT : kCFStringHangulStateLV); 3430 } 3431 initialState = state; 3432 3433 // Extend backward 3434 while (((character = CFStringGetCharacterFromInlineBuffer(buffer, start - 1)) >= HANGUL_CHOSEONG_START) && (character <= HANGUL_SYLLABLE_END) && ((character <= HANGUL_JONGSEONG_END) || (character >= HANGUL_SYLLABLE_START))) { 3435 switch (state) { 3436 case kCFStringHangulStateV: 3437 if (character <= HANGUL_CHOSEONG_END) { 3438 state = kCFStringHangulStateL; 3439 } else if ((character >= HANGUL_SYLLABLE_START) && (character <= HANGUL_SYLLABLE_END) && !_CFStringIsHangulLVT(character)) { 3440 state = kCFStringHangulStateLV; 3441 } else if (character > HANGUL_JUNGSEONG_END) { 3442 state = kCFStringHangulStateBreak; 3443 } 3444 break; 3445 3446 case kCFStringHangulStateT: 3447 if ((character >= HANGUL_JUNGSEONG_START) && (character <= HANGUL_JUNGSEONG_END)) { 3448 state = kCFStringHangulStateV; 3449 } else if ((character >= HANGUL_SYLLABLE_START) && (character <= HANGUL_SYLLABLE_END)) { 3450 state = (_CFStringIsHangulLVT(character) ? kCFStringHangulStateLVT : kCFStringHangulStateLV); 3451 } else if (character < HANGUL_JUNGSEONG_START) { 3452 state = kCFStringHangulStateBreak; 3453 } 3454 break; 3455 3456 default: 3457 state = ((character < HANGUL_JUNGSEONG_START) ? kCFStringHangulStateL : kCFStringHangulStateBreak); 3458 break; 3459 } 3460 3461 if (state == kCFStringHangulStateBreak) break; 3462 --start; 3463 } 3464 3465 // Extend forward 3466 state = initialState; 3467 while (((character = CFStringGetCharacterFromInlineBuffer(buffer, end)) > 0) && (((character >= HANGUL_CHOSEONG_START) && (character <= HANGUL_JONGSEONG_END)) || ((character >= HANGUL_SYLLABLE_START) && (character <= HANGUL_SYLLABLE_END)))) { 3468 switch (state) { 3469 case kCFStringHangulStateLV: 3470 case kCFStringHangulStateV: 3471 if ((character >= HANGUL_JUNGSEONG_START) && (character <= HANGUL_JONGSEONG_END)) { 3472 state = ((character < HANGUL_JONGSEONG_START) ? kCFStringHangulStateV : kCFStringHangulStateT); 3473 } else { 3474 state = kCFStringHangulStateBreak; 3475 } 3476 break; 3477 3478 case kCFStringHangulStateLVT: 3479 case kCFStringHangulStateT: 3480 state = (((character >= HANGUL_JONGSEONG_START) && (character <= HANGUL_JONGSEONG_END)) ? kCFStringHangulStateT : kCFStringHangulStateBreak); 3481 break; 3482 3483 default: 3484 if (character < HANGUL_JUNGSEONG_START) { 3485 state = kCFStringHangulStateL; 3486 } else if (character < HANGUL_JONGSEONG_START) { 3487 state = kCFStringHangulStateV; 3488 } else if (character >= HANGUL_SYLLABLE_START) { 3489 state = (_CFStringIsHangulLVT(character) ? kCFStringHangulStateLVT : kCFStringHangulStateLV); 3490 } else { 3491 state = kCFStringHangulStateBreak; 3492 } 3493 break; 3494 } 3495 3496 if (state == kCFStringHangulStateBreak) break; 3497 ++end; 3498 } 3499 } 3500 3501 // Extend forward 3502 while ((character = CFStringGetCharacterFromInlineBuffer(buffer, end)) > 0) { 3503 if ((type == kCFStringBackwardDeletionCluster) && (character >= 0x0530) && (character < 0x1950)) break; 3504 3505 if (CFUniCharIsSurrogateHighCharacter(character) && CFUniCharIsSurrogateLowCharacter((otherSurrogate = CFStringGetCharacterFromInlineBuffer(buffer, end + 1)))) { 3506 character = CFUniCharGetLongCharacterForSurrogatePair(character, otherSurrogate); 3507 bitmap = CFUniCharGetBitmapPtrForPlane(csetType, (character >> 16)); 3508 step = 2; 3509 } else { 3510 bitmap = bmpBitmap; 3511 step = 1; 3512 } 3513 3514 if (!CFUniCharIsMemberOfBitmap(character, bitmap) && (character != 0xFF9E) && (character != 0xFF9F) && ((character & 0x1FFFF0) != 0xF870)) break; 3515 3516 end += step; 3517 } 3518 3519 return CFRangeMake(start, end - start); 3520} 3521 3522CF_INLINE bool _CFStringIsVirama(UTF32Char character, const uint8_t *combClassBMP) { 3523 return ((character == COMBINING_GRAPHEME_JOINER) || (CFUniCharGetCombiningPropertyForCharacter(character, (const uint8_t *)((character < 0x10000) ? combClassBMP : CFUniCharGetUnicodePropertyDataForPlane(kCFUniCharCombiningProperty, (character >> 16)))) == 9) ? true : false); 3524} 3525 3526CFRange CFStringGetRangeOfCharacterClusterAtIndex(CFStringRef string, CFIndex charIndex, CFStringCharacterClusterType type) { 3527 CFRange range; 3528 CFIndex currentIndex; 3529 CFIndex length = CFStringGetLength(string); 3530 CFIndex csetType = ((kCFStringGraphemeCluster == type) ? kCFUniCharGraphemeExtendCharacterSet : kCFUniCharNonBaseCharacterSet); 3531 CFStringInlineBuffer stringBuffer; 3532 const uint8_t *bmpBitmap; 3533 const uint8_t *letterBMP; 3534 static const uint8_t *combClassBMP = NULL; 3535 UTF32Char character; 3536 UTF16Char otherSurrogate; 3537 3538 if (charIndex >= length) return CFRangeMake(kCFNotFound, 0); 3539 3540 /* Fast case. If we're eight-bit, it's either the default encoding is cheap or the content is all ASCII. Watch out when (or if) adding more 8bit Mac-scripts in CFStringEncodingConverters 3541 */ 3542 if (!CF_IS_OBJC(__kCFStringTypeID, string) && __CFStrIsEightBit(string)) return CFRangeMake(charIndex, 1); 3543 3544 bmpBitmap = CFUniCharGetBitmapPtrForPlane(csetType, 0); 3545 letterBMP = CFUniCharGetBitmapPtrForPlane(kCFUniCharLetterCharacterSet, 0); 3546 if (NULL == combClassBMP) combClassBMP = (const uint8_t *)CFUniCharGetUnicodePropertyDataForPlane(kCFUniCharCombiningProperty, 0); 3547 3548 CFStringInitInlineBuffer(string, &stringBuffer, CFRangeMake(0, length)); 3549 3550 // Get composed character sequence first 3551 range = _CFStringInlineBufferGetComposedRange(&stringBuffer, charIndex, type, bmpBitmap, csetType); 3552 3553 // Do grapheme joiners 3554 if (type < kCFStringCursorMovementCluster) { 3555 const uint8_t *letter = letterBMP; 3556 3557 // Check to see if we have a letter at the beginning of initial cluster 3558 character = CFStringGetCharacterFromInlineBuffer(&stringBuffer, range.location); 3559 3560 if ((range.length > 1) && CFUniCharIsSurrogateHighCharacter(character) && CFUniCharIsSurrogateLowCharacter((otherSurrogate = CFStringGetCharacterFromInlineBuffer(&stringBuffer, range.location + 1)))) { 3561 character = CFUniCharGetLongCharacterForSurrogatePair(character, otherSurrogate); 3562 letter = CFUniCharGetBitmapPtrForPlane(kCFUniCharLetterCharacterSet, (character >> 16)); 3563 } 3564 3565 if ((character == ZERO_WIDTH_JOINER) || CFUniCharIsMemberOfBitmap(character, letter)) { 3566 CFRange otherRange; 3567 3568 // Check if preceded by grapheme joiners (U034F and viramas) 3569 otherRange.location = currentIndex = range.location; 3570 3571 while (currentIndex > 1) { 3572 character = CFStringGetCharacterFromInlineBuffer(&stringBuffer, --currentIndex); 3573 3574 // ??? We're assuming viramas only in BMP 3575 if ((_CFStringIsVirama(character, combClassBMP) || ((character == ZERO_WIDTH_JOINER) && _CFStringIsVirama(CFStringGetCharacterFromInlineBuffer(&stringBuffer, --currentIndex), combClassBMP))) && (currentIndex > 0)) { 3576 --currentIndex; 3577 } else { 3578 break; 3579 } 3580 3581 currentIndex = _CFStringInlineBufferGetComposedRange(&stringBuffer, currentIndex, type, bmpBitmap, csetType).location; 3582 3583 character = CFStringGetCharacterFromInlineBuffer(&stringBuffer, currentIndex); 3584 3585 if (CFUniCharIsSurrogateLowCharacter(character) && CFUniCharIsSurrogateHighCharacter((otherSurrogate = CFStringGetCharacterFromInlineBuffer(&stringBuffer, currentIndex - 1)))) { 3586 character = CFUniCharGetLongCharacterForSurrogatePair(character, otherSurrogate); 3587 letter = CFUniCharGetBitmapPtrForPlane(kCFUniCharLetterCharacterSet, (character >> 16)); 3588 --currentIndex; 3589 } else { 3590 letter = letterBMP; 3591 } 3592 3593 if (!CFUniCharIsMemberOfBitmap(character, letter)) break; 3594 range.location = currentIndex; 3595 } 3596 3597 range.length += otherRange.location - range.location; 3598 3599 // Check if followed by grapheme joiners 3600 if ((range.length > 1) && ((range.location + range.length) < length)) { 3601 otherRange = range; 3602 currentIndex = otherRange.location + otherRange.length; 3603 3604 do { 3605 character = CFStringGetCharacterFromInlineBuffer(&stringBuffer, currentIndex - 1); 3606 3607 // ??? We're assuming viramas only in BMP 3608 if ((character != ZERO_WIDTH_JOINER) && !_CFStringIsVirama(character, combClassBMP)) break; 3609 3610 character = CFStringGetCharacterFromInlineBuffer(&stringBuffer, currentIndex); 3611 3612 if (character == ZERO_WIDTH_JOINER) character = CFStringGetCharacterFromInlineBuffer(&stringBuffer, ++currentIndex); 3613 3614 if (CFUniCharIsSurrogateHighCharacter(character) && CFUniCharIsSurrogateLowCharacter((otherSurrogate = CFStringGetCharacterFromInlineBuffer(&stringBuffer, currentIndex + 1)))) { 3615 character = CFUniCharGetLongCharacterForSurrogatePair(character, otherSurrogate); 3616 letter = CFUniCharGetBitmapPtrForPlane(kCFUniCharLetterCharacterSet, (character >> 16)); 3617 } else { 3618 letter = letterBMP; 3619 } 3620 3621 // We only conjoin letters 3622 if (!CFUniCharIsMemberOfBitmap(character, letter)) break; 3623 otherRange = _CFStringInlineBufferGetComposedRange(&stringBuffer, currentIndex, type, bmpBitmap, csetType); 3624 currentIndex = otherRange.location + otherRange.length; 3625 } while ((otherRange.location + otherRange.length) < length); 3626 range.length = currentIndex - range.location; 3627 } 3628 } 3629 } 3630 3631 // Check if we're part of prefix transcoding hints 3632 CFIndex otherIndex; 3633 3634 currentIndex = (range.location + range.length) - (MAX_TRANSCODING_LENGTH + 1); 3635 if (currentIndex < 0) currentIndex = 0; 3636 3637 while (currentIndex <= range.location) { 3638 character = CFStringGetCharacterFromInlineBuffer(&stringBuffer, currentIndex); 3639 3640 if ((character & 0x1FFFF0) == 0xF860) { // transcoding hint 3641 otherIndex = currentIndex + __CFTranscodingHintLength[(character - 0xF860)] + 1; 3642 if (otherIndex >= (range.location + range.length)) { 3643 if (otherIndex <= length) { 3644 range.location = currentIndex; 3645 range.length = otherIndex - currentIndex; 3646 } 3647 break; 3648 } 3649 } 3650 ++currentIndex; 3651 } 3652 3653 return range; 3654} 3655 3656CFRange CFStringGetRangeOfComposedCharactersAtIndex(CFStringRef theString, CFIndex theIndex) { 3657 return CFStringGetRangeOfCharacterClusterAtIndex(theString, theIndex, kCFStringComposedCharacterCluster); 3658} 3659 3660/*! 3661 @function CFStringFindCharacterFromSet 3662 Query the range of characters contained in the specified character set. 3663 @param theString The CFString which is to be searched. If this 3664 parameter is not a valid CFString, the behavior is 3665 undefined. 3666 @param theSet The CFCharacterSet against which the membership 3667 of characters is checked. If this parameter is not a valid 3668 CFCharacterSet, the behavior is undefined. 3669 @param range The range of characters within the string to search. If 3670 the range location or end point (defined by the location 3671 plus length minus 1) are outside the index space of the 3672 string (0 to N-1 inclusive, where N is the length of the 3673 string), the behavior is undefined. If the range length is 3674 negative, the behavior is undefined. The range may be empty 3675 (length 0), in which case no search is performed. 3676 @param searchOptions The bitwise-or'ed option flags to control 3677 the search behavior. The supported options are 3678 kCFCompareBackwards andkCFCompareAnchored. 3679 If other option flags are specified, the behavior 3680 is undefined. 3681 @param result The pointer to a CFRange supplied by the caller in 3682 which the search result is stored. If a pointer to an invalid 3683 memory is specified, the behavior is undefined. 3684 @result true, if at least a character which is a member of the character 3685 set is found and result is filled, otherwise, false. 3686*/ 3687#define SURROGATE_START 0xD800 3688#define SURROGATE_END 0xDFFF 3689 3690CF_EXPORT Boolean CFStringFindCharacterFromSet(CFStringRef theString, CFCharacterSetRef theSet, CFRange rangeToSearch, CFStringCompareFlags searchOptions, CFRange *result) { 3691 CFStringInlineBuffer stringBuffer; 3692 CFCharacterSetInlineBuffer csetBuffer; 3693 UniChar ch; 3694 CFIndex step; 3695 CFIndex fromLoc, toLoc, cnt; // fromLoc and toLoc are inclusive 3696 Boolean found = false; 3697 Boolean done = false; 3698 3699//#warning FIX ME !! Should support kCFCompareNonliteral 3700 3701 if ((rangeToSearch.location + rangeToSearch.length > CFStringGetLength(theString)) || (rangeToSearch.length == 0)) return false; 3702 3703 if (searchOptions & kCFCompareBackwards) { 3704 fromLoc = rangeToSearch.location + rangeToSearch.length - 1; 3705 toLoc = rangeToSearch.location; 3706 } else { 3707 fromLoc = rangeToSearch.location; 3708 toLoc = rangeToSearch.location + rangeToSearch.length - 1; 3709 } 3710 if (searchOptions & kCFCompareAnchored) { 3711 toLoc = fromLoc; 3712 } 3713 3714 step = (fromLoc <= toLoc) ? 1 : -1; 3715 cnt = fromLoc; 3716 3717 CFStringInitInlineBuffer(theString, &stringBuffer, rangeToSearch); 3718 CFCharacterSetInitInlineBuffer(theSet, &csetBuffer); 3719 3720 do { 3721 ch = CFStringGetCharacterFromInlineBuffer(&stringBuffer, cnt - rangeToSearch.location); 3722 if ((ch >= SURROGATE_START) && (ch <= SURROGATE_END)) { 3723 int otherCharIndex = cnt + step; 3724 3725 if (((step < 0) && (otherCharIndex < toLoc)) || ((step > 0) && (otherCharIndex > toLoc))) { 3726 done = true; 3727 } else { 3728 UniChar highChar; 3729 UniChar lowChar = CFStringGetCharacterFromInlineBuffer(&stringBuffer, otherCharIndex - rangeToSearch.location); 3730 3731 if (cnt < otherCharIndex) { 3732 highChar = ch; 3733 } else { 3734 highChar = lowChar; 3735 lowChar = ch; 3736 } 3737 3738 if (CFUniCharIsSurrogateHighCharacter(highChar) && CFUniCharIsSurrogateLowCharacter(lowChar) && CFCharacterSetInlineBufferIsLongCharacterMember(&csetBuffer, CFUniCharGetLongCharacterForSurrogatePair(highChar, lowChar))) { 3739 if (result) *result = CFRangeMake((cnt < otherCharIndex ? cnt : otherCharIndex), 2); 3740 return true; 3741 } else if (otherCharIndex == toLoc) { 3742 done = true; 3743 } else { 3744 cnt = otherCharIndex + step; 3745 } 3746 } 3747 } else if (CFCharacterSetInlineBufferIsLongCharacterMember(&csetBuffer, ch)) { 3748 done = found = true; 3749 } else if (cnt == toLoc) { 3750 done = true; 3751 } else { 3752 cnt += step; 3753 } 3754 } while (!done); 3755 3756 if (found && result) *result = CFRangeMake(cnt, 1); 3757 return found; 3758} 3759 3760/* Line range code */ 3761 3762#define CarriageReturn '\r' /* 0x0d */ 3763#define NewLine '\n' /* 0x0a */ 3764#define NextLine 0x0085 3765#define LineSeparator 0x2028 3766#define ParaSeparator 0x2029 3767 3768CF_INLINE Boolean isALineSeparatorTypeCharacter(UniChar ch, Boolean includeLineEndings) { 3769 if (ch > CarriageReturn && ch < NextLine) return false; /* Quick test to cover most chars */ 3770 return (ch == NewLine || ch == CarriageReturn || ch == ParaSeparator || (includeLineEndings && (ch == NextLine || ch == LineSeparator))) ? true : false; 3771} 3772 3773static void __CFStringGetLineOrParagraphBounds(CFStringRef string, CFRange range, CFIndex *lineBeginIndex, CFIndex *lineEndIndex, CFIndex *contentsEndIndex, Boolean includeLineEndings) { 3774 CFIndex len; 3775 CFStringInlineBuffer buf; 3776 UniChar ch; 3777 3778 __CFAssertIsString(string); 3779 __CFAssertRangeIsInStringBounds(string, range.location, range.length); 3780 3781 len = __CFStrLength(string); 3782 3783 if (lineBeginIndex) { 3784 CFIndex start; 3785 if (range.location == 0) { 3786 start = 0; 3787 } else { 3788 CFStringInitInlineBuffer(string, &buf, CFRangeMake(0, len)); 3789 CFIndex buf_idx = range.location; 3790 3791 /* Take care of the special case where start happens to fall right between \r and \n */ 3792 ch = CFStringGetCharacterFromInlineBuffer(&buf, buf_idx); 3793 buf_idx--; 3794 if ((ch == NewLine) && (CFStringGetCharacterFromInlineBuffer(&buf, buf_idx) == CarriageReturn)) { 3795 buf_idx--; 3796 } 3797 while (1) { 3798 if (buf_idx < 0) { 3799 start = 0; 3800 break; 3801 } else if (isALineSeparatorTypeCharacter(CFStringGetCharacterFromInlineBuffer(&buf, buf_idx), includeLineEndings)) { 3802 start = buf_idx + 1; 3803 break; 3804 } else { 3805 buf_idx--; 3806 } 3807 } 3808 } 3809 *lineBeginIndex = start; 3810 } 3811 3812 /* Now find the ending point */ 3813 if (lineEndIndex || contentsEndIndex) { 3814 CFIndex endOfContents, lineSeparatorLength = 1; /* 1 by default */ 3815 CFStringInitInlineBuffer(string, &buf, CFRangeMake(0, len)); 3816 CFIndex buf_idx = range.location + range.length - (range.length ? 1 : 0); 3817 /* First look at the last char in the range (if the range is zero length, the char after the range) to see if we're already on or within a end of line sequence... */ 3818 ch = __CFStringGetCharacterFromInlineBufferAux(&buf, buf_idx); 3819 if (ch == NewLine) { 3820 endOfContents = buf_idx; 3821 buf_idx--; 3822 if (__CFStringGetCharacterFromInlineBufferAux(&buf, buf_idx) == CarriageReturn) { 3823 lineSeparatorLength = 2; 3824 endOfContents--; 3825 } 3826 } else { 3827 while (1) { 3828 if (isALineSeparatorTypeCharacter(ch, includeLineEndings)) { 3829 endOfContents = buf_idx; /* This is actually end of contentsRange */ 3830 buf_idx++; /* OK for this to go past the end */ 3831 if ((ch == CarriageReturn) && (__CFStringGetCharacterFromInlineBufferAux(&buf, buf_idx) == NewLine)) { 3832 lineSeparatorLength = 2; 3833 } 3834 break; 3835 } else if (buf_idx >= len) { 3836 endOfContents = len; 3837 lineSeparatorLength = 0; 3838 break; 3839 } else { 3840 buf_idx++; 3841 ch = __CFStringGetCharacterFromInlineBufferAux(&buf, buf_idx); 3842 } 3843 } 3844 } 3845 if (contentsEndIndex) *contentsEndIndex = endOfContents; 3846 if (lineEndIndex) *lineEndIndex = endOfContents + lineSeparatorLength; 3847 } 3848} 3849 3850void CFStringGetLineBounds(CFStringRef string, CFRange range, CFIndex *lineBeginIndex, CFIndex *lineEndIndex, CFIndex *contentsEndIndex) { 3851 CF_OBJC_FUNCDISPATCHV(__kCFStringTypeID, void, (NSString *)string, getLineStart:(NSUInteger *)lineBeginIndex end:(NSUInteger *)lineEndIndex contentsEnd:(NSUInteger *)contentsEndIndex forRange:NSMakeRange(range.location, range.length)); 3852 __CFStringGetLineOrParagraphBounds(string, range, lineBeginIndex, lineEndIndex, contentsEndIndex, true); 3853} 3854 3855void CFStringGetParagraphBounds(CFStringRef string, CFRange range, CFIndex *parBeginIndex, CFIndex *parEndIndex, CFIndex *contentsEndIndex) { 3856 CF_OBJC_FUNCDISPATCHV(__kCFStringTypeID, void, (NSString *)string, getParagraphStart:(NSUInteger *)parBeginIndex end:(NSUInteger *)parEndIndex contentsEnd:(NSUInteger *)contentsEndIndex forRange:NSMakeRange(range.location, range.length)); 3857 __CFStringGetLineOrParagraphBounds(string, range, parBeginIndex, parEndIndex, contentsEndIndex, false); 3858} 3859 3860 3861CFStringRef CFStringCreateByCombiningStrings(CFAllocatorRef alloc, CFArrayRef array, CFStringRef separatorString) { 3862 CFIndex numChars; 3863 CFIndex separatorNumByte; 3864 CFIndex stringCount = CFArrayGetCount(array); 3865 Boolean isSepCFString = !CF_IS_OBJC(__kCFStringTypeID, separatorString); 3866 Boolean canBeEightbit = isSepCFString && __CFStrIsEightBit(separatorString); 3867 CFIndex idx; 3868 CFStringRef otherString; 3869 void *buffer; 3870 uint8_t *bufPtr; 3871 const void *separatorContents = NULL; 3872 3873 if (stringCount == 0) { 3874 return CFStringCreateWithCharacters(alloc, NULL, 0); 3875 } else if (stringCount == 1) { 3876 return (CFStringRef)CFStringCreateCopy(alloc, (CFStringRef)CFArrayGetValueAtIndex(array, 0)); 3877 } 3878 3879 if (alloc == NULL) alloc = __CFGetDefaultAllocator(); 3880 3881 numChars = CFStringGetLength(separatorString) * (stringCount - 1); 3882 for (idx = 0; idx < stringCount; idx++) { 3883 otherString = (CFStringRef)CFArrayGetValueAtIndex(array, idx); 3884 numChars += CFStringGetLength(otherString); 3885 // canBeEightbit is already false if the separator is an NSString... 3886 if (CF_IS_OBJC(__kCFStringTypeID, otherString) || ! __CFStrIsEightBit(otherString)) canBeEightbit = false; 3887 } 3888 3889 buffer = (uint8_t *)CFAllocatorAllocate(alloc, canBeEightbit ? ((numChars + 1) * sizeof(uint8_t)) : (numChars * sizeof(UniChar)), 0); 3890 bufPtr = (uint8_t *)buffer; 3891 if (__CFOASafe) __CFSetLastAllocationEventName(buffer, "CFString (store)"); 3892 separatorNumByte = CFStringGetLength(separatorString) * (canBeEightbit ? sizeof(uint8_t) : sizeof(UniChar)); 3893 3894 for (idx = 0; idx < stringCount; idx++) { 3895 if (idx) { // add separator here unless first string 3896 if (separatorContents) { 3897 memmove(bufPtr, separatorContents, separatorNumByte); 3898 } else { 3899 if (!isSepCFString) { // NSString 3900 CFStringGetCharacters(separatorString, CFRangeMake(0, CFStringGetLength(separatorString)), (UniChar *)bufPtr); 3901 } else if (canBeEightbit) { 3902 memmove(bufPtr, (const uint8_t *)__CFStrContents(separatorString) + __CFStrSkipAnyLengthByte(separatorString), separatorNumByte); 3903 } else { 3904 __CFStrConvertBytesToUnicode((uint8_t *)__CFStrContents(separatorString) + __CFStrSkipAnyLengthByte(separatorString), (UniChar *)bufPtr, __CFStrLength(separatorString)); 3905 } 3906 separatorContents = bufPtr; 3907 } 3908 bufPtr += separatorNumByte; 3909 } 3910 3911 otherString = (CFStringRef )CFArrayGetValueAtIndex(array, idx); 3912 if (CF_IS_OBJC(__kCFStringTypeID, otherString)) { 3913 CFIndex otherLength = CFStringGetLength(otherString); 3914 CFStringGetCharacters(otherString, CFRangeMake(0, otherLength), (UniChar *)bufPtr); 3915 bufPtr += otherLength * sizeof(UniChar); 3916 } else { 3917 const uint8_t * otherContents = (const uint8_t *)__CFStrContents(otherString); 3918 CFIndex otherNumByte = __CFStrLength2(otherString, otherContents) * (canBeEightbit ? sizeof(uint8_t) : sizeof(UniChar)); 3919 3920 if (canBeEightbit || __CFStrIsUnicode(otherString)) { 3921 memmove(bufPtr, otherContents + __CFStrSkipAnyLengthByte(otherString), otherNumByte); 3922 } else { 3923 __CFStrConvertBytesToUnicode(otherContents + __CFStrSkipAnyLengthByte(otherString), (UniChar *)bufPtr, __CFStrLength2(otherString, otherContents)); 3924 } 3925 bufPtr += otherNumByte; 3926 } 3927 } 3928 if (canBeEightbit) *bufPtr = 0; // NULL byte; 3929 3930 return canBeEightbit ? 3931 CFStringCreateWithCStringNoCopy(alloc, (const char*)buffer, __CFStringGetEightBitStringEncoding(), alloc) : 3932 CFStringCreateWithCharactersNoCopy(alloc, (UniChar *)buffer, numChars, alloc); 3933} 3934 3935 3936CFArrayRef CFStringCreateArrayBySeparatingStrings(CFAllocatorRef alloc, CFStringRef string, CFStringRef separatorString) { 3937 CFArrayRef separatorRanges; 3938 CFIndex length = CFStringGetLength(string); 3939 /* No objc dispatch needed here since CFStringCreateArrayWithFindResults() works with both CFString and NSString */ 3940 if (!(separatorRanges = CFStringCreateArrayWithFindResults(alloc, string, separatorString, CFRangeMake(0, length), 0))) { 3941 return CFArrayCreate(alloc, (const void **)&string, 1, & kCFTypeArrayCallBacks); 3942 } else { 3943 CFIndex idx; 3944 CFIndex count = CFArrayGetCount(separatorRanges); 3945 CFIndex startIndex = 0; 3946 CFIndex numChars; 3947 CFMutableArrayRef array = CFArrayCreateMutable(alloc, count + 2, & kCFTypeArrayCallBacks); 3948 const CFRange *currentRange; 3949 CFStringRef substring; 3950 3951 for (idx = 0;idx < count;idx++) { 3952 currentRange = (const CFRange *)CFArrayGetValueAtIndex(separatorRanges, idx); 3953 numChars = currentRange->location - startIndex; 3954 substring = CFStringCreateWithSubstring(alloc, string, CFRangeMake(startIndex, numChars)); 3955 CFArrayAppendValue(array, substring); 3956 CFRelease(substring); 3957 startIndex = currentRange->location + currentRange->length; 3958 } 3959 substring = CFStringCreateWithSubstring(alloc, string, CFRangeMake(startIndex, length - startIndex)); 3960 CFArrayAppendValue(array, substring); 3961 CFRelease(substring); 3962 3963 CFRelease(separatorRanges); 3964 3965 return array; 3966 } 3967} 3968 3969CFStringRef CFStringCreateFromExternalRepresentation(CFAllocatorRef alloc, CFDataRef data, CFStringEncoding encoding) { 3970 return CFStringCreateWithBytes(alloc, CFDataGetBytePtr(data), CFDataGetLength(data), encoding, true); 3971} 3972 3973 3974CFDataRef CFStringCreateExternalRepresentation(CFAllocatorRef alloc, CFStringRef string, CFStringEncoding encoding, uint8_t lossByte) { 3975 CFIndex length; 3976 CFIndex guessedByteLength; 3977 uint8_t *bytes; 3978 CFIndex usedLength; 3979 SInt32 result; 3980 3981 if (CF_IS_OBJC(__kCFStringTypeID, string)) { /* ??? Hope the compiler optimizes this away if OBJC_MAPPINGS is not on */ 3982 length = CFStringGetLength(string); 3983 } else { 3984 __CFAssertIsString(string); 3985 length = __CFStrLength(string); 3986 if (__CFStrIsEightBit(string) && ((__CFStringGetEightBitStringEncoding() == encoding) || (__CFStringGetEightBitStringEncoding() == kCFStringEncodingASCII && __CFStringEncodingIsSupersetOfASCII(encoding)))) { // Requested encoding is equal to the encoding in string 3987 return CFDataCreate(alloc, ((uint8_t *)__CFStrContents(string) + __CFStrSkipAnyLengthByte(string)), __CFStrLength(string)); 3988 } 3989 } 3990 3991 if (alloc == NULL) alloc = __CFGetDefaultAllocator(); 3992 3993 if (((encoding & 0x0FFF) == kCFStringEncodingUnicode) && ((encoding == kCFStringEncodingUnicode) || ((encoding > kCFStringEncodingUTF8) && (encoding <= kCFStringEncodingUTF32LE)))) { 3994 guessedByteLength = (length + 1) * ((((encoding >> 26) & 2) == 0) ? sizeof(UTF16Char) : sizeof(UTF32Char)); // UTF32 format has the bit set 3995 } else if (((guessedByteLength = CFStringGetMaximumSizeForEncoding(length, encoding)) > length) && !CF_IS_OBJC(__kCFStringTypeID, string)) { // Multi byte encoding 3996#if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_EMBEDDED_MINI || DEPLOYMENT_TARGET_LINUX || DEPLOYMENT_TARGET_FREEBSD 3997 if (__CFStrIsUnicode(string)) { 3998 CFIndex aLength = CFStringEncodingByteLengthForCharacters(encoding, kCFStringEncodingPrependBOM, __CFStrContents(string), __CFStrLength(string)); 3999 if (aLength > 0) guessedByteLength = aLength; 4000 } else { 4001#endif 4002 result = __CFStringEncodeByteStream(string, 0, length, true, encoding, lossByte, NULL, LONG_MAX, &guessedByteLength); 4003 // if result == length, we always succeed 4004 // otherwise, if result == 0, we fail 4005 // otherwise, if there was a lossByte but still result != length, we fail 4006 if ((result != length) && (!result || !lossByte)) return NULL; 4007 if (guessedByteLength == length && __CFStrIsEightBit(string) && __CFStringEncodingIsSupersetOfASCII(encoding)) { // It's all ASCII !! 4008 return CFDataCreate(alloc, ((uint8_t *)__CFStrContents(string) + __CFStrSkipAnyLengthByte(string)), __CFStrLength(string)); 4009 } 4010#if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_EMBEDDED_MINI || DEPLOYMENT_TARGET_LINUX || DEPLOYMENT_TARGET_FREEBSD 4011 } 4012#endif 4013 } 4014 bytes = (uint8_t *)CFAllocatorAllocate(alloc, guessedByteLength, 0); 4015 if (__CFOASafe) __CFSetLastAllocationEventName(bytes, "CFData (store)"); 4016 4017 result = __CFStringEncodeByteStream(string, 0, length, true, encoding, lossByte, bytes, guessedByteLength, &usedLength); 4018 4019 if ((result != length) && (!result || !lossByte)) { // see comment above about what this means 4020 CFAllocatorDeallocate(alloc, bytes); 4021 return NULL; 4022 } 4023 4024 return CFDataCreateWithBytesNoCopy(alloc, (uint8_t *)bytes, usedLength, alloc); 4025} 4026 4027 4028CFStringEncoding CFStringGetSmallestEncoding(CFStringRef str) { 4029 CFIndex len; 4030 CF_OBJC_FUNCDISPATCHV(__kCFStringTypeID, CFStringEncoding, (NSString *)str, _smallestEncodingInCFStringEncoding); 4031 __CFAssertIsString(str); 4032 4033 if (__CFStrIsEightBit(str)) return __CFStringGetEightBitStringEncoding(); 4034 len = __CFStrLength(str); 4035 if (__CFStringEncodeByteStream(str, 0, len, false, __CFStringGetEightBitStringEncoding(), 0, NULL, LONG_MAX, NULL) == len) return __CFStringGetEightBitStringEncoding(); 4036 if ((__CFStringGetEightBitStringEncoding() != __CFStringGetSystemEncoding()) && (__CFStringEncodeByteStream(str, 0, len, false, __CFStringGetSystemEncoding(), 0, NULL, LONG_MAX, NULL) == len)) return __CFStringGetSystemEncoding(); 4037 return kCFStringEncodingUnicode; /* ??? */ 4038} 4039 4040 4041CFStringEncoding CFStringGetFastestEncoding(CFStringRef str) { 4042 CF_OBJC_FUNCDISPATCHV(__kCFStringTypeID, CFStringEncoding, (NSString *)str, _fastestEncodingInCFStringEncoding); 4043 __CFAssertIsString(str); 4044 return __CFStrIsEightBit(str) ? __CFStringGetEightBitStringEncoding() : kCFStringEncodingUnicode; /* ??? */ 4045} 4046 4047 4048SInt32 CFStringGetIntValue(CFStringRef str) { 4049 Boolean success; 4050 SInt32 result; 4051 SInt32 idx = 0; 4052 CFStringInlineBuffer buf; 4053 CFStringInitInlineBuffer(str, &buf, CFRangeMake(0, CFStringGetLength(str))); 4054 success = __CFStringScanInteger(&buf, NULL, &idx, false, &result); 4055 return success ? result : 0; 4056} 4057 4058 4059double CFStringGetDoubleValue(CFStringRef str) { 4060 Boolean success; 4061 double result; 4062 SInt32 idx = 0; 4063 CFStringInlineBuffer buf; 4064 CFStringInitInlineBuffer(str, &buf, CFRangeMake(0, CFStringGetLength(str))); 4065 success = __CFStringScanDouble(&buf, NULL, &idx, &result); 4066 return success ? result : 0.0; 4067} 4068 4069 4070/*** Mutable functions... ***/ 4071 4072void CFStringSetExternalCharactersNoCopy(CFMutableStringRef string, UniChar *chars, CFIndex length, CFIndex capacity) { 4073 __CFAssertIsNotNegative(length); 4074 __CFAssertIsStringAndExternalMutable(string); 4075 CFAssert4((length <= capacity) && ((capacity == 0) || ((capacity > 0) && chars)), __kCFLogAssertion, "%s(): Invalid args: characters %p length %d capacity %d", __PRETTY_FUNCTION__, chars, length, capacity); 4076 __CFStrSetContentPtr(string, chars); 4077 __CFStrSetExplicitLength(string, length); 4078 __CFStrSetCapacity(string, capacity * sizeof(UniChar)); 4079 __CFStrSetCapacityProvidedExternally(string); 4080} 4081 4082 4083 4084void CFStringInsert(CFMutableStringRef str, CFIndex idx, CFStringRef insertedStr) { 4085 CF_OBJC_FUNCDISPATCHV(__kCFStringTypeID, void, (NSMutableString *)str, insertString:(NSString *)insertedStr atIndex:(NSUInteger)idx); 4086 __CFAssertIsStringAndMutable(str); 4087 CFAssert3(idx >= 0 && idx <= __CFStrLength(str), __kCFLogAssertion, "%s(): string index %d out of bounds (length %d)", __PRETTY_FUNCTION__, idx, __CFStrLength(str)); 4088 __CFStringReplace(str, CFRangeMake(idx, 0), insertedStr); 4089} 4090 4091 4092void CFStringDelete(CFMutableStringRef str, CFRange range) { 4093 CF_OBJC_FUNCDISPATCHV(__kCFStringTypeID, void, (NSMutableString *)str, deleteCharactersInRange:NSMakeRange(range.location, range.length)); 4094 __CFAssertIsStringAndMutable(str); 4095 __CFAssertRangeIsInStringBounds(str, range.location, range.length); 4096 __CFStringChangeSize(str, range, 0, false); 4097} 4098 4099 4100void CFStringReplace(CFMutableStringRef str, CFRange range, CFStringRef replacement) { 4101 CF_OBJC_FUNCDISPATCHV(__kCFStringTypeID, void, (NSMutableString *)str, replaceCharactersInRange:NSMakeRange(range.location, range.length) withString:(NSString *)replacement); 4102 __CFAssertIsStringAndMutable(str); 4103 __CFAssertRangeIsInStringBounds(str, range.location, range.length); 4104 __CFStringReplace(str, range, replacement); 4105} 4106 4107 4108void CFStringReplaceAll(CFMutableStringRef str, CFStringRef replacement) { 4109 CF_OBJC_FUNCDISPATCHV(__kCFStringTypeID, void, (NSMutableString *)str, setString:(NSString *)replacement); 4110 __CFAssertIsStringAndMutable(str); 4111 __CFStringReplace(str, CFRangeMake(0, __CFStrLength(str)), replacement); 4112} 4113 4114 4115void CFStringAppend(CFMutableStringRef str, CFStringRef appended) { 4116 CF_OBJC_FUNCDISPATCHV(__kCFStringTypeID, void, (NSMutableString *)str, appendString:(NSString *)appended); 4117 __CFAssertIsStringAndMutable(str); 4118 __CFStringReplace(str, CFRangeMake(__CFStrLength(str), 0), appended); 4119} 4120 4121 4122void CFStringAppendCharacters(CFMutableStringRef str, const UniChar *chars, CFIndex appendedLength) { 4123 CFIndex strLength, idx; 4124 4125 __CFAssertIsNotNegative(appendedLength); 4126 4127 CF_OBJC_FUNCDISPATCHV(__kCFStringTypeID, void, (NSMutableString *)str, appendCharacters:chars length:(NSUInteger)appendedLength); 4128 4129 __CFAssertIsStringAndMutable(str); 4130 4131 strLength = __CFStrLength(str); 4132 if (__CFStrIsUnicode(str)) { 4133 __CFStringChangeSize(str, CFRangeMake(strLength, 0), appendedLength, true); 4134 memmove((UniChar *)__CFStrContents(str) + strLength, chars, appendedLength * sizeof(UniChar)); 4135 } else { 4136 uint8_t *contents; 4137 bool isASCII = true; 4138 for (idx = 0; isASCII && idx < appendedLength; idx++) isASCII = (chars[idx] < 0x80); 4139 __CFStringChangeSize(str, CFRangeMake(strLength, 0), appendedLength, !isASCII); 4140 if (!isASCII) { 4141 memmove((UniChar *)__CFStrContents(str) + strLength, chars, appendedLength * sizeof(UniChar)); 4142 } else { 4143 contents = (uint8_t *)__CFStrContents(str) + strLength + __CFStrSkipAnyLengthByte(str); 4144 for (idx = 0; idx < appendedLength; idx++) contents[idx] = (uint8_t)chars[idx]; 4145 } 4146 } 4147} 4148 4149 4150void __CFStringAppendBytes(CFMutableStringRef str, const char *cStr, CFIndex appendedLength, CFStringEncoding encoding) { 4151 Boolean appendedIsUnicode = false; 4152 Boolean freeCStrWhenDone = false; 4153 Boolean demoteAppendedUnicode = false; 4154 CFVarWidthCharBuffer vBuf; 4155 4156 __CFAssertIsNotNegative(appendedLength); 4157 4158 if (encoding == kCFStringEncodingASCII || encoding == __CFStringGetEightBitStringEncoding()) { 4159 // appendedLength now denotes length in UniChars 4160 } else if (encoding == kCFStringEncodingUnicode) { 4161 UniChar *chars = (UniChar *)cStr; 4162 CFIndex idx, length = appendedLength / sizeof(UniChar); 4163 bool isASCII = true; 4164 for (idx = 0; isASCII && idx < length; idx++) isASCII = (chars[idx] < 0x80); 4165 if (!isASCII) { 4166 appendedIsUnicode = true; 4167 } else { 4168 demoteAppendedUnicode = true; 4169 } 4170 appendedLength = length; 4171 } else { 4172 Boolean usingPassedInMemory = false; 4173 4174 vBuf.allocator = __CFGetDefaultAllocator(); // We don't want to use client's allocator for temp stuff 4175 vBuf.chars.unicode = NULL; // This will cause the decode function to allocate memory if necessary 4176 4177 if (!__CFStringDecodeByteStream3((const uint8_t *)cStr, appendedLength, encoding, __CFStrIsUnicode(str), &vBuf, &usingPassedInMemory, 0)) { 4178 CFAssert1(0, __kCFLogAssertion, "Supplied bytes could not be converted specified encoding %d", encoding); 4179 return; 4180 } 4181 4182 // If not ASCII, appendedLength now denotes length in UniChars 4183 appendedLength = vBuf.numChars; 4184 appendedIsUnicode = !vBuf.isASCII; 4185 cStr = (const char *)vBuf.chars.ascii; 4186 freeCStrWhenDone = !usingPassedInMemory && vBuf.shouldFreeChars; 4187 } 4188 4189 if (CF_IS_OBJC(__kCFStringTypeID, str)) { 4190 if (!appendedIsUnicode && !demoteAppendedUnicode) { 4191 CF_OBJC_FUNCDISPATCHV(__kCFStringTypeID, void, (NSMutableString *)str, _cfAppendCString:(const unsigned char *)cStr length:(NSInteger)appendedLength); 4192 } else { 4193 CF_OBJC_FUNCDISPATCHV(__kCFStringTypeID, void, (NSMutableString *)str, appendCharacters:(const unichar *)cStr length:(NSUInteger)appendedLength); 4194 } 4195 } else { 4196 CFIndex strLength; 4197 __CFAssertIsStringAndMutable(str); 4198 strLength = __CFStrLength(str); 4199 4200 __CFStringChangeSize(str, CFRangeMake(strLength, 0), appendedLength, appendedIsUnicode || __CFStrIsUnicode(str)); 4201 4202 if (__CFStrIsUnicode(str)) { 4203 UniChar *contents = (UniChar *)__CFStrContents(str); 4204 if (appendedIsUnicode) { 4205 memmove(contents + strLength, cStr, appendedLength * sizeof(UniChar)); 4206 } else { 4207 __CFStrConvertBytesToUnicode((const uint8_t *)cStr, contents + strLength, appendedLength); 4208 } 4209 } else { 4210 if (demoteAppendedUnicode) { 4211 UniChar *chars = (UniChar *)cStr; 4212 CFIndex idx; 4213 uint8_t *contents = (uint8_t *)__CFStrContents(str) + strLength + __CFStrSkipAnyLengthByte(str); 4214 for (idx = 0; idx < appendedLength; idx++) contents[idx] = (uint8_t)chars[idx]; 4215 } else { 4216 uint8_t *contents = (uint8_t *)__CFStrContents(str); 4217 memmove(contents + strLength + __CFStrSkipAnyLengthByte(str), cStr, appendedLength); 4218 } 4219 } 4220 } 4221 4222 if (freeCStrWhenDone) CFAllocatorDeallocate(__CFGetDefaultAllocator(), (void *)cStr); 4223} 4224 4225void CFStringAppendPascalString(CFMutableStringRef str, ConstStringPtr pStr, CFStringEncoding encoding) { 4226 __CFStringAppendBytes(str, (const char *)(pStr + 1), (CFIndex)*pStr, encoding); 4227} 4228 4229void CFStringAppendCString(CFMutableStringRef str, const char *cStr, CFStringEncoding encoding) { 4230 __CFStringAppendBytes(str, cStr, strlen(cStr), encoding); 4231} 4232 4233 4234void CFStringAppendFormat(CFMutableStringRef str, CFDictionaryRef formatOptions, CFStringRef format, ...) { 4235 va_list argList; 4236 4237 va_start(argList, format); 4238 CFStringAppendFormatAndArguments(str, formatOptions, format, argList); 4239 va_end(argList); 4240} 4241 4242 4243CFIndex CFStringFindAndReplace(CFMutableStringRef string, CFStringRef stringToFind, CFStringRef replacementString, CFRange rangeToSearch, CFStringCompareFlags compareOptions) { 4244 CF_OBJC_FUNCDISPATCHV(__kCFStringTypeID, CFIndex, (NSMutableString *)string, replaceOccurrencesOfString:(NSString *)stringToFind withString:(NSString *)replacementString options:(NSStringCompareOptions)compareOptions range:NSMakeRange(rangeToSearch.location, rangeToSearch.length)); 4245 CFRange foundRange; 4246 Boolean backwards = ((compareOptions & kCFCompareBackwards) != 0); 4247 UInt32 endIndex = rangeToSearch.location + rangeToSearch.length; 4248#define MAX_RANGES_ON_STACK (1000 / sizeof(CFRange)) 4249 CFRange rangeBuffer[MAX_RANGES_ON_STACK]; // Used to avoid allocating memory 4250 CFRange *ranges = rangeBuffer; 4251 CFIndex foundCount = 0; 4252 CFIndex capacity = MAX_RANGES_ON_STACK; 4253 4254 __CFAssertIsStringAndMutable(string); 4255 __CFAssertRangeIsInStringBounds(string, rangeToSearch.location, rangeToSearch.length); 4256 4257 // Note: This code is very similar to the one in CFStringCreateArrayWithFindResults(). 4258 while ((rangeToSearch.length > 0) && CFStringFindWithOptions(string, stringToFind, rangeToSearch, compareOptions, &foundRange)) { 4259 // Determine the next range 4260 if (backwards) { 4261 rangeToSearch.length = foundRange.location - rangeToSearch.location; 4262 } else { 4263 rangeToSearch.location = foundRange.location + foundRange.length; 4264 rangeToSearch.length = endIndex - rangeToSearch.location; 4265 } 4266 4267 // If necessary, grow the array 4268 if (foundCount >= capacity) { 4269 bool firstAlloc = (ranges == rangeBuffer) ? true : false; 4270 capacity = (capacity + 4) * 2; 4271 // Note that reallocate with NULL previous pointer is same as allocate 4272 ranges = (CFRange *)CFAllocatorReallocate(kCFAllocatorSystemDefault, firstAlloc ? NULL : ranges, capacity * sizeof(CFRange), 0); 4273 if (firstAlloc) memmove(ranges, rangeBuffer, MAX_RANGES_ON_STACK * sizeof(CFRange)); 4274 } 4275 ranges[foundCount] = foundRange; 4276 foundCount++; 4277 } 4278 4279 if (foundCount > 0) { 4280 if (backwards) { // Reorder the ranges to be incrementing (better to do this here, then to check other places) 4281 int head = 0; 4282 int tail = foundCount - 1; 4283 while (head < tail) { 4284 CFRange temp = ranges[head]; 4285 ranges[head] = ranges[tail]; 4286 ranges[tail] = temp; 4287 head++; 4288 tail--; 4289 } 4290 } 4291 __CFStringReplaceMultiple(string, ranges, foundCount, replacementString); 4292 if (ranges != rangeBuffer) CFAllocatorDeallocate(kCFAllocatorSystemDefault, ranges); 4293 } 4294 4295 return foundCount; 4296} 4297 4298 4299// This function is here for NSString purposes 4300// It allows checking for mutability before mutating; this allows NSString to catch invalid mutations 4301 4302int __CFStringCheckAndReplace(CFMutableStringRef str, CFRange range, CFStringRef replacement) { 4303 if (!__CFStrIsMutable(str)) return _CFStringErrNotMutable; // These three ifs are always here, for NSString usage 4304 if (!replacement && __CFStringNoteErrors()) return _CFStringErrNilArg; 4305 // This attempts to catch bad ranges including those described in 3375535 (-1,1) 4306 unsigned long endOfRange = (unsigned long)(range.location) + (unsigned long)(range.length); // NSRange uses unsigned quantities, hence the casting 4307 if (((endOfRange > (unsigned long)__CFStrLength(str)) || (endOfRange < (unsigned long)(range.location))) && __CFStringNoteErrors()) return _CFStringErrBounds; 4308 4309 __CFAssertIsStringAndMutable(str); 4310 __CFAssertRangeIsInStringBounds(str, range.location, range.length); 4311 __CFStringReplace(str, range, replacement); 4312 return _CFStringErrNone; 4313} 4314 4315// This function determines whether errors which would cause string exceptions should 4316// be ignored or not 4317 4318Boolean __CFStringNoteErrors(void) { 4319 return true; 4320} 4321 4322 4323 4324void CFStringPad(CFMutableStringRef string, CFStringRef padString, CFIndex length, CFIndex indexIntoPad) { 4325 CFIndex originalLength; 4326 4327 __CFAssertIsNotNegative(length); 4328 __CFAssertIsNotNegative(indexIntoPad); 4329 4330 CF_OBJC_FUNCDISPATCHV(__kCFStringTypeID, void, (NSMutableString *)string, _cfPad:padString length:(uint32_t)length padIndex:(uint32_t)indexIntoPad); 4331 4332 __CFAssertIsStringAndMutable(string); 4333 4334 originalLength = __CFStrLength(string); 4335 if (length < originalLength) { 4336 __CFStringChangeSize(string, CFRangeMake(length, originalLength - length), 0, false); 4337 } else if (originalLength < length) { 4338 uint8_t *contents; 4339 Boolean isUnicode; 4340 CFIndex charSize; 4341 CFIndex padStringLength; 4342 CFIndex padLength; 4343 CFIndex padRemaining = length - originalLength; 4344 4345 if (CF_IS_OBJC(__kCFStringTypeID, padString)) { /* ??? Hope the compiler optimizes this away if OBJC_MAPPINGS is not on */ 4346 padStringLength = CFStringGetLength(padString); 4347 isUnicode = true; /* !!! Bad for now */ 4348 } else { 4349 __CFAssertIsString(padString); 4350 padStringLength = __CFStrLength(padString); 4351 isUnicode = __CFStrIsUnicode(string) || __CFStrIsUnicode(padString); 4352 } 4353 4354 charSize = isUnicode ? sizeof(UniChar) : sizeof(uint8_t); 4355 4356 __CFStringChangeSize(string, CFRangeMake(originalLength, 0), padRemaining, isUnicode); 4357 4358 contents = (uint8_t *)__CFStrContents(string) + charSize * originalLength + __CFStrSkipAnyLengthByte(string); 4359 padLength = padStringLength - indexIntoPad; 4360 padLength = padRemaining < padLength ? padRemaining : padLength; 4361 4362 while (padRemaining > 0) { 4363 if (isUnicode) { 4364 CFStringGetCharacters(padString, CFRangeMake(indexIntoPad, padLength), (UniChar *)contents); 4365 } else { 4366 CFStringGetBytes(padString, CFRangeMake(indexIntoPad, padLength), __CFStringGetEightBitStringEncoding(), 0, false, contents, padRemaining * charSize, NULL); 4367 } 4368 contents += padLength * charSize; 4369 padRemaining -= padLength; 4370 indexIntoPad = 0; 4371 padLength = padRemaining < padLength ? padRemaining : padStringLength; 4372 } 4373 } 4374} 4375 4376void CFStringTrim(CFMutableStringRef string, CFStringRef trimString) { 4377 CFRange range; 4378 CFIndex newStartIndex; 4379 CFIndex length; 4380 4381 CF_OBJC_FUNCDISPATCHV(__kCFStringTypeID, void, (NSMutableString *)string, _cfTrim:trimString); 4382 4383 __CFAssertIsStringAndMutable(string); 4384 __CFAssertIsString(trimString); 4385 4386 newStartIndex = 0; 4387 length = __CFStrLength(string); 4388 4389 while (CFStringFindWithOptions(string, trimString, CFRangeMake(newStartIndex, length - newStartIndex), kCFCompareAnchored, &range)) { 4390 newStartIndex = range.location + range.length; 4391 } 4392 4393 if (newStartIndex < length) { 4394 CFIndex charSize = __CFStrIsUnicode(string) ? sizeof(UniChar) : sizeof(uint8_t); 4395 uint8_t *contents = (uint8_t *)__CFStrContents(string) + __CFStrSkipAnyLengthByte(string); 4396 4397 length -= newStartIndex; 4398 if (__CFStrLength(trimString) < length) { 4399 while (CFStringFindWithOptions(string, trimString, CFRangeMake(newStartIndex, length), kCFCompareAnchored|kCFCompareBackwards, &range)) { 4400 length = range.location - newStartIndex; 4401 } 4402 } 4403 memmove(contents, contents + newStartIndex * charSize, length * charSize); 4404 __CFStringChangeSize(string, CFRangeMake(length, __CFStrLength(string) - length), 0, false); 4405 } else { // Only trimString in string, trim all 4406 __CFStringChangeSize(string, CFRangeMake(0, length), 0, false); 4407 } 4408} 4409 4410void CFStringTrimWhitespace(CFMutableStringRef string) { 4411 CFIndex newStartIndex; 4412 CFIndex length; 4413 CFStringInlineBuffer buffer; 4414 4415 CF_OBJC_FUNCDISPATCHV(__kCFStringTypeID, void, (NSMutableString *)string, _cfTrimWS); 4416 4417 __CFAssertIsStringAndMutable(string); 4418 4419 newStartIndex = 0; 4420 length = __CFStrLength(string); 4421 4422 CFStringInitInlineBuffer(string, &buffer, CFRangeMake(0, length)); 4423 CFIndex buffer_idx = 0; 4424 4425 while (buffer_idx < length && CFUniCharIsMemberOf(__CFStringGetCharacterFromInlineBufferQuick(&buffer, buffer_idx), kCFUniCharWhitespaceAndNewlineCharacterSet)) 4426 buffer_idx++; 4427 newStartIndex = buffer_idx; 4428 4429 if (newStartIndex < length) { 4430 uint8_t *contents = (uint8_t *)__CFStrContents(string) + __CFStrSkipAnyLengthByte(string); 4431 CFIndex charSize = (__CFStrIsUnicode(string) ? sizeof(UniChar) : sizeof(uint8_t)); 4432 4433 buffer_idx = length - 1; 4434 while (0 <= buffer_idx && CFUniCharIsMemberOf(__CFStringGetCharacterFromInlineBufferQuick(&buffer, buffer_idx), kCFUniCharWhitespaceAndNewlineCharacterSet)) 4435 buffer_idx--; 4436 length = buffer_idx - newStartIndex + 1; 4437 4438 memmove(contents, contents + newStartIndex * charSize, length * charSize); 4439 __CFStringChangeSize(string, CFRangeMake(length, __CFStrLength(string) - length), 0, false); 4440 } else { // Whitespace only string 4441 __CFStringChangeSize(string, CFRangeMake(0, length), 0, false); 4442 } 4443} 4444 4445void CFStringLowercase(CFMutableStringRef string, CFLocaleRef locale) { 4446 CFIndex currentIndex = 0; 4447 CFIndex length; 4448 const uint8_t *langCode; 4449 Boolean isEightBit = __CFStrIsEightBit(string); 4450 4451 CF_OBJC_FUNCDISPATCHV(__kCFStringTypeID, void, (NSMutableString *)string, _cfLowercase:(const void *)locale); 4452 4453 __CFAssertIsStringAndMutable(string); 4454 4455 length = __CFStrLength(string); 4456 4457 langCode = (const uint8_t *)(_CFCanUseLocale(locale) ? _CFStrGetLanguageIdentifierForLocale(locale, false) : NULL); 4458 4459 if (!langCode && isEightBit) { 4460 uint8_t *contents = (uint8_t *)__CFStrContents(string) + __CFStrSkipAnyLengthByte(string); 4461 for (;currentIndex < length;currentIndex++) { 4462 if (contents[currentIndex] >= 'A' && contents[currentIndex] <= 'Z') { 4463 contents[currentIndex] += 'a' - 'A'; 4464 } else if (contents[currentIndex] > 127) { 4465 break; 4466 } 4467 } 4468 } 4469 4470 if (currentIndex < length) { 4471 UTF16Char *contents; 4472 UniChar mappedCharacters[MAX_CASE_MAPPING_BUF]; 4473 CFIndex mappedLength; 4474 UTF32Char currentChar; 4475 UInt32 flags = 0; 4476 4477 if (isEightBit) __CFStringChangeSize(string, CFRangeMake(0, 0), 0, true); 4478 4479 contents = (UniChar *)__CFStrContents(string); 4480 4481 for (;currentIndex < length;currentIndex++) { 4482 4483 if (CFUniCharIsSurrogateHighCharacter(contents[currentIndex]) && (currentIndex + 1 < length) && CFUniCharIsSurrogateLowCharacter(contents[currentIndex + 1])) { 4484 currentChar = CFUniCharGetLongCharacterForSurrogatePair(contents[currentIndex], contents[currentIndex + 1]); 4485 } else { 4486 currentChar = contents[currentIndex]; 4487 } 4488 flags = ((langCode || (currentChar == 0x03A3)) ? CFUniCharGetConditionalCaseMappingFlags(currentChar, contents, currentIndex, length, kCFUniCharToLowercase, langCode, flags) : 0); 4489 4490 mappedLength = CFUniCharMapCaseTo(currentChar, mappedCharacters, MAX_CASE_MAPPING_BUF, kCFUniCharToLowercase, flags, langCode); 4491 if (mappedLength > 0) contents[currentIndex] = *mappedCharacters; 4492 4493 if (currentChar > 0xFFFF) { // Non-BMP char 4494 switch (mappedLength) { 4495 case 0: 4496 __CFStringChangeSize(string, CFRangeMake(currentIndex, 2), 0, true); 4497 contents = (UniChar *)__CFStrContents(string); 4498 length -= 2; 4499 break; 4500 4501 case 1: 4502 __CFStringChangeSize(string, CFRangeMake(currentIndex + 1, 1), 0, true); 4503 contents = (UniChar *)__CFStrContents(string); 4504 --length; 4505 break; 4506 4507 case 2: 4508 contents[++currentIndex] = mappedCharacters[1]; 4509 break; 4510 4511 default: 4512 --mappedLength; // Skip the current char 4513 __CFStringChangeSize(string, CFRangeMake(currentIndex + 1, 0), mappedLength - 1, true); 4514 contents = (UniChar *)__CFStrContents(string); 4515 memmove(contents + currentIndex + 1, mappedCharacters + 1, mappedLength * sizeof(UniChar)); 4516 length += (mappedLength - 1); 4517 currentIndex += mappedLength; 4518 break; 4519 } 4520 } else if (mappedLength == 0) { 4521 __CFStringChangeSize(string, CFRangeMake(currentIndex, 1), 0, true); 4522 contents = (UniChar *)__CFStrContents(string); 4523 --length; 4524 } else if (mappedLength > 1) { 4525 --mappedLength; // Skip the current char 4526 __CFStringChangeSize(string, CFRangeMake(currentIndex + 1, 0), mappedLength, true); 4527 contents = (UniChar *)__CFStrContents(string); 4528 memmove(contents + currentIndex + 1, mappedCharacters + 1, mappedLength * sizeof(UniChar)); 4529 length += mappedLength; 4530 currentIndex += mappedLength; 4531 } 4532 } 4533 } 4534} 4535 4536void CFStringUppercase(CFMutableStringRef string, CFLocaleRef locale) { 4537 CFIndex currentIndex = 0; 4538 CFIndex length; 4539 const uint8_t *langCode; 4540 Boolean isEightBit = __CFStrIsEightBit(string); 4541 4542 CF_OBJC_FUNCDISPATCHV(__kCFStringTypeID, void, (NSMutableString *)string, _cfUppercase:(const void *)locale); 4543 4544 __CFAssertIsStringAndMutable(string); 4545 4546 length = __CFStrLength(string); 4547 4548 langCode = (const uint8_t *)(_CFCanUseLocale(locale) ? _CFStrGetLanguageIdentifierForLocale(locale, false) : NULL); 4549 4550 if (!langCode && isEightBit) { 4551 uint8_t *contents = (uint8_t *)__CFStrContents(string) + __CFStrSkipAnyLengthByte(string); 4552 for (;currentIndex < length;currentIndex++) { 4553 if (contents[currentIndex] >= 'a' && contents[currentIndex] <= 'z') { 4554 contents[currentIndex] -= 'a' - 'A'; 4555 } else if (contents[currentIndex] > 127) { 4556 break; 4557 } 4558 } 4559 } 4560 4561 if (currentIndex < length) { 4562 UniChar *contents; 4563 UniChar mappedCharacters[MAX_CASE_MAPPING_BUF]; 4564 CFIndex mappedLength; 4565 UTF32Char currentChar; 4566 UInt32 flags = 0; 4567 4568 if (isEightBit) __CFStringChangeSize(string, CFRangeMake(0, 0), 0, true); 4569 4570 contents = (UniChar *)__CFStrContents(string); 4571 4572 for (;currentIndex < length;currentIndex++) { 4573 if (CFUniCharIsSurrogateHighCharacter(contents[currentIndex]) && (currentIndex + 1 < length) && CFUniCharIsSurrogateLowCharacter(contents[currentIndex + 1])) { 4574 currentChar = CFUniCharGetLongCharacterForSurrogatePair(contents[currentIndex], contents[currentIndex + 1]); 4575 } else { 4576 currentChar = contents[currentIndex]; 4577 } 4578 4579 flags = (langCode ? CFUniCharGetConditionalCaseMappingFlags(currentChar, contents, currentIndex, length, kCFUniCharToUppercase, langCode, flags) : 0); 4580 4581 mappedLength = CFUniCharMapCaseTo(currentChar, mappedCharacters, MAX_CASE_MAPPING_BUF, kCFUniCharToUppercase, flags, langCode); 4582 if (mappedLength > 0) contents[currentIndex] = *mappedCharacters; 4583 4584 if (currentChar > 0xFFFF) { // Non-BMP char 4585 switch (mappedLength) { 4586 case 0: 4587 __CFStringChangeSize(string, CFRangeMake(currentIndex, 2), 0, true); 4588 contents = (UniChar *)__CFStrContents(string); 4589 length -= 2; 4590 break; 4591 4592 case 1: 4593 __CFStringChangeSize(string, CFRangeMake(currentIndex + 1, 1), 0, true); 4594 contents = (UniChar *)__CFStrContents(string); 4595 --length; 4596 break; 4597 4598 case 2: 4599 contents[++currentIndex] = mappedCharacters[1]; 4600 break; 4601 4602 default: 4603 --mappedLength; // Skip the current char 4604 __CFStringChangeSize(string, CFRangeMake(currentIndex + 1, 0), mappedLength - 1, true); 4605 contents = (UniChar *)__CFStrContents(string); 4606 memmove(contents + currentIndex + 1, mappedCharacters + 1, mappedLength * sizeof(UniChar)); 4607 length += (mappedLength - 1); 4608 currentIndex += mappedLength; 4609 break; 4610 } 4611 } else if (mappedLength == 0) { 4612 __CFStringChangeSize(string, CFRangeMake(currentIndex, 1), 0, true); 4613 contents = (UniChar *)__CFStrContents(string); 4614 --length; 4615 } else if (mappedLength > 1) { 4616 --mappedLength; // Skip the current char 4617 __CFStringChangeSize(string, CFRangeMake(currentIndex + 1, 0), mappedLength, true); 4618 contents = (UniChar *)__CFStrContents(string); 4619 memmove(contents + currentIndex + 1, mappedCharacters + 1, mappedLength * sizeof(UniChar)); 4620 length += mappedLength; 4621 currentIndex += mappedLength; 4622 } 4623 } 4624 } 4625} 4626 4627 4628void CFStringCapitalize(CFMutableStringRef string, CFLocaleRef locale) { 4629 CFIndex currentIndex = 0; 4630 CFIndex length; 4631 const uint8_t *langCode; 4632 Boolean isEightBit = __CFStrIsEightBit(string); 4633 Boolean isLastCased = false; 4634 const uint8_t *caseIgnorableForBMP; 4635 4636 CF_OBJC_FUNCDISPATCHV(__kCFStringTypeID, void, (NSMutableString *)string, _cfCapitalize:(const void *)locale); 4637 4638 __CFAssertIsStringAndMutable(string); 4639 4640 length = __CFStrLength(string); 4641 4642 caseIgnorableForBMP = CFUniCharGetBitmapPtrForPlane(kCFUniCharCaseIgnorableCharacterSet, 0); 4643 4644 langCode = (const uint8_t *)(_CFCanUseLocale(locale) ? _CFStrGetLanguageIdentifierForLocale(locale, false) : NULL); 4645 4646 if (!langCode && isEightBit) { 4647 uint8_t *contents = (uint8_t *)__CFStrContents(string) + __CFStrSkipAnyLengthByte(string); 4648 for (;currentIndex < length;currentIndex++) { 4649 if (contents[currentIndex] > 127) { 4650 break; 4651 } else if (contents[currentIndex] >= 'A' && contents[currentIndex] <= 'Z') { 4652 contents[currentIndex] += (isLastCased ? 'a' - 'A' : 0); 4653 isLastCased = true; 4654 } else if (contents[currentIndex] >= 'a' && contents[currentIndex] <= 'z') { 4655 contents[currentIndex] -= (!isLastCased ? 'a' - 'A' : 0); 4656 isLastCased = true; 4657 } else if (!CFUniCharIsMemberOfBitmap(contents[currentIndex], caseIgnorableForBMP)) { 4658 isLastCased = false; 4659 } 4660 } 4661 } 4662 4663 if (currentIndex < length) { 4664 UniChar *contents; 4665 UniChar mappedCharacters[MAX_CASE_MAPPING_BUF]; 4666 CFIndex mappedLength; 4667 UTF32Char currentChar; 4668 UInt32 flags = 0; 4669 4670 if (isEightBit) __CFStringChangeSize(string, CFRangeMake(0, 0), 0, true); 4671 4672 contents = (UniChar *)__CFStrContents(string); 4673 4674 for (;currentIndex < length;currentIndex++) { 4675 if (CFUniCharIsSurrogateHighCharacter(contents[currentIndex]) && (currentIndex + 1 < length) && CFUniCharIsSurrogateLowCharacter(contents[currentIndex + 1])) { 4676 currentChar = CFUniCharGetLongCharacterForSurrogatePair(contents[currentIndex], contents[currentIndex + 1]); 4677 } else { 4678 currentChar = contents[currentIndex]; 4679 } 4680 flags = ((langCode || ((currentChar == 0x03A3) && isLastCased)) ? CFUniCharGetConditionalCaseMappingFlags(currentChar, contents, currentIndex, length, (isLastCased ? kCFUniCharToLowercase : kCFUniCharToTitlecase), langCode, flags) : 0); 4681 4682 mappedLength = CFUniCharMapCaseTo(currentChar, mappedCharacters, MAX_CASE_MAPPING_BUF, (isLastCased ? kCFUniCharToLowercase : kCFUniCharToTitlecase), flags, langCode); 4683 if (mappedLength > 0) contents[currentIndex] = *mappedCharacters; 4684 4685 if (currentChar > 0xFFFF) { // Non-BMP char 4686 switch (mappedLength) { 4687 case 0: 4688 __CFStringChangeSize(string, CFRangeMake(currentIndex, 2), 0, true); 4689 contents = (UniChar *)__CFStrContents(string); 4690 length -= 2; 4691 break; 4692 4693 case 1: 4694 __CFStringChangeSize(string, CFRangeMake(currentIndex + 1, 1), 0, true); 4695 contents = (UniChar *)__CFStrContents(string); 4696 --length; 4697 break; 4698 4699 case 2: 4700 contents[++currentIndex] = mappedCharacters[1]; 4701 break; 4702 4703 default: 4704 --mappedLength; // Skip the current char 4705 __CFStringChangeSize(string, CFRangeMake(currentIndex + 1, 0), mappedLength - 1, true); 4706 contents = (UniChar *)__CFStrContents(string); 4707 memmove(contents + currentIndex + 1, mappedCharacters + 1, mappedLength * sizeof(UniChar)); 4708 length += (mappedLength - 1); 4709 currentIndex += mappedLength; 4710 break; 4711 } 4712 } else if (mappedLength == 0) { 4713 __CFStringChangeSize(string, CFRangeMake(currentIndex, 1), 0, true); 4714 contents = (UniChar *)__CFStrContents(string); 4715 --length; 4716 } else if (mappedLength > 1) { 4717 --mappedLength; // Skip the current char 4718 __CFStringChangeSize(string, CFRangeMake(currentIndex + 1, 0), mappedLength, true); 4719 contents = (UniChar *)__CFStrContents(string); 4720 memmove(contents + currentIndex + 1, mappedCharacters + 1, mappedLength * sizeof(UniChar)); 4721 length += mappedLength; 4722 currentIndex += mappedLength; 4723 } 4724 4725 if (!((currentChar > 0xFFFF) ? CFUniCharIsMemberOf(currentChar, kCFUniCharCaseIgnorableCharacterSet) : CFUniCharIsMemberOfBitmap(currentChar, caseIgnorableForBMP))) { // We have non-caseignorable here 4726 isLastCased = ((CFUniCharIsMemberOf(currentChar, kCFUniCharUppercaseLetterCharacterSet) || CFUniCharIsMemberOf(currentChar, kCFUniCharLowercaseLetterCharacterSet)) ? true : false); 4727 } 4728 } 4729 } 4730} 4731 4732 4733#define MAX_DECOMP_BUF 64 4734 4735#define HANGUL_SBASE 0xAC00 4736#define HANGUL_LBASE 0x1100 4737#define HANGUL_VBASE 0x1161 4738#define HANGUL_TBASE 0x11A7 4739#define HANGUL_SCOUNT 11172 4740#define HANGUL_LCOUNT 19 4741#define HANGUL_VCOUNT 21 4742#define HANGUL_TCOUNT 28 4743#define HANGUL_NCOUNT (HANGUL_VCOUNT * HANGUL_TCOUNT) 4744 4745CF_INLINE uint32_t __CFGetUTF16Length(const UTF32Char *characters, uint32_t utf32Length) { 4746 const UTF32Char *limit = characters + utf32Length; 4747 uint32_t length = 0; 4748 4749 while (characters < limit) length += (*(characters++) > 0xFFFF ? 2 : 1); 4750 4751 return length; 4752} 4753 4754CF_INLINE void __CFFillInUTF16(const UTF32Char *characters, UTF16Char *dst, uint32_t utf32Length) { 4755 const UTF32Char *limit = characters + utf32Length; 4756 UTF32Char currentChar; 4757 4758 while (characters < limit) { 4759 currentChar = *(characters++); 4760 if (currentChar > 0xFFFF) { 4761 currentChar -= 0x10000; 4762 *(dst++) = (UTF16Char)((currentChar >> 10) + 0xD800UL); 4763 *(dst++) = (UTF16Char)((currentChar & 0x3FF) + 0xDC00UL); 4764 } else { 4765 *(dst++) = currentChar; 4766 } 4767 } 4768} 4769 4770void CFStringNormalize(CFMutableStringRef string, CFStringNormalizationForm theForm) { 4771 CFIndex currentIndex = 0; 4772 CFIndex length; 4773 bool needToReorder = true; 4774 4775 CF_OBJC_FUNCDISPATCHV(__kCFStringTypeID, void, (NSMutableString *)string, _cfNormalize:theForm); 4776 4777 __CFAssertIsStringAndMutable(string); 4778 4779 length = __CFStrLength(string); 4780 4781 if (__CFStrIsEightBit(string)) { 4782 uint8_t *contents; 4783 4784 if (theForm == kCFStringNormalizationFormC) return; // 8bit form has no decomposition 4785 4786 contents = (uint8_t *)__CFStrContents(string) + __CFStrSkipAnyLengthByte(string); 4787 4788 for (;currentIndex < length;currentIndex++) { 4789 if (contents[currentIndex] > 127) { 4790 __CFStringChangeSize(string, CFRangeMake(0, 0), 0, true); // need to do harm way 4791 needToReorder = false; 4792 break; 4793 } 4794 } 4795 } 4796 4797 if (currentIndex < length) { 4798 UTF16Char *limit = (UTF16Char *)__CFStrContents(string) + length; 4799 UTF16Char *contents = (UTF16Char *)__CFStrContents(string) + currentIndex; 4800 UTF32Char buffer[MAX_DECOMP_BUF]; 4801 UTF32Char *mappedCharacters = buffer; 4802 CFIndex allocatedLength = MAX_DECOMP_BUF; 4803 CFIndex mappedLength; 4804 CFIndex currentLength; 4805 UTF32Char currentChar; 4806 const uint8_t *decompBMP = CFUniCharGetBitmapPtrForPlane(kCFUniCharCanonicalDecomposableCharacterSet, 0); 4807 const uint8_t *nonBaseBMP = CFUniCharGetBitmapPtrForPlane(kCFUniCharNonBaseCharacterSet, 0); 4808 const uint8_t *combiningBMP = (const uint8_t *)CFUniCharGetUnicodePropertyDataForPlane(kCFUniCharCombiningProperty, 0); 4809 4810 while (contents < limit) { 4811 if (CFUniCharIsSurrogateHighCharacter(*contents) && (contents + 1 < limit) && CFUniCharIsSurrogateLowCharacter(*(contents + 1))) { 4812 currentChar = CFUniCharGetLongCharacterForSurrogatePair(*contents, *(contents + 1)); 4813 currentLength = 2; 4814 contents += 2; 4815 } else { 4816 currentChar = *(contents++); 4817 currentLength = 1; 4818 } 4819 4820 mappedLength = 0; 4821 4822 if (CFUniCharIsMemberOfBitmap(currentChar, ((currentChar < 0x10000) ? decompBMP : CFUniCharGetBitmapPtrForPlane(kCFUniCharCanonicalDecomposableCharacterSet, (currentChar >> 16)))) && (0 == CFUniCharGetCombiningPropertyForCharacter(currentChar, ((currentChar < 0x10000) ? combiningBMP : (const uint8_t *)CFUniCharGetUnicodePropertyDataForPlane(kCFUniCharCombiningProperty, (currentChar >> 16)))))) { 4823 if ((theForm & kCFStringNormalizationFormC) == 0 || currentChar < HANGUL_SBASE || currentChar > (HANGUL_SBASE + HANGUL_SCOUNT)) { // We don't have to decompose Hangul Syllables if we're precomposing again 4824 mappedLength = CFUniCharDecomposeCharacter(currentChar, mappedCharacters, MAX_DECOMP_BUF); 4825 } 4826 } 4827 4828 if ((needToReorder || (theForm & kCFStringNormalizationFormC)) && ((contents < limit) || (mappedLength == 0))) { 4829 if (mappedLength > 0) { 4830 if (CFUniCharIsSurrogateHighCharacter(*contents) && (contents + 1 < limit) && CFUniCharIsSurrogateLowCharacter(*(contents + 1))) { 4831 currentChar = CFUniCharGetLongCharacterForSurrogatePair(*contents, *(contents + 1)); 4832 } else { 4833 currentChar = *contents; 4834 } 4835 } 4836 4837 if (0 != CFUniCharGetCombiningPropertyForCharacter(currentChar, (const uint8_t *)((currentChar < 0x10000) ? combiningBMP : CFUniCharGetUnicodePropertyDataForPlane(kCFUniCharCombiningProperty, (currentChar >> 16))))) { 4838 uint32_t decompLength; 4839 4840 if (mappedLength == 0) { 4841 contents -= (currentChar & 0xFFFF0000 ? 2 : 1); 4842 if (currentIndex > 0) { 4843 if (CFUniCharIsSurrogateLowCharacter(*(contents - 1)) && (currentIndex > 1) && CFUniCharIsSurrogateHighCharacter(*(contents - 2))) { 4844 *mappedCharacters = CFUniCharGetLongCharacterForSurrogatePair(*(contents - 2), *(contents - 1)); 4845 currentIndex -= 2; 4846 currentLength += 2; 4847 } else { 4848 *mappedCharacters = *(contents - 1); 4849 --currentIndex; 4850 ++currentLength; 4851 } 4852 mappedLength = 1; 4853 } 4854 } else { 4855 currentLength += (currentChar & 0xFFFF0000 ? 2 : 1); 4856 } 4857 contents += (currentChar & 0xFFFF0000 ? 2 : 1); 4858 4859 if (CFUniCharIsMemberOfBitmap(currentChar, ((currentChar < 0x10000) ? decompBMP : CFUniCharGetBitmapPtrForPlane(kCFUniCharCanonicalDecomposableCharacterSet, (currentChar >> 16))))) { // Vietnamese accent, etc. 4860 decompLength = CFUniCharDecomposeCharacter(currentChar, mappedCharacters + mappedLength, MAX_DECOMP_BUF - mappedLength); 4861 mappedLength += decompLength; 4862 } else { 4863 mappedCharacters[mappedLength++] = currentChar; 4864 } 4865 4866 while (contents < limit) { 4867 if (CFUniCharIsSurrogateHighCharacter(*contents) && (contents + 1 < limit) && CFUniCharIsSurrogateLowCharacter(*(contents + 1))) { 4868 currentChar = CFUniCharGetLongCharacterForSurrogatePair(*contents, *(contents + 1)); 4869 } else { 4870 currentChar = *contents; 4871 } 4872 if (0 == CFUniCharGetCombiningPropertyForCharacter(currentChar, (const uint8_t *)((currentChar < 0x10000) ? combiningBMP : CFUniCharGetUnicodePropertyDataForPlane(kCFUniCharCombiningProperty, (currentChar >> 16))))) break; 4873 if (currentChar & 0xFFFF0000) { 4874 contents += 2; 4875 currentLength += 2; 4876 } else { 4877 ++contents; 4878 ++currentLength; 4879 } 4880 if (mappedLength == allocatedLength) { 4881 allocatedLength += MAX_DECOMP_BUF; 4882 if (mappedCharacters == buffer) { 4883 mappedCharacters = (UTF32Char *)CFAllocatorAllocate(kCFAllocatorSystemDefault, allocatedLength * sizeof(UTF32Char), 0); 4884 memmove(mappedCharacters, buffer, MAX_DECOMP_BUF * sizeof(UTF32Char)); 4885 } else { 4886 mappedCharacters = (UTF32Char *)CFAllocatorReallocate(kCFAllocatorSystemDefault, mappedCharacters, allocatedLength * sizeof(UTF32Char), 0); 4887 } 4888 } 4889 if (CFUniCharIsMemberOfBitmap(currentChar, ((currentChar < 0x10000) ? decompBMP : CFUniCharGetBitmapPtrForPlane(kCFUniCharCanonicalDecomposableCharacterSet, (currentChar >> 16))))) { // Vietnamese accent, etc. 4890 decompLength = CFUniCharDecomposeCharacter(currentChar, mappedCharacters + mappedLength, MAX_DECOMP_BUF - mappedLength); 4891 mappedLength += decompLength; 4892 } else { 4893 mappedCharacters[mappedLength++] = currentChar; 4894 } 4895 } 4896 } 4897 if (needToReorder && mappedLength > 1) CFUniCharPrioritySort(mappedCharacters, mappedLength); 4898 } 4899 4900 if (theForm & kCFStringNormalizationFormKD) { 4901 CFIndex newLength = 0; 4902 4903 if (mappedLength == 0 && CFUniCharIsMemberOf(currentChar, kCFUniCharCompatibilityDecomposableCharacterSet)) { 4904 mappedCharacters[mappedLength++] = currentChar; 4905 } 4906 while (newLength < mappedLength) { 4907 newLength = CFUniCharCompatibilityDecompose(mappedCharacters, mappedLength, allocatedLength); 4908 if (newLength == 0) { 4909 allocatedLength += MAX_DECOMP_BUF; 4910 if (mappedCharacters == buffer) { 4911 mappedCharacters = (UTF32Char *)CFAllocatorAllocate(kCFAllocatorSystemDefault, allocatedLength * sizeof(UTF32Char), 0); 4912 memmove(mappedCharacters, buffer, MAX_DECOMP_BUF * sizeof(UTF32Char)); 4913 } else { 4914 mappedCharacters = (UTF32Char *)CFAllocatorReallocate(kCFAllocatorSystemDefault, mappedCharacters, allocatedLength * sizeof(UTF32Char), 0); 4915 } 4916 } 4917 } 4918 mappedLength = newLength; 4919 } 4920 4921 if (theForm & kCFStringNormalizationFormC) { 4922 UTF32Char nextChar; 4923 4924 if (mappedLength > 1) { 4925 CFIndex consumedLength = 1; 4926 UTF32Char *currentBase = mappedCharacters; 4927 uint8_t currentClass, lastClass = 0; 4928 bool didCombine = false; 4929 4930 currentChar = *mappedCharacters; 4931 4932 while (consumedLength < mappedLength) { 4933 nextChar = mappedCharacters[consumedLength]; 4934 currentClass = CFUniCharGetCombiningPropertyForCharacter(nextChar, (const uint8_t *)((nextChar < 0x10000) ? combiningBMP : CFUniCharGetUnicodePropertyDataForPlane(kCFUniCharCombiningProperty, (nextChar >> 16)))); 4935 4936 if (theForm & kCFStringNormalizationFormKD) { 4937 if ((currentChar >= HANGUL_LBASE) && (currentChar < (HANGUL_LBASE + 0xFF))) { 4938 SInt8 lIndex = currentChar - HANGUL_LBASE; 4939 4940 if ((0 <= lIndex) && (lIndex <= HANGUL_LCOUNT)) { 4941 SInt16 vIndex = nextChar - HANGUL_VBASE; 4942 4943 if ((vIndex >= 0) && (vIndex <= HANGUL_VCOUNT)) { 4944 SInt16 tIndex = 0; 4945 CFIndex usedLength = mappedLength; 4946 4947 mappedCharacters[consumedLength++] = 0xFFFD; 4948 4949 if (consumedLength < mappedLength) { 4950 tIndex = mappedCharacters[consumedLength] - HANGUL_TBASE; 4951 if ((tIndex < 0) || (tIndex > HANGUL_TCOUNT)) { 4952 tIndex = 0; 4953 } else { 4954 mappedCharacters[consumedLength++] = 0xFFFD; 4955 } 4956 } 4957 *currentBase = (lIndex * HANGUL_VCOUNT + vIndex) * HANGUL_TCOUNT + tIndex + HANGUL_SBASE; 4958 4959 while (--usedLength > 0) { 4960 if (mappedCharacters[usedLength] == 0xFFFD) { 4961 --mappedLength; 4962 --consumedLength; 4963 memmove(mappedCharacters + usedLength, mappedCharacters + usedLength + 1, (mappedLength - usedLength) * sizeof(UTF32Char)); 4964 } 4965 } 4966 currentBase = mappedCharacters + consumedLength; 4967 currentChar = *currentBase; 4968 ++consumedLength; 4969 4970 continue; 4971 } 4972 } 4973 } 4974 if (!CFUniCharIsMemberOfBitmap(nextChar, ((nextChar < 0x10000) ? nonBaseBMP : CFUniCharGetBitmapPtrForPlane(kCFUniCharNonBaseCharacterSet, (nextChar >> 16))))) { 4975 *currentBase = currentChar; 4976 currentBase = mappedCharacters + consumedLength; 4977 currentChar = nextChar; 4978 ++consumedLength; 4979 continue; 4980 } 4981 } 4982 4983 if ((lastClass == 0) || (currentClass > lastClass)) { 4984 nextChar = CFUniCharPrecomposeCharacter(currentChar, nextChar); 4985 if (nextChar == 0xFFFD) { 4986 lastClass = currentClass; 4987 } else { 4988 mappedCharacters[consumedLength] = 0xFFFD; 4989 didCombine = true; 4990 currentChar = nextChar; 4991 } 4992 } 4993 ++consumedLength; 4994 } 4995 4996 *currentBase = currentChar; 4997 if (didCombine) { 4998 consumedLength = mappedLength; 4999 while (--consumedLength > 0) { 5000 if (mappedCharacters[consumedLength] == 0xFFFD) { 5001 --mappedLength; 5002 memmove(mappedCharacters + consumedLength, mappedCharacters + consumedLength + 1, (mappedLength - consumedLength) * sizeof(UTF32Char)); 5003 } 5004 } 5005 } 5006 } else if ((currentChar >= HANGUL_LBASE) && (currentChar < (HANGUL_LBASE + 0xFF))) { // Hangul Jamo 5007 SInt8 lIndex = currentChar - HANGUL_LBASE; 5008 5009 if ((contents < limit) && (0 <= lIndex) && (lIndex <= HANGUL_LCOUNT)) { 5010 SInt16 vIndex = *contents - HANGUL_VBASE; 5011 5012 if ((vIndex >= 0) && (vIndex <= HANGUL_VCOUNT)) { 5013 SInt16 tIndex = 0; 5014 5015 ++contents; ++currentLength; 5016 5017 if (contents < limit) { 5018 tIndex = *contents - HANGUL_TBASE; 5019 if ((tIndex < 0) || (tIndex > HANGUL_TCOUNT)) { 5020 tIndex = 0; 5021 } else { 5022 ++contents; ++currentLength; 5023 } 5024 } 5025 *mappedCharacters = (lIndex * HANGUL_VCOUNT + vIndex) * HANGUL_TCOUNT + tIndex + HANGUL_SBASE; 5026 mappedLength = 1; 5027 } 5028 } 5029 } else { // collect class 0 non-base characters 5030 while (contents < limit) { 5031 nextChar = *contents; 5032 if (CFUniCharIsSurrogateHighCharacter(nextChar) && ((contents + 1) < limit) && CFUniCharIsSurrogateLowCharacter(*(contents + 1))) { 5033 nextChar = CFUniCharGetLongCharacterForSurrogatePair(nextChar, *(contents + 1)); 5034 if (!CFUniCharIsMemberOfBitmap(nextChar, (const uint8_t *)CFUniCharGetBitmapPtrForPlane(kCFUniCharNonBaseCharacterSet, (nextChar >> 16))) || (0 != CFUniCharGetCombiningPropertyForCharacter(nextChar, (const uint8_t *)CFUniCharGetUnicodePropertyDataForPlane(kCFUniCharCombiningProperty, (nextChar >> 16))))) break; 5035 } else { 5036 if (!CFUniCharIsMemberOfBitmap(nextChar, nonBaseBMP) || (0 != CFUniCharGetCombiningPropertyForCharacter(nextChar, combiningBMP))) break; 5037 } 5038 currentChar = CFUniCharPrecomposeCharacter(currentChar, nextChar); 5039 if (0xFFFD == currentChar) break; 5040 5041 if (nextChar < 0x10000) { 5042 ++contents; ++currentLength; 5043 } else { 5044 contents += 2; 5045 currentLength += 2; 5046 } 5047 5048 *mappedCharacters = currentChar; 5049 mappedLength = 1; 5050 } 5051 } 5052 } 5053 5054 if (mappedLength > 0) { 5055 CFIndex utf16Length = __CFGetUTF16Length(mappedCharacters, mappedLength); 5056 5057 if (utf16Length != currentLength) { 5058 __CFStringChangeSize(string, CFRangeMake(currentIndex, currentLength), utf16Length, true); 5059 currentLength = utf16Length; 5060 } 5061 contents = (UTF16Char *)__CFStrContents(string); 5062 limit = contents + __CFStrLength(string); 5063 contents += currentIndex; 5064 __CFFillInUTF16(mappedCharacters, contents, mappedLength); 5065 contents += utf16Length; 5066 } 5067 currentIndex += currentLength; 5068 } 5069 5070 if (mappedCharacters != buffer) CFAllocatorDeallocate(kCFAllocatorSystemDefault, mappedCharacters); 5071 } 5072} 5073 5074void CFStringFold(CFMutableStringRef theString, CFStringCompareFlags theFlags, CFLocaleRef locale) { 5075 CFStringInlineBuffer stringBuffer; 5076 CFIndex length = CFStringGetLength(theString); 5077 CFIndex currentIndex = 0; 5078 CFIndex bufferLength = 0; 5079 UTF32Char buffer[kCFStringStackBufferLength]; 5080 const uint8_t *cString; 5081 const uint8_t *langCode; 5082 CFStringEncoding eightBitEncoding; 5083 bool caseInsensitive = ((theFlags & kCFCompareCaseInsensitive) ? true : false); 5084 bool isObjc = CF_IS_OBJC(__kCFStringTypeID, theString); 5085 CFLocaleRef theLocale = locale; 5086 5087 if ((theFlags & kCFCompareLocalized) && (NULL == locale)) { 5088 theLocale = CFLocaleCopyCurrent(); 5089 } 5090 5091 theFlags &= (kCFCompareCaseInsensitive|kCFCompareDiacriticInsensitive|kCFCompareWidthInsensitive); 5092 5093 if ((0 == theFlags) || (0 == length)) goto bail; // nothing to do 5094 5095 langCode = ((NULL == theLocale) ? NULL : (const uint8_t *)_CFStrGetLanguageIdentifierForLocale(theLocale, true)); 5096 5097 eightBitEncoding = __CFStringGetEightBitStringEncoding(); 5098 cString = (const uint8_t *)CFStringGetCStringPtr(theString, eightBitEncoding); 5099 5100 if ((NULL != cString) && !caseInsensitive && (kCFStringEncodingASCII == eightBitEncoding)) goto bail; // All ASCII 5101 5102 CFStringInitInlineBuffer(theString, &stringBuffer, CFRangeMake(0, length)); 5103 5104 if ((NULL != cString) && (theFlags & (kCFCompareCaseInsensitive|kCFCompareDiacriticInsensitive))) { 5105 const uint8_t *cStringPtr = cString; 5106 const uint8_t *cStringLimit = cString + length; 5107 uint8_t *cStringContents = (isObjc ? NULL : (uint8_t *)__CFStrContents(theString) + __CFStrSkipAnyLengthByte(theString)); 5108 5109 while (cStringPtr < cStringLimit) { 5110 if ((*cStringPtr < 0x80) && (NULL == langCode)) { 5111 if (caseInsensitive && (*cStringPtr >= 'A') && (*cStringPtr <= 'Z')) { 5112 if (NULL == cStringContents) { 5113 break; 5114 } else { 5115 cStringContents[cStringPtr - cString] += ('a' - 'A'); 5116 } 5117 } 5118 } else { 5119 if ((bufferLength = __CFStringFoldCharacterClusterAtIndex((UTF32Char)__CFCharToUniCharTable[*cStringPtr], &stringBuffer, cStringPtr - cString, theFlags, langCode, buffer, kCFStringStackBufferLength, NULL)) > 0) { 5120 if ((*buffer > 0x7F) || (bufferLength > 1) || (NULL == cStringContents)) break; 5121 cStringContents[cStringPtr - cString] = *buffer; 5122 } 5123 } 5124 ++cStringPtr; 5125 } 5126 5127 currentIndex = cStringPtr - cString; 5128 } 5129 5130 if (currentIndex < length) { 5131 UTF16Char *contents; 5132 5133 if (isObjc) { 5134 CFMutableStringRef cfString; 5135 CFRange range = CFRangeMake(currentIndex, length - currentIndex); 5136 5137 contents = (UTF16Char *)CFAllocatorAllocate(kCFAllocatorSystemDefault, sizeof(UTF16Char) * range.length, 0); 5138 5139 CFStringGetCharacters(theString, range, contents); 5140 5141 cfString = CFStringCreateMutableWithExternalCharactersNoCopy(kCFAllocatorSystemDefault, contents, range.length, range.length, NULL); 5142 5143 CFStringFold(cfString, theFlags, theLocale); 5144 5145 CFStringReplace(theString, range, cfString); 5146 5147 CFRelease(cfString); 5148 } else { 5149 const UTF32Char *characters; 5150 const UTF32Char *charactersLimit; 5151 UTF32Char character; 5152 CFIndex consumedLength; 5153 5154 contents = NULL; 5155 5156 if (bufferLength > 0) { 5157 __CFStringChangeSize(theString, CFRangeMake(currentIndex + 1, 0), bufferLength - 1, true); 5158 length = __CFStrLength(theString); 5159 CFStringInitInlineBuffer(theString, &stringBuffer, CFRangeMake(0, length)); 5160 5161 contents = (UTF16Char *)__CFStrContents(theString) + currentIndex; 5162 characters = buffer; 5163 charactersLimit = characters + bufferLength; 5164 while (characters < charactersLimit) *(contents++) = (UTF16Char)*(characters++); 5165 ++currentIndex; 5166 } 5167 5168 while (currentIndex < length) { 5169 character = __CFStringGetCharacterFromInlineBufferQuick(&stringBuffer, currentIndex); 5170 5171 consumedLength = 0; 5172 5173 if ((NULL == langCode) && (character < 0x80) && (0 == (theFlags & kCFCompareDiacriticInsensitive))) { 5174 if (caseInsensitive && (character >= 'A') && (character <= 'Z')) { 5175 consumedLength = 1; 5176 bufferLength = 1; 5177 *buffer = character + ('a' - 'A'); 5178 } 5179 } else { 5180 if (CFUniCharIsSurrogateHighCharacter(character) && ((currentIndex + 1) < length)) { 5181 UTF16Char lowSurrogate = __CFStringGetCharacterFromInlineBufferQuick(&stringBuffer, currentIndex + 1); 5182 if (CFUniCharIsSurrogateLowCharacter(lowSurrogate)) character = CFUniCharGetLongCharacterForSurrogatePair(character, lowSurrogate); 5183 } 5184 5185 bufferLength = __CFStringFoldCharacterClusterAtIndex(character, &stringBuffer, currentIndex, theFlags, langCode, buffer, kCFStringStackBufferLength, &consumedLength); 5186 } 5187 5188 if (consumedLength > 0) { 5189 CFIndex utf16Length = bufferLength; 5190 5191 characters = buffer; 5192 charactersLimit = characters + bufferLength; 5193 5194 while (characters < charactersLimit) if (*(characters++) > 0xFFFF) ++utf16Length; // Extend bufferLength to the UTF-16 length 5195 5196 if ((utf16Length != consumedLength) || __CFStrIsEightBit(theString)) { 5197 CFRange range; 5198 CFIndex insertLength; 5199 5200 if (consumedLength < utf16Length) { // Need to expand 5201 range = CFRangeMake(currentIndex + consumedLength, 0); 5202 insertLength = utf16Length - consumedLength; 5203 } else { 5204 range = CFRangeMake(currentIndex + utf16Length, consumedLength - utf16Length); 5205 insertLength = 0; 5206 } 5207 __CFStringChangeSize(theString, range, insertLength, true); 5208 length = __CFStrLength(theString); 5209 CFStringInitInlineBuffer(theString, &stringBuffer, CFRangeMake(0, length)); 5210 } 5211 5212 (void)CFUniCharFromUTF32(buffer, bufferLength, (UTF16Char *)__CFStrContents(theString) + currentIndex, true, __CF_BIG_ENDIAN__); 5213 5214 currentIndex += utf16Length; 5215 } else { 5216 ++currentIndex; 5217 } 5218 } 5219 } 5220 } 5221 5222 bail: 5223 if (NULL == locale && theLocale) { 5224 CFRelease(theLocale); 5225 } 5226} 5227 5228enum { 5229 kCFStringFormatZeroFlag = (1 << 0), // if not, padding is space char 5230 kCFStringFormatMinusFlag = (1 << 1), // if not, no flag implied 5231 kCFStringFormatPlusFlag = (1 << 2), // if not, no flag implied, overrides space 5232 kCFStringFormatSpaceFlag = (1 << 3), // if not, no flag implied 5233 kCFStringFormatExternalSpecFlag = (1 << 4), // using config dict 5234 kCFStringFormatLocalizable = (1 << 5) // explicitly mark the specs we can localize 5235}; 5236 5237typedef struct { 5238 int16_t size; 5239 int16_t type; 5240 SInt32 loc; 5241 SInt32 len; 5242 SInt32 widthArg; 5243 SInt32 precArg; 5244 uint32_t flags; 5245 int8_t mainArgNum; 5246 int8_t precArgNum; 5247 int8_t widthArgNum; 5248 int8_t configDictIndex; 5249 int8_t numericFormatStyle; // Only set for localizable numeric quantities 5250} CFFormatSpec; 5251 5252typedef struct { 5253 int16_t type; 5254 int16_t size; 5255 union { 5256 int64_t int64Value; 5257 double doubleValue; 5258#if LONG_DOUBLE_SUPPORT 5259 long double longDoubleValue; 5260#endif 5261 void *pointerValue; 5262 } value; 5263} CFPrintValue; 5264 5265enum { 5266 CFFormatDefaultSize = 0, 5267 CFFormatSize1 = 1, 5268 CFFormatSize2 = 2, 5269 CFFormatSize4 = 3, 5270 CFFormatSize8 = 4, 5271 CFFormatSize16 = 5, 5272#if __LP64__ 5273 CFFormatSizeLong = CFFormatSize8, 5274 CFFormatSizePointer = CFFormatSize8 5275#else 5276 CFFormatSizeLong = CFFormatSize4, 5277 CFFormatSizePointer = CFFormatSize4 5278#endif 5279}; 5280 5281enum { 5282 CFFormatStyleDecimal = (1 << 0), 5283 CFFormatStyleScientific = (1 << 1), 5284 CFFormatStyleDecimalOrScientific = CFFormatStyleDecimal|CFFormatStyleScientific, 5285 CFFormatStyleUnsigned = (1 << 2) 5286}; 5287 5288enum { 5289 CFFormatLiteralType = 32, 5290 CFFormatLongType = 33, 5291 CFFormatDoubleType = 34, 5292 CFFormatPointerType = 35, 5293 CFFormatObjectType = 36, /* handled specially */ /* ??? not used anymore, can be removed? */ 5294 CFFormatCFType = 37, /* handled specially */ 5295 CFFormatUnicharsType = 38, /* handled specially */ 5296 CFFormatCharsType = 39, /* handled specially */ 5297 CFFormatPascalCharsType = 40, /* handled specially */ 5298 CFFormatSingleUnicharType = 41, /* handled specially */ 5299 CFFormatDummyPointerType = 42 /* special case for %n */ 5300}; 5301 5302#if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_WINDOWS 5303/* Only come in here if spec->type is CFFormatLongType or CFFormatDoubleType. Pass in 0 for width or precision if not specified. Returns false if couldn't do the format (with the assumption the caller falls back to unlocalized). 5304*/ 5305static Boolean __CFStringFormatLocalizedNumber(CFMutableStringRef output, CFLocaleRef locale, const CFPrintValue *values, const CFFormatSpec *spec, SInt32 width, SInt32 precision, Boolean hasPrecision) { 5306 static CFSpinLock_t formatterLock = CFSpinLockInit; 5307 // These formatters are recached if the locale argument is different 5308 static CFNumberFormatterRef decimalFormatter = NULL; 5309 static CFNumberFormatterRef scientificFormatter = NULL; 5310 static CFNumberFormatterRef gFormatter = NULL; // for %g 5311 static SInt32 groupingSize = 0; 5312 static SInt32 secondaryGroupingSize = 0; 5313 5314 // !!! This code should be removed before shipping 5315 static char disableLocalizedFormatting = -1; 5316 if (disableLocalizedFormatting == -1) disableLocalizedFormatting = (getenv("CFStringDisableLocalizedNumberFormatting") != NULL) ? 1 : 0; 5317 if (disableLocalizedFormatting) return false; 5318 5319 CFNumberFormatterRef formatter; 5320 5321 __CFSpinLock(&formatterLock); // We use the formatter from one thread at one time; if this proves to be a bottleneck we need to get fancier 5322 5323 switch (spec->numericFormatStyle) { 5324 case CFFormatStyleUnsigned: 5325 case CFFormatStyleDecimal: 5326 if (!decimalFormatter || !CFEqual(CFNumberFormatterGetLocale(decimalFormatter), locale)) { // cache or recache if the locale is different 5327 if (decimalFormatter) CFRelease(decimalFormatter); 5328 decimalFormatter = CFNumberFormatterCreate(NULL, locale, kCFNumberFormatterDecimalStyle); // since this is shared, remember to reset all its properties! 5329 } 5330 formatter = decimalFormatter; 5331 break; 5332 case CFFormatStyleScientific: 5333 if (!scientificFormatter || !CFEqual(CFNumberFormatterGetLocale(scientificFormatter), locale)) { // cache or recache if the locale is different 5334 if (scientificFormatter) CFRelease(scientificFormatter); 5335 scientificFormatter = CFNumberFormatterCreate(NULL, locale, kCFNumberFormatterScientificStyle); 5336 CFStringRef pattern = CFSTR("#E+00"); // the default pattern does not have the sign if the exponent is positive and it is single digit 5337 CFNumberFormatterSetFormat(scientificFormatter, pattern); 5338 CFNumberFormatterSetProperty(scientificFormatter, kCFNumberFormatterUseSignificantDigitsKey, kCFBooleanTrue); 5339 } 5340 formatter = scientificFormatter; 5341 break; 5342 case CFFormatStyleDecimalOrScientific: 5343 if (!gFormatter || !CFEqual(CFNumberFormatterGetLocale(gFormatter), locale)) { // cache or recache if the locale is different 5344 if (gFormatter) CFRelease(gFormatter); 5345 gFormatter = CFNumberFormatterCreate(NULL, locale, kCFNumberFormatterDecimalStyle); 5346 // when we update the locale in gFormatter, we also need to update the two grouping sizes 5347 CFNumberRef num = (CFNumberRef) CFNumberFormatterCopyProperty(gFormatter, kCFNumberFormatterGroupingSizeKey); 5348 CFNumberGetValue(num, kCFNumberSInt32Type, &groupingSize); 5349 CFRelease(num); 5350 num = (CFNumberRef) CFNumberFormatterCopyProperty(gFormatter, kCFNumberFormatterSecondaryGroupingSizeKey); 5351 CFNumberGetValue(num, kCFNumberSInt32Type, &secondaryGroupingSize); 5352 CFRelease(num); 5353 } 5354 formatter = gFormatter; 5355 break; 5356 } 5357 5358 SInt32 prec = hasPrecision ? precision : ((spec->type == CFFormatLongType) ? 0 : 6); // default precision of printf is 6 5359 5360 // pattern must be set before setting width and padding 5361 // otherwise, the pattern will take over those settings 5362 if (spec->numericFormatStyle == CFFormatStyleDecimalOrScientific) { 5363 if (prec == 0) prec = 1; // at least one sig fig 5364 CFMutableStringRef pattern = CFStringCreateMutable(NULL, 0); 5365 // use significant digits pattern 5366 CFStringAppendCString(pattern, "@", kCFStringEncodingASCII); 5367 CFStringPad(pattern, CFSTR("#"), prec, 0); 5368 double targetValue = values[spec->mainArgNum].value.doubleValue; 5369#if LONG_DOUBLE_SUPPORT 5370 if (CFFormatSize16 == values[spec->mainArgNum].size) { 5371 targetValue = values[spec->mainArgNum].value.longDoubleValue; // losing precision 5372 } 5373#endif 5374 double max = pow(10.0, (double)prec); // if the value requires more digits than the number of sig figs, we need to use scientific format 5375 double min = 0.0001; // if the value is less than 10E-4, scientific format is the shorter form 5376 if (((targetValue > 0 && (targetValue > max || targetValue < min)) || (targetValue < 0 && (targetValue < -max || targetValue > -min)))){ 5377 CFStringAppendCString(pattern, "E+00", kCFStringEncodingASCII); 5378 } else if (prec > groupingSize && groupingSize != 0) { 5379 CFStringInsert(pattern, prec-groupingSize, CFSTR(",")); // if we are not using scientific format, we need to set the pattern to use grouping separator 5380 if (secondaryGroupingSize != 0 && prec > (groupingSize + secondaryGroupingSize)) CFStringInsert(pattern, prec-groupingSize-secondaryGroupingSize, CFSTR(",")); 5381 } 5382 CFNumberFormatterSetFormat(formatter, pattern); 5383 CFRelease(pattern); 5384 } 5385 5386 // clear the padding, we will add it later if we need it 5387 const SInt32 z = 0; 5388 CFNumberRef zero = CFNumberCreate(NULL, kCFNumberSInt32Type, &z); 5389 CFNumberFormatterSetProperty(formatter, kCFNumberFormatterFormatWidthKey, zero); 5390 5391 CFNumberRef tmp = CFNumberCreate(NULL, kCFNumberSInt32Type, &prec); 5392 CFNumberFormatterSetProperty(formatter, kCFNumberFormatterMaxFractionDigitsKey, tmp); 5393 if (spec->type == CFFormatDoubleType) { 5394 CFNumberFormatterSetProperty(formatter, kCFNumberFormatterMinFractionDigitsKey, tmp); 5395 } else { 5396 CFNumberFormatterSetProperty(formatter, kCFNumberFormatterMinFractionDigitsKey, zero); 5397 } 5398 CFRelease(tmp); 5399 CFRelease(zero); 5400 5401 5402 // ??? use the right zero here for Arabic 5403 Boolean padZero = spec->flags & kCFStringFormatZeroFlag; 5404 if (hasPrecision && spec->type == CFFormatLongType) { // if we have precision and %d or %u, we pad 0 5405 padZero = true; 5406 } 5407 CFNumberFormatterSetProperty(formatter, kCFNumberFormatterPaddingCharacterKey, padZero ? CFSTR("0") : CFSTR(" ")); 5408 5409 5410 // Left (default) or right padding 5411 SInt32 p = (spec->flags & kCFStringFormatMinusFlag) ? kCFNumberFormatterPadAfterSuffix : (padZero ? kCFNumberFormatterPadAfterPrefix : kCFNumberFormatterPadBeforePrefix); 5412 if (hasPrecision && spec->type == CFFormatLongType) { 5413 SInt32 tmpP = kCFNumberFormatterPadAfterPrefix; 5414 tmp = CFNumberCreate(NULL, kCFNumberSInt32Type, &tmpP); 5415 } else { 5416 tmp = CFNumberCreate(NULL, kCFNumberSInt32Type, &p); 5417 } 5418 CFNumberFormatterSetProperty(formatter, kCFNumberFormatterPaddingPositionKey, tmp); 5419 CFRelease(tmp); 5420 5421 Boolean isNegative = false; 5422 switch (values[spec->mainArgNum].type) { 5423 case CFFormatLongType: 5424 if (values[spec->mainArgNum].value.int64Value < 0) isNegative = true; 5425 break; 5426 case CFFormatDoubleType: 5427#if LONG_DOUBLE_SUPPORT 5428 if ((CFFormatSize16 == values[spec->mainArgNum].size) && (values[spec->mainArgNum].value.longDoubleValue < 0)) isNegative = true; 5429 else 5430#endif 5431 if (values[spec->mainArgNum].value.doubleValue < 0) isNegative = true; 5432 break; 5433 } 5434 5435 CFStringRef pattern = CFNumberFormatterGetFormat(formatter); 5436 if ((spec->flags & kCFStringFormatPlusFlag) && !isNegative) { 5437 if (CFStringGetCharacterAtIndex(pattern, 0) != '+') { 5438 CFMutableStringRef newPattern = CFStringCreateMutableCopy(NULL, 0, CFSTR("+")); 5439 CFStringAppend(newPattern, pattern); 5440 CFNumberFormatterSetFormat(formatter, newPattern); 5441 CFRelease(newPattern); 5442 } 5443 } else { 5444 if (CFStringGetCharacterAtIndex(pattern, 0) == '+') { 5445 CFStringRef newPattern = CFStringCreateWithSubstring(NULL, pattern, CFRangeMake(1, CFStringGetLength(pattern)-1)); 5446 CFNumberFormatterSetFormat(formatter, newPattern); 5447 CFRelease(newPattern); 5448 } 5449 } 5450 5451 // width == 0 seems to be CFNumberFormatter's default setting 5452 if (hasPrecision && spec->type == CFFormatLongType) { // if we have precision and %d or %u, we pad 0 according to precision first 5453 tmp = CFNumberCreate(NULL, kCFNumberSInt32Type, &prec); 5454 } else { 5455 tmp = CFNumberCreate(NULL, kCFNumberSInt32Type, &width); 5456 } 5457 CFNumberFormatterSetProperty(formatter, kCFNumberFormatterFormatWidthKey, tmp); 5458 CFRelease(tmp); 5459 5460 if (spec->numericFormatStyle == CFFormatStyleScientific) { 5461 prec++; // for %e, precision+1 is the number of sig fig 5462 tmp = CFNumberCreate(NULL, kCFNumberSInt32Type, &prec); 5463 CFNumberFormatterSetProperty(formatter, kCFNumberFormatterMinSignificantDigitsKey, tmp); 5464 CFNumberFormatterSetProperty(formatter, kCFNumberFormatterMaxSignificantDigitsKey, tmp); 5465 CFRelease(tmp); 5466 } 5467 5468 CFStringRef localizedNumberString = NULL; 5469 switch (spec->type) { 5470 case CFFormatLongType: 5471 // ??? Need to do unsigned 5472 localizedNumberString = CFNumberFormatterCreateStringWithValue(NULL, formatter, kCFNumberSInt64Type, &(values[spec->mainArgNum].value.int64Value)); 5473 break; 5474 case CFFormatDoubleType: { 5475#if LONG_DOUBLE_SUPPORT 5476 if (CFFormatSize16 == values[spec->mainArgNum].size) { 5477 double doubleValue = values[spec->mainArgNum].value.longDoubleValue; // losing precision 5478 localizedNumberString = CFNumberFormatterCreateStringWithValue(NULL, formatter, kCFNumberDoubleType, &doubleValue); 5479 } else 5480#endif 5481 { 5482 localizedNumberString = CFNumberFormatterCreateStringWithValue(NULL, formatter, kCFNumberDoubleType, &(values[spec->mainArgNum].value.doubleValue)); 5483 } 5484 break; 5485 } 5486 } 5487 __CFSpinUnlock(&formatterLock); 5488 5489 if (localizedNumberString) { 5490 // we need to pad space if we have %d or %u 5491 if (spec->type == CFFormatLongType && hasPrecision && CFStringGetLength(localizedNumberString) < width) { 5492 CFMutableStringRef finalStr = NULL; 5493 if (p == kCFNumberFormatterPadAfterSuffix) { 5494 finalStr = CFStringCreateMutableCopy(NULL, 0, localizedNumberString); 5495 CFStringPad(finalStr, CFSTR(" "), width, 0); 5496 } else { 5497 finalStr = CFStringCreateMutable(NULL, 0); 5498 CFStringPad(finalStr, CFSTR(" "), width - CFStringGetLength(localizedNumberString), 0); 5499 CFStringAppend(finalStr, localizedNumberString); 5500 } 5501 CFRelease(localizedNumberString); 5502 localizedNumberString = finalStr; 5503 } 5504 CFStringAppend(output, localizedNumberString); 5505 CFRelease(localizedNumberString); 5506 return true; 5507 } 5508 return false; 5509} 5510#endif 5511 5512CF_INLINE void __CFParseFormatSpec(const UniChar *uformat, const uint8_t *cformat, SInt32 *fmtIdx, SInt32 fmtLen, CFFormatSpec *spec, CFStringRef *configKeyPointer) { 5513 Boolean seenDot = false; 5514 Boolean seenSharp = false; 5515 CFIndex keyIndex = kCFNotFound; 5516 5517 for (;;) { 5518 UniChar ch; 5519 if (fmtLen <= *fmtIdx) return; /* no type */ 5520 if (cformat) ch = (UniChar)cformat[(*fmtIdx)++]; else ch = uformat[(*fmtIdx)++]; 5521 5522 if (keyIndex >= 0) { 5523 if ((ch < '0') || ((ch > '9') && (ch < 'A')) || ((ch > 'Z') && (ch < 'a') && (ch != '_')) || (ch > 'z')) { 5524 if (ch == '@') { // found the key 5525 CFIndex length = (*fmtIdx) - 1 - keyIndex; 5526 5527 spec->flags |= kCFStringFormatExternalSpecFlag; 5528 spec->type = CFFormatCFType; 5529 spec->size = CFFormatSizePointer; // 4 or 8 depending on LP64 5530 5531 if ((NULL != configKeyPointer) && (length > 0)) { 5532 if (cformat) { 5533 *configKeyPointer = CFStringCreateWithBytes(NULL, cformat + keyIndex, length, __CFStringGetEightBitStringEncoding(), FALSE); 5534 } else { 5535 *configKeyPointer = CFStringCreateWithCharactersNoCopy(NULL, uformat + keyIndex, length, kCFAllocatorNull); 5536 } 5537 } 5538 return; 5539 } 5540 keyIndex = kCFNotFound; 5541 } 5542 continue; 5543 } 5544 5545reswtch:switch (ch) { 5546 case '#': // ignored for now 5547 seenSharp = true; 5548 break; 5549 case 0x20: 5550 if (!(spec->flags & kCFStringFormatPlusFlag)) spec->flags |= kCFStringFormatSpaceFlag; 5551 break; 5552 case '-': 5553 spec->flags |= kCFStringFormatMinusFlag; 5554 spec->flags &= ~kCFStringFormatZeroFlag; // remove zero flag 5555 break; 5556 case '+': 5557 spec->flags |= kCFStringFormatPlusFlag; 5558 spec->flags &= ~kCFStringFormatSpaceFlag; // remove space flag 5559 break; 5560 case '0': 5561 if (seenDot) { // after we see '.' and then we see '0', it is 0 precision. We should not see '.' after '0' if '0' is the zero padding flag 5562 spec->precArg = 0; 5563 break; 5564 } 5565 if (!(spec->flags & kCFStringFormatMinusFlag)) spec->flags |= kCFStringFormatZeroFlag; 5566 break; 5567 case 'h': 5568 if (*fmtIdx < fmtLen) { 5569 // fetch next character, don't increment fmtIdx 5570 if (cformat) ch = (UniChar)cformat[(*fmtIdx)]; else ch = uformat[(*fmtIdx)]; 5571 if ('h' == ch) { // 'hh' for char, like 'c' 5572 (*fmtIdx)++; 5573 spec->size = CFFormatSize1; 5574 break; 5575 } 5576 } 5577 spec->size = CFFormatSize2; 5578 break; 5579 case 'l': 5580 if (*fmtIdx < fmtLen) { 5581 // fetch next character, don't increment fmtIdx 5582 if (cformat) ch = (UniChar)cformat[(*fmtIdx)]; else ch = uformat[(*fmtIdx)]; 5583 if ('l' == ch) { // 'll' for long long, like 'q' 5584 (*fmtIdx)++; 5585 spec->size = CFFormatSize8; 5586 break; 5587 } 5588 } 5589 spec->size = CFFormatSizeLong; // 4 or 8 depending on LP64 5590 break; 5591#if LONG_DOUBLE_SUPPORT 5592 case 'L': 5593 spec->size = CFFormatSize16; 5594 break; 5595#endif 5596 case 'q': 5597 spec->size = CFFormatSize8; 5598 break; 5599 case 't': case 'z': 5600 spec->size = CFFormatSizeLong; // 4 or 8 depending on LP64 5601 break; 5602 case 'j': 5603 spec->size = CFFormatSize8; 5604 break; 5605 case 'c': 5606 spec->type = CFFormatLongType; 5607 spec->size = CFFormatSize1; 5608 return; 5609 case 'D': case 'd': case 'i': case 'U': case 'u': 5610 // we can localize all but octal or hex 5611 if (_CFExecutableLinkedOnOrAfter(CFSystemVersionMountainLion)) spec->flags |= kCFStringFormatLocalizable; 5612 spec->numericFormatStyle = CFFormatStyleDecimal; 5613 if (ch == 'u' || ch == 'U') spec->numericFormatStyle = CFFormatStyleUnsigned; 5614 // fall thru 5615 case 'O': case 'o': case 'x': case 'X': 5616 spec->type = CFFormatLongType; 5617 // Seems like if spec->size == 0, we should spec->size = CFFormatSize4. However, 0 is handled correctly. 5618 return; 5619 case 'f': case 'F': case 'g': case 'G': case 'e': case 'E': { 5620 // we can localize all but hex float output 5621 if (_CFExecutableLinkedOnOrAfter(CFSystemVersionMountainLion)) spec->flags |= kCFStringFormatLocalizable; 5622 char lch = (ch >= 'A' && ch <= 'Z') ? (ch - 'A' + 'a') : ch; 5623 spec->numericFormatStyle = ((lch == 'e' || lch == 'g') ? CFFormatStyleScientific : 0) | ((lch == 'f' || lch == 'g') ? CFFormatStyleDecimal : 0); 5624 if (seenDot && spec->precArg == -1 && spec->precArgNum == -1) { // for the cases that we have '.' but no precision followed, not even '*' 5625 spec->precArg = 0; 5626 } 5627 } 5628 // fall thru 5629 case 'a': case 'A': 5630 spec->type = CFFormatDoubleType; 5631 if (spec->size != CFFormatSize16) spec->size = CFFormatSize8; 5632 return; 5633 case 'n': /* %n is not handled correctly; for Leopard or newer apps, we disable it further */ 5634 spec->type = 1 ? CFFormatDummyPointerType : CFFormatPointerType; 5635 spec->size = CFFormatSizePointer; // 4 or 8 depending on LP64 5636 return; 5637 case 'p': 5638 spec->type = CFFormatPointerType; 5639 spec->size = CFFormatSizePointer; // 4 or 8 depending on LP64 5640 return; 5641 case 's': 5642 spec->type = CFFormatCharsType; 5643 spec->size = CFFormatSizePointer; // 4 or 8 depending on LP64 5644 return; 5645 case 'S': 5646 spec->type = CFFormatUnicharsType; 5647 spec->size = CFFormatSizePointer; // 4 or 8 depending on LP64 5648 return; 5649 case 'C': 5650 spec->type = CFFormatSingleUnicharType; 5651 spec->size = CFFormatSize2; 5652 return; 5653 case 'P': 5654 spec->type = CFFormatPascalCharsType; 5655 spec->size = CFFormatSizePointer; // 4 or 8 depending on LP64 5656 return; 5657 case '@': 5658 if (seenSharp) { 5659 seenSharp = false; 5660 keyIndex = *fmtIdx; 5661 break; 5662 } else { 5663 spec->type = CFFormatCFType; 5664 spec->size = CFFormatSizePointer; // 4 or 8 depending on LP64 5665 return; 5666 } 5667 case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': { 5668 int64_t number = 0; 5669 do { 5670 number = 10 * number + (ch - '0'); 5671 if (cformat) ch = (UniChar)cformat[(*fmtIdx)++]; else ch = uformat[(*fmtIdx)++]; 5672 } while ((UInt32)(ch - '0') <= 9); 5673 if ('$' == ch) { 5674 if (-2 == spec->precArgNum) { 5675 spec->precArgNum = (int8_t)number - 1; // Arg numbers start from 1 5676 } else if (-2 == spec->widthArgNum) { 5677 spec->widthArgNum = (int8_t)number - 1; // Arg numbers start from 1 5678 } else { 5679 spec->mainArgNum = (int8_t)number - 1; // Arg numbers start from 1 5680 } 5681 break; 5682 } else if (seenDot) { /* else it's either precision or width */ 5683 spec->precArg = (SInt32)number; 5684 } else { 5685 spec->widthArg = (SInt32)number; 5686 } 5687 goto reswtch; 5688 } 5689 case '*': 5690 spec->widthArgNum = -2; 5691 break; 5692 case '.': 5693 seenDot = true; 5694 if (cformat) ch = (UniChar)cformat[(*fmtIdx)++]; else ch = uformat[(*fmtIdx)++]; 5695 if ('*' == ch) { 5696 spec->precArgNum = -2; 5697 break; 5698 } 5699 goto reswtch; 5700 default: 5701 spec->type = CFFormatLiteralType; 5702 return; 5703 } 5704 } 5705} 5706 5707/* ??? %s depends on handling of encodings by __CFStringAppendBytes 5708*/ 5709void CFStringAppendFormatAndArguments(CFMutableStringRef outputString, CFDictionaryRef formatOptions, CFStringRef formatString, va_list args) { 5710 __CFStringAppendFormatCore(outputString, NULL, formatOptions, NULL, formatString, 0, NULL, 0, args); 5711} 5712 5713// Length of the buffer to call sprintf() with 5714#define BUFFER_LEN 512 5715 5716#if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_EMBEDDED_MINI 5717#define SNPRINTF(TYPE, WHAT) { \ 5718 TYPE value = (TYPE) WHAT; \ 5719 if (-1 != specs[curSpec].widthArgNum) { \ 5720 if (-1 != specs[curSpec].precArgNum) { \ 5721 snprintf_l(buffer, BUFFER_LEN-1, NULL, formatBuffer, width, precision, value); \ 5722 } else { \ 5723 snprintf_l(buffer, BUFFER_LEN-1, NULL, formatBuffer, width, value); \ 5724 } \ 5725 } else { \ 5726 if (-1 != specs[curSpec].precArgNum) { \ 5727 snprintf_l(buffer, BUFFER_LEN-1, NULL, formatBuffer, precision, value); \ 5728 } else { \ 5729 snprintf_l(buffer, BUFFER_LEN-1, NULL, formatBuffer, value); \ 5730 } \ 5731 }} 5732#else 5733#define SNPRINTF(TYPE, WHAT) { \ 5734 TYPE value = (TYPE) WHAT; \ 5735 if (-1 != specs[curSpec].widthArgNum) { \ 5736 if (-1 != specs[curSpec].precArgNum) { \ 5737 sprintf(buffer, formatBuffer, width, precision, value); \ 5738 } else { \ 5739 sprintf(buffer, formatBuffer, width, value); \ 5740 } \ 5741 } else { \ 5742 if (-1 != specs[curSpec].precArgNum) { \ 5743 sprintf(buffer, formatBuffer, precision, value); \ 5744 } else { \ 5745 sprintf(buffer, formatBuffer, value); \ 5746 } \ 5747 }} 5748#endif 5749 5750void _CFStringAppendFormatAndArgumentsAux(CFMutableStringRef outputString, CFStringRef (*copyDescFunc)(void *, const void *), CFDictionaryRef formatOptions, CFStringRef formatString, va_list args) { __CFStringAppendFormatCore(outputString, copyDescFunc, formatOptions, NULL, formatString, 0, NULL, 0, args); } 5751 5752static void __CFStringAppendFormatCore(CFMutableStringRef outputString, CFStringRef (*copyDescFunc)(void *, const void *), CFDictionaryRef formatOptions, CFDictionaryRef stringsDictConfig, CFStringRef formatString, CFIndex initialArgPosition, const void *origValues, CFIndex originalValuesSize, va_list args) { 5753 SInt32 numSpecs, sizeSpecs, sizeArgNum, formatIdx, curSpec, argNum; 5754 CFIndex formatLen; 5755#define FORMAT_BUFFER_LEN 400 5756 const uint8_t *cformat = NULL; 5757 const UniChar *uformat = NULL; 5758 UniChar *formatChars = NULL; 5759 UniChar localFormatBuffer[FORMAT_BUFFER_LEN]; 5760 5761#define VPRINTF_BUFFER_LEN 61 5762 CFFormatSpec localSpecsBuffer[VPRINTF_BUFFER_LEN]; 5763 CFFormatSpec *specs; 5764 CFPrintValue localValuesBuffer[VPRINTF_BUFFER_LEN]; 5765 CFPrintValue *values; 5766 const CFPrintValue *originalValues = (const CFPrintValue *)origValues; 5767 CFDictionaryRef localConfigs[VPRINTF_BUFFER_LEN]; 5768 CFDictionaryRef *configs; 5769 CFIndex numConfigs; 5770 CFAllocatorRef tmpAlloc = NULL; 5771 intmax_t dummyLocation; // A place for %n to do its thing in; should be the widest possible int value 5772 5773 numSpecs = 0; 5774 sizeSpecs = 0; 5775 sizeArgNum = 0; 5776 numConfigs = 0; 5777 specs = NULL; 5778 values = NULL; 5779 configs = NULL; 5780 5781 5782 formatLen = CFStringGetLength(formatString); 5783 if (!CF_IS_OBJC(__kCFStringTypeID, formatString)) { 5784 __CFAssertIsString(formatString); 5785 if (!__CFStrIsUnicode(formatString)) { 5786 cformat = (const uint8_t *)__CFStrContents(formatString); 5787 if (cformat) cformat += __CFStrSkipAnyLengthByte(formatString); 5788 } else { 5789 uformat = (const UniChar *)__CFStrContents(formatString); 5790 } 5791 } 5792 if (!cformat && !uformat) { 5793 formatChars = (formatLen > FORMAT_BUFFER_LEN) ? (UniChar *)CFAllocatorAllocate(tmpAlloc = __CFGetDefaultAllocator(), formatLen * sizeof(UniChar), 0) : localFormatBuffer; 5794 if (formatChars != localFormatBuffer && __CFOASafe) __CFSetLastAllocationEventName(formatChars, "CFString (temp)"); 5795 CFStringGetCharacters(formatString, CFRangeMake(0, formatLen), formatChars); 5796 uformat = formatChars; 5797 } 5798 5799 /* Compute an upper bound for the number of format specifications */ 5800 if (cformat) { 5801 for (formatIdx = 0; formatIdx < formatLen; formatIdx++) if ('%' == cformat[formatIdx]) sizeSpecs++; 5802 } else { 5803 for (formatIdx = 0; formatIdx < formatLen; formatIdx++) if ('%' == uformat[formatIdx]) sizeSpecs++; 5804 } 5805 tmpAlloc = __CFGetDefaultAllocator(); 5806 specs = ((2 * sizeSpecs + 1) > VPRINTF_BUFFER_LEN) ? (CFFormatSpec *)CFAllocatorAllocate(tmpAlloc, (2 * sizeSpecs + 1) * sizeof(CFFormatSpec), 0) : localSpecsBuffer; 5807 if (specs != localSpecsBuffer && __CFOASafe) __CFSetLastAllocationEventName(specs, "CFString (temp)"); 5808 5809 configs = ((sizeSpecs < VPRINTF_BUFFER_LEN) ? localConfigs : (CFDictionaryRef *)CFAllocatorAllocate(tmpAlloc, sizeof(CFStringRef) * sizeSpecs, 0)); 5810 5811 /* Collect format specification information from the format string */ 5812 for (curSpec = 0, formatIdx = 0; formatIdx < formatLen; curSpec++) { 5813 SInt32 newFmtIdx; 5814 specs[curSpec].loc = formatIdx; 5815 specs[curSpec].len = 0; 5816 specs[curSpec].size = 0; 5817 specs[curSpec].type = 0; 5818 specs[curSpec].flags = 0; 5819 specs[curSpec].widthArg = -1; 5820 specs[curSpec].precArg = -1; 5821 specs[curSpec].mainArgNum = -1; 5822 specs[curSpec].precArgNum = -1; 5823 specs[curSpec].widthArgNum = -1; 5824 specs[curSpec].configDictIndex = -1; 5825 if (cformat) { 5826 for (newFmtIdx = formatIdx; newFmtIdx < formatLen && '%' != cformat[newFmtIdx]; newFmtIdx++); 5827 } else { 5828 for (newFmtIdx = formatIdx; newFmtIdx < formatLen && '%' != uformat[newFmtIdx]; newFmtIdx++); 5829 } 5830 if (newFmtIdx != formatIdx) { /* Literal chunk */ 5831 specs[curSpec].type = CFFormatLiteralType; 5832 specs[curSpec].len = newFmtIdx - formatIdx; 5833 } else { 5834 CFStringRef configKey = NULL; 5835 newFmtIdx++; /* Skip % */ 5836 __CFParseFormatSpec(uformat, cformat, &newFmtIdx, formatLen, &(specs[curSpec]), &configKey); 5837 if (CFFormatLiteralType == specs[curSpec].type) { 5838 specs[curSpec].loc = formatIdx + 1; 5839 specs[curSpec].len = 1; 5840 } else { 5841 specs[curSpec].len = newFmtIdx - formatIdx; 5842 } 5843 } 5844 formatIdx = newFmtIdx; 5845 5846// fprintf(stderr, "specs[%d] = {\n size = %d,\n type = %d,\n loc = %d,\n len = %d,\n mainArgNum = %d,\n precArgNum = %d,\n widthArgNum = %d\n}\n", curSpec, specs[curSpec].size, specs[curSpec].type, specs[curSpec].loc, specs[curSpec].len, specs[curSpec].mainArgNum, specs[curSpec].precArgNum, specs[curSpec].widthArgNum); 5847 5848 } 5849 numSpecs = curSpec; 5850 5851 // Max of three args per spec, reasoning thus: 1 width, 1 prec, 1 value 5852 sizeArgNum = ((NULL == originalValues) ? (3 * sizeSpecs + 1) : originalValuesSize); 5853 5854 values = (sizeArgNum > VPRINTF_BUFFER_LEN) ? (CFPrintValue *)CFAllocatorAllocate(tmpAlloc, sizeArgNum * sizeof(CFPrintValue), 0) : localValuesBuffer; 5855 if (values != localValuesBuffer && __CFOASafe) __CFSetLastAllocationEventName(values, "CFString (temp)"); 5856 memset(values, 0, sizeArgNum * sizeof(CFPrintValue)); 5857 5858#if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_EMBEDDED_MINI || DEPLOYMENT_TARGET_LINUX || DEPLOYMENT_TARGET_FREEBSD 5859 // va_copy is a C99 extension. No support on Windows 5860 va_list copiedArgs; 5861 if (numConfigs > 0) va_copy(copiedArgs, args); // we need to preserve the original state for passing down 5862#endif 5863 5864 /* Compute values array */ 5865 argNum = initialArgPosition; 5866 for (curSpec = 0; curSpec < numSpecs; curSpec++) { 5867 SInt32 newMaxArgNum; 5868 if (0 == specs[curSpec].type) continue; 5869 if (CFFormatLiteralType == specs[curSpec].type) continue; 5870 newMaxArgNum = sizeArgNum; 5871 if (newMaxArgNum < specs[curSpec].mainArgNum) { 5872 newMaxArgNum = specs[curSpec].mainArgNum; 5873 } 5874 if (newMaxArgNum < specs[curSpec].precArgNum) { 5875 newMaxArgNum = specs[curSpec].precArgNum; 5876 } 5877 if (newMaxArgNum < specs[curSpec].widthArgNum) { 5878 newMaxArgNum = specs[curSpec].widthArgNum; 5879 } 5880 if (sizeArgNum < newMaxArgNum) { 5881 if (specs != localSpecsBuffer) CFAllocatorDeallocate(tmpAlloc, specs); 5882 if (values != localValuesBuffer) CFAllocatorDeallocate(tmpAlloc, values); 5883 if (formatChars && (formatChars != localFormatBuffer)) CFAllocatorDeallocate(tmpAlloc, formatChars); 5884 return; // more args than we expected! 5885 } 5886 /* It is actually incorrect to reorder some specs and not all; we just do some random garbage here */ 5887 if (-2 == specs[curSpec].widthArgNum) { 5888 specs[curSpec].widthArgNum = argNum++; 5889 } 5890 if (-2 == specs[curSpec].precArgNum) { 5891 specs[curSpec].precArgNum = argNum++; 5892 } 5893 if (-1 == specs[curSpec].mainArgNum) { 5894 specs[curSpec].mainArgNum = argNum++; 5895 } 5896 5897 values[specs[curSpec].mainArgNum].size = specs[curSpec].size; 5898 values[specs[curSpec].mainArgNum].type = specs[curSpec].type; 5899 5900 5901 if (-1 != specs[curSpec].widthArgNum) { 5902 values[specs[curSpec].widthArgNum].size = 0; 5903 values[specs[curSpec].widthArgNum].type = CFFormatLongType; 5904 } 5905 if (-1 != specs[curSpec].precArgNum) { 5906 values[specs[curSpec].precArgNum].size = 0; 5907 values[specs[curSpec].precArgNum].type = CFFormatLongType; 5908 } 5909 } 5910 5911 /* Collect the arguments in correct type from vararg list */ 5912 for (argNum = 0; argNum < sizeArgNum; argNum++) { 5913 if ((NULL != originalValues) && (0 == values[argNum].type)) values[argNum] = originalValues[argNum]; 5914 switch (values[argNum].type) { 5915 case 0: 5916 case CFFormatLiteralType: 5917 break; 5918 case CFFormatLongType: 5919 case CFFormatSingleUnicharType: 5920 if (CFFormatSize1 == values[argNum].size) { 5921 values[argNum].value.int64Value = (int64_t)(int8_t)va_arg(args, int); 5922 } else if (CFFormatSize2 == values[argNum].size) { 5923 values[argNum].value.int64Value = (int64_t)(int16_t)va_arg(args, int); 5924 } else if (CFFormatSize4 == values[argNum].size) { 5925 values[argNum].value.int64Value = (int64_t)va_arg(args, int32_t); 5926 } else if (CFFormatSize8 == values[argNum].size) { 5927 values[argNum].value.int64Value = (int64_t)va_arg(args, int64_t); 5928 } else { 5929 values[argNum].value.int64Value = (int64_t)va_arg(args, int); 5930 } 5931 break; 5932 case CFFormatDoubleType: 5933#if LONG_DOUBLE_SUPPORT 5934 if (CFFormatSize16 == values[argNum].size) { 5935 values[argNum].value.longDoubleValue = va_arg(args, long double); 5936 } else 5937#endif 5938 { 5939 values[argNum].value.doubleValue = va_arg(args, double); 5940 } 5941 break; 5942 case CFFormatPointerType: 5943 case CFFormatObjectType: 5944 case CFFormatCFType: 5945 case CFFormatUnicharsType: 5946 case CFFormatCharsType: 5947 case CFFormatPascalCharsType: 5948 values[argNum].value.pointerValue = va_arg(args, void *); 5949 break; 5950 case CFFormatDummyPointerType: 5951 (void)va_arg(args, void *); // Skip the provided argument 5952 values[argNum].value.pointerValue = &dummyLocation; 5953 break; 5954 } 5955 } 5956 va_end(args); 5957 5958 /* Format the pieces together */ 5959 5960 if (NULL == originalValues) { 5961 originalValues = values; 5962 originalValuesSize = sizeArgNum; 5963 } 5964 5965 for (curSpec = 0; curSpec < numSpecs; curSpec++) { 5966 SInt32 width = 0, precision = 0; 5967 UniChar *up, ch; 5968 Boolean hasWidth = false, hasPrecision = false; 5969 5970 // widthArgNum and widthArg are never set at the same time; same for precArg* 5971 if (-1 != specs[curSpec].widthArgNum) { 5972 width = (SInt32)values[specs[curSpec].widthArgNum].value.int64Value; 5973 hasWidth = true; 5974 } 5975 if (-1 != specs[curSpec].precArgNum) { 5976 precision = (SInt32)values[specs[curSpec].precArgNum].value.int64Value; 5977 hasPrecision = true; 5978 } 5979 if (-1 != specs[curSpec].widthArg) { 5980 width = specs[curSpec].widthArg; 5981 hasWidth = true; 5982 } 5983 if (-1 != specs[curSpec].precArg) { 5984 precision = specs[curSpec].precArg; 5985 hasPrecision = true; 5986 } 5987 5988 switch (specs[curSpec].type) { 5989 case CFFormatLongType: 5990 case CFFormatDoubleType: 5991#if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_WINDOWS 5992 if (formatOptions && (specs[curSpec].flags & kCFStringFormatLocalizable) && (CFGetTypeID(formatOptions) == CFLocaleGetTypeID())) { // We have a locale, so we do localized formatting 5993 if (__CFStringFormatLocalizedNumber(outputString, (CFLocaleRef)formatOptions, values, &specs[curSpec], width, precision, hasPrecision)) break; 5994 } 5995 /* Otherwise fall-thru to the next case! */ 5996#endif 5997 case CFFormatPointerType: { 5998 char formatBuffer[128]; 5999#if defined(__GNUC__) 6000 char buffer[BUFFER_LEN + width + precision]; 6001#else 6002 char stackBuffer[BUFFER_LEN]; 6003 char *dynamicBuffer = NULL; 6004 char *buffer = stackBuffer; 6005 if (256+width+precision > BUFFER_LEN) { 6006 dynamicBuffer = (char *)CFAllocatorAllocate(kCFAllocatorSystemDefault, 256+width+precision, 0); 6007 buffer = dynamicBuffer; 6008 } 6009#endif 6010 SInt32 cidx, idx, loc; 6011 Boolean appended = false; 6012 loc = specs[curSpec].loc; 6013 // In preparation to call snprintf(), copy the format string out 6014 if (cformat) { 6015 for (idx = 0, cidx = 0; cidx < specs[curSpec].len; idx++, cidx++) { 6016 if ('$' == cformat[loc + cidx]) { 6017 for (idx--; '0' <= formatBuffer[idx] && formatBuffer[idx] <= '9'; idx--); 6018 } else { 6019 formatBuffer[idx] = cformat[loc + cidx]; 6020 } 6021 } 6022 } else { 6023 for (idx = 0, cidx = 0; cidx < specs[curSpec].len; idx++, cidx++) { 6024 if ('$' == uformat[loc + cidx]) { 6025 for (idx--; '0' <= formatBuffer[idx] && formatBuffer[idx] <= '9'; idx--); 6026 } else { 6027 formatBuffer[idx] = (int8_t)uformat[loc + cidx]; 6028 } 6029 } 6030 } 6031 formatBuffer[idx] = '\0'; 6032 // Should modify format buffer here if necessary; for example, to translate %qd to 6033 // the equivalent, on architectures which do not have %q. 6034 buffer[sizeof(buffer) - 1] = '\0'; 6035 switch (specs[curSpec].type) { 6036 case CFFormatLongType: 6037 if (CFFormatSize8 == specs[curSpec].size) { 6038 SNPRINTF(int64_t, values[specs[curSpec].mainArgNum].value.int64Value) 6039 } else { 6040 SNPRINTF(SInt32, values[specs[curSpec].mainArgNum].value.int64Value) 6041 } 6042 break; 6043 case CFFormatPointerType: 6044 case CFFormatDummyPointerType: 6045 SNPRINTF(void *, values[specs[curSpec].mainArgNum].value.pointerValue) 6046 break; 6047 6048 case CFFormatDoubleType: 6049#if LONG_DOUBLE_SUPPORT 6050 if (CFFormatSize16 == specs[curSpec].size) { 6051 SNPRINTF(long double, values[specs[curSpec].mainArgNum].value.longDoubleValue) 6052 } else 6053#endif 6054 { 6055 SNPRINTF(double, values[specs[curSpec].mainArgNum].value.doubleValue) 6056 } 6057 // See if we need to localize the decimal point 6058 if (formatOptions) { // We have localization info 6059#if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_EMBEDDED_MINI || DEPLOYMENT_TARGET_WINDOWS || DEPLOYMENT_TARGET_LINUX 6060 CFStringRef decimalSeparator = (CFGetTypeID(formatOptions) == CFLocaleGetTypeID()) ? (CFStringRef)CFLocaleGetValue((CFLocaleRef)formatOptions, kCFLocaleDecimalSeparatorKey) : (CFStringRef)CFDictionaryGetValue(formatOptions, CFSTR("NSDecimalSeparator")); 6061#else 6062 CFStringRef decimalSeparator = CFSTR("."); 6063#endif 6064 if (decimalSeparator != NULL) { // We have a decimal separator in there 6065 CFIndex decimalPointLoc = 0; 6066 while (buffer[decimalPointLoc] != 0 && buffer[decimalPointLoc] != '.') decimalPointLoc++; 6067 if (buffer[decimalPointLoc] == '.') { // And we have a decimal point in the formatted string 6068 buffer[decimalPointLoc] = 0; 6069 CFStringAppendCString(outputString, (const char *)buffer, __CFStringGetEightBitStringEncoding()); 6070 CFStringAppend(outputString, decimalSeparator); 6071 CFStringAppendCString(outputString, (const char *)(buffer + decimalPointLoc + 1), __CFStringGetEightBitStringEncoding()); 6072 appended = true; 6073 } 6074 } 6075 } 6076 break; 6077 } 6078 if (!appended) CFStringAppendCString(outputString, (const char *)buffer, __CFStringGetEightBitStringEncoding()); 6079#if !defined(__GNUC__) 6080 if (dynamicBuffer) { 6081 CFAllocatorDeallocate(kCFAllocatorSystemDefault, dynamicBuffer); 6082 } 6083#endif 6084 } 6085 break; 6086 case CFFormatLiteralType: 6087 if (cformat) { 6088 __CFStringAppendBytes(outputString, (const char *)(cformat+specs[curSpec].loc), specs[curSpec].len, __CFStringGetEightBitStringEncoding()); 6089 } else { 6090 CFStringAppendCharacters(outputString, uformat+specs[curSpec].loc, specs[curSpec].len); 6091 } 6092 break; 6093 case CFFormatPascalCharsType: 6094 case CFFormatCharsType: 6095 if (values[specs[curSpec].mainArgNum].value.pointerValue == NULL) { 6096 CFStringAppendCString(outputString, "(null)", kCFStringEncodingASCII); 6097 } else { 6098 int len; 6099 const char *str = (const char *)values[specs[curSpec].mainArgNum].value.pointerValue; 6100 if (specs[curSpec].type == CFFormatPascalCharsType) { // Pascal string case 6101 len = ((unsigned char *)str)[0]; 6102 str++; 6103 if (hasPrecision && precision < len) len = precision; 6104 } else { // C-string case 6105 if (!hasPrecision) { // No precision, so rely on the terminating null character 6106 len = strlen(str); 6107 } else { // Don't blindly call strlen() if there is a precision; the string might not have a terminating null (3131988) 6108 const char *terminatingNull = (const char *)memchr(str, 0, precision); // Basically strlen() on only the first precision characters of str 6109 if (terminatingNull) { // There was a null in the first precision characters 6110 len = terminatingNull - str; 6111 } else { 6112 len = precision; 6113 } 6114 } 6115 } 6116 // Since the spec says the behavior of the ' ', '0', '#', and '+' flags is undefined for 6117 // '%s', and since we have ignored them in the past, the behavior is hereby cast in stone 6118 // to ignore those flags (and, say, never pad with '0' instead of space). 6119 if (specs[curSpec].flags & kCFStringFormatMinusFlag) { 6120 __CFStringAppendBytes(outputString, str, len, __CFStringGetSystemEncoding()); 6121 if (hasWidth && width > len) { 6122 int w = width - len; // We need this many spaces; do it ten at a time 6123 do {__CFStringAppendBytes(outputString, " ", (w > 10 ? 10 : w), kCFStringEncodingASCII);} while ((w -= 10) > 0); 6124 } 6125 } else { 6126 if (hasWidth && width > len) { 6127 int w = width - len; // We need this many spaces; do it ten at a time 6128 do {__CFStringAppendBytes(outputString, " ", (w > 10 ? 10 : w), kCFStringEncodingASCII);} while ((w -= 10) > 0); 6129 } 6130 __CFStringAppendBytes(outputString, str, len, __CFStringGetSystemEncoding()); 6131 } 6132 } 6133 break; 6134 case CFFormatSingleUnicharType: 6135 ch = (UniChar)values[specs[curSpec].mainArgNum].value.int64Value; 6136 CFStringAppendCharacters(outputString, &ch, 1); 6137 break; 6138 case CFFormatUnicharsType: 6139 //??? need to handle width, precision, and padding arguments 6140 up = (UniChar *)values[specs[curSpec].mainArgNum].value.pointerValue; 6141 if (NULL == up) { 6142 CFStringAppendCString(outputString, "(null)", kCFStringEncodingASCII); 6143 } else { 6144 int len; 6145 for (len = 0; 0 != up[len]; len++); 6146 // Since the spec says the behavior of the ' ', '0', '#', and '+' flags is undefined for 6147 // '%s', and since we have ignored them in the past, the behavior is hereby cast in stone 6148 // to ignore those flags (and, say, never pad with '0' instead of space). 6149 if (hasPrecision && precision < len) len = precision; 6150 if (specs[curSpec].flags & kCFStringFormatMinusFlag) { 6151 CFStringAppendCharacters(outputString, up, len); 6152 if (hasWidth && width > len) { 6153 int w = width - len; // We need this many spaces; do it ten at a time 6154 do {__CFStringAppendBytes(outputString, " ", (w > 10 ? 10 : w), kCFStringEncodingASCII);} while ((w -= 10) > 0); 6155 } 6156 } else { 6157 if (hasWidth && width > len) { 6158 int w = width - len; // We need this many spaces; do it ten at a time 6159 do {__CFStringAppendBytes(outputString, " ", (w > 10 ? 10 : w), kCFStringEncodingASCII);} while ((w -= 10) > 0); 6160 } 6161 CFStringAppendCharacters(outputString, up, len); 6162 } 6163 } 6164 break; 6165 case CFFormatCFType: 6166 case CFFormatObjectType: 6167 if (specs[curSpec].configDictIndex != -1) { // config dict 6168 CFTypeRef object = NULL; 6169 CFStringRef innerFormat = NULL; 6170 6171 switch (values[specs[curSpec].mainArgNum].type) { 6172 case CFFormatLongType: 6173 object = CFNumberCreate(tmpAlloc, kCFNumberSInt64Type, &(values[specs[curSpec].mainArgNum].value.int64Value)); 6174 break; 6175 6176 case CFFormatDoubleType: 6177#if LONG_DOUBLE_SUPPORT 6178 if (CFFormatSize16 == values[specs[curSpec].mainArgNum].size) { 6179 double aValue = values[specs[curSpec].mainArgNum].value.longDoubleValue; // losing precision 6180 6181 object = CFNumberCreate(tmpAlloc, kCFNumberDoubleType, &aValue); 6182 } else 6183#endif 6184 { 6185 object = CFNumberCreate(tmpAlloc, kCFNumberDoubleType, &(values[specs[curSpec].mainArgNum].value.doubleValue)); 6186 } 6187 break; 6188 6189 case CFFormatPointerType: 6190 object = CFNumberCreate(tmpAlloc, kCFNumberCFIndexType, &(values[specs[curSpec].mainArgNum].value.pointerValue)); 6191 break; 6192 6193 case CFFormatPascalCharsType: 6194 case CFFormatCharsType: 6195 if (NULL != values[specs[curSpec].mainArgNum].value.pointerValue) { 6196 CFMutableStringRef aString = CFStringCreateMutable(tmpAlloc, 0); 6197 int len; 6198 const char *str = (const char *)values[specs[curSpec].mainArgNum].value.pointerValue; 6199 if (specs[curSpec].type == CFFormatPascalCharsType) { // Pascal string case 6200 len = ((unsigned char *)str)[0]; 6201 str++; 6202 if (hasPrecision && precision < len) len = precision; 6203 } else { // C-string case 6204 if (!hasPrecision) { // No precision, so rely on the terminating null character 6205 len = strlen(str); 6206 } else { // Don't blindly call strlen() if there is a precision; the string might not have a terminating null (3131988) 6207 const char *terminatingNull = (const char *)memchr(str, 0, precision); // Basically strlen() on only the first precision characters of str 6208 if (terminatingNull) { // There was a null in the first precision characters 6209 len = terminatingNull - str; 6210 } else { 6211 len = precision; 6212 } 6213 } 6214 } 6215 // Since the spec says the behavior of the ' ', '0', '#', and '+' flags is undefined for 6216 // '%s', and since we have ignored them in the past, the behavior is hereby cast in stone 6217 // to ignore those flags (and, say, never pad with '0' instead of space). 6218 if (specs[curSpec].flags & kCFStringFormatMinusFlag) { 6219 __CFStringAppendBytes(aString, str, len, __CFStringGetSystemEncoding()); 6220 if (hasWidth && width > len) { 6221 int w = width - len; // We need this many spaces; do it ten at a time 6222 do {__CFStringAppendBytes(aString, " ", (w > 10 ? 10 : w), kCFStringEncodingASCII);} while ((w -= 10) > 0); 6223 } 6224 } else { 6225 if (hasWidth && width > len) { 6226 int w = width - len; // We need this many spaces; do it ten at a time 6227 do {__CFStringAppendBytes(aString, " ", (w > 10 ? 10 : w), kCFStringEncodingASCII);} while ((w -= 10) > 0); 6228 } 6229 __CFStringAppendBytes(aString, str, len, __CFStringGetSystemEncoding()); 6230 } 6231 6232 object = aString; 6233 } 6234 break; 6235 6236 case CFFormatSingleUnicharType: 6237 ch = (UniChar)values[specs[curSpec].mainArgNum].value.int64Value; 6238 object = CFStringCreateWithCharactersNoCopy(tmpAlloc, &ch, 1, kCFAllocatorNull); 6239 break; 6240 6241 case CFFormatUnicharsType: 6242 //??? need to handle width, precision, and padding arguments 6243 up = (UniChar *)values[specs[curSpec].mainArgNum].value.pointerValue; 6244 if (NULL != up) { 6245 CFMutableStringRef aString = CFStringCreateMutable(tmpAlloc, 0); 6246 int len; 6247 for (len = 0; 0 != up[len]; len++); 6248 // Since the spec says the behavior of the ' ', '0', '#', and '+' flags is undefined for 6249 // '%s', and since we have ignored them in the past, the behavior is hereby cast in stone 6250 // to ignore those flags (and, say, never pad with '0' instead of space). 6251 if (hasPrecision && precision < len) len = precision; 6252 if (specs[curSpec].flags & kCFStringFormatMinusFlag) { 6253 CFStringAppendCharacters(aString, up, len); 6254 if (hasWidth && width > len) { 6255 int w = width - len; // We need this many spaces; do it ten at a time 6256 do {__CFStringAppendBytes(aString, " ", (w > 10 ? 10 : w), kCFStringEncodingASCII);} while ((w -= 10) > 0); 6257 } 6258 } else { 6259 if (hasWidth && width > len) { 6260 int w = width - len; // We need this many spaces; do it ten at a time 6261 do {__CFStringAppendBytes(aString, " ", (w > 10 ? 10 : w), kCFStringEncodingASCII);} while ((w -= 10) > 0); 6262 } 6263 CFStringAppendCharacters(aString, up, len); 6264 } 6265 object = aString; 6266 } 6267 break; 6268 6269 case CFFormatCFType: 6270 case CFFormatObjectType: 6271 if (NULL != values[specs[curSpec].mainArgNum].value.pointerValue) object = CFRetain(values[specs[curSpec].mainArgNum].value.pointerValue); 6272 break; 6273 } 6274 6275 if (NULL != object) CFRelease(object); 6276 6277 } else if (NULL != values[specs[curSpec].mainArgNum].value.pointerValue) { 6278 CFStringRef str = NULL; 6279 if (copyDescFunc) { 6280 str = copyDescFunc(values[specs[curSpec].mainArgNum].value.pointerValue, formatOptions); 6281 } else { 6282 str = __CFCopyFormattingDescription(values[specs[curSpec].mainArgNum].value.pointerValue, formatOptions); 6283 if (NULL == str) { 6284 str = CFCopyDescription(values[specs[curSpec].mainArgNum].value.pointerValue); 6285 } 6286 } 6287 if (str) { 6288 CFStringAppend(outputString, str); 6289 CFRelease(str); 6290 } else { 6291 CFStringAppendCString(outputString, "(null description)", kCFStringEncodingASCII); 6292 } 6293 } else { 6294 CFStringAppendCString(outputString, "(null)", kCFStringEncodingASCII); 6295 } 6296 break; 6297 } 6298 } 6299 6300#if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_EMBEDDED_MINI || DEPLOYMENT_TARGET_LINUX || DEPLOYMENT_TARGET_FREEBSD 6301 // va_copy is a C99 extension. No support on Windows 6302 if (numConfigs > 0) va_end(copiedArgs); 6303#endif 6304 if (specs != localSpecsBuffer) CFAllocatorDeallocate(tmpAlloc, specs); 6305 if (values != localValuesBuffer) CFAllocatorDeallocate(tmpAlloc, values); 6306 if (formatChars && (formatChars != localFormatBuffer)) CFAllocatorDeallocate(tmpAlloc, formatChars); 6307 if (configs != localConfigs) CFAllocatorDeallocate(tmpAlloc, configs); 6308} 6309 6310#undef SNPRINTF 6311 6312void CFShowStr(CFStringRef str) { 6313 CFAllocatorRef alloc; 6314 6315 if (!str) { 6316 fprintf(stdout, "(null)\n"); 6317 return; 6318 } 6319 6320 if (CF_IS_OBJC(__kCFStringTypeID, str)) { 6321 fprintf(stdout, "This is an NSString, not CFString\n"); 6322 return; 6323 } 6324 6325 alloc = CFGetAllocator(str); 6326 6327 fprintf(stdout, "\nLength %d\nIsEightBit %d\n", (int)__CFStrLength(str), __CFStrIsEightBit(str)); 6328 fprintf(stdout, "HasLengthByte %d\nHasNullByte %d\nInlineContents %d\n", 6329 __CFStrHasLengthByte(str), __CFStrHasNullByte(str), __CFStrIsInline(str)); 6330 6331 fprintf(stdout, "Allocator "); 6332 if (alloc != kCFAllocatorSystemDefault) { 6333 fprintf(stdout, "%p\n", (void *)alloc); 6334 } else { 6335 fprintf(stdout, "SystemDefault\n"); 6336 } 6337 fprintf(stdout, "Mutable %d\n", __CFStrIsMutable(str)); 6338 if (!__CFStrIsMutable(str) && __CFStrHasContentsDeallocator(str)) { 6339 if (__CFStrContentsDeallocator(str)) fprintf(stdout, "ContentsDeallocatorFunc %p\n", (void *)__CFStrContentsDeallocator(str)); 6340 else fprintf(stdout, "ContentsDeallocatorFunc None\n"); 6341 } else if (__CFStrIsMutable(str) && __CFStrHasContentsAllocator(str)) { 6342 fprintf(stdout, "ExternalContentsAllocator %p\n", (void *)__CFStrContentsAllocator((CFMutableStringRef)str)); 6343 } 6344 6345 if (__CFStrIsMutable(str)) { 6346 fprintf(stdout, "CurrentCapacity %d\n%sCapacity %d\n", (int)__CFStrCapacity(str), __CFStrIsFixed(str) ? "Fixed" : "Desired", (int)__CFStrDesiredCapacity(str)); 6347 } 6348 fprintf(stdout, "Contents %p\n", (void *)__CFStrContents(str)); 6349} 6350 6351 6352 6353#undef HANGUL_SBASE 6354#undef HANGUL_LBASE 6355#undef HANGUL_VBASE 6356#undef HANGUL_TBASE 6357#undef HANGUL_SCOUNT 6358#undef HANGUL_LCOUNT 6359#undef HANGUL_VCOUNT 6360#undef HANGUL_TCOUNT 6361#undef HANGUL_NCOUNT 6362 6363