1/* 2 * Copyright (c) 2014 Apple Inc. All rights reserved. 3 * 4 * @APPLE_LICENSE_HEADER_START@ 5 * 6 * This file contains Original Code and/or Modifications of Original Code 7 * as defined in and that are subject to the Apple Public Source License 8 * Version 2.0 (the 'License'). You may not use this file except in 9 * compliance with the License. Please obtain a copy of the License at 10 * http://www.opensource.apple.com/apsl/ and read it before using this 11 * file. 12 * 13 * The Original Code and all software distributed under the License are 14 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER 15 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, 16 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. 18 * Please see the License for the specific language governing rights and 19 * limitations under the License. 20 * 21 * @APPLE_LICENSE_HEADER_END@ 22 */ 23 24/* CFCharacterSet.h 25 Copyright (c) 1999-2013, Apple Inc. All rights reserved. 26*/ 27 28/*! 29 @header CFCharacterSet 30 CFCharacterSet represents a set, or a bag, of Unicode characters. 31 The API consists of 3 groups: 32 1) creation/manipulation of CFCharacterSet instances, 33 2) query of a single Unicode character membership, 34 and 3) bitmap representation related (reading/writing). 35 Conceptually, CFCharacterSet is a 136K byte bitmap array of 36 which each bit represents a Unicode code point. It could 37 contain the Unicode characters in ISO 10646 Basic Multilingual 38 Plane (BMP) and characters in Plane 1 through Plane 16 39 accessible via surrogate paris in the Unicode Transformation 40 Format, 16-bit encoding form (UTF-16). In other words, it can 41 store values from 0x00000 to 0x10FFFF in the Unicode 42 Transformation Format, 32-bit encoding form (UTF-32). However, 43 in general, how CFCharacterSet stores the information is an 44 implementation detail. Note even CFData used for the external 45 bitmap representation rarely has 136K byte. For detailed 46 discussion of the external bitmap representation, refer to the 47 comments for CFCharacterSetCreateWithBitmapRepresentation below. 48 Note that the existance of non-BMP characters in a character set 49 does not imply the membership of the corresponding surrogate 50 characters. For example, a character set with U+10000 does not 51 match with U+D800. 52*/ 53 54#if !defined(__COREFOUNDATION_CFCHARACTERSET__) 55#define __COREFOUNDATION_CFCHARACTERSET__ 1 56 57#include <CoreFoundation/CFBase.h> 58#include <CoreFoundation/CFData.h> 59 60CF_IMPLICIT_BRIDGING_ENABLED 61CF_EXTERN_C_BEGIN 62 63/*! 64 @typedef CFCharacterSetRef 65 This is the type of a reference to immutable CFCharacterSets. 66*/ 67typedef const struct __CFCharacterSet * CFCharacterSetRef; 68 69/*! 70 @typedef CFMutableCharacterSetRef 71 This is the type of a reference to mutable CFMutableCharacterSets. 72*/ 73typedef struct __CFCharacterSet * CFMutableCharacterSetRef; 74 75/*! 76 @typedef CFCharacterSetPredefinedSet 77 Type of the predefined CFCharacterSet selector values. 78*/ 79 80typedef CF_ENUM(CFIndex, CFCharacterSetPredefinedSet) { 81 kCFCharacterSetControl = 1, /* Control character set (Unicode General Category Cc and Cf) */ 82 kCFCharacterSetWhitespace, /* Whitespace character set (Unicode General Category Zs and U0009 CHARACTER TABULATION) */ 83 kCFCharacterSetWhitespaceAndNewline, /* Whitespace and Newline character set (Unicode General Category Z*, U000A ~ U000D, and U0085) */ 84 kCFCharacterSetDecimalDigit, /* Decimal digit character set */ 85 kCFCharacterSetLetter, /* Letter character set (Unicode General Category L* & M*) */ 86 kCFCharacterSetLowercaseLetter, /* Lowercase character set (Unicode General Category Ll) */ 87 kCFCharacterSetUppercaseLetter, /* Uppercase character set (Unicode General Category Lu and Lt) */ 88 kCFCharacterSetNonBase, /* Non-base character set (Unicode General Category M*) */ 89 kCFCharacterSetDecomposable, /* Canonically decomposable character set */ 90 kCFCharacterSetAlphaNumeric, /* Alpha Numeric character set (Unicode General Category L*, M*, & N*) */ 91 kCFCharacterSetPunctuation, /* Punctuation character set (Unicode General Category P*) */ 92 kCFCharacterSetCapitalizedLetter = 13, /* Titlecase character set (Unicode General Category Lt) */ 93 kCFCharacterSetSymbol = 14, /* Symbol character set (Unicode General Category S*) */ 94 kCFCharacterSetNewline CF_ENUM_AVAILABLE(10_5, 2_0) = 15, /* Newline character set (U000A ~ U000D, U0085, U2028, and U2029) */ 95 kCFCharacterSetIllegal = 12/* Illegal character set */ 96}; 97 98/*! 99 @function CFCharacterSetGetTypeID 100 Returns the type identifier of all CFCharacterSet instances. 101*/ 102CF_EXPORT 103CFTypeID CFCharacterSetGetTypeID(void); 104 105/*! 106 @function CFCharacterSetGetPredefined 107 Returns a predefined CFCharacterSet instance. 108 @param theSetIdentifier The CFCharacterSetPredefinedSet selector 109 which specifies the predefined character set. If the 110 value is not in CFCharacterSetPredefinedSet, the behavior 111 is undefined. 112 @result A reference to the predefined immutable CFCharacterSet. 113 This instance is owned by CF. 114*/ 115CF_EXPORT 116CFCharacterSetRef CFCharacterSetGetPredefined(CFCharacterSetPredefinedSet theSetIdentifier); 117 118/*! 119 @function CFCharacterSetCreateWithCharactersInRange 120 Creates a new immutable character set with the values from the given range. 121 @param alloc The CFAllocator which should be used to allocate 122 memory for the array and its storage for values. This 123 parameter may be NULL in which case the current default 124 CFAllocator is used. If this reference is not a valid 125 CFAllocator, the behavior is undefined. 126 @param theRange The CFRange which should be used to specify the 127 Unicode range the character set is filled with. It 128 accepts the range in 32-bit in the UTF-32 format. The 129 valid character point range is from 0x00000 to 0x10FFFF. 130 If the range is outside of the valid Unicode character 131 point, the behavior is undefined. 132 @result A reference to the new immutable CFCharacterSet. 133*/ 134CF_EXPORT 135CFCharacterSetRef CFCharacterSetCreateWithCharactersInRange(CFAllocatorRef alloc, CFRange theRange); 136 137/*! 138 @function CFCharacterSetCreateWithCharactersInString 139 Creates a new immutable character set with the values in the given string. 140 @param alloc The CFAllocator which should be used to allocate 141 memory for the array and its storage for values. This 142 parameter may be NULL in which case the current default 143 CFAllocator is used. If this reference is not a valid 144 CFAllocator, the behavior is undefined. 145 @param theString The CFString which should be used to specify 146 the Unicode characters the character set is filled with. 147 If this parameter is not a valid CFString, the behavior 148 is undefined. 149 @result A reference to the new immutable CFCharacterSet. 150*/ 151CF_EXPORT 152CFCharacterSetRef CFCharacterSetCreateWithCharactersInString(CFAllocatorRef alloc, CFStringRef theString); 153 154/*! 155 @function CFCharacterSetCreateWithBitmapRepresentation 156 Creates a new immutable character set with the bitmap representtion in the given data. 157 @param alloc The CFAllocator which should be used to allocate 158 memory for the array and its storage for values. This 159 parameter may be NULL in which case the current default 160 CFAllocator is used. If this reference is not a valid 161 CFAllocator, the behavior is undefined. 162 @param theData The CFData which should be used to specify the 163 bitmap representation of the Unicode character points 164 the character set is filled with. The bitmap 165 representation could contain all the Unicode character 166 range starting from BMP to Plane 16. The first 8192 bytes 167 of the data represent the BMP range. The BMP range 8192 168 bytes can be followed by zero to sixteen 8192 byte 169 bitmaps, each one with the plane index byte prepended. 170 For example, the bitmap representing the BMP and Plane 2 171 has the size of 16385 bytes (8192 bytes for BMP, 1 byte 172 index + 8192 bytes bitmap for Plane 2). The plane index 173 byte, in this case, contains the integer value two. If 174 this parameter is not a valid CFData or it contains a 175 Plane index byte outside of the valid Plane range 176 (1 to 16), the behavior is undefined. 177 @result A reference to the new immutable CFCharacterSet. 178*/ 179CF_EXPORT 180CFCharacterSetRef CFCharacterSetCreateWithBitmapRepresentation(CFAllocatorRef alloc, CFDataRef theData); 181 182/*! 183 @function CFCharacterSetCreateInvertedSet 184 Creates a new immutable character set that is the invert of the specified character set. 185 @param alloc The CFAllocator which should be used to allocate 186 memory for the array and its storage for values. This 187 parameter may be NULL in which case the current default 188 CFAllocator is used. If this reference is not a valid 189 CFAllocator, the behavior is undefined. 190 @param theSet The CFCharacterSet which is to be inverted. If this 191 parameter is not a valid CFCharacterSet, the behavior is 192 undefined. 193 @result A reference to the new immutable CFCharacterSet. 194*/ 195CF_EXPORT CFCharacterSetRef CFCharacterSetCreateInvertedSet(CFAllocatorRef alloc, CFCharacterSetRef theSet); 196 197/*! 198 @function CFCharacterSetIsSupersetOfSet 199 Reports whether or not the character set is a superset of the character set specified as the second parameter. 200 @param theSet The character set to be checked for the membership of theOtherSet. 201 If this parameter is not a valid CFCharacterSet, the behavior is undefined. 202 @param theOtherset The character set to be checked whether or not it is a subset of theSet. 203 If this parameter is not a valid CFCharacterSet, the behavior is undefined. 204*/ 205CF_EXPORT Boolean CFCharacterSetIsSupersetOfSet(CFCharacterSetRef theSet, CFCharacterSetRef theOtherset); 206 207/*! 208 @function CFCharacterSetHasMemberInPlane 209 Reports whether or not the character set contains at least one member character in the specified plane. 210 @param theSet The character set to be checked for the membership. If this 211 parameter is not a valid CFCharacterSet, the behavior is undefined. 212 @param thePlane The plane number to be checked for the membership. 213 The valid value range is from 0 to 16. If the value is outside of the valid 214 plane number range, the behavior is undefined. 215*/ 216CF_EXPORT Boolean CFCharacterSetHasMemberInPlane(CFCharacterSetRef theSet, CFIndex thePlane); 217 218/*! 219 @function CFCharacterSetCreateMutable 220 Creates a new empty mutable character set. 221 @param allocator The CFAllocator which should be used to allocate 222 memory for the array and its storage for values. This 223 parameter may be NULL in which case the current default 224 CFAllocator is used. If this reference is not a valid 225 CFAllocator, the behavior is undefined. 226 @result A reference to the new mutable CFCharacterSet. 227*/ 228CF_EXPORT 229CFMutableCharacterSetRef CFCharacterSetCreateMutable(CFAllocatorRef alloc); 230 231/*! 232 @function CFCharacterSetCreateCopy 233 Creates a new character set with the values from the given character set. This function tries to compact the backing store where applicable. 234 @param allocator The CFAllocator which should be used to allocate 235 memory for the array and its storage for values. This 236 parameter may be NULL in which case the current default 237 CFAllocator is used. If this reference is not a valid 238 CFAllocator, the behavior is undefined. 239 @param theSet The CFCharacterSet which is to be copied. If this 240 parameter is not a valid CFCharacterSet, the behavior is 241 undefined. 242 @result A reference to the new CFCharacterSet. 243*/ 244CF_EXPORT 245CFCharacterSetRef CFCharacterSetCreateCopy(CFAllocatorRef alloc, CFCharacterSetRef theSet); 246 247/*! 248 @function CFCharacterSetCreateMutableCopy 249 Creates a new mutable character set with the values from the given character set. 250 @param allocator The CFAllocator which should be used to allocate 251 memory for the array and its storage for values. This 252 parameter may be NULL in which case the current default 253 CFAllocator is used. If this reference is not a valid 254 CFAllocator, the behavior is undefined. 255 @param theSet The CFCharacterSet which is to be copied. If this 256 parameter is not a valid CFCharacterSet, the behavior is 257 undefined. 258 @result A reference to the new mutable CFCharacterSet. 259*/ 260CF_EXPORT 261CFMutableCharacterSetRef CFCharacterSetCreateMutableCopy(CFAllocatorRef alloc, CFCharacterSetRef theSet); 262 263/*! 264 @function CFCharacterSetIsCharacterMember 265 Reports whether or not the Unicode character is in the character set. 266 @param theSet The character set to be searched. If this parameter 267 is not a valid CFCharacterSet, the behavior is undefined. 268 @param theChar The Unicode character for which to test against the 269 character set. Note that this function takes 16-bit Unicode 270 character value; hence, it does not support access to the 271 non-BMP planes. 272 @result true, if the value is in the character set, otherwise false. 273*/ 274CF_EXPORT 275Boolean CFCharacterSetIsCharacterMember(CFCharacterSetRef theSet, UniChar theChar); 276 277/*! 278 @function CFCharacterSetIsLongCharacterMember 279 Reports whether or not the UTF-32 character is in the character set. 280 @param theSet The character set to be searched. If this parameter 281 is not a valid CFCharacterSet, the behavior is undefined. 282 @param theChar The UTF-32 character for which to test against the 283 character set. 284 @result true, if the value is in the character set, otherwise false. 285*/ 286CF_EXPORT Boolean CFCharacterSetIsLongCharacterMember(CFCharacterSetRef theSet, UTF32Char theChar); 287 288/*! 289 @function CFCharacterSetCreateBitmapRepresentation 290 Creates a new immutable data with the bitmap representation from the given character set. 291 @param allocator The CFAllocator which should be used to allocate 292 memory for the array and its storage for values. This 293 parameter may be NULL in which case the current default 294 CFAllocator is used. If this reference is not a valid 295 CFAllocator, the behavior is undefined. 296 @param theSet The CFCharacterSet which is to be used create the 297 bitmap representation from. Refer to the comments for 298 CFCharacterSetCreateWithBitmapRepresentation for the 299 detailed discussion of the bitmap representation format. 300 If this parameter is not a valid CFCharacterSet, the 301 behavior is undefined. 302 @result A reference to the new immutable CFData. 303*/ 304CF_EXPORT 305CFDataRef CFCharacterSetCreateBitmapRepresentation(CFAllocatorRef alloc, CFCharacterSetRef theSet); 306 307/*! 308 @function CFCharacterSetAddCharactersInRange 309 Adds the given range to the charaacter set. 310 @param theSet The character set to which the range is to be added. 311 If this parameter is not a valid mutable CFCharacterSet, 312 the behavior is undefined. 313 @param theRange The range to add to the character set. It accepts 314 the range in 32-bit in the UTF-32 format. The valid 315 character point range is from 0x00000 to 0x10FFFF. If the 316 range is outside of the valid Unicode character point, 317 the behavior is undefined. 318*/ 319CF_EXPORT 320void CFCharacterSetAddCharactersInRange(CFMutableCharacterSetRef theSet, CFRange theRange); 321 322/*! 323 @function CFCharacterSetRemoveCharactersInRange 324 Removes the given range from the charaacter set. 325 @param theSet The character set from which the range is to be 326 removed. If this parameter is not a valid mutable 327 CFCharacterSet, the behavior is undefined. 328 @param theRange The range to remove from the character set. 329 It accepts the range in 32-bit in the UTF-32 format. 330 The valid character point range is from 0x00000 to 0x10FFFF. 331 If the range is outside of the valid Unicode character point, 332 the behavior is undefined. 333*/ 334CF_EXPORT 335void CFCharacterSetRemoveCharactersInRange(CFMutableCharacterSetRef theSet, CFRange theRange); 336 337/*! 338 @function CFCharacterSetAddCharactersInString 339 Adds the characters in the given string to the charaacter set. 340 @param theSet The character set to which the characters in the 341 string are to be added. If this parameter is not a 342 valid mutable CFCharacterSet, the behavior is undefined. 343 @param theString The string to add to the character set. 344 If this parameter is not a valid CFString, the behavior 345 is undefined. 346*/ 347CF_EXPORT 348void CFCharacterSetAddCharactersInString(CFMutableCharacterSetRef theSet, CFStringRef theString); 349 350/*! 351 @function CFCharacterSetRemoveCharactersInString 352 Removes the characters in the given string from the charaacter set. 353 @param theSet The character set from which the characters in the 354 string are to be remove. If this parameter is not a 355 valid mutable CFCharacterSet, the behavior is undefined. 356 @param theString The string to remove from the character set. 357 If this parameter is not a valid CFString, the behavior 358 is undefined. 359*/ 360CF_EXPORT 361void CFCharacterSetRemoveCharactersInString(CFMutableCharacterSetRef theSet, CFStringRef theString); 362 363/*! 364 @function CFCharacterSetUnion 365 Forms the union with the given character set. 366 @param theSet The destination character set into which the 367 union of the two character sets is stored. If this 368 parameter is not a valid mutable CFCharacterSet, the 369 behavior is undefined. 370 @param theOtherSet The character set with which the union is 371 formed. If this parameter is not a valid CFCharacterSet, 372 the behavior is undefined. 373*/ 374CF_EXPORT 375void CFCharacterSetUnion(CFMutableCharacterSetRef theSet, CFCharacterSetRef theOtherSet); 376 377/*! 378 @function CFCharacterSetIntersect 379 Forms the intersection with the given character set. 380 @param theSet The destination character set into which the 381 intersection of the two character sets is stored. 382 If this parameter is not a valid mutable CFCharacterSet, 383 the behavior is undefined. 384 @param theOtherSet The character set with which the intersection 385 is formed. If this parameter is not a valid CFCharacterSet, 386 the behavior is undefined. 387*/ 388CF_EXPORT 389void CFCharacterSetIntersect(CFMutableCharacterSetRef theSet, CFCharacterSetRef theOtherSet); 390 391/*! 392 @function CFCharacterSetInvert 393 Inverts the content of the given character set. 394 @param theSet The character set to be inverted. 395 If this parameter is not a valid mutable CFCharacterSet, 396 the behavior is undefined. 397*/ 398CF_EXPORT 399void CFCharacterSetInvert(CFMutableCharacterSetRef theSet); 400 401CF_EXTERN_C_END 402CF_IMPLICIT_BRIDGING_DISABLED 403 404#endif /* ! __COREFOUNDATION_CFCHARACTERSET__ */ 405 406