1/*
2 * Copyright (c) 2014 Apple Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. Please obtain a copy of the License at
10 * http://www.opensource.apple.com/apsl/ and read it before using this
11 * file.
12 *
13 * The Original Code and all software distributed under the License are
14 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
15 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
16 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
18 * Please see the License for the specific language governing rights and
19 * limitations under the License.
20 *
21 * @APPLE_LICENSE_HEADER_END@
22 */
23
24/*	CFCharacterSet.h
25	Copyright (c) 1999-2013, Apple Inc. All rights reserved.
26*/
27
28/*!
29	@header CFCharacterSet
30        CFCharacterSet represents a set, or a bag, of Unicode characters.
31        The API consists of 3 groups:
32        1) creation/manipulation of CFCharacterSet instances,
33        2) query of a single Unicode character membership,
34        and 3) bitmap representation related (reading/writing).
35        Conceptually, CFCharacterSet is a 136K byte bitmap array of
36        which each bit represents a Unicode code point.  It could
37        contain the Unicode characters in ISO 10646 Basic Multilingual
38        Plane (BMP) and characters in Plane 1 through Plane 16
39        accessible via surrogate paris in the Unicode Transformation
40        Format, 16-bit encoding form (UTF-16).  In other words, it can
41        store values from 0x00000 to 0x10FFFF in the Unicode
42        Transformation Format, 32-bit encoding form (UTF-32).  However,
43        in general, how CFCharacterSet stores the information is an
44        implementation detail.  Note even CFData used for the external
45        bitmap representation rarely has 136K byte.  For detailed
46        discussion of the external bitmap representation, refer to the
47        comments for CFCharacterSetCreateWithBitmapRepresentation below.
48        Note that the existance of non-BMP characters in a character set
49        does not imply the membership of the corresponding surrogate
50        characters.  For example, a character set with U+10000 does not
51        match with U+D800.
52*/
53
54#if !defined(__COREFOUNDATION_CFCHARACTERSET__)
55#define __COREFOUNDATION_CFCHARACTERSET__ 1
56
57#include <CoreFoundation/CFBase.h>
58#include <CoreFoundation/CFData.h>
59
60CF_IMPLICIT_BRIDGING_ENABLED
61CF_EXTERN_C_BEGIN
62
63/*!
64	@typedef CFCharacterSetRef
65	This is the type of a reference to immutable CFCharacterSets.
66*/
67typedef const struct __CFCharacterSet * CFCharacterSetRef;
68
69/*!
70	@typedef CFMutableCharacterSetRef
71	This is the type of a reference to mutable CFMutableCharacterSets.
72*/
73typedef struct __CFCharacterSet * CFMutableCharacterSetRef;
74
75/*!
76	@typedef CFCharacterSetPredefinedSet
77        Type of the predefined CFCharacterSet selector values.
78*/
79
80typedef CF_ENUM(CFIndex, CFCharacterSetPredefinedSet) {
81    kCFCharacterSetControl = 1, /* Control character set (Unicode General Category Cc and Cf) */
82    kCFCharacterSetWhitespace, /* Whitespace character set (Unicode General Category Zs and U0009 CHARACTER TABULATION) */
83    kCFCharacterSetWhitespaceAndNewline,  /* Whitespace and Newline character set (Unicode General Category Z*, U000A ~ U000D, and U0085) */
84    kCFCharacterSetDecimalDigit, /* Decimal digit character set */
85    kCFCharacterSetLetter, /* Letter character set (Unicode General Category L* & M*) */
86    kCFCharacterSetLowercaseLetter, /* Lowercase character set (Unicode General Category Ll) */
87    kCFCharacterSetUppercaseLetter, /* Uppercase character set (Unicode General Category Lu and Lt) */
88    kCFCharacterSetNonBase, /* Non-base character set (Unicode General Category M*) */
89    kCFCharacterSetDecomposable, /* Canonically decomposable character set */
90    kCFCharacterSetAlphaNumeric, /* Alpha Numeric character set (Unicode General Category L*, M*, & N*) */
91    kCFCharacterSetPunctuation, /* Punctuation character set (Unicode General Category P*) */
92    kCFCharacterSetCapitalizedLetter = 13, /* Titlecase character set (Unicode General Category Lt) */
93    kCFCharacterSetSymbol = 14, /* Symbol character set (Unicode General Category S*) */
94    kCFCharacterSetNewline CF_ENUM_AVAILABLE(10_5, 2_0) = 15, /* Newline character set (U000A ~ U000D, U0085, U2028, and U2029) */
95    kCFCharacterSetIllegal = 12/* Illegal character set */
96};
97
98/*!
99	@function CFCharacterSetGetTypeID
100	Returns the type identifier of all CFCharacterSet instances.
101*/
102CF_EXPORT
103CFTypeID CFCharacterSetGetTypeID(void);
104
105/*!
106	@function CFCharacterSetGetPredefined
107	Returns a predefined CFCharacterSet instance.
108	@param theSetIdentifier The CFCharacterSetPredefinedSet selector
109                which specifies the predefined character set.  If the
110                value is not in CFCharacterSetPredefinedSet, the behavior
111                is undefined.
112	@result A reference to the predefined immutable CFCharacterSet.
113                This instance is owned by CF.
114*/
115CF_EXPORT
116CFCharacterSetRef CFCharacterSetGetPredefined(CFCharacterSetPredefinedSet theSetIdentifier);
117
118/*!
119	@function CFCharacterSetCreateWithCharactersInRange
120	Creates a new immutable character set with the values from the given range.
121	@param alloc The CFAllocator which should be used to allocate
122		memory for the array and its storage for values. This
123		parameter may be NULL in which case the current default
124		CFAllocator is used. If this reference is not a valid
125		CFAllocator, the behavior is undefined.
126	@param theRange The CFRange which should be used to specify the
127                Unicode range the character set is filled with.  It
128                accepts the range in 32-bit in the UTF-32 format.  The
129                valid character point range is from 0x00000 to 0x10FFFF.
130                If the range is outside of the valid Unicode character
131                point, the behavior is undefined.
132	@result A reference to the new immutable CFCharacterSet.
133*/
134CF_EXPORT
135CFCharacterSetRef CFCharacterSetCreateWithCharactersInRange(CFAllocatorRef alloc, CFRange theRange);
136
137/*!
138	@function CFCharacterSetCreateWithCharactersInString
139	Creates a new immutable character set with the values in the given string.
140	@param alloc The CFAllocator which should be used to allocate
141		memory for the array and its storage for values. This
142		parameter may be NULL in which case the current default
143		CFAllocator is used. If this reference is not a valid
144		CFAllocator, the behavior is undefined.
145	@param theString The CFString which should be used to specify
146                the Unicode characters the character set is filled with.
147                If this parameter is not a valid CFString, the behavior
148                is undefined.
149        @result A reference to the new immutable CFCharacterSet.
150*/
151CF_EXPORT
152CFCharacterSetRef CFCharacterSetCreateWithCharactersInString(CFAllocatorRef alloc, CFStringRef theString);
153
154/*!
155	@function CFCharacterSetCreateWithBitmapRepresentation
156	Creates a new immutable character set with the bitmap representtion in the given data.
157	@param alloc The CFAllocator which should be used to allocate
158		memory for the array and its storage for values. This
159		parameter may be NULL in which case the current default
160		CFAllocator is used. If this reference is not a valid
161		CFAllocator, the behavior is undefined.
162	@param theData The CFData which should be used to specify the
163                bitmap representation of the Unicode character points
164                the character set is filled with.  The bitmap
165                representation could contain all the Unicode character
166                range starting from BMP to Plane 16.  The first 8192 bytes
167                of the data represent the BMP range.  The BMP range 8192
168                bytes can be followed by zero to sixteen 8192 byte
169                bitmaps, each one with the plane index byte prepended.
170                For example, the bitmap representing the BMP and Plane 2
171                has the size of 16385 bytes (8192 bytes for BMP, 1 byte
172                index + 8192 bytes bitmap for Plane 2).  The plane index
173                byte, in this case, contains the integer value two.  If
174                this parameter is not a valid CFData or it contains a
175                Plane index byte outside of the valid Plane range
176                (1 to 16), the behavior is undefined.
177        @result A reference to the new immutable CFCharacterSet.
178*/
179CF_EXPORT
180CFCharacterSetRef CFCharacterSetCreateWithBitmapRepresentation(CFAllocatorRef alloc, CFDataRef theData);
181
182/*!
183	@function CFCharacterSetCreateInvertedSet
184	Creates a new immutable character set that is the invert of the specified character set.
185	@param alloc The CFAllocator which should be used to allocate
186			memory for the array and its storage for values. This
187			parameter may be NULL in which case the current default
188			CFAllocator is used. If this reference is not a valid
189			CFAllocator, the behavior is undefined.
190	@param theSet The CFCharacterSet which is to be inverted.  If this
191                		parameter is not a valid CFCharacterSet, the behavior is
192              		undefined.
193	@result A reference to the new immutable CFCharacterSet.
194*/
195CF_EXPORT CFCharacterSetRef CFCharacterSetCreateInvertedSet(CFAllocatorRef alloc, CFCharacterSetRef theSet);
196
197/*!
198	@function CFCharacterSetIsSupersetOfSet
199	Reports whether or not the character set is a superset of the character set specified as the second parameter.
200	@param theSet  The character set to be checked for the membership of theOtherSet.
201		If this parameter is not a valid CFCharacterSet, the behavior is undefined.
202	@param theOtherset  The character set to be checked whether or not it is a subset of theSet.
203		If this parameter is not a valid CFCharacterSet, the behavior is undefined.
204*/
205CF_EXPORT Boolean CFCharacterSetIsSupersetOfSet(CFCharacterSetRef theSet, CFCharacterSetRef theOtherset);
206
207/*!
208	@function CFCharacterSetHasMemberInPlane
209	Reports whether or not the character set contains at least one member character in the specified plane.
210	@param theSet  The character set to be checked for the membership.  If this
211		parameter is not a valid CFCharacterSet, the behavior is undefined.
212	@param thePlane  The plane number to be checked for the membership.
213		The valid value range is from 0 to 16.  If the value is outside of the valid
214		plane number range, the behavior is undefined.
215*/
216CF_EXPORT Boolean CFCharacterSetHasMemberInPlane(CFCharacterSetRef theSet, CFIndex thePlane);
217
218/*!
219	@function CFCharacterSetCreateMutable
220	Creates a new empty mutable character set.
221	@param allocator The CFAllocator which should be used to allocate
222		memory for the array and its storage for values. This
223		parameter may be NULL in which case the current default
224		CFAllocator is used. If this reference is not a valid
225		CFAllocator, the behavior is undefined.
226	@result A reference to the new mutable CFCharacterSet.
227*/
228CF_EXPORT
229CFMutableCharacterSetRef CFCharacterSetCreateMutable(CFAllocatorRef alloc);
230
231/*!
232	@function CFCharacterSetCreateCopy
233	Creates a new character set with the values from the given character set.  This function tries to compact the backing store where applicable.
234	@param allocator The CFAllocator which should be used to allocate
235		memory for the array and its storage for values. This
236		parameter may be NULL in which case the current default
237		CFAllocator is used. If this reference is not a valid
238		CFAllocator, the behavior is undefined.
239	@param theSet The CFCharacterSet which is to be copied.  If this
240                parameter is not a valid CFCharacterSet, the behavior is
241                undefined.
242	@result A reference to the new CFCharacterSet.
243*/
244CF_EXPORT
245CFCharacterSetRef CFCharacterSetCreateCopy(CFAllocatorRef alloc, CFCharacterSetRef theSet);
246
247/*!
248	@function CFCharacterSetCreateMutableCopy
249	Creates a new mutable character set with the values from the given character set.
250	@param allocator The CFAllocator which should be used to allocate
251		memory for the array and its storage for values. This
252		parameter may be NULL in which case the current default
253		CFAllocator is used. If this reference is not a valid
254		CFAllocator, the behavior is undefined.
255	@param theSet The CFCharacterSet which is to be copied.  If this
256                parameter is not a valid CFCharacterSet, the behavior is
257                undefined.
258	@result A reference to the new mutable CFCharacterSet.
259*/
260CF_EXPORT
261CFMutableCharacterSetRef CFCharacterSetCreateMutableCopy(CFAllocatorRef alloc, CFCharacterSetRef theSet);
262
263/*!
264	@function CFCharacterSetIsCharacterMember
265	Reports whether or not the Unicode character is in the character set.
266	@param theSet The character set to be searched. If this parameter
267                is not a valid CFCharacterSet, the behavior is undefined.
268	@param theChar The Unicode character for which to test against the
269                character set.  Note that this function takes 16-bit Unicode
270                character value; hence, it does not support access to the
271                non-BMP planes.
272        @result true, if the value is in the character set, otherwise false.
273*/
274CF_EXPORT
275Boolean CFCharacterSetIsCharacterMember(CFCharacterSetRef theSet, UniChar theChar);
276
277/*!
278	@function CFCharacterSetIsLongCharacterMember
279	Reports whether or not the UTF-32 character is in the character set.
280	@param theSet The character set to be searched. If this parameter
281               		 is not a valid CFCharacterSet, the behavior is undefined.
282	@param theChar The UTF-32 character for which to test against the
283			character set.
284        @result true, if the value is in the character set, otherwise false.
285*/
286CF_EXPORT Boolean CFCharacterSetIsLongCharacterMember(CFCharacterSetRef theSet, UTF32Char theChar);
287
288/*!
289	@function CFCharacterSetCreateBitmapRepresentation
290	Creates a new immutable data with the bitmap representation from the given character set.
291	@param allocator The CFAllocator which should be used to allocate
292		memory for the array and its storage for values. This
293		parameter may be NULL in which case the current default
294		CFAllocator is used. If this reference is not a valid
295		CFAllocator, the behavior is undefined.
296	@param theSet The CFCharacterSet which is to be used create the
297                bitmap representation from.  Refer to the comments for
298                CFCharacterSetCreateWithBitmapRepresentation for the
299                detailed discussion of the bitmap representation format.
300                If this parameter is not a valid CFCharacterSet, the
301                behavior is undefined.
302	@result A reference to the new immutable CFData.
303*/
304CF_EXPORT
305CFDataRef CFCharacterSetCreateBitmapRepresentation(CFAllocatorRef alloc, CFCharacterSetRef theSet);
306
307/*!
308	@function CFCharacterSetAddCharactersInRange
309	Adds the given range to the charaacter set.
310	@param theSet The character set to which the range is to be added.
311                If this parameter is not a valid mutable CFCharacterSet,
312                the behavior is undefined.
313        @param theRange The range to add to the character set.  It accepts
314                the range in 32-bit in the UTF-32 format.  The valid
315                character point range is from 0x00000 to 0x10FFFF.  If the
316                range is outside of the valid Unicode character point,
317                the behavior is undefined.
318*/
319CF_EXPORT
320void CFCharacterSetAddCharactersInRange(CFMutableCharacterSetRef theSet, CFRange theRange);
321
322/*!
323	@function CFCharacterSetRemoveCharactersInRange
324	Removes the given range from the charaacter set.
325	@param theSet The character set from which the range is to be
326                removed.  If this parameter is not a valid mutable
327                CFCharacterSet, the behavior is undefined.
328        @param theRange The range to remove from the character set.
329                It accepts the range in 32-bit in the UTF-32 format.
330                The valid character point range is from 0x00000 to 0x10FFFF.
331                If the range is outside of the valid Unicode character point,
332                the behavior is undefined.
333*/
334CF_EXPORT
335void CFCharacterSetRemoveCharactersInRange(CFMutableCharacterSetRef theSet, CFRange theRange);
336
337/*!
338	@function CFCharacterSetAddCharactersInString
339	Adds the characters in the given string to the charaacter set.
340	@param theSet The character set to which the characters in the
341                string are to be added.  If this parameter is not a
342                valid mutable CFCharacterSet, the behavior is undefined.
343        @param theString The string to add to the character set.
344                If this parameter is not a valid CFString, the behavior
345                is undefined.
346*/
347CF_EXPORT
348void CFCharacterSetAddCharactersInString(CFMutableCharacterSetRef theSet,  CFStringRef theString);
349
350/*!
351	@function CFCharacterSetRemoveCharactersInString
352	Removes the characters in the given string from the charaacter set.
353	@param theSet The character set from which the characters in the
354                string are to be remove.  If this parameter is not a
355                valid mutable CFCharacterSet, the behavior is undefined.
356        @param theString The string to remove from the character set.
357                If this parameter is not a valid CFString, the behavior
358                is undefined.
359*/
360CF_EXPORT
361void CFCharacterSetRemoveCharactersInString(CFMutableCharacterSetRef theSet, CFStringRef theString);
362
363/*!
364	@function CFCharacterSetUnion
365	Forms the union with the given character set.
366	@param theSet The destination character set into which the
367                union of the two character sets is stored.  If this
368                parameter is not a valid mutable CFCharacterSet, the
369                behavior is undefined.
370	@param theOtherSet The character set with which the union is
371                formed.  If this parameter is not a valid CFCharacterSet,
372                the behavior is undefined.
373*/
374CF_EXPORT
375void CFCharacterSetUnion(CFMutableCharacterSetRef theSet, CFCharacterSetRef theOtherSet);
376
377/*!
378	@function CFCharacterSetIntersect
379	Forms the intersection with the given character set.
380	@param theSet The destination character set into which the
381                intersection of the two character sets is stored.
382                If this parameter is not a valid mutable CFCharacterSet,
383                the behavior is undefined.
384	@param theOtherSet The character set with which the intersection
385                is formed.  If this parameter is not a valid CFCharacterSet,
386                the behavior is undefined.
387*/
388CF_EXPORT
389void CFCharacterSetIntersect(CFMutableCharacterSetRef theSet, CFCharacterSetRef theOtherSet);
390
391/*!
392	@function CFCharacterSetInvert
393	Inverts the content of the given character set.
394	@param theSet The character set to be inverted.
395                If this parameter is not a valid mutable CFCharacterSet,
396                the behavior is undefined.
397*/
398CF_EXPORT
399void CFCharacterSetInvert(CFMutableCharacterSetRef theSet);
400
401CF_EXTERN_C_END
402CF_IMPLICIT_BRIDGING_DISABLED
403
404#endif /* ! __COREFOUNDATION_CFCHARACTERSET__ */
405
406