1/*
2 * Copyright (c) 2014 Apple Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. Please obtain a copy of the License at
10 * http://www.opensource.apple.com/apsl/ and read it before using this
11 * file.
12 *
13 * The Original Code and all software distributed under the License are
14 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
15 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
16 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
18 * Please see the License for the specific language governing rights and
19 * limitations under the License.
20 *
21 * @APPLE_LICENSE_HEADER_END@
22 */
23
24/*	CFStringEncodingConverter.c
25	Copyright (c) 1998-2013, Apple Inc. All rights reserved.
26	Responsibility: Aki Inoue
27*/
28
29#include "CFInternal.h"
30#include <CoreFoundation/CFArray.h>
31#include <CoreFoundation/CFDictionary.h>
32#include "CFICUConverters.h"
33#include <CoreFoundation/CFUniChar.h>
34#include <CoreFoundation/CFPriv.h>
35#include "CFUnicodeDecomposition.h"
36#include "CFStringEncodingConverterExt.h"
37#include "CFStringEncodingConverterPriv.h"
38#include <stdlib.h>
39
40typedef CFIndex (*_CFToBytesProc)(const void *converter, uint32_t flags, const UniChar *characters, CFIndex numChars, uint8_t *bytes, CFIndex maxByteLen, CFIndex *usedByteLen);
41typedef CFIndex (*_CFToUnicodeProc)(const void *converter, uint32_t flags, const uint8_t *bytes, CFIndex numBytes, UniChar *characters, CFIndex maxCharLen, CFIndex *usedCharLen);
42
43typedef struct {
44    const CFStringEncodingConverter *definition;
45    _CFToBytesProc toBytes;
46    _CFToUnicodeProc toUnicode;
47    _CFToUnicodeProc toCanonicalUnicode;
48    CFStringEncodingToBytesFallbackProc toBytesFallback;
49    CFStringEncodingToUnicodeFallbackProc toUnicodeFallback;
50} _CFEncodingConverter;
51
52/* Macros
53*/
54#define TO_BYTE(conv,flags,chars,numChars,bytes,max,used) (conv->toBytes ? conv->toBytes(conv,flags,chars,numChars,bytes,max,used) : ((CFStringEncodingToBytesProc)conv->definition->toBytes)(flags,chars,numChars,bytes,max,used))
55#define TO_UNICODE(conv,flags,bytes,numBytes,chars,max,used) (conv->toUnicode ?  (flags & (kCFStringEncodingUseCanonical|kCFStringEncodingUseHFSPlusCanonical) ? conv->toCanonicalUnicode(conv,flags,bytes,numBytes,chars,max,used) : conv->toUnicode(conv,flags,bytes,numBytes,chars,max,used)) : ((CFStringEncodingToUnicodeProc)conv->definition->toUnicode)(flags,bytes,numBytes,chars,max,used))
56
57#define ASCIINewLine 0x0a
58#define kSurrogateHighStart 0xD800
59#define kSurrogateHighEnd 0xDBFF
60#define kSurrogateLowStart 0xDC00
61#define kSurrogateLowEnd 0xDFFF
62
63static const uint8_t __CFMaximumConvertedLength = 20;
64
65/* Mapping 128..255 to lossy ASCII
66*/
67static const struct {
68    unsigned char chars[4];
69} _toLossyASCIITable[] = {
70    {{' ', 0, 0, 0}}, // NO-BREAK SPACE
71    {{'!', 0, 0, 0}}, // INVERTED EXCLAMATION MARK
72    {{'c', 0, 0, 0}}, // CENT SIGN
73    {{'L', 0, 0, 0}}, // POUND SIGN
74    {{'$', 0, 0, 0}}, // CURRENCY SIGN
75    {{'Y', 0, 0, 0}}, // YEN SIGN
76    {{'|', 0, 0, 0}}, // BROKEN BAR
77    {{0, 0, 0, 0}}, // SECTION SIGN
78    {{0, 0, 0, 0}}, // DIAERESIS
79    {{'(', 'C', ')', 0}}, // COPYRIGHT SIGN
80    {{'a', 0, 0, 0}}, // FEMININE ORDINAL INDICATOR
81    {{'<', '<', 0, 0}}, // LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
82    {{0, 0, 0, 0}}, // NOT SIGN
83    {{'-', 0, 0, 0}}, // SOFT HYPHEN
84    {{'(', 'R', ')', 0}}, // REGISTERED SIGN
85    {{0, 0, 0, 0}}, // MACRON
86    {{0, 0, 0, 0}}, // DEGREE SIGN
87    {{'+', '-', 0, 0}}, // PLUS-MINUS SIGN
88    {{'2', 0, 0, 0}}, // SUPERSCRIPT TWO
89    {{'3', 0, 0, 0}}, // SUPERSCRIPT THREE
90    {{0, 0, 0, 0}}, // ACUTE ACCENT
91    {{0, 0, 0, 0}}, // MICRO SIGN
92    {{0, 0, 0, 0}}, // PILCROW SIGN
93    {{0, 0, 0, 0}}, // MIDDLE DOT
94    {{0, 0, 0, 0}}, // CEDILLA
95    {{'1', 0, 0, 0}}, // SUPERSCRIPT ONE
96    {{'o', 0, 0, 0}}, // MASCULINE ORDINAL INDICATOR
97    {{'>', '>', 0, 0}}, // RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
98    {{'1', '/', '4', 0}}, // VULGAR FRACTION ONE QUARTER
99    {{'1', '/', '2', 0}}, // VULGAR FRACTION ONE HALF
100    {{'3', '/', '4', 0}}, // VULGAR FRACTION THREE QUARTERS
101    {{'?', 0, 0, 0}}, // INVERTED QUESTION MARK
102    {{'A', 0, 0, 0}}, // LATIN CAPITAL LETTER A WITH GRAVE
103    {{'A', 0, 0, 0}}, // LATIN CAPITAL LETTER A WITH ACUTE
104    {{'A', 0, 0, 0}}, // LATIN CAPITAL LETTER A WITH CIRCUMFLEX
105    {{'A', 0, 0, 0}}, // LATIN CAPITAL LETTER A WITH TILDE
106    {{'A', 0, 0, 0}}, // LATIN CAPITAL LETTER A WITH DIAERESIS
107    {{'A', 0, 0, 0}}, // LATIN CAPITAL LETTER A WITH RING ABOVE
108    {{'A', 'E', 0, 0}}, // LATIN CAPITAL LETTER AE
109    {{'C', 0, 0, 0}}, // LATIN CAPITAL LETTER C WITH CEDILLA
110    {{'E', 0, 0, 0}}, // LATIN CAPITAL LETTER E WITH GRAVE
111    {{'E', 0, 0, 0}}, // LATIN CAPITAL LETTER E WITH ACUTE
112    {{'E', 0, 0, 0}}, // LATIN CAPITAL LETTER E WITH CIRCUMFLEX
113    {{'E', 0, 0, 0}}, // LATIN CAPITAL LETTER E WITH DIAERESIS
114    {{'I', 0, 0, 0}}, // LATIN CAPITAL LETTER I WITH GRAVE
115    {{'I', 0, 0, 0}}, // LATIN CAPITAL LETTER I WITH ACUTE
116    {{'I', 0, 0, 0}}, // LATIN CAPITAL LETTER I WITH CIRCUMFLEX
117    {{'I', 0, 0, 0}}, // LATIN CAPITAL LETTER I WITH DIAERESIS
118    {{'T', 'H', 0, 0}}, // LATIN CAPITAL LETTER ETH (Icelandic)
119    {{'N', 0, 0, 0}}, // LATIN CAPITAL LETTER N WITH TILDE
120    {{'O', 0, 0, 0}}, // LATIN CAPITAL LETTER O WITH GRAVE
121    {{'O', 0, 0, 0}}, // LATIN CAPITAL LETTER O WITH ACUTE
122    {{'O', 0, 0, 0}}, // LATIN CAPITAL LETTER O WITH CIRCUMFLEX
123    {{'O', 0, 0, 0}}, // LATIN CAPITAL LETTER O WITH TILDE
124    {{'O', 0, 0, 0}}, // LATIN CAPITAL LETTER O WITH DIAERESIS
125    {{'X', 0, 0, 0}}, // MULTIPLICATION SIGN
126    {{'O', 0, 0, 0}}, // LATIN CAPITAL LETTER O WITH STROKE
127    {{'U', 0, 0, 0}}, // LATIN CAPITAL LETTER U WITH GRAVE
128    {{'U', 0, 0, 0}}, // LATIN CAPITAL LETTER U WITH ACUTE
129    {{'U', 0, 0, 0}}, // LATIN CAPITAL LETTER U WITH CIRCUMFLEX
130    {{'U', 0, 0, 0}}, // LATIN CAPITAL LETTER U WITH DIAERESIS
131    {{'Y', 0, 0, 0}}, // LATIN CAPITAL LETTER Y WITH ACUTE
132    {{'t', 'h', 0, 0}}, // LATIN CAPITAL LETTER THORN (Icelandic)
133    {{'s', 0, 0, 0}}, // LATIN SMALL LETTER SHARP S (German)
134    {{'a', 0, 0, 0}}, // LATIN SMALL LETTER A WITH GRAVE
135    {{'a', 0, 0, 0}}, // LATIN SMALL LETTER A WITH ACUTE
136    {{'a', 0, 0, 0}}, // LATIN SMALL LETTER A WITH CIRCUMFLEX
137    {{'a', 0, 0, 0}}, // LATIN SMALL LETTER A WITH TILDE
138    {{'a', 0, 0, 0}}, // LATIN SMALL LETTER A WITH DIAERESIS
139    {{'a', 0, 0, 0}}, // LATIN SMALL LETTER A WITH RING ABOVE
140    {{'a', 'e', 0, 0}}, // LATIN SMALL LETTER AE
141    {{'c', 0, 0, 0}}, // LATIN SMALL LETTER C WITH CEDILLA
142    {{'e', 0, 0, 0}}, // LATIN SMALL LETTER E WITH GRAVE
143    {{'e', 0, 0, 0}}, // LATIN SMALL LETTER E WITH ACUTE
144    {{'e', 0, 0, 0}}, // LATIN SMALL LETTER E WITH CIRCUMFLEX
145    {{'e', 0, 0, 0}}, // LATIN SMALL LETTER E WITH DIAERESIS
146    {{'i', 0, 0, 0}}, // LATIN SMALL LETTER I WITH GRAVE
147    {{'i', 0, 0, 0}}, // LATIN SMALL LETTER I WITH ACUTE
148    {{'i', 0, 0, 0}}, // LATIN SMALL LETTER I WITH CIRCUMFLEX
149    {{'i', 0, 0, 0}}, // LATIN SMALL LETTER I WITH DIAERESIS
150    {{'T', 'H', 0, 0}}, // LATIN SMALL LETTER ETH (Icelandic)
151    {{'n', 0, 0, 0}}, // LATIN SMALL LETTER N WITH TILDE
152    {{'o', 0, 0, 0}}, // LATIN SMALL LETTER O WITH GRAVE
153    {{'o', 0, 0, 0}}, // LATIN SMALL LETTER O WITH ACUTE
154    {{'o', 0, 0, 0}}, // LATIN SMALL LETTER O WITH CIRCUMFLEX
155    {{'o', 0, 0, 0}}, // LATIN SMALL LETTER O WITH TILDE
156    {{'o', 0, 0, 0}}, // LATIN SMALL LETTER O WITH DIAERESIS
157    {{'/', 0, 0, 0}}, // DIVISION SIGN
158    {{'o', 0, 0, 0}}, // LATIN SMALL LETTER O WITH STROKE
159    {{'u', 0, 0, 0}}, // LATIN SMALL LETTER U WITH GRAVE
160    {{'u', 0, 0, 0}}, // LATIN SMALL LETTER U WITH ACUTE
161    {{'u', 0, 0, 0}}, // LATIN SMALL LETTER U WITH CIRCUMFLEX
162    {{'u', 0, 0, 0}}, // LATIN SMALL LETTER U WITH DIAERESIS
163    {{'y', 0, 0, 0}}, // LATIN SMALL LETTER Y WITH ACUTE
164    {{'t', 'h', 0, 0}}, // LATIN SMALL LETTER THORN (Icelandic)
165    {{'y', 0, 0, 0}}, // LATIN SMALL LETTER Y WITH DIAERESIS
166};
167
168CF_INLINE CFIndex __CFToASCIILatin1Fallback(UniChar character, uint8_t *bytes, CFIndex maxByteLen) {
169    const uint8_t *losChars = (const uint8_t*)_toLossyASCIITable + (character - 0xA0) * sizeof(uint8_t[4]);
170    CFIndex numBytes = 0;
171    CFIndex idx, max = (maxByteLen && (maxByteLen < 4) ? maxByteLen : 4);
172
173    for (idx = 0;idx < max;idx++) {
174        if (losChars[idx]) {
175            if (maxByteLen) bytes[idx] = losChars[idx];
176            ++numBytes;
177        } else {
178            break;
179        }
180    }
181
182    return numBytes;
183}
184
185static CFIndex __CFDefaultToBytesFallbackProc(const UniChar *characters, CFIndex numChars, uint8_t *bytes, CFIndex maxByteLen, CFIndex *usedByteLen) {
186    CFIndex processCharLen = 1, filledBytesLen = 1;
187    uint8_t byte = '?';
188
189    if (*characters < 0xA0) { // 0x80 to 0x9F maps to ASCII C0 range
190        byte = (uint8_t)(*characters - 0x80);
191    } else if (*characters < 0x100) {
192        *usedByteLen = __CFToASCIILatin1Fallback(*characters, bytes, maxByteLen);
193        return 1;
194    } else if (*characters >= kSurrogateHighStart && *characters <= kSurrogateLowEnd) {
195        processCharLen = (numChars > 1 && *characters <= kSurrogateLowStart && *(characters + 1) >= kSurrogateLowStart && *(characters + 1) <= kSurrogateLowEnd ? 2 : 1);
196    } else if (CFUniCharIsMemberOf(*characters, kCFUniCharWhitespaceCharacterSet)) {
197        byte = ' ';
198    } else if (CFUniCharIsMemberOf(*characters, kCFUniCharWhitespaceAndNewlineCharacterSet)) {
199        byte = ASCIINewLine;
200    } else if (*characters == 0x2026) { // ellipsis
201        if (0 == maxByteLen) {
202            filledBytesLen = 3;
203        } else if (maxByteLen > 2) {
204            memset(bytes, '.', 3);
205            *usedByteLen = 3;
206            return processCharLen;
207        }
208    } else if (CFUniCharIsMemberOf(*characters, kCFUniCharDecomposableCharacterSet)) {
209        UTF32Char decomposed[MAX_DECOMPOSED_LENGTH];
210
211        (void)CFUniCharDecomposeCharacter(*characters, decomposed, MAX_DECOMPOSED_LENGTH);
212        if (*decomposed < 0x80) {
213            byte = (uint8_t)(*decomposed);
214        } else {
215            UTF16Char theChar = *decomposed;
216
217            return __CFDefaultToBytesFallbackProc(&theChar, 1, bytes, maxByteLen, usedByteLen);
218        }
219    }
220
221    if (maxByteLen) *bytes = byte;
222    *usedByteLen = filledBytesLen;
223    return processCharLen;
224}
225
226static CFIndex __CFDefaultToUnicodeFallbackProc(const uint8_t *bytes, CFIndex numBytes, UniChar *characters, CFIndex maxCharLen, CFIndex *usedCharLen) {
227    if (maxCharLen) *characters = (UniChar)'?';
228    *usedCharLen = 1;
229    return 1;
230}
231
232#define TO_BYTE_FALLBACK(conv,chars,numChars,bytes,max,used) (conv->toBytesFallback(chars,numChars,bytes,max,used))
233#define TO_UNICODE_FALLBACK(conv,bytes,numBytes,chars,max,used) (conv->toUnicodeFallback(bytes,numBytes,chars,max,used))
234
235#define EXTRA_BASE (0x0F00)
236
237/* Wrapper funcs for non-standard converters
238*/
239static CFIndex __CFToBytesCheapEightBitWrapper(const void *converter, uint32_t flags, const UniChar *characters, CFIndex numChars, uint8_t *bytes, CFIndex maxByteLen, CFIndex *usedByteLen) {
240    CFIndex processedCharLen = 0;
241    CFIndex length = (maxByteLen && (maxByteLen < numChars) ? maxByteLen : numChars);
242    uint8_t byte;
243
244    while (processedCharLen < length) {
245        if (!((CFStringEncodingCheapEightBitToBytesProc)((const _CFEncodingConverter*)converter)->definition->toBytes)(flags, characters[processedCharLen], &byte)) break;
246
247        if (maxByteLen) bytes[processedCharLen] = byte;
248        processedCharLen++;
249    }
250
251    *usedByteLen = processedCharLen;
252    return processedCharLen;
253}
254
255static CFIndex __CFToUnicodeCheapEightBitWrapper(const void *converter, uint32_t flags, const uint8_t *bytes, CFIndex numBytes, UniChar *characters, CFIndex maxCharLen, CFIndex *usedCharLen) {
256    CFIndex processedByteLen = 0;
257    CFIndex length = (maxCharLen && (maxCharLen < numBytes) ? maxCharLen : numBytes);
258    UniChar character;
259
260    while (processedByteLen < length) {
261        if (!((CFStringEncodingCheapEightBitToUnicodeProc)((const _CFEncodingConverter*)converter)->definition->toUnicode)(flags, bytes[processedByteLen], &character)) break;
262
263        if (maxCharLen) characters[processedByteLen] = character;
264        processedByteLen++;
265    }
266
267    *usedCharLen = processedByteLen;
268    return processedByteLen;
269}
270
271static CFIndex __CFToCanonicalUnicodeCheapEightBitWrapper(const void *converter, uint32_t flags, const uint8_t *bytes, CFIndex numBytes, UniChar *characters, CFIndex maxCharLen, CFIndex *usedCharLen) {
272    CFIndex processedByteLen = 0;
273    CFIndex theUsedCharLen = 0;
274    UTF32Char charBuffer[MAX_DECOMPOSED_LENGTH];
275    CFIndex usedLen;
276    UniChar character;
277    bool isHFSPlus = (flags & kCFStringEncodingUseHFSPlusCanonical ? true : false);
278
279    while ((processedByteLen < numBytes) && (!maxCharLen || (theUsedCharLen < maxCharLen))) {
280        if (!((CFStringEncodingCheapEightBitToUnicodeProc)((const _CFEncodingConverter*)converter)->definition->toUnicode)(flags, bytes[processedByteLen], &character)) break;
281
282        if (CFUniCharIsDecomposableCharacter(character, isHFSPlus)) {
283            CFIndex idx;
284
285            usedLen = CFUniCharDecomposeCharacter(character, charBuffer, MAX_DECOMPOSED_LENGTH);
286            *usedCharLen = theUsedCharLen;
287
288            for (idx = 0;idx < usedLen;idx++) {
289                if (charBuffer[idx] > 0xFFFF) { // Non-BMP
290                    if (theUsedCharLen + 2 > maxCharLen)  return processedByteLen;
291                    theUsedCharLen += 2;
292                    if (maxCharLen) {
293                        charBuffer[idx] = charBuffer[idx] - 0x10000;
294                        *(characters++) = (UniChar)(charBuffer[idx] >> 10) + 0xD800UL;
295                        *(characters++) = (UniChar)(charBuffer[idx] & 0x3FF) + 0xDC00UL;
296                    }
297                } else {
298                    if (theUsedCharLen + 1 > maxCharLen)  return processedByteLen;
299                    ++theUsedCharLen;
300                    *(characters++) = charBuffer[idx];
301                }
302            }
303        } else {
304            if (maxCharLen) *(characters++) = character;
305            ++theUsedCharLen;
306        }
307        processedByteLen++;
308    }
309
310    *usedCharLen = theUsedCharLen;
311    return processedByteLen;
312}
313
314static CFIndex __CFToBytesStandardEightBitWrapper(const void *converter, uint32_t flags, const UniChar *characters, CFIndex numChars, uint8_t *bytes, CFIndex maxByteLen, CFIndex *usedByteLen) {
315    CFIndex processedCharLen = 0;
316    uint8_t byte;
317    CFIndex usedLen;
318
319    *usedByteLen = 0;
320
321    while (numChars && (!maxByteLen || (*usedByteLen < maxByteLen))) {
322        if (!(usedLen = ((CFStringEncodingStandardEightBitToBytesProc)((const _CFEncodingConverter*)converter)->definition->toBytes)(flags, characters, numChars, &byte))) break;
323
324        if (maxByteLen) bytes[*usedByteLen] = byte;
325        (*usedByteLen)++;
326        characters += usedLen;
327        numChars -= usedLen;
328        processedCharLen += usedLen;
329    }
330
331    return processedCharLen;
332}
333
334static CFIndex __CFToUnicodeStandardEightBitWrapper(const void *converter, uint32_t flags, const uint8_t *bytes, CFIndex numBytes, UniChar *characters, CFIndex maxCharLen, CFIndex *usedCharLen) {
335    CFIndex processedByteLen = 0;
336    UniChar charBuffer[__CFMaximumConvertedLength];
337    CFIndex usedLen;
338
339    *usedCharLen = 0;
340
341    while ((processedByteLen < numBytes) && (!maxCharLen || (*usedCharLen < maxCharLen))) {
342        if (!(usedLen = ((CFStringEncodingCheapEightBitToUnicodeProc)((const _CFEncodingConverter*)converter)->definition->toUnicode)(flags, bytes[processedByteLen], charBuffer))) break;
343
344        if (maxCharLen) {
345            CFIndex idx;
346
347            if (*usedCharLen + usedLen > maxCharLen) break;
348
349            for (idx = 0;idx < usedLen;idx++) {
350                characters[*usedCharLen + idx] = charBuffer[idx];
351            }
352        }
353        *usedCharLen += usedLen;
354        processedByteLen++;
355    }
356
357    return processedByteLen;
358}
359
360static CFIndex __CFToCanonicalUnicodeStandardEightBitWrapper(const void *converter, uint32_t flags, const uint8_t *bytes, CFIndex numBytes, UniChar *characters, CFIndex maxCharLen, CFIndex *usedCharLen) {
361    CFIndex processedByteLen = 0;
362    UniChar charBuffer[__CFMaximumConvertedLength];
363    UTF32Char decompBuffer[MAX_DECOMPOSED_LENGTH];
364    CFIndex usedLen;
365    CFIndex decompedLen;
366    CFIndex idx, decompIndex;
367    bool isHFSPlus = (flags & kCFStringEncodingUseHFSPlusCanonical ? true : false);
368    CFIndex theUsedCharLen = 0;
369
370    while ((processedByteLen < numBytes) && (!maxCharLen || (theUsedCharLen < maxCharLen))) {
371        if (!(usedLen = ((CFStringEncodingCheapEightBitToUnicodeProc)((const _CFEncodingConverter*)converter)->definition->toUnicode)(flags, bytes[processedByteLen], charBuffer))) break;
372
373        for (idx = 0;idx < usedLen;idx++) {
374            if (CFUniCharIsDecomposableCharacter(charBuffer[idx], isHFSPlus)) {
375                decompedLen = CFUniCharDecomposeCharacter(charBuffer[idx], decompBuffer, MAX_DECOMPOSED_LENGTH);
376                *usedCharLen = theUsedCharLen;
377
378                for (decompIndex = 0;decompIndex < decompedLen;decompIndex++) {
379                    if (decompBuffer[decompIndex] > 0xFFFF) { // Non-BMP
380                        if (theUsedCharLen + 2 > maxCharLen)  return processedByteLen;
381                        theUsedCharLen += 2;
382                        if (maxCharLen) {
383                            charBuffer[idx] = charBuffer[idx] - 0x10000;
384                            *(characters++) = (charBuffer[idx] >> 10) + 0xD800UL;
385                            *(characters++) = (charBuffer[idx] & 0x3FF) + 0xDC00UL;
386                        }
387                    } else {
388                        if (theUsedCharLen + 1 > maxCharLen)  return processedByteLen;
389                        ++theUsedCharLen;
390                        *(characters++) = charBuffer[idx];
391                    }
392                }
393            } else {
394                if (maxCharLen) *(characters++) = charBuffer[idx];
395                ++theUsedCharLen;
396            }
397        }
398        processedByteLen++;
399    }
400
401    *usedCharLen = theUsedCharLen;
402    return processedByteLen;
403}
404
405static CFIndex __CFToBytesCheapMultiByteWrapper(const void *converter, uint32_t flags, const UniChar *characters, CFIndex numChars, uint8_t *bytes, CFIndex maxByteLen, CFIndex *usedByteLen) {
406    CFIndex processedCharLen = 0;
407    uint8_t byteBuffer[__CFMaximumConvertedLength];
408    CFIndex usedLen;
409
410    *usedByteLen = 0;
411
412    while ((processedCharLen < numChars) && (!maxByteLen || (*usedByteLen < maxByteLen))) {
413        if (!(usedLen = ((CFStringEncodingCheapMultiByteToBytesProc)((const _CFEncodingConverter*)converter)->definition->toBytes)(flags, characters[processedCharLen], byteBuffer))) break;
414
415        if (maxByteLen) {
416            CFIndex idx;
417
418            if (*usedByteLen + usedLen > maxByteLen) break;
419
420            for (idx = 0;idx <usedLen;idx++) {
421                bytes[*usedByteLen + idx] = byteBuffer[idx];
422            }
423        }
424
425        *usedByteLen += usedLen;
426        processedCharLen++;
427    }
428
429    return processedCharLen;
430}
431
432static CFIndex __CFToUnicodeCheapMultiByteWrapper(const void *converter, uint32_t flags, const uint8_t *bytes, CFIndex numBytes, UniChar *characters, CFIndex maxCharLen, CFIndex *usedCharLen) {
433    CFIndex processedByteLen = 0;
434    UniChar character;
435    CFIndex usedLen;
436
437    *usedCharLen = 0;
438
439    while (numBytes && (!maxCharLen || (*usedCharLen < maxCharLen))) {
440        if (!(usedLen = ((CFStringEncodingCheapMultiByteToUnicodeProc)((const _CFEncodingConverter*)converter)->definition->toUnicode)(flags, bytes, numBytes, &character))) break;
441
442        if (maxCharLen) *(characters++) = character;
443        (*usedCharLen)++;
444        processedByteLen += usedLen;
445        bytes += usedLen;
446        numBytes -= usedLen;
447    }
448
449    return processedByteLen;
450}
451
452static CFIndex __CFToCanonicalUnicodeCheapMultiByteWrapper(const void *converter, uint32_t flags, const uint8_t *bytes, CFIndex numBytes, UniChar *characters, CFIndex maxCharLen, CFIndex *usedCharLen) {
453    CFIndex processedByteLen = 0;
454    UTF32Char charBuffer[MAX_DECOMPOSED_LENGTH];
455    UniChar character;
456    CFIndex usedLen;
457    CFIndex decomposedLen;
458    CFIndex theUsedCharLen = 0;
459    bool isHFSPlus = (flags & kCFStringEncodingUseHFSPlusCanonical ? true : false);
460
461    while (numBytes && (!maxCharLen || (theUsedCharLen < maxCharLen))) {
462        if (!(usedLen = ((CFStringEncodingCheapMultiByteToUnicodeProc)((const _CFEncodingConverter*)converter)->definition->toUnicode)(flags, bytes, numBytes, &character))) break;
463
464        if (CFUniCharIsDecomposableCharacter(character, isHFSPlus)) {
465            CFIndex idx;
466
467            decomposedLen = CFUniCharDecomposeCharacter(character, charBuffer, MAX_DECOMPOSED_LENGTH);
468            *usedCharLen = theUsedCharLen;
469
470            for (idx = 0;idx < decomposedLen;idx++) {
471                if (charBuffer[idx] > 0xFFFF) { // Non-BMP
472                    if (theUsedCharLen + 2 > maxCharLen)  return processedByteLen;
473                    theUsedCharLen += 2;
474                    if (maxCharLen) {
475                        charBuffer[idx] = charBuffer[idx] - 0x10000;
476                        *(characters++) = (UniChar)(charBuffer[idx] >> 10) + 0xD800UL;
477                        *(characters++) = (UniChar)(charBuffer[idx] & 0x3FF) + 0xDC00UL;
478                    }
479                } else {
480                    if (theUsedCharLen + 1 > maxCharLen)  return processedByteLen;
481                    ++theUsedCharLen;
482                    *(characters++) = charBuffer[idx];
483                }
484            }
485        } else {
486            if (maxCharLen) *(characters++) = character;
487            ++theUsedCharLen;
488        }
489
490        processedByteLen += usedLen;
491        bytes += usedLen;
492        numBytes -= usedLen;
493    }
494    *usedCharLen = theUsedCharLen;
495    return processedByteLen;
496}
497
498/* static functions
499*/
500CF_INLINE _CFEncodingConverter *__CFEncodingConverterFromDefinition(const CFStringEncodingConverter *definition, CFStringEncoding encoding) {
501#define NUM_OF_ENTRIES_CYCLE (10)
502    static uint32_t _currentIndex = 0;
503    static uint32_t _allocatedSize = 0;
504    static _CFEncodingConverter *_allocatedEntries = NULL;
505    _CFEncodingConverter *converter;
506
507
508    if ((_currentIndex + 1) >= _allocatedSize) {
509        _currentIndex = 0;
510        _allocatedSize = 0;
511        _allocatedEntries = NULL;
512    }
513    if (_allocatedEntries == NULL) { // Not allocated yet
514        _allocatedEntries = (_CFEncodingConverter *)CFAllocatorAllocate(kCFAllocatorSystemDefault, sizeof(_CFEncodingConverter) * NUM_OF_ENTRIES_CYCLE, 0);
515        _allocatedSize = NUM_OF_ENTRIES_CYCLE;
516        converter = &(_allocatedEntries[_currentIndex]);
517    } else {
518        converter = &(_allocatedEntries[++_currentIndex]);
519    }
520
521    memset(converter, 0, sizeof(_CFEncodingConverter));
522
523    converter->definition = definition;
524
525    switch (definition->encodingClass) {
526        case kCFStringEncodingConverterStandard:
527            converter->toBytes = NULL;
528            converter->toUnicode = NULL;
529            converter->toCanonicalUnicode = NULL;
530            break;
531
532        case kCFStringEncodingConverterCheapEightBit:
533            converter->toBytes = __CFToBytesCheapEightBitWrapper;
534            converter->toUnicode = __CFToUnicodeCheapEightBitWrapper;
535            converter->toCanonicalUnicode = __CFToCanonicalUnicodeCheapEightBitWrapper;
536            break;
537
538        case kCFStringEncodingConverterStandardEightBit:
539            converter->toBytes = __CFToBytesStandardEightBitWrapper;
540            converter->toUnicode = __CFToUnicodeStandardEightBitWrapper;
541            converter->toCanonicalUnicode = __CFToCanonicalUnicodeStandardEightBitWrapper;
542            break;
543
544        case kCFStringEncodingConverterCheapMultiByte:
545            converter->toBytes = __CFToBytesCheapMultiByteWrapper;
546            converter->toUnicode = __CFToUnicodeCheapMultiByteWrapper;
547            converter->toCanonicalUnicode = __CFToCanonicalUnicodeCheapMultiByteWrapper;
548            break;
549
550#if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_WINDOWS || DEPLOYMENT_TARGET_LINUX
551        case kCFStringEncodingConverterICU:
552            converter->toBytes = (_CFToBytesProc)__CFStringEncodingGetICUName(encoding);
553            break;
554#endif
555
556        case kCFStringEncodingConverterPlatformSpecific:
557            break;
558
559        default: // Shouln't be here
560            return NULL;
561    }
562
563    converter->toBytesFallback = (definition->toBytesFallback ? definition->toBytesFallback : __CFDefaultToBytesFallbackProc);
564    converter->toUnicodeFallback = (definition->toUnicodeFallback ? definition->toUnicodeFallback : __CFDefaultToUnicodeFallbackProc);
565
566    return converter;
567}
568
569CF_INLINE const CFStringEncodingConverter *__CFStringEncodingConverterGetDefinition(CFStringEncoding encoding) {
570    switch (encoding) {
571        case kCFStringEncodingUTF8:
572            return &__CFConverterUTF8;
573
574        case kCFStringEncodingMacRoman:
575            return &__CFConverterMacRoman;
576
577        case kCFStringEncodingWindowsLatin1:
578            return &__CFConverterWinLatin1;
579
580        case kCFStringEncodingASCII:
581            return &__CFConverterASCII;
582
583        case kCFStringEncodingISOLatin1:
584            return &__CFConverterISOLatin1;
585
586
587        case kCFStringEncodingNextStepLatin:
588            return &__CFConverterNextStepLatin;
589
590
591        default:
592            return __CFStringEncodingGetExternalConverter(encoding);
593    }
594}
595
596static const _CFEncodingConverter *__CFGetConverter(uint32_t encoding) {
597    const _CFEncodingConverter *converter = NULL;
598    const _CFEncodingConverter **commonConverterSlot = NULL;
599    static _CFEncodingConverter *commonConverters[3] = {NULL, NULL, NULL}; // UTF8, MacRoman/WinLatin1, and the default encoding*
600    static CFMutableDictionaryRef mappingTable = NULL;
601    static CFSpinLock_t lock = CFSpinLockInit;
602
603    switch (encoding) {
604	case kCFStringEncodingUTF8: commonConverterSlot = (const _CFEncodingConverter **)&(commonConverters[0]); break;
605
606	    /* the swith here should avoid possible bootstrap issues in the default: case below when invoked from CFStringGetSystemEncoding() */
607#if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_EMBEDDED_MINI || DEPLOYMENT_TARGET_LINUX
608	case kCFStringEncodingMacRoman: commonConverterSlot = (const _CFEncodingConverter **)&(commonConverters[1]); break;
609#elif DEPLOYMENT_TARGET_WINDOWS
610	case kCFStringEncodingWindowsLatin1: commonConverterSlot = (const _CFEncodingConverter **)(&(commonConverters[1])); break;
611#else
612#warning This case must match __defaultEncoding value defined in CFString.c
613	case kCFStringEncodingISOLatin1: commonConverterSlot = (const _CFEncodingConverter **)(&(commonConverters[1])); break;
614#endif
615
616	default: if (CFStringGetSystemEncoding() == encoding) commonConverterSlot = (const _CFEncodingConverter **)&(commonConverters[2]); break;
617    }
618
619    __CFSpinLock(&lock);
620    converter = ((NULL == commonConverterSlot) ? ((NULL == mappingTable) ? NULL : (const _CFEncodingConverter *)CFDictionaryGetValue(mappingTable, (const void *)(uintptr_t)encoding)) : *commonConverterSlot);
621    __CFSpinUnlock(&lock);
622
623    if (NULL == converter) {
624        const CFStringEncodingConverter *definition = __CFStringEncodingConverterGetDefinition(encoding);
625
626        if (NULL != definition) {
627            __CFSpinLock(&lock);
628            converter = ((NULL == commonConverterSlot) ? ((NULL == mappingTable) ? NULL : (const _CFEncodingConverter *)CFDictionaryGetValue(mappingTable, (const void *)(uintptr_t)encoding)) : *commonConverterSlot);
629
630            if (NULL == converter) {
631                converter = __CFEncodingConverterFromDefinition(definition, encoding);
632
633		if (NULL == commonConverterSlot) {
634		    if (NULL == mappingTable) mappingTable = CFDictionaryCreateMutable(NULL, 0, NULL, NULL);
635
636		    CFDictionarySetValue(mappingTable, (const void *)(uintptr_t)encoding, converter);
637		} else {
638		    *commonConverterSlot = converter;
639		}
640            }
641            __CFSpinUnlock(&lock);
642        }
643    }
644
645    return converter;
646}
647
648/* Public API
649*/
650uint32_t CFStringEncodingUnicodeToBytes(uint32_t encoding, uint32_t flags, const UniChar *characters, CFIndex numChars, CFIndex *usedCharLen, uint8_t *bytes, CFIndex maxByteLen, CFIndex *usedByteLen) {
651    if (encoding == kCFStringEncodingUTF8) {
652        static CFStringEncodingToBytesProc __CFToUTF8 = NULL;
653        CFIndex convertedCharLen;
654        CFIndex usedLen;
655
656
657        if ((flags & kCFStringEncodingUseCanonical) || (flags & kCFStringEncodingUseHFSPlusCanonical)) {
658            (void)CFUniCharDecompose(characters, numChars, &convertedCharLen, (void *)bytes, maxByteLen, &usedLen, true, kCFUniCharUTF8Format, (flags & kCFStringEncodingUseHFSPlusCanonical ? true : false));
659        } else {
660            if (!__CFToUTF8) {
661                const CFStringEncodingConverter *utf8Converter = CFStringEncodingGetConverter(kCFStringEncodingUTF8);
662                __CFToUTF8 = (CFStringEncodingToBytesProc)utf8Converter->toBytes;
663            }
664            convertedCharLen = __CFToUTF8(0, characters, numChars, bytes, maxByteLen, &usedLen);
665        }
666        if (usedCharLen) *usedCharLen = convertedCharLen;
667        if (usedByteLen) *usedByteLen = usedLen;
668
669        if (convertedCharLen == numChars) {
670            return kCFStringEncodingConversionSuccess;
671        } else if ((maxByteLen > 0) && ((maxByteLen - usedLen) < 10)) { // could be filled outbuf
672            UTF16Char character = characters[convertedCharLen];
673
674            if (((character >= kSurrogateLowStart) && (character <= kSurrogateLowEnd)) || ((character >= kSurrogateHighStart) && (character <= kSurrogateHighEnd) && ((1 == (numChars - convertedCharLen)) || (characters[convertedCharLen + 1] < kSurrogateLowStart) || (characters[convertedCharLen + 1] > kSurrogateLowEnd)))) return kCFStringEncodingInvalidInputStream;
675
676            return kCFStringEncodingInsufficientOutputBufferLength;
677        } else {
678            return kCFStringEncodingInvalidInputStream;
679        }
680    } else {
681        const _CFEncodingConverter *converter = __CFGetConverter(encoding);
682        CFIndex usedLen = 0;
683        CFIndex localUsedByteLen;
684        CFIndex theUsedByteLen = 0;
685        uint32_t theResult = kCFStringEncodingConversionSuccess;
686        CFStringEncodingToBytesPrecomposeProc toBytesPrecompose = NULL;
687        CFStringEncodingIsValidCombiningCharacterProc isValidCombiningChar = NULL;
688
689        if (!converter) return kCFStringEncodingConverterUnavailable;
690
691        if (flags & kCFStringEncodingSubstituteCombinings) {
692            if (!(flags & kCFStringEncodingAllowLossyConversion)) isValidCombiningChar = converter->definition->isValidCombiningChar;
693       } else {
694            isValidCombiningChar = converter->definition->isValidCombiningChar;
695            if (!(flags & kCFStringEncodingIgnoreCombinings)) {
696                toBytesPrecompose = converter->definition->toBytesPrecompose;
697                flags |= kCFStringEncodingComposeCombinings;
698            }
699        }
700
701#if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_WINDOWS || DEPLOYMENT_TARGET_LINUX
702        if (kCFStringEncodingConverterICU == converter->definition->encodingClass) return __CFStringEncodingICUToBytes((const char *)converter->toBytes, flags, characters, numChars, usedCharLen, bytes, maxByteLen, usedByteLen);
703#endif
704
705        /* Platform converter */
706        if (kCFStringEncodingConverterPlatformSpecific == converter->definition->encodingClass) return __CFStringEncodingPlatformUnicodeToBytes(encoding, flags, characters, numChars, usedCharLen, bytes, maxByteLen, usedByteLen);
707
708        while ((usedLen < numChars) && (!maxByteLen || (theUsedByteLen < maxByteLen))) {
709            if ((usedLen += TO_BYTE(converter, flags, characters + usedLen, numChars - usedLen, bytes + theUsedByteLen, (maxByteLen ? maxByteLen - theUsedByteLen : 0), &localUsedByteLen)) < numChars) {
710                CFIndex dummy;
711
712                if (isValidCombiningChar && (usedLen > 0) && isValidCombiningChar(characters[usedLen])) {
713                    if (toBytesPrecompose) {
714                        CFIndex localUsedLen = usedLen;
715
716                        while (isValidCombiningChar(characters[--usedLen]));
717                        theUsedByteLen += localUsedByteLen;
718                        if (converter->definition->maxBytesPerChar > 1) {
719                            TO_BYTE(converter, flags, characters + usedLen, localUsedLen - usedLen, NULL, 0, &localUsedByteLen);
720                            theUsedByteLen -= localUsedByteLen;
721                        } else {
722                            theUsedByteLen--;
723                        }
724                        if ((localUsedLen = toBytesPrecompose(flags, characters + usedLen, numChars - usedLen, bytes + theUsedByteLen, (maxByteLen ? maxByteLen - theUsedByteLen : 0), &localUsedByteLen)) > 0) {
725                            usedLen += localUsedLen;
726                            if ((usedLen < numChars) && isValidCombiningChar(characters[usedLen])) { // There is a non-base char not combined remaining
727                                theUsedByteLen += localUsedByteLen;
728                                theResult = kCFStringEncodingInvalidInputStream;
729                                break;
730                            }
731                        } else if (flags & kCFStringEncodingAllowLossyConversion) {
732                            uint8_t lossyByte = CFStringEncodingMaskToLossyByte(flags);
733
734                            if (lossyByte) {
735                                while (isValidCombiningChar(characters[++usedLen]));
736                                localUsedByteLen = 1;
737                                if (maxByteLen) *(bytes + theUsedByteLen) = lossyByte;
738                            } else {
739                                ++usedLen;
740                                usedLen += TO_BYTE_FALLBACK(converter, characters + usedLen, numChars - usedLen, bytes + theUsedByteLen, (maxByteLen ? maxByteLen - theUsedByteLen : 0), &localUsedByteLen);
741                            }
742                        } else {
743                            theResult = kCFStringEncodingInvalidInputStream;
744                            break;
745                        }
746                    } else if (maxByteLen && ((maxByteLen == theUsedByteLen + localUsedByteLen) || TO_BYTE(converter, flags, characters + usedLen, numChars - usedLen, NULL, 0, &dummy))) { // buffer was filled up
747                                    theUsedByteLen += localUsedByteLen;
748                                    theResult = kCFStringEncodingInsufficientOutputBufferLength;
749                                    break;
750                    } else if (flags & kCFStringEncodingIgnoreCombinings) {
751                        while ((++usedLen < numChars) && isValidCombiningChar(characters[usedLen]));
752                    } else {
753                        uint8_t lossyByte = CFStringEncodingMaskToLossyByte(flags);
754
755                        theUsedByteLen += localUsedByteLen;
756                        if (lossyByte) {
757                            ++usedLen;
758                            localUsedByteLen = 1;
759                            if (maxByteLen) *(bytes + theUsedByteLen) = lossyByte;
760                        } else {
761                            usedLen += TO_BYTE_FALLBACK(converter, characters + usedLen, numChars - usedLen, bytes + theUsedByteLen, (maxByteLen ? maxByteLen - theUsedByteLen : 0), &localUsedByteLen);
762                        }
763                    }
764                } else if (maxByteLen && ((maxByteLen == theUsedByteLen + localUsedByteLen) || TO_BYTE(converter, flags, characters + usedLen, numChars - usedLen, NULL, 0, &dummy))) { // buffer was filled up
765                    theUsedByteLen += localUsedByteLen;
766
767                    if (flags & kCFStringEncodingAllowLossyConversion && !CFStringEncodingMaskToLossyByte(flags)) {
768                        CFIndex localUsedLen;
769
770                        localUsedByteLen = 0;
771                        while ((usedLen < numChars) && !localUsedByteLen && (localUsedLen = TO_BYTE_FALLBACK(converter, characters + usedLen, numChars - usedLen, NULL, 0, &localUsedByteLen))) usedLen += localUsedLen;
772                    }
773                    if (usedLen < numChars) theResult = kCFStringEncodingInsufficientOutputBufferLength;
774                    break;
775                } else if (flags & kCFStringEncodingAllowLossyConversion) {
776                    uint8_t lossyByte = CFStringEncodingMaskToLossyByte(flags);
777
778                    theUsedByteLen += localUsedByteLen;
779                    if (lossyByte) {
780                        ++usedLen;
781                        localUsedByteLen = 1;
782                        if (maxByteLen) *(bytes + theUsedByteLen) = lossyByte;
783                    } else {
784                        usedLen += TO_BYTE_FALLBACK(converter, characters + usedLen, numChars - usedLen, bytes + theUsedByteLen, (maxByteLen ? maxByteLen - theUsedByteLen : 0), &localUsedByteLen);
785                    }
786                } else {
787                    theUsedByteLen += localUsedByteLen;
788                    theResult = kCFStringEncodingInvalidInputStream;
789                    break;
790                }
791            }
792            theUsedByteLen += localUsedByteLen;
793        }
794
795        if (usedLen < numChars && maxByteLen && theResult == kCFStringEncodingConversionSuccess) {
796            if (flags & kCFStringEncodingAllowLossyConversion && !CFStringEncodingMaskToLossyByte(flags)) {
797                CFIndex localUsedLen;
798
799                localUsedByteLen = 0;
800                while ((usedLen < numChars) && !localUsedByteLen && (localUsedLen = TO_BYTE_FALLBACK(converter, characters + usedLen, numChars - usedLen, NULL, 0, &localUsedByteLen))) usedLen += localUsedLen;
801            }
802            if (usedLen < numChars) theResult = kCFStringEncodingInsufficientOutputBufferLength;
803        }
804        if (usedByteLen) *usedByteLen = theUsedByteLen;
805        if (usedCharLen) *usedCharLen = usedLen;
806
807        return theResult;
808    }
809}
810
811uint32_t CFStringEncodingBytesToUnicode(uint32_t encoding, uint32_t flags, const uint8_t *bytes, CFIndex numBytes, CFIndex *usedByteLen, UniChar *characters, CFIndex maxCharLen, CFIndex *usedCharLen) {
812    const _CFEncodingConverter *converter = __CFGetConverter(encoding);
813    CFIndex usedLen = 0;
814    CFIndex theUsedCharLen = 0;
815    CFIndex localUsedCharLen;
816    uint32_t theResult = kCFStringEncodingConversionSuccess;
817
818    if (!converter) return kCFStringEncodingConverterUnavailable;
819
820#if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_WINDOWS || DEPLOYMENT_TARGET_LINUX
821    if (kCFStringEncodingConverterICU == converter->definition->encodingClass) return __CFStringEncodingICUToUnicode((const char *)converter->toBytes, flags, bytes, numBytes, usedByteLen, characters, maxCharLen, usedCharLen);
822#endif
823
824    /* Platform converter */
825    if (kCFStringEncodingConverterPlatformSpecific == converter->definition->encodingClass) return __CFStringEncodingPlatformBytesToUnicode(encoding, flags, bytes, numBytes, usedByteLen, characters, maxCharLen, usedCharLen);
826
827    while ((usedLen < numBytes) && (!maxCharLen || (theUsedCharLen < maxCharLen))) {
828        if ((usedLen += TO_UNICODE(converter, flags, bytes + usedLen, numBytes - usedLen, characters + theUsedCharLen, (maxCharLen ? maxCharLen - theUsedCharLen : 0), &localUsedCharLen)) < numBytes) {
829            CFIndex tempUsedCharLen;
830
831            if (maxCharLen && ((maxCharLen == theUsedCharLen + localUsedCharLen) || (((flags & (kCFStringEncodingUseCanonical|kCFStringEncodingUseHFSPlusCanonical)) || (maxCharLen == theUsedCharLen + localUsedCharLen + 1)) && TO_UNICODE(converter, flags, bytes + usedLen, numBytes - usedLen, NULL, 0, &tempUsedCharLen)))) { // buffer was filled up
832                theUsedCharLen += localUsedCharLen;
833                theResult = kCFStringEncodingInsufficientOutputBufferLength;
834                break;
835            } else if (flags & kCFStringEncodingAllowLossyConversion) {
836                theUsedCharLen += localUsedCharLen;
837                usedLen += TO_UNICODE_FALLBACK(converter, bytes + usedLen, numBytes - usedLen, characters + theUsedCharLen, (maxCharLen ? maxCharLen - theUsedCharLen : 0), &localUsedCharLen);
838            } else {
839                theUsedCharLen += localUsedCharLen;
840                theResult = kCFStringEncodingInvalidInputStream;
841                break;
842            }
843        }
844        theUsedCharLen += localUsedCharLen;
845    }
846
847    if (usedLen < numBytes && maxCharLen && theResult == kCFStringEncodingConversionSuccess) {
848        theResult = kCFStringEncodingInsufficientOutputBufferLength;
849    }
850    if (usedCharLen) *usedCharLen = theUsedCharLen;
851    if (usedByteLen) *usedByteLen = usedLen;
852
853    return theResult;
854}
855
856CF_PRIVATE bool CFStringEncodingIsValidEncoding(uint32_t encoding) {
857    return (CFStringEncodingGetConverter(encoding) ? true : false);
858}
859
860CF_PRIVATE CFIndex CFStringEncodingCharLengthForBytes(uint32_t encoding, uint32_t flags, const uint8_t *bytes, CFIndex numBytes) {
861    const _CFEncodingConverter *converter = __CFGetConverter(encoding);
862
863    if (converter) {
864#if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_WINDOWS || DEPLOYMENT_TARGET_LINUX
865        if (kCFStringEncodingConverterICU == converter->definition->encodingClass) return __CFStringEncodingICUCharLength((const char *)converter->toBytes, flags, bytes, numBytes);
866#endif
867
868        if (kCFStringEncodingConverterPlatformSpecific == converter->definition->encodingClass) return __CFStringEncodingPlatformCharLengthForBytes(encoding, flags, bytes, numBytes);
869
870        if (1 == converter->definition->maxBytesPerChar) return numBytes;
871
872        if (NULL == converter->definition->toUnicodeLen) {
873            CFIndex usedByteLen = 0;
874            CFIndex totalLength = 0;
875            CFIndex usedCharLen;
876
877            while (numBytes > 0) {
878                usedByteLen = TO_UNICODE(converter, flags, bytes, numBytes, NULL, 0, &usedCharLen);
879
880                bytes += usedByteLen;
881                numBytes -= usedByteLen;
882                totalLength += usedCharLen;
883
884                if (numBytes > 0) {
885                    if (0 == (flags & kCFStringEncodingAllowLossyConversion)) return 0;
886
887                    usedByteLen = TO_UNICODE_FALLBACK(converter, bytes, numBytes, NULL, 0, &usedCharLen);
888
889                    bytes += usedByteLen;
890                    numBytes -= usedByteLen;
891                    totalLength += usedCharLen;
892                }
893            }
894
895            return totalLength;
896        } else {
897            return converter->definition->toUnicodeLen(flags, bytes, numBytes);
898        }
899    }
900
901    return 0;
902}
903
904CF_PRIVATE CFIndex CFStringEncodingByteLengthForCharacters(uint32_t encoding, uint32_t flags, const UniChar *characters, CFIndex numChars) {
905    const _CFEncodingConverter *converter = __CFGetConverter(encoding);
906
907    if (converter) {
908#if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_WINDOWS || DEPLOYMENT_TARGET_LINUX
909        if (kCFStringEncodingConverterICU == converter->definition->encodingClass) return __CFStringEncodingICUByteLength((const char *)converter->toBytes, flags, characters, numChars);
910#endif
911
912        if (kCFStringEncodingConverterPlatformSpecific == converter->definition->encodingClass) return __CFStringEncodingPlatformByteLengthForCharacters(encoding, flags, characters, numChars);
913
914        if (1 == converter->definition->maxBytesPerChar) return numChars;
915
916        if (NULL == converter->definition->toBytesLen) {
917            CFIndex usedByteLen;
918
919            return ((kCFStringEncodingConversionSuccess == CFStringEncodingUnicodeToBytes(encoding, flags, characters, numChars, NULL, NULL, 0, &usedByteLen)) ? usedByteLen : 0);
920        } else {
921            return converter->definition->toBytesLen(flags, characters, numChars);
922        }
923    }
924
925    return 0;
926}
927
928void CFStringEncodingRegisterFallbackProcedures(uint32_t encoding, CFStringEncodingToBytesFallbackProc toBytes, CFStringEncodingToUnicodeFallbackProc toUnicode) {
929    _CFEncodingConverter *converter = (_CFEncodingConverter *)__CFGetConverter(encoding);
930
931    if (NULL != converter) {
932       const CFStringEncodingConverter *body = CFStringEncodingGetConverter(encoding);
933
934        converter->toBytesFallback = ((NULL == toBytes) ? ((NULL == body) ? __CFDefaultToBytesFallbackProc : body->toBytesFallback) : toBytes);
935        converter->toUnicodeFallback = ((NULL == toUnicode) ? ((NULL == body) ? __CFDefaultToUnicodeFallbackProc : body->toUnicodeFallback) : toUnicode);
936    }
937}
938
939CF_PRIVATE const CFStringEncodingConverter *CFStringEncodingGetConverter(uint32_t encoding) {
940    const _CFEncodingConverter *converter = __CFGetConverter(encoding);
941
942    return ((NULL == converter) ? NULL : converter->definition);
943}
944
945static const CFStringEncoding __CFBuiltinEncodings[] = {
946    kCFStringEncodingMacRoman,
947    kCFStringEncodingWindowsLatin1,
948    kCFStringEncodingISOLatin1,
949    kCFStringEncodingNextStepLatin,
950    kCFStringEncodingASCII,
951    kCFStringEncodingUTF8,
952    /* These seven are available only in CFString-level */
953    kCFStringEncodingNonLossyASCII,
954
955    kCFStringEncodingUTF16,
956    kCFStringEncodingUTF16BE,
957    kCFStringEncodingUTF16LE,
958
959    kCFStringEncodingUTF32,
960    kCFStringEncodingUTF32BE,
961    kCFStringEncodingUTF32LE,
962
963    kCFStringEncodingInvalidId,
964};
965
966static CFComparisonResult __CFStringEncodingComparator(const void *v1, const void *v2, void *context) {
967    CFComparisonResult val1 = (*(const CFStringEncoding *)v1) & 0xFFFF;
968    CFComparisonResult val2 = (*(const CFStringEncoding *)v2) & 0xFFFF;
969
970    return ((val1 == val2) ? ((CFComparisonResult)(*(const CFStringEncoding *)v1) - (CFComparisonResult)(*(const CFStringEncoding *)v2)) : val1 - val2);
971}
972
973static void __CFStringEncodingFliterDupes(CFStringEncoding *encodings, CFIndex numSlots) {
974    CFStringEncoding last = kCFStringEncodingInvalidId;
975    const CFStringEncoding *limitEncodings = encodings + numSlots;
976
977    while (encodings < limitEncodings) {
978        if (last == *encodings) {
979            if ((encodings + 1) < limitEncodings) memmove(encodings, encodings + 1, sizeof(CFStringEncoding) * (limitEncodings - encodings - 1));
980            --limitEncodings;
981        } else {
982            last = *(encodings++);
983        }
984    }
985}
986
987CF_PRIVATE const CFStringEncoding *CFStringEncodingListOfAvailableEncodings(void) {
988    static const CFStringEncoding *encodings = NULL;
989
990    if (NULL == encodings) {
991        CFStringEncoding *list = (CFStringEncoding *)__CFBuiltinEncodings;
992        CFIndex numICUConverters = 0, numPlatformConverters = 0;
993#if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_WINDOWS || DEPLOYMENT_TARGET_LINUX
994        CFStringEncoding *icuConverters = __CFStringEncodingCreateICUEncodings(NULL, &numICUConverters);
995#else
996        CFStringEncoding *icuConverters = NULL;
997#endif
998        CFStringEncoding *platformConverters = __CFStringEncodingCreateListOfAvailablePlatformConverters(NULL, &numPlatformConverters);
999
1000        if ((NULL != icuConverters) || (NULL != platformConverters)) {
1001            CFIndex numSlots = (sizeof(__CFBuiltinEncodings) / sizeof(*__CFBuiltinEncodings)) + numICUConverters + numPlatformConverters;
1002
1003            list = (CFStringEncoding *)CFAllocatorAllocate(NULL, sizeof(CFStringEncoding) * numSlots, 0);
1004
1005            memcpy(list, __CFBuiltinEncodings, sizeof(__CFBuiltinEncodings));
1006
1007            if (NULL != icuConverters) {
1008                memcpy(list + (sizeof(__CFBuiltinEncodings) / sizeof(*__CFBuiltinEncodings)), icuConverters, sizeof(CFStringEncoding) * numICUConverters);
1009                CFAllocatorDeallocate(NULL, icuConverters);
1010            }
1011
1012            if (NULL != platformConverters) {
1013                memcpy(list + (sizeof(__CFBuiltinEncodings) / sizeof(*__CFBuiltinEncodings)) + numICUConverters, platformConverters, sizeof(CFStringEncoding) * numPlatformConverters);
1014                CFAllocatorDeallocate(NULL, platformConverters);
1015            }
1016
1017            CFQSortArray(list, numSlots, sizeof(CFStringEncoding), (CFComparatorFunction)__CFStringEncodingComparator, NULL);
1018            __CFStringEncodingFliterDupes(list, numSlots);
1019        }
1020        if (!OSAtomicCompareAndSwapPtrBarrier(NULL, list, (void * volatile *)&encodings) && (list != __CFBuiltinEncodings)) CFAllocatorDeallocate(NULL, list);
1021    }
1022
1023    return encodings;
1024}
1025
1026#undef TO_BYTE
1027#undef TO_UNICODE
1028#undef ASCIINewLine
1029#undef kSurrogateHighStart
1030#undef kSurrogateHighEnd
1031#undef kSurrogateLowStart
1032#undef kSurrogateLowEnd
1033#undef TO_BYTE_FALLBACK
1034#undef TO_UNICODE_FALLBACK
1035#undef EXTRA_BASE
1036#undef NUM_OF_ENTRIES_CYCLE
1037
1038