1/*
2 * Copyright (c) 2014 Apple Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. Please obtain a copy of the License at
10 * http://www.opensource.apple.com/apsl/ and read it before using this
11 * file.
12 *
13 * The Original Code and all software distributed under the License are
14 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
15 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
16 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
18 * Please see the License for the specific language governing rights and
19 * limitations under the License.
20 *
21 * @APPLE_LICENSE_HEADER_END@
22 */
23
24/*	CFPlatformConverters.c
25	Copyright (c) 1998-2013, Apple Inc. All rights reserved.
26	Responsibility: Aki Inoue
27*/
28
29#include "CFInternal.h"
30#include <CoreFoundation/CFString.h>
31#include "CFStringEncodingConverterExt.h"
32#include <CoreFoundation/CFStringEncodingExt.h>
33#include "CFUniChar.h"
34#include "CFUnicodeDecomposition.h"
35#include "CFStringEncodingConverterPriv.h"
36#include "CFICUConverters.h"
37
38
39CF_INLINE bool __CFIsPlatformConverterAvailable(int encoding) {
40
41#if DEPLOYMENT_TARGET_WINDOWS
42    return (IsValidCodePage(CFStringConvertEncodingToWindowsCodepage(encoding)) ? true : false);
43#else
44    return false;
45#endif
46}
47
48static const CFStringEncodingConverter __CFICUBootstrap = {
49    NULL /* toBytes */, NULL /* toUnicode */, 6 /* maxBytesPerChar */, 4 /* maxDecomposedCharLen */,
50    kCFStringEncodingConverterICU /* encodingClass */,
51    NULL /* toBytesLen */, NULL /* toUnicodeLen */, NULL /* toBytesFallback */,
52    NULL /* toUnicodeFallback */, NULL /* toBytesPrecompose */, NULL, /* isValidCombiningChar */
53};
54
55static const CFStringEncodingConverter __CFPlatformBootstrap = {
56    NULL /* toBytes */, NULL /* toUnicode */, 6 /* maxBytesPerChar */, 4 /* maxDecomposedCharLen */,
57    kCFStringEncodingConverterPlatformSpecific /* encodingClass */,
58    NULL /* toBytesLen */, NULL /* toUnicodeLen */, NULL /* toBytesFallback */,
59    NULL /* toUnicodeFallback */, NULL /* toBytesPrecompose */, NULL, /* isValidCombiningChar */
60};
61
62CF_PRIVATE const CFStringEncodingConverter *__CFStringEncodingGetExternalConverter(uint32_t encoding) {
63
64    // we prefer Text Encoding Converter ICU since it's more reliable
65    if (__CFIsPlatformConverterAvailable(encoding)) {
66        return &__CFPlatformBootstrap;
67    } else {
68#if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_WINDOWS || DEPLOYMENT_TARGET_LINUX
69        if (__CFStringEncodingGetICUName(encoding)) {
70            return &__CFICUBootstrap;
71        }
72#endif
73        return NULL;
74    }
75}
76
77#if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED
78CF_PRIVATE CFStringEncoding *__CFStringEncodingCreateListOfAvailablePlatformConverters(CFAllocatorRef allocator, CFIndex *numberOfConverters) {
79
80    return NULL;
81}
82#elif DEPLOYMENT_TARGET_WINDOWS
83
84#include <tchar.h>
85
86static uint32_t __CFWin32EncodingIndex = 0;
87static CFStringEncoding *__CFWin32EncodingList = NULL;
88
89static char CALLBACK __CFWin32EnumCodePageProc(LPTSTR string) {
90    uint32_t encoding = CFStringConvertWindowsCodepageToEncoding(_tcstoul(string, NULL, 10));
91    CFIndex idx;
92
93    if (encoding != kCFStringEncodingInvalidId) { // We list only encodings we know
94        if (__CFWin32EncodingList) {
95            for (idx = 0;idx < (CFIndex)__CFWin32EncodingIndex;idx++) if (__CFWin32EncodingList[idx] == encoding) break;
96            if (idx != __CFWin32EncodingIndex) return true;
97            __CFWin32EncodingList[__CFWin32EncodingIndex] = encoding;
98        }
99        ++__CFWin32EncodingIndex;
100    }
101    return true;
102}
103
104CF_PRIVATE CFStringEncoding *__CFStringEncodingCreateListOfAvailablePlatformConverters(CFAllocatorRef allocator, CFIndex *numberOfConverters) {
105    CFStringEncoding *encodings;
106
107    EnumSystemCodePages((CODEPAGE_ENUMPROC)&__CFWin32EnumCodePageProc, CP_INSTALLED);
108    __CFWin32EncodingList = (uint32_t *)CFAllocatorAllocate(allocator, sizeof(uint32_t) * __CFWin32EncodingIndex, 0);
109    EnumSystemCodePages((CODEPAGE_ENUMPROC)&__CFWin32EnumCodePageProc, CP_INSTALLED);
110
111    *numberOfConverters = __CFWin32EncodingIndex;
112    encodings = __CFWin32EncodingList;
113
114    __CFWin32EncodingIndex = 0;
115    __CFWin32EncodingList = NULL;
116
117    return encodings;
118}
119#else
120CF_PRIVATE CFStringEncoding *__CFStringEncodingCreateListOfAvailablePlatformConverters(CFAllocatorRef allocator, CFIndex *numberOfConverters) { return NULL; }
121#endif
122
123CF_PRIVATE CFIndex __CFStringEncodingPlatformUnicodeToBytes(uint32_t encoding, uint32_t flags, const UniChar *characters, CFIndex numChars, CFIndex *usedCharLen, uint8_t *bytes, CFIndex maxByteLen, CFIndex *usedByteLen) {
124
125#if DEPLOYMENT_TARGET_WINDOWS
126    WORD dwFlags = 0;
127    CFIndex usedLen;
128
129    if ((kCFStringEncodingUTF7 != encoding) && (kCFStringEncodingGB_18030_2000 != encoding) && (0x0800 != (encoding & 0x0F00))) { // not UTF-7/GB18030/ISO-2022-*
130        dwFlags |= (flags & (kCFStringEncodingAllowLossyConversion|kCFStringEncodingSubstituteCombinings) ? WC_DEFAULTCHAR : 0);
131        dwFlags |= (flags & kCFStringEncodingComposeCombinings ? WC_COMPOSITECHECK : 0);
132        dwFlags |= (flags & kCFStringEncodingIgnoreCombinings ? WC_DISCARDNS : 0);
133    }
134
135    if ((usedLen = WideCharToMultiByte(CFStringConvertEncodingToWindowsCodepage(encoding), dwFlags, (LPCWSTR)characters, numChars, (LPSTR)bytes, maxByteLen, NULL, NULL)) == 0) {
136        if (GetLastError() == ERROR_INSUFFICIENT_BUFFER) {
137            CPINFO cpInfo;
138
139            if (!GetCPInfo(CFStringConvertEncodingToWindowsCodepage(encoding), &cpInfo)) {
140                cpInfo.MaxCharSize = 1; // Is this right ???
141            }
142            if (cpInfo.MaxCharSize == 1) {
143                numChars = maxByteLen;
144            } else {
145                usedLen = WideCharToMultiByte(CFStringConvertEncodingToWindowsCodepage(encoding), dwFlags, (LPCWSTR)characters, numChars, NULL, 0, NULL, NULL);
146                usedLen -= maxByteLen;
147                numChars = (numChars > usedLen ? numChars - usedLen : 1);
148            }
149            if (WideCharToMultiByte(CFStringConvertEncodingToWindowsCodepage(encoding), dwFlags, (LPCWSTR)characters, numChars, (LPSTR)bytes, maxByteLen, NULL, NULL) == 0) {
150                if (usedCharLen) *usedCharLen = 0;
151                if (usedByteLen) *usedByteLen = 0;
152            } else {
153                CFIndex lastUsedLen = 0;
154
155                while ((usedLen = WideCharToMultiByte(CFStringConvertEncodingToWindowsCodepage(encoding), dwFlags, (LPCWSTR)characters, ++numChars, (LPSTR)bytes, maxByteLen, NULL, NULL))) lastUsedLen = usedLen;
156                if (usedCharLen) *usedCharLen = (numChars - 1);
157                if (usedByteLen) *usedByteLen = lastUsedLen;
158            }
159
160            return kCFStringEncodingInsufficientOutputBufferLength;
161        } else {
162            return kCFStringEncodingInvalidInputStream;
163        }
164    } else {
165        if (usedCharLen) *usedCharLen = numChars;
166        if (usedByteLen) *usedByteLen = usedLen;
167        return kCFStringEncodingConversionSuccess;
168    }
169#endif /* DEPLOYMENT_TARGET_WINDOWS */
170
171    return kCFStringEncodingConverterUnavailable;
172}
173
174CF_PRIVATE CFIndex __CFStringEncodingPlatformBytesToUnicode(uint32_t encoding, uint32_t flags, const uint8_t *bytes, CFIndex numBytes, CFIndex *usedByteLen, UniChar *characters, CFIndex maxCharLen, CFIndex *usedCharLen) {
175
176#if DEPLOYMENT_TARGET_WINDOWS
177    WORD dwFlags = 0;
178    CFIndex usedLen;
179
180    if ((kCFStringEncodingUTF7 != encoding) && (kCFStringEncodingGB_18030_2000 != encoding) && (0x0800 != (encoding & 0x0F00))) { // not UTF-7/GB18030/ISO-2022-*
181        dwFlags |= (flags & (kCFStringEncodingAllowLossyConversion|kCFStringEncodingSubstituteCombinings) ? 0 : MB_ERR_INVALID_CHARS);
182        dwFlags |= (flags & (kCFStringEncodingUseCanonical|kCFStringEncodingUseHFSPlusCanonical) ? MB_COMPOSITE : MB_PRECOMPOSED);
183    }
184
185    if ((usedLen = MultiByteToWideChar(CFStringConvertEncodingToWindowsCodepage(encoding), dwFlags, (LPCSTR)bytes, numBytes, (LPWSTR)characters, maxCharLen)) == 0) {
186        if (GetLastError() == ERROR_INSUFFICIENT_BUFFER) {
187            CPINFO cpInfo;
188
189            if (!GetCPInfo(CFStringConvertEncodingToWindowsCodepage(encoding), &cpInfo)) {
190                cpInfo.MaxCharSize = 1; // Is this right ???
191            }
192            if (cpInfo.MaxCharSize == 1) {
193                numBytes = maxCharLen;
194            } else {
195                usedLen = MultiByteToWideChar(CFStringConvertEncodingToWindowsCodepage(encoding), dwFlags, (LPCSTR)bytes, numBytes, (LPWSTR)characters, maxCharLen);
196                usedLen -= maxCharLen;
197                numBytes = (numBytes > usedLen ? numBytes - usedLen : 1);
198            }
199            while ((usedLen = MultiByteToWideChar(CFStringConvertEncodingToWindowsCodepage(encoding), dwFlags, (LPCSTR)bytes, numBytes, (LPWSTR)characters, maxCharLen)) == 0) {
200                if ((--numBytes) == 0) break;
201            }
202            if (usedCharLen) *usedCharLen = usedLen;
203            if (usedByteLen) *usedByteLen = numBytes;
204
205            return kCFStringEncodingInsufficientOutputBufferLength;
206        } else {
207            return kCFStringEncodingInvalidInputStream;
208        }
209    } else {
210        if (usedCharLen) *usedCharLen = usedLen;
211        if (usedByteLen) *usedByteLen = numBytes;
212        return kCFStringEncodingConversionSuccess;
213    }
214#endif /* DEPLOYMENT_TARGET_WINDOWS */
215
216    return kCFStringEncodingConverterUnavailable;
217}
218
219CF_PRIVATE CFIndex __CFStringEncodingPlatformCharLengthForBytes(uint32_t encoding, uint32_t flags, const uint8_t *bytes, CFIndex numBytes) {
220    CFIndex usedCharLen;
221    return (__CFStringEncodingPlatformBytesToUnicode(encoding, flags, bytes, numBytes, NULL, NULL, 0, &usedCharLen) == kCFStringEncodingConversionSuccess ? usedCharLen : 0);
222}
223
224CF_PRIVATE CFIndex __CFStringEncodingPlatformByteLengthForCharacters(uint32_t encoding, uint32_t flags, const UniChar *characters, CFIndex numChars) {
225    CFIndex usedByteLen;
226    return (__CFStringEncodingPlatformUnicodeToBytes(encoding, flags, characters, numChars, NULL, NULL, 0, &usedByteLen) == kCFStringEncodingConversionSuccess ? usedByteLen : 0);
227}
228
229#undef __CFCarbonCore_GetTextEncodingBase0
230
231