1/* 2 * Copyright (c) 2014 Apple Inc. All rights reserved. 3 * 4 * @APPLE_LICENSE_HEADER_START@ 5 * 6 * This file contains Original Code and/or Modifications of Original Code 7 * as defined in and that are subject to the Apple Public Source License 8 * Version 2.0 (the 'License'). You may not use this file except in 9 * compliance with the License. Please obtain a copy of the License at 10 * http://www.opensource.apple.com/apsl/ and read it before using this 11 * file. 12 * 13 * The Original Code and all software distributed under the License are 14 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER 15 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, 16 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. 18 * Please see the License for the specific language governing rights and 19 * limitations under the License. 20 * 21 * @APPLE_LICENSE_HEADER_END@ 22 */ 23 24/* CFPlatformConverters.c 25 Copyright (c) 1998-2013, Apple Inc. All rights reserved. 26 Responsibility: Aki Inoue 27*/ 28 29#include "CFInternal.h" 30#include <CoreFoundation/CFString.h> 31#include "CFStringEncodingConverterExt.h" 32#include <CoreFoundation/CFStringEncodingExt.h> 33#include "CFUniChar.h" 34#include "CFUnicodeDecomposition.h" 35#include "CFStringEncodingConverterPriv.h" 36#include "CFICUConverters.h" 37 38 39CF_INLINE bool __CFIsPlatformConverterAvailable(int encoding) { 40 41#if DEPLOYMENT_TARGET_WINDOWS 42 return (IsValidCodePage(CFStringConvertEncodingToWindowsCodepage(encoding)) ? true : false); 43#else 44 return false; 45#endif 46} 47 48static const CFStringEncodingConverter __CFICUBootstrap = { 49 NULL /* toBytes */, NULL /* toUnicode */, 6 /* maxBytesPerChar */, 4 /* maxDecomposedCharLen */, 50 kCFStringEncodingConverterICU /* encodingClass */, 51 NULL /* toBytesLen */, NULL /* toUnicodeLen */, NULL /* toBytesFallback */, 52 NULL /* toUnicodeFallback */, NULL /* toBytesPrecompose */, NULL, /* isValidCombiningChar */ 53}; 54 55static const CFStringEncodingConverter __CFPlatformBootstrap = { 56 NULL /* toBytes */, NULL /* toUnicode */, 6 /* maxBytesPerChar */, 4 /* maxDecomposedCharLen */, 57 kCFStringEncodingConverterPlatformSpecific /* encodingClass */, 58 NULL /* toBytesLen */, NULL /* toUnicodeLen */, NULL /* toBytesFallback */, 59 NULL /* toUnicodeFallback */, NULL /* toBytesPrecompose */, NULL, /* isValidCombiningChar */ 60}; 61 62CF_PRIVATE const CFStringEncodingConverter *__CFStringEncodingGetExternalConverter(uint32_t encoding) { 63 64 // we prefer Text Encoding Converter ICU since it's more reliable 65 if (__CFIsPlatformConverterAvailable(encoding)) { 66 return &__CFPlatformBootstrap; 67 } else { 68#if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_WINDOWS || DEPLOYMENT_TARGET_LINUX 69 if (__CFStringEncodingGetICUName(encoding)) { 70 return &__CFICUBootstrap; 71 } 72#endif 73 return NULL; 74 } 75} 76 77#if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED 78CF_PRIVATE CFStringEncoding *__CFStringEncodingCreateListOfAvailablePlatformConverters(CFAllocatorRef allocator, CFIndex *numberOfConverters) { 79 80 return NULL; 81} 82#elif DEPLOYMENT_TARGET_WINDOWS 83 84#include <tchar.h> 85 86static uint32_t __CFWin32EncodingIndex = 0; 87static CFStringEncoding *__CFWin32EncodingList = NULL; 88 89static char CALLBACK __CFWin32EnumCodePageProc(LPTSTR string) { 90 uint32_t encoding = CFStringConvertWindowsCodepageToEncoding(_tcstoul(string, NULL, 10)); 91 CFIndex idx; 92 93 if (encoding != kCFStringEncodingInvalidId) { // We list only encodings we know 94 if (__CFWin32EncodingList) { 95 for (idx = 0;idx < (CFIndex)__CFWin32EncodingIndex;idx++) if (__CFWin32EncodingList[idx] == encoding) break; 96 if (idx != __CFWin32EncodingIndex) return true; 97 __CFWin32EncodingList[__CFWin32EncodingIndex] = encoding; 98 } 99 ++__CFWin32EncodingIndex; 100 } 101 return true; 102} 103 104CF_PRIVATE CFStringEncoding *__CFStringEncodingCreateListOfAvailablePlatformConverters(CFAllocatorRef allocator, CFIndex *numberOfConverters) { 105 CFStringEncoding *encodings; 106 107 EnumSystemCodePages((CODEPAGE_ENUMPROC)&__CFWin32EnumCodePageProc, CP_INSTALLED); 108 __CFWin32EncodingList = (uint32_t *)CFAllocatorAllocate(allocator, sizeof(uint32_t) * __CFWin32EncodingIndex, 0); 109 EnumSystemCodePages((CODEPAGE_ENUMPROC)&__CFWin32EnumCodePageProc, CP_INSTALLED); 110 111 *numberOfConverters = __CFWin32EncodingIndex; 112 encodings = __CFWin32EncodingList; 113 114 __CFWin32EncodingIndex = 0; 115 __CFWin32EncodingList = NULL; 116 117 return encodings; 118} 119#else 120CF_PRIVATE CFStringEncoding *__CFStringEncodingCreateListOfAvailablePlatformConverters(CFAllocatorRef allocator, CFIndex *numberOfConverters) { return NULL; } 121#endif 122 123CF_PRIVATE CFIndex __CFStringEncodingPlatformUnicodeToBytes(uint32_t encoding, uint32_t flags, const UniChar *characters, CFIndex numChars, CFIndex *usedCharLen, uint8_t *bytes, CFIndex maxByteLen, CFIndex *usedByteLen) { 124 125#if DEPLOYMENT_TARGET_WINDOWS 126 WORD dwFlags = 0; 127 CFIndex usedLen; 128 129 if ((kCFStringEncodingUTF7 != encoding) && (kCFStringEncodingGB_18030_2000 != encoding) && (0x0800 != (encoding & 0x0F00))) { // not UTF-7/GB18030/ISO-2022-* 130 dwFlags |= (flags & (kCFStringEncodingAllowLossyConversion|kCFStringEncodingSubstituteCombinings) ? WC_DEFAULTCHAR : 0); 131 dwFlags |= (flags & kCFStringEncodingComposeCombinings ? WC_COMPOSITECHECK : 0); 132 dwFlags |= (flags & kCFStringEncodingIgnoreCombinings ? WC_DISCARDNS : 0); 133 } 134 135 if ((usedLen = WideCharToMultiByte(CFStringConvertEncodingToWindowsCodepage(encoding), dwFlags, (LPCWSTR)characters, numChars, (LPSTR)bytes, maxByteLen, NULL, NULL)) == 0) { 136 if (GetLastError() == ERROR_INSUFFICIENT_BUFFER) { 137 CPINFO cpInfo; 138 139 if (!GetCPInfo(CFStringConvertEncodingToWindowsCodepage(encoding), &cpInfo)) { 140 cpInfo.MaxCharSize = 1; // Is this right ??? 141 } 142 if (cpInfo.MaxCharSize == 1) { 143 numChars = maxByteLen; 144 } else { 145 usedLen = WideCharToMultiByte(CFStringConvertEncodingToWindowsCodepage(encoding), dwFlags, (LPCWSTR)characters, numChars, NULL, 0, NULL, NULL); 146 usedLen -= maxByteLen; 147 numChars = (numChars > usedLen ? numChars - usedLen : 1); 148 } 149 if (WideCharToMultiByte(CFStringConvertEncodingToWindowsCodepage(encoding), dwFlags, (LPCWSTR)characters, numChars, (LPSTR)bytes, maxByteLen, NULL, NULL) == 0) { 150 if (usedCharLen) *usedCharLen = 0; 151 if (usedByteLen) *usedByteLen = 0; 152 } else { 153 CFIndex lastUsedLen = 0; 154 155 while ((usedLen = WideCharToMultiByte(CFStringConvertEncodingToWindowsCodepage(encoding), dwFlags, (LPCWSTR)characters, ++numChars, (LPSTR)bytes, maxByteLen, NULL, NULL))) lastUsedLen = usedLen; 156 if (usedCharLen) *usedCharLen = (numChars - 1); 157 if (usedByteLen) *usedByteLen = lastUsedLen; 158 } 159 160 return kCFStringEncodingInsufficientOutputBufferLength; 161 } else { 162 return kCFStringEncodingInvalidInputStream; 163 } 164 } else { 165 if (usedCharLen) *usedCharLen = numChars; 166 if (usedByteLen) *usedByteLen = usedLen; 167 return kCFStringEncodingConversionSuccess; 168 } 169#endif /* DEPLOYMENT_TARGET_WINDOWS */ 170 171 return kCFStringEncodingConverterUnavailable; 172} 173 174CF_PRIVATE CFIndex __CFStringEncodingPlatformBytesToUnicode(uint32_t encoding, uint32_t flags, const uint8_t *bytes, CFIndex numBytes, CFIndex *usedByteLen, UniChar *characters, CFIndex maxCharLen, CFIndex *usedCharLen) { 175 176#if DEPLOYMENT_TARGET_WINDOWS 177 WORD dwFlags = 0; 178 CFIndex usedLen; 179 180 if ((kCFStringEncodingUTF7 != encoding) && (kCFStringEncodingGB_18030_2000 != encoding) && (0x0800 != (encoding & 0x0F00))) { // not UTF-7/GB18030/ISO-2022-* 181 dwFlags |= (flags & (kCFStringEncodingAllowLossyConversion|kCFStringEncodingSubstituteCombinings) ? 0 : MB_ERR_INVALID_CHARS); 182 dwFlags |= (flags & (kCFStringEncodingUseCanonical|kCFStringEncodingUseHFSPlusCanonical) ? MB_COMPOSITE : MB_PRECOMPOSED); 183 } 184 185 if ((usedLen = MultiByteToWideChar(CFStringConvertEncodingToWindowsCodepage(encoding), dwFlags, (LPCSTR)bytes, numBytes, (LPWSTR)characters, maxCharLen)) == 0) { 186 if (GetLastError() == ERROR_INSUFFICIENT_BUFFER) { 187 CPINFO cpInfo; 188 189 if (!GetCPInfo(CFStringConvertEncodingToWindowsCodepage(encoding), &cpInfo)) { 190 cpInfo.MaxCharSize = 1; // Is this right ??? 191 } 192 if (cpInfo.MaxCharSize == 1) { 193 numBytes = maxCharLen; 194 } else { 195 usedLen = MultiByteToWideChar(CFStringConvertEncodingToWindowsCodepage(encoding), dwFlags, (LPCSTR)bytes, numBytes, (LPWSTR)characters, maxCharLen); 196 usedLen -= maxCharLen; 197 numBytes = (numBytes > usedLen ? numBytes - usedLen : 1); 198 } 199 while ((usedLen = MultiByteToWideChar(CFStringConvertEncodingToWindowsCodepage(encoding), dwFlags, (LPCSTR)bytes, numBytes, (LPWSTR)characters, maxCharLen)) == 0) { 200 if ((--numBytes) == 0) break; 201 } 202 if (usedCharLen) *usedCharLen = usedLen; 203 if (usedByteLen) *usedByteLen = numBytes; 204 205 return kCFStringEncodingInsufficientOutputBufferLength; 206 } else { 207 return kCFStringEncodingInvalidInputStream; 208 } 209 } else { 210 if (usedCharLen) *usedCharLen = usedLen; 211 if (usedByteLen) *usedByteLen = numBytes; 212 return kCFStringEncodingConversionSuccess; 213 } 214#endif /* DEPLOYMENT_TARGET_WINDOWS */ 215 216 return kCFStringEncodingConverterUnavailable; 217} 218 219CF_PRIVATE CFIndex __CFStringEncodingPlatformCharLengthForBytes(uint32_t encoding, uint32_t flags, const uint8_t *bytes, CFIndex numBytes) { 220 CFIndex usedCharLen; 221 return (__CFStringEncodingPlatformBytesToUnicode(encoding, flags, bytes, numBytes, NULL, NULL, 0, &usedCharLen) == kCFStringEncodingConversionSuccess ? usedCharLen : 0); 222} 223 224CF_PRIVATE CFIndex __CFStringEncodingPlatformByteLengthForCharacters(uint32_t encoding, uint32_t flags, const UniChar *characters, CFIndex numChars) { 225 CFIndex usedByteLen; 226 return (__CFStringEncodingPlatformUnicodeToBytes(encoding, flags, characters, numChars, NULL, NULL, 0, &usedByteLen) == kCFStringEncodingConversionSuccess ? usedByteLen : 0); 227} 228 229#undef __CFCarbonCore_GetTextEncodingBase0 230 231