1/*
2 * Copyright 2010-2011, Oliver Tappe, zooey@hirschkaefer.de.
3 * Distributed under the terms of the MIT License.
4 */
5
6
7#include "ICUCollateData.h"
8
9#include <assert.h>
10#include <string.h>
11#include <strings.h>
12#include <wchar.h>
13
14#include <unicode/unistr.h>
15
16#include <AutoDeleter.h>
17
18
19U_NAMESPACE_USE
20
21
22namespace BPrivate {
23namespace Libroot {
24
25
26ICUCollateData::ICUCollateData(pthread_key_t tlsKey)
27	:
28	inherited(tlsKey),
29	fCollator(NULL)
30{
31}
32
33
34ICUCollateData::~ICUCollateData()
35{
36	delete fCollator;
37}
38
39
40status_t
41ICUCollateData::SetTo(const Locale& locale, const char* posixLocaleName)
42{
43	status_t result = inherited::SetTo(locale, posixLocaleName);
44
45	if (result == B_OK) {
46		UErrorCode icuStatus = U_ZERO_ERROR;
47		delete fCollator;
48		fCollator = Collator::createInstance(fLocale, icuStatus);
49		if (!U_SUCCESS(icuStatus))
50			return B_NO_MEMORY;
51	}
52
53	return result;
54}
55
56
57status_t
58ICUCollateData::SetToPosix()
59{
60	status_t result = inherited::SetToPosix();
61
62	if (result == B_OK) {
63		delete fCollator;
64		fCollator = NULL;
65	}
66
67	return result;
68}
69
70
71status_t
72ICUCollateData::Strcoll(const char* a, const char* b, int& result)
73{
74	if (fCollator == NULL || strcmp(fPosixLocaleName, "POSIX") == 0) {
75		// handle POSIX here as the collator ICU uses for that (english) is
76		// incompatible in too many ways
77		result = strcmp(a, b);
78		for (const char* aIter = a; *aIter != 0; ++aIter) {
79			if (*aIter < 0)
80				return B_BAD_VALUE;
81		}
82		for (const char* bIter = b; *bIter != 0; ++bIter) {
83			if (*bIter < 0)
84				return B_BAD_VALUE;
85		}
86		return B_OK;
87	}
88
89	status_t status = B_OK;
90	UErrorCode icuStatus = U_ZERO_ERROR;
91
92	if (strcasecmp(fGivenCharset, "utf-8") == 0) {
93		UCharIterator aIter, bIter;
94		uiter_setUTF8(&aIter, a, -1);
95		uiter_setUTF8(&bIter, b, -1);
96
97		result = fCollator->compare(aIter, bIter, icuStatus);
98	} else {
99		UnicodeString unicodeA;
100		UnicodeString unicodeB;
101
102		if (_ToUnicodeString(a, unicodeA) != B_OK
103			|| _ToUnicodeString(b, unicodeB) != B_OK) {
104			status = B_BAD_VALUE;
105		}
106
107		result = fCollator->compare(unicodeA, unicodeB, icuStatus);
108	}
109
110	if (!U_SUCCESS(icuStatus))
111		status = B_BAD_VALUE;
112
113	return status;
114}
115
116
117status_t
118ICUCollateData::Strxfrm(char* out, const char* in,
119	size_t outSize, size_t& requiredSize)
120{
121	if (in == NULL) {
122		requiredSize = 0;
123		return B_OK;
124	}
125
126	if (fCollator == NULL || strcmp(fPosixLocaleName, "POSIX") == 0) {
127		// handle POSIX here as the collator ICU uses for that (english) is
128		// incompatible in too many ways
129		requiredSize = strlcpy(out, in, outSize);
130		for (const char* inIter = in; *inIter != 0; ++inIter) {
131			if (*inIter < 0)
132				return B_BAD_VALUE;
133		}
134		return B_OK;
135	}
136
137	UnicodeString unicodeIn;
138	if (_ToUnicodeString(in, unicodeIn) != B_OK)
139		return B_BAD_VALUE;
140
141	requiredSize = fCollator->getSortKey(unicodeIn, (uint8_t*)out, outSize);
142
143	// Do not include terminating NULL byte in the required-size.
144	if (requiredSize > 0) {
145		if (outSize >= requiredSize)
146			assert(out[requiredSize - 1] == '\0');
147		requiredSize--;
148	}
149
150	return B_OK;
151}
152
153
154status_t
155ICUCollateData::Wcscoll(const wchar_t* a, const wchar_t* b, int& result)
156{
157	if (fCollator == NULL || strcmp(fPosixLocaleName, "POSIX") == 0) {
158		// handle POSIX here as the collator ICU uses for that (english) is
159		// incompatible in too many ways
160		result = wcscmp(a, b);
161		for (const wchar_t* aIter = a; *aIter != 0; ++aIter) {
162			if (*aIter > 127)
163				return B_BAD_VALUE;
164		}
165		for (const wchar_t* bIter = b; *bIter != 0; ++bIter) {
166			if (*bIter > 127)
167				return B_BAD_VALUE;
168		}
169		return B_OK;
170	}
171
172	UnicodeString unicodeA = UnicodeString::fromUTF32((UChar32*)a, -1);
173	UnicodeString unicodeB = UnicodeString::fromUTF32((UChar32*)b, -1);
174
175	UErrorCode icuStatus = U_ZERO_ERROR;
176	result = fCollator->compare(unicodeA, unicodeB, icuStatus);
177
178	if (!U_SUCCESS(icuStatus))
179		return B_BAD_VALUE;
180
181	return B_OK;
182}
183
184
185status_t
186ICUCollateData::Wcsxfrm(wchar_t* out, const wchar_t* in, size_t outSize,
187	size_t& requiredSize)
188{
189	if (in == NULL) {
190		requiredSize = 0;
191		return B_OK;
192	}
193
194	if (fCollator == NULL || strcmp(fPosixLocaleName, "POSIX") == 0) {
195		// handle POSIX here as the collator ICU uses for that (english) is
196		// incompatible in too many ways
197		requiredSize = wcslcpy(out, in, outSize);
198		for (const wchar_t* inIter = in; *inIter != 0; ++inIter) {
199			if (*inIter > 127)
200				return B_BAD_VALUE;
201		}
202		return B_OK;
203	}
204
205	UnicodeString unicodeIn = UnicodeString::fromUTF32((UChar32*)in, -1);
206	requiredSize = fCollator->getSortKey(unicodeIn, NULL, 0);
207
208	if (outSize == 0)
209		return B_OK;
210
211	uint8_t* buffer = (uint8_t*)out;
212	fCollator->getSortKey(unicodeIn, buffer, outSize);
213
214	// convert 1-byte characters to 4-byte wide characters:
215	for (size_t i = 0; i < outSize; ++i)
216		out[outSize - 1 - i] = buffer[outSize - 1 - i];
217
218	// Do not include terminating NULL character in the required-size.
219	if (requiredSize > 0) {
220		if (outSize >= requiredSize)
221			assert(out[requiredSize - 1] == 0);
222		requiredSize--;
223	}
224
225	return B_OK;
226}
227
228
229status_t
230ICUCollateData::_ToUnicodeString(const char* in, UnicodeString& out)
231{
232	out.remove();
233
234	if (in == NULL)
235		return B_OK;
236
237	size_t inLen = strlen(in);
238	if (inLen == 0)
239		return B_OK;
240
241	UConverter* converter;
242	status_t result = _GetConverter(converter);
243	if (result != B_OK)
244		return result;
245
246	UErrorCode icuStatus = U_ZERO_ERROR;
247	int32_t outLen = ucnv_toUChars(converter, NULL, 0, in, inLen, &icuStatus);
248	if (icuStatus != U_BUFFER_OVERFLOW_ERROR)
249		return B_BAD_VALUE;
250	if (outLen < 0)
251		return B_ERROR;
252	if (outLen == 0)
253		return B_OK;
254
255	UChar* outBuf = out.getBuffer(outLen + 1);
256	icuStatus = U_ZERO_ERROR;
257	outLen
258		= ucnv_toUChars(converter, outBuf, outLen + 1, in, inLen, &icuStatus);
259	if (!U_SUCCESS(icuStatus)) {
260		out.releaseBuffer(0);
261		return B_BAD_VALUE;
262	}
263
264	out.releaseBuffer(outLen);
265
266	return B_OK;
267}
268
269
270}	// namespace Libroot
271}	// namespace BPrivate
272