1/*
2 * Copyright 2010-2011, Oliver Tappe, zooey@hirschkaefer.de.
3 * Distributed under the terms of the MIT License.
4 */
5
6
7#include "ICUCollateData.h"
8
9#include <string.h>
10#include <wchar.h>
11
12#include <unicode/unistr.h>
13
14#include <AutoDeleter.h>
15
16
17namespace BPrivate {
18namespace Libroot {
19
20
21ICUCollateData::ICUCollateData(pthread_key_t tlsKey)
22	:
23	inherited(tlsKey),
24	fCollator(NULL)
25{
26}
27
28
29ICUCollateData::~ICUCollateData()
30{
31	delete fCollator;
32}
33
34
35status_t
36ICUCollateData::SetTo(const Locale& locale, const char* posixLocaleName)
37{
38	status_t result = inherited::SetTo(locale, posixLocaleName);
39
40	if (result == B_OK) {
41		UErrorCode icuStatus = U_ZERO_ERROR;
42		delete fCollator;
43		fCollator = Collator::createInstance(fLocale, icuStatus);
44		if (!U_SUCCESS(icuStatus))
45			return B_NO_MEMORY;
46	}
47
48	return result;
49}
50
51
52status_t
53ICUCollateData::SetToPosix()
54{
55	status_t result = inherited::SetToPosix();
56
57	if (result == B_OK) {
58		delete fCollator;
59		fCollator = NULL;
60	}
61
62	return result;
63}
64
65
66status_t
67ICUCollateData::Strcoll(const char* a, const char* b, int& result)
68{
69	if (fCollator == NULL || strcmp(fPosixLocaleName, "POSIX") == 0) {
70		// handle POSIX here as the collator ICU uses for that (english) is
71		// incompatible in too many ways
72		result = strcmp(a, b);
73		for (const char* aIter = a; *aIter != 0; ++aIter) {
74			if (*aIter < 0)
75				return B_BAD_VALUE;
76		}
77		for (const char* bIter = b; *bIter != 0; ++bIter) {
78			if (*bIter < 0)
79				return B_BAD_VALUE;
80		}
81		return B_OK;
82	}
83
84	status_t status = B_OK;
85	UErrorCode icuStatus = U_ZERO_ERROR;
86
87	if (strcasecmp(fGivenCharset, "utf-8") == 0) {
88		UCharIterator aIter, bIter;
89		uiter_setUTF8(&aIter, a, -1);
90		uiter_setUTF8(&bIter, b, -1);
91
92		result = fCollator->compare(aIter, bIter, icuStatus);
93	} else {
94		UnicodeString unicodeA;
95		UnicodeString unicodeB;
96
97		if (_ToUnicodeString(a, unicodeA) != B_OK
98			|| _ToUnicodeString(b, unicodeB) != B_OK) {
99			status = B_BAD_VALUE;
100		}
101
102		result = fCollator->compare(unicodeA, unicodeB, icuStatus);
103	}
104
105	if (!U_SUCCESS(icuStatus))
106		status = B_BAD_VALUE;
107
108	return status;
109}
110
111
112status_t
113ICUCollateData::Strxfrm(char* out, const char* in, size_t size, size_t& outSize)
114{
115	if (fCollator == NULL || strcmp(fPosixLocaleName, "POSIX") == 0) {
116		// handle POSIX here as the collator ICU uses for that (english) is
117		// incompatible in too many ways
118		outSize = strlcpy(out, in, size);
119		for (const char* inIter = in; *inIter != 0; ++inIter) {
120			if (*inIter < 0)
121				return B_BAD_VALUE;
122		}
123		return B_OK;
124	}
125
126	if (in == NULL) {
127		outSize = 0;
128		return B_OK;
129	}
130
131	UnicodeString unicodeIn;
132	if (_ToUnicodeString(in, unicodeIn) != B_OK)
133		return B_BAD_VALUE;
134
135	outSize = fCollator->getSortKey(unicodeIn, (uint8_t*)out, size);
136
137	return B_OK;
138}
139
140
141status_t
142ICUCollateData::Wcscoll(const wchar_t* a, const wchar_t* b, int& result)
143{
144	if (fCollator == NULL || strcmp(fPosixLocaleName, "POSIX") == 0) {
145		// handle POSIX here as the collator ICU uses for that (english) is
146		// incompatible in too many ways
147		result = wcscmp(a, b);
148		for (const wchar_t* aIter = a; *aIter != 0; ++aIter) {
149			if (*aIter > 127)
150				return B_BAD_VALUE;
151		}
152		for (const wchar_t* bIter = b; *bIter != 0; ++bIter) {
153			if (*bIter > 127)
154				return B_BAD_VALUE;
155		}
156		return B_OK;
157	}
158
159	UnicodeString unicodeA = UnicodeString::fromUTF32((UChar32*)a, -1);
160	UnicodeString unicodeB = UnicodeString::fromUTF32((UChar32*)b, -1);
161
162	UErrorCode icuStatus = U_ZERO_ERROR;
163	result = fCollator->compare(unicodeA, unicodeB, icuStatus);
164
165	if (!U_SUCCESS(icuStatus))
166		return B_BAD_VALUE;
167
168	return B_OK;
169}
170
171
172status_t
173ICUCollateData::Wcsxfrm(wchar_t* out, const wchar_t* in, size_t size,
174	size_t& outSize)
175{
176	if (in == NULL) {
177		outSize = 0;
178		return B_OK;
179	}
180
181	if (fCollator == NULL || strcmp(fPosixLocaleName, "POSIX") == 0) {
182		// handle POSIX here as the collator ICU uses for that (english) is
183		// incompatible in too many ways
184		outSize = wcslcpy(out, in, size);
185		for (const wchar_t* inIter = in; *inIter != 0; ++inIter) {
186			if (*inIter > 127)
187				return B_BAD_VALUE;
188		}
189		return B_OK;
190	}
191
192	UnicodeString unicodeIn = UnicodeString::fromUTF32((UChar32*)in, -1);
193	size_t requiredSize = fCollator->getSortKey(unicodeIn, NULL, 0);
194
195	uint8_t* buffer = (uint8_t*)out;
196	outSize = fCollator->getSortKey(unicodeIn, buffer, requiredSize);
197
198	// convert 1-byte characters to 4-byte wide characters:
199	for (size_t i = 0; i < outSize; ++i)
200		out[outSize - 1 - i] = buffer[outSize - 1 - i];
201
202	return B_OK;
203}
204
205
206status_t
207ICUCollateData::_ToUnicodeString(const char* in, UnicodeString& out)
208{
209	out.remove();
210
211	if (in == NULL)
212		return B_OK;
213
214	size_t inLen = strlen(in);
215	if (inLen == 0)
216		return B_OK;
217
218	UConverter* converter;
219	status_t result = _GetConverter(converter);
220	if (result != B_OK)
221		return result;
222
223	UErrorCode icuStatus = U_ZERO_ERROR;
224	int32_t outLen = ucnv_toUChars(converter, NULL, 0, in, inLen, &icuStatus);
225	if (icuStatus != U_BUFFER_OVERFLOW_ERROR)
226		return B_BAD_VALUE;
227	if (outLen < 0)
228		return B_ERROR;
229	if (outLen == 0)
230		return B_OK;
231
232	UChar* outBuf = out.getBuffer(outLen + 1);
233	icuStatus = U_ZERO_ERROR;
234	outLen
235		= ucnv_toUChars(converter, outBuf, outLen + 1, in, inLen, &icuStatus);
236	if (!U_SUCCESS(icuStatus)) {
237		out.releaseBuffer(0);
238		return B_BAD_VALUE;
239	}
240
241	out.releaseBuffer(outLen);
242
243	return B_OK;
244}
245
246
247}	// namespace Libroot
248}	// namespace BPrivate
249