1/*
2 * Copyright (C) 2008, 2014 Apple Inc. All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
6 * are met:
7 *
8 * 1.  Redistributions of source code must retain the above copyright
9 *     notice, this list of conditions and the following disclaimer.
10 * 2.  Redistributions in binary form must reproduce the above copyright
11 *     notice, this list of conditions and the following disclaimer in the
12 *     documentation and/or other materials provided with the distribution.
13 * 3.  Neither the name of Apple Inc. ("Apple") nor the names of
14 *     its contributors may be used to endorse or promote products derived
15 *     from this software without specific prior written permission.
16 *
17 * THIS SOFTWARE IS PROVIDED BY APPLE AND ITS CONTRIBUTORS "AS IS" AND ANY
18 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
19 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
20 * DISCLAIMED. IN NO EVENT SHALL APPLE OR ITS CONTRIBUTORS BE LIABLE FOR ANY
21 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
22 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
23 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
24 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29#include "config.h"
30#include <wtf/unicode/Collator.h>
31
32// FIXME: Merge this with CollatorDefault.cpp into a single Collator.cpp source file.
33
34#if !UCONFIG_NO_COLLATION
35
36#include <mutex>
37#include <unicode/ucol.h>
38#include <wtf/NeverDestroyed.h>
39#include <wtf/StringExtras.h>
40#include <wtf/text/StringView.h>
41
42#if OS(DARWIN) && USE(CF)
43#include <CoreFoundation/CoreFoundation.h>
44#include <wtf/RetainPtr.h>
45#endif
46
47namespace WTF {
48
49static UCollator* cachedCollator;
50static char* cachedCollatorLocale;
51static bool cachedCollatorShouldSortLowercaseFirst;
52
53static std::mutex& cachedCollatorMutex()
54{
55    static std::once_flag onceFlag;
56
57    static LazyNeverDestroyed<std::mutex> mutex;
58    std::call_once(onceFlag, []{
59        mutex.construct();
60    });
61
62    return mutex;
63}
64
65#if !(OS(DARWIN) && USE(CF))
66
67static inline const char* resolveDefaultLocale(const char* locale)
68{
69    return locale;
70}
71
72#else
73
74static inline char* copyShortASCIIString(CFStringRef string)
75{
76    // OK to have a fixed size buffer and to only handle ASCII since we only use this for locale names.
77    char buffer[256];
78    if (!string || !CFStringGetCString(string, buffer, sizeof(buffer), kCFStringEncodingASCII))
79        return strdup("");
80    return strdup(buffer);
81}
82
83static char* copyDefaultLocale()
84{
85#if !PLATFORM(IOS)
86    return copyShortASCIIString(static_cast<CFStringRef>(CFLocaleGetValue(adoptCF(CFLocaleCopyCurrent()).get(), kCFLocaleCollatorIdentifier)));
87#else
88    // FIXME: Documentation claims the code above would work on iOS 4.0 and later. After test that works, we should remove this and use that instead.
89    return copyShortASCIIString(adoptCF(static_cast<CFStringRef>(CFPreferencesCopyValue(CFSTR("AppleCollationOrder"), kCFPreferencesAnyApplication, kCFPreferencesCurrentUser, kCFPreferencesAnyHost))).get());
90#endif
91}
92
93static inline const char* resolveDefaultLocale(const char* locale)
94{
95    if (locale)
96        return locale;
97    // Since iOS and OS X don't set UNIX locale to match the user's selected locale, the ICU default locale is not the right one.
98    // So, instead of passing null to ICU, we pass the name of the user's selected locale.
99    static char* defaultLocale;
100    static std::once_flag initializeDefaultLocaleOnce;
101    std::call_once(initializeDefaultLocaleOnce, []{
102        defaultLocale = copyDefaultLocale();
103    });
104    return defaultLocale;
105}
106
107#endif
108
109static inline bool localesMatch(const char* a, const char* b)
110{
111    // Two null locales are equal, other locales are compared with strcmp.
112    return a == b || (a && b && !strcmp(a, b));
113}
114
115Collator::Collator(const char* locale, bool shouldSortLowercaseFirst)
116{
117    UErrorCode status = U_ZERO_ERROR;
118
119    {
120        std::lock_guard<std::mutex> lock(cachedCollatorMutex());
121        if (cachedCollator && localesMatch(cachedCollatorLocale, locale) && cachedCollatorShouldSortLowercaseFirst == shouldSortLowercaseFirst) {
122            m_collator = cachedCollator;
123            m_locale = cachedCollatorLocale;
124            m_shouldSortLowercaseFirst = shouldSortLowercaseFirst;
125            cachedCollator = nullptr;
126            cachedCollatorLocale = nullptr;
127            return;
128        }
129    }
130
131    m_collator = ucol_open(resolveDefaultLocale(locale), &status);
132    if (U_FAILURE(status)) {
133        status = U_ZERO_ERROR;
134        m_collator = ucol_open("", &status); // Fall back to Unicode Collation Algorithm.
135    }
136    ASSERT(U_SUCCESS(status));
137
138    ucol_setAttribute(m_collator, UCOL_CASE_FIRST, shouldSortLowercaseFirst ? UCOL_LOWER_FIRST : UCOL_UPPER_FIRST, &status);
139    ASSERT(U_SUCCESS(status));
140
141    ucol_setAttribute(m_collator, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
142    ASSERT(U_SUCCESS(status));
143
144    m_locale = locale ? strdup(locale) : nullptr;
145    m_shouldSortLowercaseFirst = shouldSortLowercaseFirst;
146}
147
148Collator::~Collator()
149{
150    {
151        std::lock_guard<std::mutex> lock(cachedCollatorMutex());
152        if (cachedCollator)
153            ucol_close(cachedCollator);
154        cachedCollator = m_collator;
155        cachedCollatorLocale = m_locale;
156        cachedCollatorShouldSortLowercaseFirst = m_shouldSortLowercaseFirst;
157        m_collator = nullptr;
158        m_locale = nullptr;
159    }
160
161    free(m_locale);
162}
163
164static int32_t getIndexLatin1(UCharIterator* iterator, UCharIteratorOrigin origin)
165{
166    switch (origin) {
167    case UITER_START:
168        return iterator->start;
169    case UITER_CURRENT:
170        return iterator->index;
171    case UITER_LIMIT:
172        return iterator->limit;
173    case UITER_ZERO:
174        return 0;
175    case UITER_LENGTH:
176        return iterator->length;
177    }
178    ASSERT_NOT_REACHED();
179    return U_SENTINEL;
180}
181
182static int32_t moveLatin1(UCharIterator* iterator, int32_t delta, UCharIteratorOrigin origin)
183{
184    return iterator->index = getIndexLatin1(iterator, origin) + delta;
185}
186
187static UBool hasNextLatin1(UCharIterator* iterator)
188{
189    return iterator->index < iterator->limit;
190}
191
192static UBool hasPreviousLatin1(UCharIterator* iterator)
193{
194    return iterator->index > iterator->start;
195}
196
197static UChar32 currentLatin1(UCharIterator* iterator)
198{
199    ASSERT(iterator->index >= iterator->start);
200    if (iterator->index >= iterator->limit)
201        return U_SENTINEL;
202    return static_cast<const LChar*>(iterator->context)[iterator->index];
203}
204
205static UChar32 nextLatin1(UCharIterator* iterator)
206{
207    ASSERT(iterator->index >= iterator->start);
208    if (iterator->index >= iterator->limit)
209        return U_SENTINEL;
210    return static_cast<const LChar*>(iterator->context)[iterator->index++];
211}
212
213static UChar32 previousLatin1(UCharIterator* iterator)
214{
215    if (iterator->index <= iterator->start)
216        return U_SENTINEL;
217    return static_cast<const LChar*>(iterator->context)[--iterator->index];
218}
219
220static uint32_t getStateLatin1(const UCharIterator* iterator)
221{
222    return iterator->index;
223}
224
225static void setStateLatin1(UCharIterator* iterator, uint32_t state, UErrorCode*)
226{
227    iterator->index = state;
228}
229
230static UCharIterator createLatin1Iterator(const LChar* characters, int length)
231{
232    UCharIterator iterator;
233    iterator.context = characters;
234    iterator.length = length;
235    iterator.start = 0;
236    iterator.index = 0;
237    iterator.limit = length;
238    iterator.reservedField = 0;
239    iterator.getIndex = getIndexLatin1;
240    iterator.move = moveLatin1;
241    iterator.hasNext = hasNextLatin1;
242    iterator.hasPrevious = hasPreviousLatin1;
243    iterator.current = currentLatin1;
244    iterator.next = nextLatin1;
245    iterator.previous = previousLatin1;
246    iterator.reservedFn = nullptr;
247    iterator.getState = getStateLatin1;
248    iterator.setState = setStateLatin1;
249    return iterator;
250}
251
252static UCharIterator createIterator(StringView string)
253{
254    if (string.is8Bit())
255        return createLatin1Iterator(string.characters8(), string.length());
256    UCharIterator iterator;
257    uiter_setString(&iterator, string.characters16(), string.length());
258    return iterator;
259}
260
261int Collator::collate(StringView a, StringView b) const
262{
263    UCharIterator iteratorA = createIterator(a);
264    UCharIterator iteratorB = createIterator(b);
265    UErrorCode status = U_ZERO_ERROR;
266    int result = ucol_strcollIter(m_collator, &iteratorA, &iteratorB, &status);
267    ASSERT(U_SUCCESS(status));
268    return result;
269}
270
271static UCharIterator createIteratorUTF8(const char* string)
272{
273    UCharIterator iterator;
274    uiter_setUTF8(&iterator, string, strlen(string));
275    return iterator;
276}
277
278int Collator::collateUTF8(const char* a, const char* b) const
279{
280    UCharIterator iteratorA = createIteratorUTF8(a);
281    UCharIterator iteratorB = createIteratorUTF8(b);
282    UErrorCode status = U_ZERO_ERROR;
283    int result = ucol_strcollIter(m_collator, &iteratorA, &iteratorB, &status);
284    ASSERT(U_SUCCESS(status));
285    return result;
286}
287
288} // namespace WTF
289
290#endif
291