1/* 2 * Copyright (C) 2008, 2014 Apple Inc. All rights reserved. 3 * 4 * Redistribution and use in source and binary forms, with or without 5 * modification, are permitted provided that the following conditions 6 * are met: 7 * 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 3. Neither the name of Apple Inc. ("Apple") nor the names of 14 * its contributors may be used to endorse or promote products derived 15 * from this software without specific prior written permission. 16 * 17 * THIS SOFTWARE IS PROVIDED BY APPLE AND ITS CONTRIBUTORS "AS IS" AND ANY 18 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 19 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 20 * DISCLAIMED. IN NO EVENT SHALL APPLE OR ITS CONTRIBUTORS BE LIABLE FOR ANY 21 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 22 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 23 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 24 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 */ 28 29#include "config.h" 30#include <wtf/unicode/Collator.h> 31 32// FIXME: Merge this with CollatorDefault.cpp into a single Collator.cpp source file. 33 34#if !UCONFIG_NO_COLLATION 35 36#include <mutex> 37#include <unicode/ucol.h> 38#include <wtf/NeverDestroyed.h> 39#include <wtf/StringExtras.h> 40#include <wtf/text/StringView.h> 41 42#if OS(DARWIN) && USE(CF) 43#include <CoreFoundation/CoreFoundation.h> 44#include <wtf/RetainPtr.h> 45#endif 46 47namespace WTF { 48 49static UCollator* cachedCollator; 50static char* cachedCollatorLocale; 51static bool cachedCollatorShouldSortLowercaseFirst; 52 53static std::mutex& cachedCollatorMutex() 54{ 55 static std::once_flag onceFlag; 56 57 static LazyNeverDestroyed<std::mutex> mutex; 58 std::call_once(onceFlag, []{ 59 mutex.construct(); 60 }); 61 62 return mutex; 63} 64 65#if !(OS(DARWIN) && USE(CF)) 66 67static inline const char* resolveDefaultLocale(const char* locale) 68{ 69 return locale; 70} 71 72#else 73 74static inline char* copyShortASCIIString(CFStringRef string) 75{ 76 // OK to have a fixed size buffer and to only handle ASCII since we only use this for locale names. 77 char buffer[256]; 78 if (!string || !CFStringGetCString(string, buffer, sizeof(buffer), kCFStringEncodingASCII)) 79 return strdup(""); 80 return strdup(buffer); 81} 82 83static char* copyDefaultLocale() 84{ 85#if !PLATFORM(IOS) 86 return copyShortASCIIString(static_cast<CFStringRef>(CFLocaleGetValue(adoptCF(CFLocaleCopyCurrent()).get(), kCFLocaleCollatorIdentifier))); 87#else 88 // FIXME: Documentation claims the code above would work on iOS 4.0 and later. After test that works, we should remove this and use that instead. 89 return copyShortASCIIString(adoptCF(static_cast<CFStringRef>(CFPreferencesCopyValue(CFSTR("AppleCollationOrder"), kCFPreferencesAnyApplication, kCFPreferencesCurrentUser, kCFPreferencesAnyHost))).get()); 90#endif 91} 92 93static inline const char* resolveDefaultLocale(const char* locale) 94{ 95 if (locale) 96 return locale; 97 // Since iOS and OS X don't set UNIX locale to match the user's selected locale, the ICU default locale is not the right one. 98 // So, instead of passing null to ICU, we pass the name of the user's selected locale. 99 static char* defaultLocale; 100 static std::once_flag initializeDefaultLocaleOnce; 101 std::call_once(initializeDefaultLocaleOnce, []{ 102 defaultLocale = copyDefaultLocale(); 103 }); 104 return defaultLocale; 105} 106 107#endif 108 109static inline bool localesMatch(const char* a, const char* b) 110{ 111 // Two null locales are equal, other locales are compared with strcmp. 112 return a == b || (a && b && !strcmp(a, b)); 113} 114 115Collator::Collator(const char* locale, bool shouldSortLowercaseFirst) 116{ 117 UErrorCode status = U_ZERO_ERROR; 118 119 { 120 std::lock_guard<std::mutex> lock(cachedCollatorMutex()); 121 if (cachedCollator && localesMatch(cachedCollatorLocale, locale) && cachedCollatorShouldSortLowercaseFirst == shouldSortLowercaseFirst) { 122 m_collator = cachedCollator; 123 m_locale = cachedCollatorLocale; 124 m_shouldSortLowercaseFirst = shouldSortLowercaseFirst; 125 cachedCollator = nullptr; 126 cachedCollatorLocale = nullptr; 127 return; 128 } 129 } 130 131 m_collator = ucol_open(resolveDefaultLocale(locale), &status); 132 if (U_FAILURE(status)) { 133 status = U_ZERO_ERROR; 134 m_collator = ucol_open("", &status); // Fall back to Unicode Collation Algorithm. 135 } 136 ASSERT(U_SUCCESS(status)); 137 138 ucol_setAttribute(m_collator, UCOL_CASE_FIRST, shouldSortLowercaseFirst ? UCOL_LOWER_FIRST : UCOL_UPPER_FIRST, &status); 139 ASSERT(U_SUCCESS(status)); 140 141 ucol_setAttribute(m_collator, UCOL_NORMALIZATION_MODE, UCOL_ON, &status); 142 ASSERT(U_SUCCESS(status)); 143 144 m_locale = locale ? strdup(locale) : nullptr; 145 m_shouldSortLowercaseFirst = shouldSortLowercaseFirst; 146} 147 148Collator::~Collator() 149{ 150 { 151 std::lock_guard<std::mutex> lock(cachedCollatorMutex()); 152 if (cachedCollator) 153 ucol_close(cachedCollator); 154 cachedCollator = m_collator; 155 cachedCollatorLocale = m_locale; 156 cachedCollatorShouldSortLowercaseFirst = m_shouldSortLowercaseFirst; 157 m_collator = nullptr; 158 m_locale = nullptr; 159 } 160 161 free(m_locale); 162} 163 164static int32_t getIndexLatin1(UCharIterator* iterator, UCharIteratorOrigin origin) 165{ 166 switch (origin) { 167 case UITER_START: 168 return iterator->start; 169 case UITER_CURRENT: 170 return iterator->index; 171 case UITER_LIMIT: 172 return iterator->limit; 173 case UITER_ZERO: 174 return 0; 175 case UITER_LENGTH: 176 return iterator->length; 177 } 178 ASSERT_NOT_REACHED(); 179 return U_SENTINEL; 180} 181 182static int32_t moveLatin1(UCharIterator* iterator, int32_t delta, UCharIteratorOrigin origin) 183{ 184 return iterator->index = getIndexLatin1(iterator, origin) + delta; 185} 186 187static UBool hasNextLatin1(UCharIterator* iterator) 188{ 189 return iterator->index < iterator->limit; 190} 191 192static UBool hasPreviousLatin1(UCharIterator* iterator) 193{ 194 return iterator->index > iterator->start; 195} 196 197static UChar32 currentLatin1(UCharIterator* iterator) 198{ 199 ASSERT(iterator->index >= iterator->start); 200 if (iterator->index >= iterator->limit) 201 return U_SENTINEL; 202 return static_cast<const LChar*>(iterator->context)[iterator->index]; 203} 204 205static UChar32 nextLatin1(UCharIterator* iterator) 206{ 207 ASSERT(iterator->index >= iterator->start); 208 if (iterator->index >= iterator->limit) 209 return U_SENTINEL; 210 return static_cast<const LChar*>(iterator->context)[iterator->index++]; 211} 212 213static UChar32 previousLatin1(UCharIterator* iterator) 214{ 215 if (iterator->index <= iterator->start) 216 return U_SENTINEL; 217 return static_cast<const LChar*>(iterator->context)[--iterator->index]; 218} 219 220static uint32_t getStateLatin1(const UCharIterator* iterator) 221{ 222 return iterator->index; 223} 224 225static void setStateLatin1(UCharIterator* iterator, uint32_t state, UErrorCode*) 226{ 227 iterator->index = state; 228} 229 230static UCharIterator createLatin1Iterator(const LChar* characters, int length) 231{ 232 UCharIterator iterator; 233 iterator.context = characters; 234 iterator.length = length; 235 iterator.start = 0; 236 iterator.index = 0; 237 iterator.limit = length; 238 iterator.reservedField = 0; 239 iterator.getIndex = getIndexLatin1; 240 iterator.move = moveLatin1; 241 iterator.hasNext = hasNextLatin1; 242 iterator.hasPrevious = hasPreviousLatin1; 243 iterator.current = currentLatin1; 244 iterator.next = nextLatin1; 245 iterator.previous = previousLatin1; 246 iterator.reservedFn = nullptr; 247 iterator.getState = getStateLatin1; 248 iterator.setState = setStateLatin1; 249 return iterator; 250} 251 252static UCharIterator createIterator(StringView string) 253{ 254 if (string.is8Bit()) 255 return createLatin1Iterator(string.characters8(), string.length()); 256 UCharIterator iterator; 257 uiter_setString(&iterator, string.characters16(), string.length()); 258 return iterator; 259} 260 261int Collator::collate(StringView a, StringView b) const 262{ 263 UCharIterator iteratorA = createIterator(a); 264 UCharIterator iteratorB = createIterator(b); 265 UErrorCode status = U_ZERO_ERROR; 266 int result = ucol_strcollIter(m_collator, &iteratorA, &iteratorB, &status); 267 ASSERT(U_SUCCESS(status)); 268 return result; 269} 270 271static UCharIterator createIteratorUTF8(const char* string) 272{ 273 UCharIterator iterator; 274 uiter_setUTF8(&iterator, string, strlen(string)); 275 return iterator; 276} 277 278int Collator::collateUTF8(const char* a, const char* b) const 279{ 280 UCharIterator iteratorA = createIteratorUTF8(a); 281 UCharIterator iteratorB = createIteratorUTF8(b); 282 UErrorCode status = U_ZERO_ERROR; 283 int result = ucol_strcollIter(m_collator, &iteratorA, &iteratorB, &status); 284 ASSERT(U_SUCCESS(status)); 285 return result; 286} 287 288} // namespace WTF 289 290#endif 291