/* ******************************************************************************* * Copyright (C) 1996-2014, International Business Machines * Corporation and others. All Rights Reserved. ******************************************************************************* * file name: ucol_res.cpp * encoding: US-ASCII * tab size: 8 (not used) * indentation:4 * * Description: * This file contains dependencies that the collation run-time doesn't normally * need. This mainly contains resource bundle usage and collation meta information * * Modification history * Date Name Comments * 1996-1999 various members of ICU team maintained C API for collation framework * 02/16/2001 synwee Added internal method getPrevSpecialCE * 03/01/2001 synwee Added maxexpansion functionality. * 03/16/2001 weiv Collation framework is rewritten in C and made UCA compliant * 12/08/2004 grhoten Split part of ucol.cpp into ucol_res.cpp * 2012-2014 markus Rewritten in C++ again. */ #include "unicode/utypes.h" #if !UCONFIG_NO_COLLATION #include "unicode/coll.h" #include "unicode/localpointer.h" #include "unicode/locid.h" #include "unicode/tblcoll.h" #include "unicode/ucol.h" #include "unicode/uloc.h" #include "unicode/unistr.h" #include "unicode/ures.h" #include "cmemory.h" #include "cstring.h" #include "collationdatareader.h" #include "collationroot.h" #include "collationtailoring.h" #include "putilimp.h" #include "uassert.h" #include "ucln_in.h" #include "ucol_imp.h" #include "uenumimp.h" #include "ulist.h" #include "umutex.h" #include "uresimp.h" #include "ustrenum.h" #include "utracimp.h" #define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0])) U_NAMESPACE_BEGIN namespace { static const UChar *rootRules = NULL; static int32_t rootRulesLength = 0; static UResourceBundle *rootBundle = NULL; static UInitOnce gInitOnce = U_INITONCE_INITIALIZER; } // namespace U_CDECL_BEGIN static UBool U_CALLCONV ucol_res_cleanup() { rootRules = NULL; rootRulesLength = 0; ures_close(rootBundle); rootBundle = NULL; gInitOnce.reset(); return TRUE; } U_CDECL_END void CollationLoader::loadRootRules(UErrorCode &errorCode) { if(U_FAILURE(errorCode)) { return; } rootBundle = ures_open(U_ICUDATA_COLL, kRootLocaleName, &errorCode); if(U_FAILURE(errorCode)) { return; } rootRules = ures_getStringByKey(rootBundle, "UCARules", &rootRulesLength, &errorCode); if(U_FAILURE(errorCode)) { ures_close(rootBundle); rootBundle = NULL; return; } ucln_i18n_registerCleanup(UCLN_I18N_UCOL_RES, ucol_res_cleanup); } void CollationLoader::appendRootRules(UnicodeString &s) { UErrorCode errorCode = U_ZERO_ERROR; umtx_initOnce(gInitOnce, CollationLoader::loadRootRules, errorCode); if(U_SUCCESS(errorCode)) { s.append(rootRules, rootRulesLength); } } UnicodeString * CollationLoader::loadRules(const char *localeID, const char *collationType, UErrorCode &errorCode) { if(U_FAILURE(errorCode)) { return NULL; } U_ASSERT(collationType != NULL && *collationType != 0); LocalUResourceBundlePointer bundle(ures_open(U_ICUDATA_COLL, localeID, &errorCode)); LocalUResourceBundlePointer collations( ures_getByKey(bundle.getAlias(), "collations", NULL, &errorCode)); LocalUResourceBundlePointer data( ures_getByKeyWithFallback(collations.getAlias(), collationType, NULL, &errorCode)); int32_t length; const UChar *s = ures_getStringByKey(data.getAlias(), "Sequence", &length, &errorCode); if(U_FAILURE(errorCode)) { return NULL; } // No string pointer aliasing so that we need not hold onto the resource bundle. UnicodeString *rules = new UnicodeString(s, length); if(rules == NULL) { errorCode = U_MEMORY_ALLOCATION_ERROR; return NULL; } return rules; } const CollationTailoring * CollationLoader::loadTailoring(const Locale &locale, Locale &validLocale, UErrorCode &errorCode) { const CollationTailoring *root = CollationRoot::getRoot(errorCode); if(U_FAILURE(errorCode)) { return NULL; } const char *name = locale.getName(); if(*name == 0 || uprv_strcmp(name, "root") == 0) { validLocale = Locale::getRoot(); return root; } LocalUResourceBundlePointer bundle(ures_open(U_ICUDATA_COLL, name, &errorCode)); if(errorCode == U_MISSING_RESOURCE_ERROR) { errorCode = U_USING_DEFAULT_WARNING; validLocale = Locale::getRoot(); return root; } const char *vLocale = ures_getLocaleByType(bundle.getAlias(), ULOC_ACTUAL_LOCALE, &errorCode); if(U_FAILURE(errorCode)) { return NULL; } validLocale = Locale(vLocale); // There are zero or more tailorings in the collations table. LocalUResourceBundlePointer collations( ures_getByKey(bundle.getAlias(), "collations", NULL, &errorCode)); if(errorCode == U_MISSING_RESOURCE_ERROR) { errorCode = U_USING_DEFAULT_WARNING; return root; } if(U_FAILURE(errorCode)) { return NULL; } // Fetch the collation type from the locale ID and the default type from the data. char type[16]; int32_t typeLength = locale.getKeywordValue("collation", type, LENGTHOF(type) - 1, errorCode); if(U_FAILURE(errorCode)) { errorCode = U_ILLEGAL_ARGUMENT_ERROR; return NULL; } type[typeLength] = 0; // in case of U_NOT_TERMINATED_WARNING char defaultType[16]; { UErrorCode internalErrorCode = U_ZERO_ERROR; LocalUResourceBundlePointer def( ures_getByKeyWithFallback(collations.getAlias(), "default", NULL, &internalErrorCode)); int32_t length; const UChar *s = ures_getString(def.getAlias(), &length, &internalErrorCode); if(U_SUCCESS(internalErrorCode) && length < LENGTHOF(defaultType)) { u_UCharsToChars(s, defaultType, length + 1); } else { uprv_strcpy(defaultType, "standard"); } } if(typeLength == 0 || uprv_strcmp(type, "default") == 0) { uprv_strcpy(type, defaultType); } // Load the collations/type tailoring, with type fallback. UBool typeFallback = FALSE; LocalUResourceBundlePointer data( ures_getByKeyWithFallback(collations.getAlias(), type, NULL, &errorCode)); if(errorCode == U_MISSING_RESOURCE_ERROR && typeLength > 6 && uprv_strncmp(type, "search", 6) == 0) { // fall back from something like "searchjl" to "search" typeFallback = TRUE; type[6] = 0; errorCode = U_ZERO_ERROR; data.adoptInstead( ures_getByKeyWithFallback(collations.getAlias(), type, NULL, &errorCode)); } if(errorCode == U_MISSING_RESOURCE_ERROR && uprv_strcmp(type, defaultType) != 0) { // fall back to the default type typeFallback = TRUE; uprv_strcpy(type, defaultType); errorCode = U_ZERO_ERROR; data.adoptInstead( ures_getByKeyWithFallback(collations.getAlias(), type, NULL, &errorCode)); } if(errorCode == U_MISSING_RESOURCE_ERROR && uprv_strcmp(type, "standard") != 0) { // fall back to the "standard" type typeFallback = TRUE; uprv_strcpy(type, "standard"); errorCode = U_ZERO_ERROR; data.adoptInstead( ures_getByKeyWithFallback(collations.getAlias(), type, NULL, &errorCode)); } if(errorCode == U_MISSING_RESOURCE_ERROR) { errorCode = U_USING_DEFAULT_WARNING; return root; } if(U_FAILURE(errorCode)) { return NULL; } LocalPointer t(new CollationTailoring(root->settings)); if(t.isNull() || t->isBogus()) { errorCode = U_MEMORY_ALLOCATION_ERROR; return NULL; } // Is this the same as the root collator? If so, then use that instead. const char *actualLocale = ures_getLocaleByType(data.getAlias(), ULOC_ACTUAL_LOCALE, &errorCode); if(U_FAILURE(errorCode)) { return NULL; } if((*actualLocale == 0 || uprv_strcmp(actualLocale, "root") == 0) && uprv_strcmp(type, "standard") == 0) { if(typeFallback) { errorCode = U_USING_DEFAULT_WARNING; } return root; } t->actualLocale = Locale(actualLocale); // deserialize LocalUResourceBundlePointer binary( ures_getByKey(data.getAlias(), "%%CollationBin", NULL, &errorCode)); // Note: U_MISSING_RESOURCE_ERROR --> The old code built from rules if available // but that created undesirable dependencies. int32_t length; const uint8_t *inBytes = ures_getBinary(binary.getAlias(), &length, &errorCode); if(U_FAILURE(errorCode)) { return NULL; } CollationDataReader::read(root, inBytes, length, *t, errorCode); // Note: U_COLLATOR_VERSION_MISMATCH --> The old code built from rules if available // but that created undesirable dependencies. if(U_FAILURE(errorCode)) { return NULL; } // Try to fetch the optional rules string. { UErrorCode internalErrorCode = U_ZERO_ERROR; int32_t length; const UChar *s = ures_getStringByKey(data.getAlias(), "Sequence", &length, &internalErrorCode); if(U_SUCCESS(errorCode)) { t->rules.setTo(TRUE, s, length); } } // Set the collation types on the informational locales, // except when they match the default types (for brevity and backwards compatibility). // For the valid locale, suppress the default type. if(uprv_strcmp(type, defaultType) != 0) { validLocale.setKeywordValue("collation", type, errorCode); if(U_FAILURE(errorCode)) { return NULL; } } // For the actual locale, suppress the default type *according to the actual locale*. // For example, zh has default=pinyin and contains all of the Chinese tailorings. // zh_Hant has default=stroke but has no other data. // For the valid locale "zh_Hant" we need to suppress stroke. // For the actual locale "zh" we need to suppress pinyin instead. if(uprv_strcmp(actualLocale, vLocale) != 0) { // Opening a bundle for the actual locale should always succeed. LocalUResourceBundlePointer actualBundle( ures_open(U_ICUDATA_COLL, actualLocale, &errorCode)); if(U_FAILURE(errorCode)) { return NULL; } UErrorCode internalErrorCode = U_ZERO_ERROR; LocalUResourceBundlePointer def( ures_getByKeyWithFallback(actualBundle.getAlias(), "collations/default", NULL, &internalErrorCode)); int32_t length; const UChar *s = ures_getString(def.getAlias(), &length, &internalErrorCode); if(U_SUCCESS(internalErrorCode) && length < LENGTHOF(defaultType)) { u_UCharsToChars(s, defaultType, length + 1); } else { uprv_strcpy(defaultType, "standard"); } } if(uprv_strcmp(type, defaultType) != 0) { t->actualLocale.setKeywordValue("collation", type, errorCode); if(U_FAILURE(errorCode)) { return NULL; } } if(typeFallback) { errorCode = U_USING_DEFAULT_WARNING; } t->bundle = bundle.orphan(); return t.orphan(); } U_NAMESPACE_END U_NAMESPACE_USE U_CAPI UCollator* ucol_open(const char *loc, UErrorCode *status) { U_NAMESPACE_USE UTRACE_ENTRY_OC(UTRACE_UCOL_OPEN); UTRACE_DATA1(UTRACE_INFO, "locale = \"%s\"", loc); UCollator *result = NULL; Collator *coll = Collator::createInstance(loc, *status); if(U_SUCCESS(*status)) { result = coll->toUCollator(); } UTRACE_EXIT_PTR_STATUS(result, *status); return result; } U_CAPI int32_t U_EXPORT2 ucol_getDisplayName( const char *objLoc, const char *dispLoc, UChar *result, int32_t resultLength, UErrorCode *status) { U_NAMESPACE_USE if(U_FAILURE(*status)) return -1; UnicodeString dst; if(!(result==NULL && resultLength==0)) { // NULL destination for pure preflighting: empty dummy string // otherwise, alias the destination buffer dst.setTo(result, 0, resultLength); } Collator::getDisplayName(Locale(objLoc), Locale(dispLoc), dst); return dst.extract(result, resultLength, *status); } U_CAPI const char* U_EXPORT2 ucol_getAvailable(int32_t index) { int32_t count = 0; const Locale *loc = Collator::getAvailableLocales(count); if (loc != NULL && index < count) { return loc[index].getName(); } return NULL; } U_CAPI int32_t U_EXPORT2 ucol_countAvailable() { int32_t count = 0; Collator::getAvailableLocales(count); return count; } #if !UCONFIG_NO_SERVICE U_CAPI UEnumeration* U_EXPORT2 ucol_openAvailableLocales(UErrorCode *status) { U_NAMESPACE_USE // This is a wrapper over Collator::getAvailableLocales() if (U_FAILURE(*status)) { return NULL; } StringEnumeration *s = icu::Collator::getAvailableLocales(); if (s == NULL) { *status = U_MEMORY_ALLOCATION_ERROR; return NULL; } return uenum_openFromStringEnumeration(s, status); } #endif // Note: KEYWORDS[0] != RESOURCE_NAME - alan static const char RESOURCE_NAME[] = "collations"; static const char* const KEYWORDS[] = { "collation" }; #define KEYWORD_COUNT LENGTHOF(KEYWORDS) U_CAPI UEnumeration* U_EXPORT2 ucol_getKeywords(UErrorCode *status) { UEnumeration *result = NULL; if (U_SUCCESS(*status)) { return uenum_openCharStringsEnumeration(KEYWORDS, KEYWORD_COUNT, status); } return result; } U_CAPI UEnumeration* U_EXPORT2 ucol_getKeywordValues(const char *keyword, UErrorCode *status) { if (U_FAILURE(*status)) { return NULL; } // hard-coded to accept exactly one collation keyword // modify if additional collation keyword is added later if (keyword==NULL || uprv_strcmp(keyword, KEYWORDS[0])!=0) { *status = U_ILLEGAL_ARGUMENT_ERROR; return NULL; } return ures_getKeywordValues(U_ICUDATA_COLL, RESOURCE_NAME, status); } static const UEnumeration defaultKeywordValues = { NULL, NULL, ulist_close_keyword_values_iterator, ulist_count_keyword_values, uenum_unextDefault, ulist_next_keyword_value, ulist_reset_keyword_values_iterator }; #include U_CAPI UEnumeration* U_EXPORT2 ucol_getKeywordValuesForLocale(const char* /*key*/, const char* locale, UBool /*commonlyUsed*/, UErrorCode* status) { /* Get the locale base name. */ char localeBuffer[ULOC_FULLNAME_CAPACITY] = ""; uloc_getBaseName(locale, localeBuffer, sizeof(localeBuffer), status); /* Create the 2 lists * -values is the temp location for the keyword values * -results hold the actual list used by the UEnumeration object */ UList *values = ulist_createEmptyList(status); UList *results = ulist_createEmptyList(status); UEnumeration *en = (UEnumeration *)uprv_malloc(sizeof(UEnumeration)); if (U_FAILURE(*status) || en == NULL) { if (en == NULL) { *status = U_MEMORY_ALLOCATION_ERROR; } else { uprv_free(en); } ulist_deleteList(values); ulist_deleteList(results); return NULL; } memcpy(en, &defaultKeywordValues, sizeof(UEnumeration)); en->context = results; /* Open the resource bundle for collation with the given locale. */ UResourceBundle bundle, collations, collres, defres; ures_initStackObject(&bundle); ures_initStackObject(&collations); ures_initStackObject(&collres); ures_initStackObject(&defres); ures_openFillIn(&bundle, U_ICUDATA_COLL, localeBuffer, status); while (U_SUCCESS(*status)) { ures_getByKey(&bundle, RESOURCE_NAME, &collations, status); ures_resetIterator(&collations); while (U_SUCCESS(*status) && ures_hasNext(&collations)) { ures_getNextResource(&collations, &collres, status); const char *key = ures_getKey(&collres); /* If the key is default, get the string and store it in results list only * if results list is empty. */ if (uprv_strcmp(key, "default") == 0) { if (ulist_getListSize(results) == 0) { char *defcoll = (char *)uprv_malloc(sizeof(char) * ULOC_KEYWORDS_CAPACITY); int32_t defcollLength = ULOC_KEYWORDS_CAPACITY; ures_getNextResource(&collres, &defres, status); #if U_CHARSET_FAMILY==U_ASCII_FAMILY /* optimize - use the utf-8 string */ ures_getUTF8String(&defres, defcoll, &defcollLength, TRUE, status); #else { const UChar* defString = ures_getString(&defres, &defcollLength, status); if(U_SUCCESS(*status)) { if(defcollLength+1 > ULOC_KEYWORDS_CAPACITY) { *status = U_BUFFER_OVERFLOW_ERROR; } else { u_UCharsToChars(defString, defcoll, defcollLength+1); } } } #endif ulist_addItemBeginList(results, defcoll, TRUE, status); } } else { ulist_addItemEndList(values, key, FALSE, status); } } /* If the locale is "" this is root so exit. */ if (uprv_strlen(localeBuffer) == 0) { break; } /* Get the parent locale and open a new resource bundle. */ uloc_getParent(localeBuffer, localeBuffer, sizeof(localeBuffer), status); ures_openFillIn(&bundle, U_ICUDATA_COLL, localeBuffer, status); } ures_close(&defres); ures_close(&collres); ures_close(&collations); ures_close(&bundle); if (U_SUCCESS(*status)) { char *value = NULL; ulist_resetList(values); while ((value = (char *)ulist_getNext(values)) != NULL) { if (!ulist_containsString(results, value, (int32_t)uprv_strlen(value))) { ulist_addItemEndList(results, value, FALSE, status); if (U_FAILURE(*status)) { break; } } } } ulist_deleteList(values); if (U_FAILURE(*status)){ uenum_close(en); en = NULL; } else { ulist_resetList(results); } return en; } U_CAPI int32_t U_EXPORT2 ucol_getFunctionalEquivalent(char* result, int32_t resultCapacity, const char* keyword, const char* locale, UBool* isAvailable, UErrorCode* status) { // N.B.: Resource name is "collations" but keyword is "collation" return ures_getFunctionalEquivalent(result, resultCapacity, U_ICUDATA_COLL, "collations", keyword, locale, isAvailable, TRUE, status); } #endif /* #if !UCONFIG_NO_COLLATION */