1/* 2******************************************************************************* 3* 4* Copyright (C) 2009-2012, International Business Machines 5* Corporation and others. All Rights Reserved. 6* 7******************************************************************************* 8* file name: normalizer2.cpp 9* encoding: US-ASCII 10* tab size: 8 (not used) 11* indentation:4 12* 13* created on: 2009nov22 14* created by: Markus W. Scherer 15*/ 16 17#include "unicode/utypes.h" 18 19#if !UCONFIG_NO_NORMALIZATION 20 21#include "unicode/localpointer.h" 22#include "unicode/normalizer2.h" 23#include "unicode/unistr.h" 24#include "unicode/unorm.h" 25#include "cpputils.h" 26#include "cstring.h" 27#include "mutex.h" 28#include "normalizer2impl.h" 29#include "ucln_cmn.h" 30#include "uhash.h" 31 32U_NAMESPACE_BEGIN 33 34// Public API dispatch via Normalizer2 subclasses -------------------------- *** 35 36Normalizer2::~Normalizer2() {} 37 38UBool 39Normalizer2::getRawDecomposition(UChar32, UnicodeString &) const { 40 return FALSE; 41} 42 43UChar32 44Normalizer2::composePair(UChar32, UChar32) const { 45 return U_SENTINEL; 46} 47 48uint8_t 49Normalizer2::getCombiningClass(UChar32 /*c*/) const { 50 return 0; 51} 52 53// Normalizer2 implementation for the old UNORM_NONE. 54class NoopNormalizer2 : public Normalizer2 { 55 virtual ~NoopNormalizer2(); 56 57 virtual UnicodeString & 58 normalize(const UnicodeString &src, 59 UnicodeString &dest, 60 UErrorCode &errorCode) const { 61 if(U_SUCCESS(errorCode)) { 62 if(&dest!=&src) { 63 dest=src; 64 } else { 65 errorCode=U_ILLEGAL_ARGUMENT_ERROR; 66 } 67 } 68 return dest; 69 } 70 virtual UnicodeString & 71 normalizeSecondAndAppend(UnicodeString &first, 72 const UnicodeString &second, 73 UErrorCode &errorCode) const { 74 if(U_SUCCESS(errorCode)) { 75 if(&first!=&second) { 76 first.append(second); 77 } else { 78 errorCode=U_ILLEGAL_ARGUMENT_ERROR; 79 } 80 } 81 return first; 82 } 83 virtual UnicodeString & 84 append(UnicodeString &first, 85 const UnicodeString &second, 86 UErrorCode &errorCode) const { 87 if(U_SUCCESS(errorCode)) { 88 if(&first!=&second) { 89 first.append(second); 90 } else { 91 errorCode=U_ILLEGAL_ARGUMENT_ERROR; 92 } 93 } 94 return first; 95 } 96 virtual UBool 97 getDecomposition(UChar32, UnicodeString &) const { 98 return FALSE; 99 } 100 // No need to override the default getRawDecomposition(). 101 virtual UBool 102 isNormalized(const UnicodeString &, UErrorCode &) const { 103 return TRUE; 104 } 105 virtual UNormalizationCheckResult 106 quickCheck(const UnicodeString &, UErrorCode &) const { 107 return UNORM_YES; 108 } 109 virtual int32_t 110 spanQuickCheckYes(const UnicodeString &s, UErrorCode &) const { 111 return s.length(); 112 } 113 virtual UBool hasBoundaryBefore(UChar32) const { return TRUE; } 114 virtual UBool hasBoundaryAfter(UChar32) const { return TRUE; } 115 virtual UBool isInert(UChar32) const { return TRUE; } 116}; 117 118NoopNormalizer2::~NoopNormalizer2() {} 119 120// Intermediate class: 121// Has Normalizer2Impl and does boilerplate argument checking and setup. 122class Normalizer2WithImpl : public Normalizer2 { 123public: 124 Normalizer2WithImpl(const Normalizer2Impl &ni) : impl(ni) {} 125 virtual ~Normalizer2WithImpl(); 126 127 // normalize 128 virtual UnicodeString & 129 normalize(const UnicodeString &src, 130 UnicodeString &dest, 131 UErrorCode &errorCode) const { 132 if(U_FAILURE(errorCode)) { 133 dest.setToBogus(); 134 return dest; 135 } 136 const UChar *sArray=src.getBuffer(); 137 if(&dest==&src || sArray==NULL) { 138 errorCode=U_ILLEGAL_ARGUMENT_ERROR; 139 dest.setToBogus(); 140 return dest; 141 } 142 dest.remove(); 143 ReorderingBuffer buffer(impl, dest); 144 if(buffer.init(src.length(), errorCode)) { 145 normalize(sArray, sArray+src.length(), buffer, errorCode); 146 } 147 return dest; 148 } 149 virtual void 150 normalize(const UChar *src, const UChar *limit, 151 ReorderingBuffer &buffer, UErrorCode &errorCode) const = 0; 152 153 // normalize and append 154 virtual UnicodeString & 155 normalizeSecondAndAppend(UnicodeString &first, 156 const UnicodeString &second, 157 UErrorCode &errorCode) const { 158 return normalizeSecondAndAppend(first, second, TRUE, errorCode); 159 } 160 virtual UnicodeString & 161 append(UnicodeString &first, 162 const UnicodeString &second, 163 UErrorCode &errorCode) const { 164 return normalizeSecondAndAppend(first, second, FALSE, errorCode); 165 } 166 UnicodeString & 167 normalizeSecondAndAppend(UnicodeString &first, 168 const UnicodeString &second, 169 UBool doNormalize, 170 UErrorCode &errorCode) const { 171 uprv_checkCanGetBuffer(first, errorCode); 172 if(U_FAILURE(errorCode)) { 173 return first; 174 } 175 const UChar *secondArray=second.getBuffer(); 176 if(&first==&second || secondArray==NULL) { 177 errorCode=U_ILLEGAL_ARGUMENT_ERROR; 178 return first; 179 } 180 int32_t firstLength=first.length(); 181 UnicodeString safeMiddle; 182 { 183 ReorderingBuffer buffer(impl, first); 184 if(buffer.init(firstLength+second.length(), errorCode)) { 185 normalizeAndAppend(secondArray, secondArray+second.length(), doNormalize, 186 safeMiddle, buffer, errorCode); 187 } 188 } // The ReorderingBuffer destructor finalizes the first string. 189 if(U_FAILURE(errorCode)) { 190 // Restore the modified suffix of the first string. 191 first.replace(firstLength-safeMiddle.length(), 0x7fffffff, safeMiddle); 192 } 193 return first; 194 } 195 virtual void 196 normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize, 197 UnicodeString &safeMiddle, 198 ReorderingBuffer &buffer, UErrorCode &errorCode) const = 0; 199 virtual UBool 200 getDecomposition(UChar32 c, UnicodeString &decomposition) const { 201 UChar buffer[4]; 202 int32_t length; 203 const UChar *d=impl.getDecomposition(c, buffer, length); 204 if(d==NULL) { 205 return FALSE; 206 } 207 if(d==buffer) { 208 decomposition.setTo(buffer, length); // copy the string (Jamos from Hangul syllable c) 209 } else { 210 decomposition.setTo(FALSE, d, length); // read-only alias 211 } 212 return TRUE; 213 } 214 virtual UBool 215 getRawDecomposition(UChar32 c, UnicodeString &decomposition) const { 216 UChar buffer[30]; 217 int32_t length; 218 const UChar *d=impl.getRawDecomposition(c, buffer, length); 219 if(d==NULL) { 220 return FALSE; 221 } 222 if(d==buffer) { 223 decomposition.setTo(buffer, length); // copy the string (algorithmic decomposition) 224 } else { 225 decomposition.setTo(FALSE, d, length); // read-only alias 226 } 227 return TRUE; 228 } 229 virtual UChar32 230 composePair(UChar32 a, UChar32 b) const { 231 return impl.composePair(a, b); 232 } 233 234 virtual uint8_t 235 getCombiningClass(UChar32 c) const { 236 return impl.getCC(impl.getNorm16(c)); 237 } 238 239 // quick checks 240 virtual UBool 241 isNormalized(const UnicodeString &s, UErrorCode &errorCode) const { 242 if(U_FAILURE(errorCode)) { 243 return FALSE; 244 } 245 const UChar *sArray=s.getBuffer(); 246 if(sArray==NULL) { 247 errorCode=U_ILLEGAL_ARGUMENT_ERROR; 248 return FALSE; 249 } 250 const UChar *sLimit=sArray+s.length(); 251 return sLimit==spanQuickCheckYes(sArray, sLimit, errorCode); 252 } 253 virtual UNormalizationCheckResult 254 quickCheck(const UnicodeString &s, UErrorCode &errorCode) const { 255 return Normalizer2WithImpl::isNormalized(s, errorCode) ? UNORM_YES : UNORM_NO; 256 } 257 virtual int32_t 258 spanQuickCheckYes(const UnicodeString &s, UErrorCode &errorCode) const { 259 if(U_FAILURE(errorCode)) { 260 return 0; 261 } 262 const UChar *sArray=s.getBuffer(); 263 if(sArray==NULL) { 264 errorCode=U_ILLEGAL_ARGUMENT_ERROR; 265 return 0; 266 } 267 return (int32_t)(spanQuickCheckYes(sArray, sArray+s.length(), errorCode)-sArray); 268 } 269 virtual const UChar * 270 spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &errorCode) const = 0; 271 272 virtual UNormalizationCheckResult getQuickCheck(UChar32) const { 273 return UNORM_YES; 274 } 275 276 const Normalizer2Impl &impl; 277}; 278 279Normalizer2WithImpl::~Normalizer2WithImpl() {} 280 281class DecomposeNormalizer2 : public Normalizer2WithImpl { 282public: 283 DecomposeNormalizer2(const Normalizer2Impl &ni) : Normalizer2WithImpl(ni) {} 284 virtual ~DecomposeNormalizer2(); 285 286private: 287 virtual void 288 normalize(const UChar *src, const UChar *limit, 289 ReorderingBuffer &buffer, UErrorCode &errorCode) const { 290 impl.decompose(src, limit, &buffer, errorCode); 291 } 292 using Normalizer2WithImpl::normalize; // Avoid warning about hiding base class function. 293 virtual void 294 normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize, 295 UnicodeString &safeMiddle, 296 ReorderingBuffer &buffer, UErrorCode &errorCode) const { 297 impl.decomposeAndAppend(src, limit, doNormalize, safeMiddle, buffer, errorCode); 298 } 299 virtual const UChar * 300 spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &errorCode) const { 301 return impl.decompose(src, limit, NULL, errorCode); 302 } 303 using Normalizer2WithImpl::spanQuickCheckYes; // Avoid warning about hiding base class function. 304 virtual UNormalizationCheckResult getQuickCheck(UChar32 c) const { 305 return impl.isDecompYes(impl.getNorm16(c)) ? UNORM_YES : UNORM_NO; 306 } 307 virtual UBool hasBoundaryBefore(UChar32 c) const { return impl.hasDecompBoundary(c, TRUE); } 308 virtual UBool hasBoundaryAfter(UChar32 c) const { return impl.hasDecompBoundary(c, FALSE); } 309 virtual UBool isInert(UChar32 c) const { return impl.isDecompInert(c); } 310}; 311 312DecomposeNormalizer2::~DecomposeNormalizer2() {} 313 314class ComposeNormalizer2 : public Normalizer2WithImpl { 315public: 316 ComposeNormalizer2(const Normalizer2Impl &ni, UBool fcc) : 317 Normalizer2WithImpl(ni), onlyContiguous(fcc) {} 318 virtual ~ComposeNormalizer2(); 319 320private: 321 virtual void 322 normalize(const UChar *src, const UChar *limit, 323 ReorderingBuffer &buffer, UErrorCode &errorCode) const { 324 impl.compose(src, limit, onlyContiguous, TRUE, buffer, errorCode); 325 } 326 using Normalizer2WithImpl::normalize; // Avoid warning about hiding base class function. 327 virtual void 328 normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize, 329 UnicodeString &safeMiddle, 330 ReorderingBuffer &buffer, UErrorCode &errorCode) const { 331 impl.composeAndAppend(src, limit, doNormalize, onlyContiguous, safeMiddle, buffer, errorCode); 332 } 333 334 virtual UBool 335 isNormalized(const UnicodeString &s, UErrorCode &errorCode) const { 336 if(U_FAILURE(errorCode)) { 337 return FALSE; 338 } 339 const UChar *sArray=s.getBuffer(); 340 if(sArray==NULL) { 341 errorCode=U_ILLEGAL_ARGUMENT_ERROR; 342 return FALSE; 343 } 344 UnicodeString temp; 345 ReorderingBuffer buffer(impl, temp); 346 if(!buffer.init(5, errorCode)) { // small destCapacity for substring normalization 347 return FALSE; 348 } 349 return impl.compose(sArray, sArray+s.length(), onlyContiguous, FALSE, buffer, errorCode); 350 } 351 virtual UNormalizationCheckResult 352 quickCheck(const UnicodeString &s, UErrorCode &errorCode) const { 353 if(U_FAILURE(errorCode)) { 354 return UNORM_MAYBE; 355 } 356 const UChar *sArray=s.getBuffer(); 357 if(sArray==NULL) { 358 errorCode=U_ILLEGAL_ARGUMENT_ERROR; 359 return UNORM_MAYBE; 360 } 361 UNormalizationCheckResult qcResult=UNORM_YES; 362 impl.composeQuickCheck(sArray, sArray+s.length(), onlyContiguous, &qcResult); 363 return qcResult; 364 } 365 virtual const UChar * 366 spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &) const { 367 return impl.composeQuickCheck(src, limit, onlyContiguous, NULL); 368 } 369 using Normalizer2WithImpl::spanQuickCheckYes; // Avoid warning about hiding base class function. 370 virtual UNormalizationCheckResult getQuickCheck(UChar32 c) const { 371 return impl.getCompQuickCheck(impl.getNorm16(c)); 372 } 373 virtual UBool hasBoundaryBefore(UChar32 c) const { 374 return impl.hasCompBoundaryBefore(c); 375 } 376 virtual UBool hasBoundaryAfter(UChar32 c) const { 377 return impl.hasCompBoundaryAfter(c, onlyContiguous, FALSE); 378 } 379 virtual UBool isInert(UChar32 c) const { 380 return impl.hasCompBoundaryAfter(c, onlyContiguous, TRUE); 381 } 382 383 const UBool onlyContiguous; 384}; 385 386ComposeNormalizer2::~ComposeNormalizer2() {} 387 388class FCDNormalizer2 : public Normalizer2WithImpl { 389public: 390 FCDNormalizer2(const Normalizer2Impl &ni) : Normalizer2WithImpl(ni) {} 391 virtual ~FCDNormalizer2(); 392 393private: 394 virtual void 395 normalize(const UChar *src, const UChar *limit, 396 ReorderingBuffer &buffer, UErrorCode &errorCode) const { 397 impl.makeFCD(src, limit, &buffer, errorCode); 398 } 399 using Normalizer2WithImpl::normalize; // Avoid warning about hiding base class function. 400 virtual void 401 normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize, 402 UnicodeString &safeMiddle, 403 ReorderingBuffer &buffer, UErrorCode &errorCode) const { 404 impl.makeFCDAndAppend(src, limit, doNormalize, safeMiddle, buffer, errorCode); 405 } 406 virtual const UChar * 407 spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &errorCode) const { 408 return impl.makeFCD(src, limit, NULL, errorCode); 409 } 410 using Normalizer2WithImpl::spanQuickCheckYes; // Avoid warning about hiding base class function. 411 virtual UBool hasBoundaryBefore(UChar32 c) const { return impl.hasFCDBoundaryBefore(c); } 412 virtual UBool hasBoundaryAfter(UChar32 c) const { return impl.hasFCDBoundaryAfter(c); } 413 virtual UBool isInert(UChar32 c) const { return impl.isFCDInert(c); } 414}; 415 416FCDNormalizer2::~FCDNormalizer2() {} 417 418// instance cache ---------------------------------------------------------- *** 419 420struct Norm2AllModes : public UMemory { 421 static Norm2AllModes *createInstance(const char *packageName, 422 const char *name, 423 UErrorCode &errorCode); 424 Norm2AllModes() : comp(impl, FALSE), decomp(impl), fcd(impl), fcc(impl, TRUE) {} 425 426 Normalizer2Impl impl; 427 ComposeNormalizer2 comp; 428 DecomposeNormalizer2 decomp; 429 FCDNormalizer2 fcd; 430 ComposeNormalizer2 fcc; 431}; 432 433Norm2AllModes * 434Norm2AllModes::createInstance(const char *packageName, 435 const char *name, 436 UErrorCode &errorCode) { 437 if(U_FAILURE(errorCode)) { 438 return NULL; 439 } 440 LocalPointer<Norm2AllModes> allModes(new Norm2AllModes); 441 if(allModes.isNull()) { 442 errorCode=U_MEMORY_ALLOCATION_ERROR; 443 return NULL; 444 } 445 allModes->impl.load(packageName, name, errorCode); 446 return U_SUCCESS(errorCode) ? allModes.orphan() : NULL; 447} 448 449U_CDECL_BEGIN 450static UBool U_CALLCONV uprv_normalizer2_cleanup(); 451U_CDECL_END 452 453class Norm2AllModesSingleton : public TriStateSingletonWrapper<Norm2AllModes> { 454public: 455 Norm2AllModesSingleton(TriStateSingleton &s, const char *n) : 456 TriStateSingletonWrapper<Norm2AllModes>(s), name(n) {} 457 Norm2AllModes *getInstance(UErrorCode &errorCode) { 458 return TriStateSingletonWrapper<Norm2AllModes>::getInstance(createInstance, name, errorCode); 459 } 460private: 461 static void *createInstance(const void *context, UErrorCode &errorCode) { 462 ucln_common_registerCleanup(UCLN_COMMON_NORMALIZER2, uprv_normalizer2_cleanup); 463 return Norm2AllModes::createInstance(NULL, (const char *)context, errorCode); 464 } 465 466 const char *name; 467}; 468 469STATIC_TRI_STATE_SINGLETON(nfcSingleton); 470STATIC_TRI_STATE_SINGLETON(nfkcSingleton); 471STATIC_TRI_STATE_SINGLETON(nfkc_cfSingleton); 472 473class Norm2Singleton : public SimpleSingletonWrapper<Normalizer2> { 474public: 475 Norm2Singleton(SimpleSingleton &s) : SimpleSingletonWrapper<Normalizer2>(s) {} 476 Normalizer2 *getInstance(UErrorCode &errorCode) { 477 return SimpleSingletonWrapper<Normalizer2>::getInstance(createInstance, NULL, errorCode); 478 } 479private: 480 static void *createInstance(const void *, UErrorCode &errorCode) { 481 Normalizer2 *noop=new NoopNormalizer2; 482 if(noop==NULL) { 483 errorCode=U_MEMORY_ALLOCATION_ERROR; 484 } 485 ucln_common_registerCleanup(UCLN_COMMON_NORMALIZER2, uprv_normalizer2_cleanup); 486 return noop; 487 } 488}; 489 490STATIC_SIMPLE_SINGLETON(noopSingleton); 491 492static UHashtable *cache=NULL; 493 494U_CDECL_BEGIN 495 496static void U_CALLCONV deleteNorm2AllModes(void *allModes) { 497 delete (Norm2AllModes *)allModes; 498} 499 500static UBool U_CALLCONV uprv_normalizer2_cleanup() { 501 Norm2AllModesSingleton(nfcSingleton, NULL).deleteInstance(); 502 Norm2AllModesSingleton(nfkcSingleton, NULL).deleteInstance(); 503 Norm2AllModesSingleton(nfkc_cfSingleton, NULL).deleteInstance(); 504 Norm2Singleton(noopSingleton).deleteInstance(); 505 uhash_close(cache); 506 cache=NULL; 507 return TRUE; 508} 509 510U_CDECL_END 511 512const Normalizer2 *Normalizer2Factory::getNFCInstance(UErrorCode &errorCode) { 513 Norm2AllModes *allModes=Norm2AllModesSingleton(nfcSingleton, "nfc").getInstance(errorCode); 514 return allModes!=NULL ? &allModes->comp : NULL; 515} 516 517const Normalizer2 *Normalizer2Factory::getNFDInstance(UErrorCode &errorCode) { 518 Norm2AllModes *allModes=Norm2AllModesSingleton(nfcSingleton, "nfc").getInstance(errorCode); 519 return allModes!=NULL ? &allModes->decomp : NULL; 520} 521 522const Normalizer2 *Normalizer2Factory::getFCDInstance(UErrorCode &errorCode) { 523 Norm2AllModes *allModes=Norm2AllModesSingleton(nfcSingleton, "nfc").getInstance(errorCode); 524 return allModes!=NULL ? &allModes->fcd : NULL; 525} 526 527const Normalizer2 *Normalizer2Factory::getFCCInstance(UErrorCode &errorCode) { 528 Norm2AllModes *allModes=Norm2AllModesSingleton(nfcSingleton, "nfc").getInstance(errorCode); 529 return allModes!=NULL ? &allModes->fcc : NULL; 530} 531 532const Normalizer2 *Normalizer2Factory::getNFKCInstance(UErrorCode &errorCode) { 533 Norm2AllModes *allModes= 534 Norm2AllModesSingleton(nfkcSingleton, "nfkc").getInstance(errorCode); 535 return allModes!=NULL ? &allModes->comp : NULL; 536} 537 538const Normalizer2 *Normalizer2Factory::getNFKDInstance(UErrorCode &errorCode) { 539 Norm2AllModes *allModes= 540 Norm2AllModesSingleton(nfkcSingleton, "nfkc").getInstance(errorCode); 541 return allModes!=NULL ? &allModes->decomp : NULL; 542} 543 544const Normalizer2 *Normalizer2Factory::getNFKC_CFInstance(UErrorCode &errorCode) { 545 Norm2AllModes *allModes= 546 Norm2AllModesSingleton(nfkc_cfSingleton, "nfkc_cf").getInstance(errorCode); 547 return allModes!=NULL ? &allModes->comp : NULL; 548} 549 550const Normalizer2 *Normalizer2Factory::getNoopInstance(UErrorCode &errorCode) { 551 return Norm2Singleton(noopSingleton).getInstance(errorCode); 552} 553 554const Normalizer2 * 555Normalizer2Factory::getInstance(UNormalizationMode mode, UErrorCode &errorCode) { 556 if(U_FAILURE(errorCode)) { 557 return NULL; 558 } 559 switch(mode) { 560 case UNORM_NFD: 561 return getNFDInstance(errorCode); 562 case UNORM_NFKD: 563 return getNFKDInstance(errorCode); 564 case UNORM_NFC: 565 return getNFCInstance(errorCode); 566 case UNORM_NFKC: 567 return getNFKCInstance(errorCode); 568 case UNORM_FCD: 569 return getFCDInstance(errorCode); 570 default: // UNORM_NONE 571 return getNoopInstance(errorCode); 572 } 573} 574 575const Normalizer2Impl * 576Normalizer2Factory::getNFCImpl(UErrorCode &errorCode) { 577 Norm2AllModes *allModes= 578 Norm2AllModesSingleton(nfcSingleton, "nfc").getInstance(errorCode); 579 return allModes!=NULL ? &allModes->impl : NULL; 580} 581 582const Normalizer2Impl * 583Normalizer2Factory::getNFKCImpl(UErrorCode &errorCode) { 584 Norm2AllModes *allModes= 585 Norm2AllModesSingleton(nfkcSingleton, "nfkc").getInstance(errorCode); 586 return allModes!=NULL ? &allModes->impl : NULL; 587} 588 589const Normalizer2Impl * 590Normalizer2Factory::getNFKC_CFImpl(UErrorCode &errorCode) { 591 Norm2AllModes *allModes= 592 Norm2AllModesSingleton(nfkc_cfSingleton, "nfkc_cf").getInstance(errorCode); 593 return allModes!=NULL ? &allModes->impl : NULL; 594} 595 596const Normalizer2Impl * 597Normalizer2Factory::getImpl(const Normalizer2 *norm2) { 598 return &((Normalizer2WithImpl *)norm2)->impl; 599} 600 601const Normalizer2 * 602Normalizer2::getNFCInstance(UErrorCode &errorCode) { 603 return Normalizer2Factory::getNFCInstance(errorCode); 604} 605 606const Normalizer2 * 607Normalizer2::getNFDInstance(UErrorCode &errorCode) { 608 return Normalizer2Factory::getNFDInstance(errorCode); 609} 610 611const Normalizer2 * 612Normalizer2::getNFKCInstance(UErrorCode &errorCode) { 613 return Normalizer2Factory::getNFKCInstance(errorCode); 614} 615 616const Normalizer2 * 617Normalizer2::getNFKDInstance(UErrorCode &errorCode) { 618 return Normalizer2Factory::getNFKDInstance(errorCode); 619} 620 621const Normalizer2 * 622Normalizer2::getNFKCCasefoldInstance(UErrorCode &errorCode) { 623 return Normalizer2Factory::getNFKC_CFInstance(errorCode); 624} 625 626const Normalizer2 * 627Normalizer2::getInstance(const char *packageName, 628 const char *name, 629 UNormalization2Mode mode, 630 UErrorCode &errorCode) { 631 if(U_FAILURE(errorCode)) { 632 return NULL; 633 } 634 if(name==NULL || *name==0) { 635 errorCode=U_ILLEGAL_ARGUMENT_ERROR; 636 return NULL; 637 } 638 Norm2AllModes *allModes=NULL; 639 if(packageName==NULL) { 640 if(0==uprv_strcmp(name, "nfc")) { 641 allModes=Norm2AllModesSingleton(nfcSingleton, "nfc").getInstance(errorCode); 642 } else if(0==uprv_strcmp(name, "nfkc")) { 643 allModes=Norm2AllModesSingleton(nfkcSingleton, "nfkc").getInstance(errorCode); 644 } else if(0==uprv_strcmp(name, "nfkc_cf")) { 645 allModes=Norm2AllModesSingleton(nfkc_cfSingleton, "nfkc_cf").getInstance(errorCode); 646 } 647 } 648 if(allModes==NULL && U_SUCCESS(errorCode)) { 649 { 650 Mutex lock; 651 if(cache!=NULL) { 652 allModes=(Norm2AllModes *)uhash_get(cache, name); 653 } 654 } 655 if(allModes==NULL) { 656 LocalPointer<Norm2AllModes> localAllModes( 657 Norm2AllModes::createInstance(packageName, name, errorCode)); 658 if(U_SUCCESS(errorCode)) { 659 Mutex lock; 660 if(cache==NULL) { 661 cache=uhash_open(uhash_hashChars, uhash_compareChars, NULL, &errorCode); 662 if(U_FAILURE(errorCode)) { 663 return NULL; 664 } 665 uhash_setKeyDeleter(cache, uprv_free); 666 uhash_setValueDeleter(cache, deleteNorm2AllModes); 667 } 668 void *temp=uhash_get(cache, name); 669 if(temp==NULL) { 670 int32_t keyLength=uprv_strlen(name)+1; 671 char *nameCopy=(char *)uprv_malloc(keyLength); 672 if(nameCopy==NULL) { 673 errorCode=U_MEMORY_ALLOCATION_ERROR; 674 return NULL; 675 } 676 uprv_memcpy(nameCopy, name, keyLength); 677 uhash_put(cache, nameCopy, allModes=localAllModes.orphan(), &errorCode); 678 } else { 679 // race condition 680 allModes=(Norm2AllModes *)temp; 681 } 682 } 683 } 684 } 685 if(allModes!=NULL && U_SUCCESS(errorCode)) { 686 switch(mode) { 687 case UNORM2_COMPOSE: 688 return &allModes->comp; 689 case UNORM2_DECOMPOSE: 690 return &allModes->decomp; 691 case UNORM2_FCD: 692 return &allModes->fcd; 693 case UNORM2_COMPOSE_CONTIGUOUS: 694 return &allModes->fcc; 695 default: 696 break; // do nothing 697 } 698 } 699 return NULL; 700} 701 702U_NAMESPACE_END 703 704// C API ------------------------------------------------------------------- *** 705 706U_NAMESPACE_USE 707 708U_CAPI const UNormalizer2 * U_EXPORT2 709unorm2_getNFCInstance(UErrorCode *pErrorCode) { 710 return (const UNormalizer2 *)Normalizer2::getNFCInstance(*pErrorCode); 711} 712 713U_CAPI const UNormalizer2 * U_EXPORT2 714unorm2_getNFDInstance(UErrorCode *pErrorCode) { 715 return (const UNormalizer2 *)Normalizer2::getNFDInstance(*pErrorCode); 716} 717 718U_CAPI const UNormalizer2 * U_EXPORT2 719unorm2_getNFKCInstance(UErrorCode *pErrorCode) { 720 return (const UNormalizer2 *)Normalizer2::getNFKCInstance(*pErrorCode); 721} 722 723U_CAPI const UNormalizer2 * U_EXPORT2 724unorm2_getNFKDInstance(UErrorCode *pErrorCode) { 725 return (const UNormalizer2 *)Normalizer2::getNFKDInstance(*pErrorCode); 726} 727 728U_CAPI const UNormalizer2 * U_EXPORT2 729unorm2_getNFKCCasefoldInstance(UErrorCode *pErrorCode) { 730 return (const UNormalizer2 *)Normalizer2::getNFKCCasefoldInstance(*pErrorCode); 731} 732 733U_CAPI const UNormalizer2 * U_EXPORT2 734unorm2_getInstance(const char *packageName, 735 const char *name, 736 UNormalization2Mode mode, 737 UErrorCode *pErrorCode) { 738 return (const UNormalizer2 *)Normalizer2::getInstance(packageName, name, mode, *pErrorCode); 739} 740 741U_CAPI void U_EXPORT2 742unorm2_close(UNormalizer2 *norm2) { 743 delete (Normalizer2 *)norm2; 744} 745 746U_CAPI int32_t U_EXPORT2 747unorm2_normalize(const UNormalizer2 *norm2, 748 const UChar *src, int32_t length, 749 UChar *dest, int32_t capacity, 750 UErrorCode *pErrorCode) { 751 if(U_FAILURE(*pErrorCode)) { 752 return 0; 753 } 754 if( (src==NULL ? length!=0 : length<-1) || 755 (dest==NULL ? capacity!=0 : capacity<0) || 756 (src==dest && src!=NULL) 757 ) { 758 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 759 return 0; 760 } 761 UnicodeString destString(dest, 0, capacity); 762 // length==0: Nothing to do, and n2wi->normalize(NULL, NULL, buffer, ...) would crash. 763 if(length!=0) { 764 const Normalizer2 *n2=(const Normalizer2 *)norm2; 765 const Normalizer2WithImpl *n2wi=dynamic_cast<const Normalizer2WithImpl *>(n2); 766 if(n2wi!=NULL) { 767 // Avoid duplicate argument checking and support NUL-terminated src. 768 ReorderingBuffer buffer(n2wi->impl, destString); 769 if(buffer.init(length, *pErrorCode)) { 770 n2wi->normalize(src, length>=0 ? src+length : NULL, buffer, *pErrorCode); 771 } 772 } else { 773 UnicodeString srcString(length<0, src, length); 774 n2->normalize(srcString, destString, *pErrorCode); 775 } 776 } 777 return destString.extract(dest, capacity, *pErrorCode); 778} 779 780static int32_t 781normalizeSecondAndAppend(const UNormalizer2 *norm2, 782 UChar *first, int32_t firstLength, int32_t firstCapacity, 783 const UChar *second, int32_t secondLength, 784 UBool doNormalize, 785 UErrorCode *pErrorCode) { 786 if(U_FAILURE(*pErrorCode)) { 787 return 0; 788 } 789 if( (second==NULL ? secondLength!=0 : secondLength<-1) || 790 (first==NULL ? (firstCapacity!=0 || firstLength!=0) : 791 (firstCapacity<0 || firstLength<-1)) || 792 (first==second && first!=NULL) 793 ) { 794 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 795 return 0; 796 } 797 UnicodeString firstString(first, firstLength, firstCapacity); 798 firstLength=firstString.length(); // In case it was -1. 799 // secondLength==0: Nothing to do, and n2wi->normalizeAndAppend(NULL, NULL, buffer, ...) would crash. 800 if(secondLength!=0) { 801 const Normalizer2 *n2=(const Normalizer2 *)norm2; 802 const Normalizer2WithImpl *n2wi=dynamic_cast<const Normalizer2WithImpl *>(n2); 803 if(n2wi!=NULL) { 804 // Avoid duplicate argument checking and support NUL-terminated src. 805 UnicodeString safeMiddle; 806 { 807 ReorderingBuffer buffer(n2wi->impl, firstString); 808 if(buffer.init(firstLength+secondLength+1, *pErrorCode)) { // destCapacity>=-1 809 n2wi->normalizeAndAppend(second, secondLength>=0 ? second+secondLength : NULL, 810 doNormalize, safeMiddle, buffer, *pErrorCode); 811 } 812 } // The ReorderingBuffer destructor finalizes firstString. 813 if(U_FAILURE(*pErrorCode) || firstString.length()>firstCapacity) { 814 // Restore the modified suffix of the first string. 815 // This does not restore first[] array contents between firstLength and firstCapacity. 816 // (That might be uninitialized memory, as far as we know.) 817 if(first!=NULL) { /* don't dereference NULL */ 818 safeMiddle.extract(0, 0x7fffffff, first+firstLength-safeMiddle.length()); 819 if(firstLength<firstCapacity) { 820 first[firstLength]=0; // NUL-terminate in case it was originally. 821 } 822 } 823 } 824 } else { 825 UnicodeString secondString(secondLength<0, second, secondLength); 826 if(doNormalize) { 827 n2->normalizeSecondAndAppend(firstString, secondString, *pErrorCode); 828 } else { 829 n2->append(firstString, secondString, *pErrorCode); 830 } 831 } 832 } 833 return firstString.extract(first, firstCapacity, *pErrorCode); 834} 835 836U_CAPI int32_t U_EXPORT2 837unorm2_normalizeSecondAndAppend(const UNormalizer2 *norm2, 838 UChar *first, int32_t firstLength, int32_t firstCapacity, 839 const UChar *second, int32_t secondLength, 840 UErrorCode *pErrorCode) { 841 return normalizeSecondAndAppend(norm2, 842 first, firstLength, firstCapacity, 843 second, secondLength, 844 TRUE, pErrorCode); 845} 846 847U_CAPI int32_t U_EXPORT2 848unorm2_append(const UNormalizer2 *norm2, 849 UChar *first, int32_t firstLength, int32_t firstCapacity, 850 const UChar *second, int32_t secondLength, 851 UErrorCode *pErrorCode) { 852 return normalizeSecondAndAppend(norm2, 853 first, firstLength, firstCapacity, 854 second, secondLength, 855 FALSE, pErrorCode); 856} 857 858U_CAPI int32_t U_EXPORT2 859unorm2_getDecomposition(const UNormalizer2 *norm2, 860 UChar32 c, UChar *decomposition, int32_t capacity, 861 UErrorCode *pErrorCode) { 862 if(U_FAILURE(*pErrorCode)) { 863 return 0; 864 } 865 if(decomposition==NULL ? capacity!=0 : capacity<0) { 866 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 867 return 0; 868 } 869 UnicodeString destString(decomposition, 0, capacity); 870 if(reinterpret_cast<const Normalizer2 *>(norm2)->getDecomposition(c, destString)) { 871 return destString.extract(decomposition, capacity, *pErrorCode); 872 } else { 873 return -1; 874 } 875} 876 877U_CAPI int32_t U_EXPORT2 878unorm2_getRawDecomposition(const UNormalizer2 *norm2, 879 UChar32 c, UChar *decomposition, int32_t capacity, 880 UErrorCode *pErrorCode) { 881 if(U_FAILURE(*pErrorCode)) { 882 return 0; 883 } 884 if(decomposition==NULL ? capacity!=0 : capacity<0) { 885 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 886 return 0; 887 } 888 UnicodeString destString(decomposition, 0, capacity); 889 if(reinterpret_cast<const Normalizer2 *>(norm2)->getRawDecomposition(c, destString)) { 890 return destString.extract(decomposition, capacity, *pErrorCode); 891 } else { 892 return -1; 893 } 894} 895 896U_CAPI UChar32 U_EXPORT2 897unorm2_composePair(const UNormalizer2 *norm2, UChar32 a, UChar32 b) { 898 return reinterpret_cast<const Normalizer2 *>(norm2)->composePair(a, b); 899} 900 901U_CAPI uint8_t U_EXPORT2 902unorm2_getCombiningClass(const UNormalizer2 *norm2, UChar32 c) { 903 return reinterpret_cast<const Normalizer2 *>(norm2)->getCombiningClass(c); 904} 905 906U_CAPI UBool U_EXPORT2 907unorm2_isNormalized(const UNormalizer2 *norm2, 908 const UChar *s, int32_t length, 909 UErrorCode *pErrorCode) { 910 if(U_FAILURE(*pErrorCode)) { 911 return 0; 912 } 913 if((s==NULL && length!=0) || length<-1) { 914 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 915 return 0; 916 } 917 UnicodeString sString(length<0, s, length); 918 return ((const Normalizer2 *)norm2)->isNormalized(sString, *pErrorCode); 919} 920 921U_CAPI UNormalizationCheckResult U_EXPORT2 922unorm2_quickCheck(const UNormalizer2 *norm2, 923 const UChar *s, int32_t length, 924 UErrorCode *pErrorCode) { 925 if(U_FAILURE(*pErrorCode)) { 926 return UNORM_NO; 927 } 928 if((s==NULL && length!=0) || length<-1) { 929 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 930 return UNORM_NO; 931 } 932 UnicodeString sString(length<0, s, length); 933 return ((const Normalizer2 *)norm2)->quickCheck(sString, *pErrorCode); 934} 935 936U_CAPI int32_t U_EXPORT2 937unorm2_spanQuickCheckYes(const UNormalizer2 *norm2, 938 const UChar *s, int32_t length, 939 UErrorCode *pErrorCode) { 940 if(U_FAILURE(*pErrorCode)) { 941 return 0; 942 } 943 if((s==NULL && length!=0) || length<-1) { 944 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 945 return 0; 946 } 947 UnicodeString sString(length<0, s, length); 948 return ((const Normalizer2 *)norm2)->spanQuickCheckYes(sString, *pErrorCode); 949} 950 951U_CAPI UBool U_EXPORT2 952unorm2_hasBoundaryBefore(const UNormalizer2 *norm2, UChar32 c) { 953 return ((const Normalizer2 *)norm2)->hasBoundaryBefore(c); 954} 955 956U_CAPI UBool U_EXPORT2 957unorm2_hasBoundaryAfter(const UNormalizer2 *norm2, UChar32 c) { 958 return ((const Normalizer2 *)norm2)->hasBoundaryAfter(c); 959} 960 961U_CAPI UBool U_EXPORT2 962unorm2_isInert(const UNormalizer2 *norm2, UChar32 c) { 963 return ((const Normalizer2 *)norm2)->isInert(c); 964} 965 966// Some properties APIs ---------------------------------------------------- *** 967 968U_CAPI uint8_t U_EXPORT2 969u_getCombiningClass(UChar32 c) { 970 UErrorCode errorCode=U_ZERO_ERROR; 971 const Normalizer2 *nfd=Normalizer2Factory::getNFDInstance(errorCode); 972 if(U_SUCCESS(errorCode)) { 973 return nfd->getCombiningClass(c); 974 } else { 975 return 0; 976 } 977} 978 979U_CFUNC UNormalizationCheckResult 980unorm_getQuickCheck(UChar32 c, UNormalizationMode mode) { 981 if(mode<=UNORM_NONE || UNORM_FCD<=mode) { 982 return UNORM_YES; 983 } 984 UErrorCode errorCode=U_ZERO_ERROR; 985 const Normalizer2 *norm2=Normalizer2Factory::getInstance(mode, errorCode); 986 if(U_SUCCESS(errorCode)) { 987 return ((const Normalizer2WithImpl *)norm2)->getQuickCheck(c); 988 } else { 989 return UNORM_MAYBE; 990 } 991} 992 993U_CFUNC uint16_t 994unorm_getFCD16(UChar32 c) { 995 UErrorCode errorCode=U_ZERO_ERROR; 996 const Normalizer2Impl *impl=Normalizer2Factory::getNFCImpl(errorCode); 997 if(U_SUCCESS(errorCode)) { 998 return impl->getFCD16(c); 999 } else { 1000 return 0; 1001 } 1002} 1003 1004#endif // !UCONFIG_NO_NORMALIZATION 1005