1/*
2*******************************************************************************
3*
4*   Copyright (C) 2009-2012, International Business Machines
5*   Corporation and others.  All Rights Reserved.
6*
7*******************************************************************************
8*   file name:  normalizer2.cpp
9*   encoding:   US-ASCII
10*   tab size:   8 (not used)
11*   indentation:4
12*
13*   created on: 2009nov22
14*   created by: Markus W. Scherer
15*/
16
17#include "unicode/utypes.h"
18
19#if !UCONFIG_NO_NORMALIZATION
20
21#include "unicode/localpointer.h"
22#include "unicode/normalizer2.h"
23#include "unicode/unistr.h"
24#include "unicode/unorm.h"
25#include "cpputils.h"
26#include "cstring.h"
27#include "mutex.h"
28#include "normalizer2impl.h"
29#include "ucln_cmn.h"
30#include "uhash.h"
31
32U_NAMESPACE_BEGIN
33
34// Public API dispatch via Normalizer2 subclasses -------------------------- ***
35
36Normalizer2::~Normalizer2() {}
37
38UBool
39Normalizer2::getRawDecomposition(UChar32, UnicodeString &) const {
40    return FALSE;
41}
42
43UChar32
44Normalizer2::composePair(UChar32, UChar32) const {
45    return U_SENTINEL;
46}
47
48uint8_t
49Normalizer2::getCombiningClass(UChar32 /*c*/) const {
50    return 0;
51}
52
53// Normalizer2 implementation for the old UNORM_NONE.
54class NoopNormalizer2 : public Normalizer2 {
55    virtual ~NoopNormalizer2();
56
57    virtual UnicodeString &
58    normalize(const UnicodeString &src,
59              UnicodeString &dest,
60              UErrorCode &errorCode) const {
61        if(U_SUCCESS(errorCode)) {
62            if(&dest!=&src) {
63                dest=src;
64            } else {
65                errorCode=U_ILLEGAL_ARGUMENT_ERROR;
66            }
67        }
68        return dest;
69    }
70    virtual UnicodeString &
71    normalizeSecondAndAppend(UnicodeString &first,
72                             const UnicodeString &second,
73                             UErrorCode &errorCode) const {
74        if(U_SUCCESS(errorCode)) {
75            if(&first!=&second) {
76                first.append(second);
77            } else {
78                errorCode=U_ILLEGAL_ARGUMENT_ERROR;
79            }
80        }
81        return first;
82    }
83    virtual UnicodeString &
84    append(UnicodeString &first,
85           const UnicodeString &second,
86           UErrorCode &errorCode) const {
87        if(U_SUCCESS(errorCode)) {
88            if(&first!=&second) {
89                first.append(second);
90            } else {
91                errorCode=U_ILLEGAL_ARGUMENT_ERROR;
92            }
93        }
94        return first;
95    }
96    virtual UBool
97    getDecomposition(UChar32, UnicodeString &) const {
98        return FALSE;
99    }
100    // No need to override the default getRawDecomposition().
101    virtual UBool
102    isNormalized(const UnicodeString &, UErrorCode &) const {
103        return TRUE;
104    }
105    virtual UNormalizationCheckResult
106    quickCheck(const UnicodeString &, UErrorCode &) const {
107        return UNORM_YES;
108    }
109    virtual int32_t
110    spanQuickCheckYes(const UnicodeString &s, UErrorCode &) const {
111        return s.length();
112    }
113    virtual UBool hasBoundaryBefore(UChar32) const { return TRUE; }
114    virtual UBool hasBoundaryAfter(UChar32) const { return TRUE; }
115    virtual UBool isInert(UChar32) const { return TRUE; }
116};
117
118NoopNormalizer2::~NoopNormalizer2() {}
119
120// Intermediate class:
121// Has Normalizer2Impl and does boilerplate argument checking and setup.
122class Normalizer2WithImpl : public Normalizer2 {
123public:
124    Normalizer2WithImpl(const Normalizer2Impl &ni) : impl(ni) {}
125    virtual ~Normalizer2WithImpl();
126
127    // normalize
128    virtual UnicodeString &
129    normalize(const UnicodeString &src,
130              UnicodeString &dest,
131              UErrorCode &errorCode) const {
132        if(U_FAILURE(errorCode)) {
133            dest.setToBogus();
134            return dest;
135        }
136        const UChar *sArray=src.getBuffer();
137        if(&dest==&src || sArray==NULL) {
138            errorCode=U_ILLEGAL_ARGUMENT_ERROR;
139            dest.setToBogus();
140            return dest;
141        }
142        dest.remove();
143        ReorderingBuffer buffer(impl, dest);
144        if(buffer.init(src.length(), errorCode)) {
145            normalize(sArray, sArray+src.length(), buffer, errorCode);
146        }
147        return dest;
148    }
149    virtual void
150    normalize(const UChar *src, const UChar *limit,
151              ReorderingBuffer &buffer, UErrorCode &errorCode) const = 0;
152
153    // normalize and append
154    virtual UnicodeString &
155    normalizeSecondAndAppend(UnicodeString &first,
156                             const UnicodeString &second,
157                             UErrorCode &errorCode) const {
158        return normalizeSecondAndAppend(first, second, TRUE, errorCode);
159    }
160    virtual UnicodeString &
161    append(UnicodeString &first,
162           const UnicodeString &second,
163           UErrorCode &errorCode) const {
164        return normalizeSecondAndAppend(first, second, FALSE, errorCode);
165    }
166    UnicodeString &
167    normalizeSecondAndAppend(UnicodeString &first,
168                             const UnicodeString &second,
169                             UBool doNormalize,
170                             UErrorCode &errorCode) const {
171        uprv_checkCanGetBuffer(first, errorCode);
172        if(U_FAILURE(errorCode)) {
173            return first;
174        }
175        const UChar *secondArray=second.getBuffer();
176        if(&first==&second || secondArray==NULL) {
177            errorCode=U_ILLEGAL_ARGUMENT_ERROR;
178            return first;
179        }
180        int32_t firstLength=first.length();
181        UnicodeString safeMiddle;
182        {
183            ReorderingBuffer buffer(impl, first);
184            if(buffer.init(firstLength+second.length(), errorCode)) {
185                normalizeAndAppend(secondArray, secondArray+second.length(), doNormalize,
186                                   safeMiddle, buffer, errorCode);
187            }
188        }  // The ReorderingBuffer destructor finalizes the first string.
189        if(U_FAILURE(errorCode)) {
190            // Restore the modified suffix of the first string.
191            first.replace(firstLength-safeMiddle.length(), 0x7fffffff, safeMiddle);
192        }
193        return first;
194    }
195    virtual void
196    normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize,
197                       UnicodeString &safeMiddle,
198                       ReorderingBuffer &buffer, UErrorCode &errorCode) const = 0;
199    virtual UBool
200    getDecomposition(UChar32 c, UnicodeString &decomposition) const {
201        UChar buffer[4];
202        int32_t length;
203        const UChar *d=impl.getDecomposition(c, buffer, length);
204        if(d==NULL) {
205            return FALSE;
206        }
207        if(d==buffer) {
208            decomposition.setTo(buffer, length);  // copy the string (Jamos from Hangul syllable c)
209        } else {
210            decomposition.setTo(FALSE, d, length);  // read-only alias
211        }
212        return TRUE;
213    }
214    virtual UBool
215    getRawDecomposition(UChar32 c, UnicodeString &decomposition) const {
216        UChar buffer[30];
217        int32_t length;
218        const UChar *d=impl.getRawDecomposition(c, buffer, length);
219        if(d==NULL) {
220            return FALSE;
221        }
222        if(d==buffer) {
223            decomposition.setTo(buffer, length);  // copy the string (algorithmic decomposition)
224        } else {
225            decomposition.setTo(FALSE, d, length);  // read-only alias
226        }
227        return TRUE;
228    }
229    virtual UChar32
230    composePair(UChar32 a, UChar32 b) const {
231        return impl.composePair(a, b);
232    }
233
234    virtual uint8_t
235    getCombiningClass(UChar32 c) const {
236        return impl.getCC(impl.getNorm16(c));
237    }
238
239    // quick checks
240    virtual UBool
241    isNormalized(const UnicodeString &s, UErrorCode &errorCode) const {
242        if(U_FAILURE(errorCode)) {
243            return FALSE;
244        }
245        const UChar *sArray=s.getBuffer();
246        if(sArray==NULL) {
247            errorCode=U_ILLEGAL_ARGUMENT_ERROR;
248            return FALSE;
249        }
250        const UChar *sLimit=sArray+s.length();
251        return sLimit==spanQuickCheckYes(sArray, sLimit, errorCode);
252    }
253    virtual UNormalizationCheckResult
254    quickCheck(const UnicodeString &s, UErrorCode &errorCode) const {
255        return Normalizer2WithImpl::isNormalized(s, errorCode) ? UNORM_YES : UNORM_NO;
256    }
257    virtual int32_t
258    spanQuickCheckYes(const UnicodeString &s, UErrorCode &errorCode) const {
259        if(U_FAILURE(errorCode)) {
260            return 0;
261        }
262        const UChar *sArray=s.getBuffer();
263        if(sArray==NULL) {
264            errorCode=U_ILLEGAL_ARGUMENT_ERROR;
265            return 0;
266        }
267        return (int32_t)(spanQuickCheckYes(sArray, sArray+s.length(), errorCode)-sArray);
268    }
269    virtual const UChar *
270    spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &errorCode) const = 0;
271
272    virtual UNormalizationCheckResult getQuickCheck(UChar32) const {
273        return UNORM_YES;
274    }
275
276    const Normalizer2Impl &impl;
277};
278
279Normalizer2WithImpl::~Normalizer2WithImpl() {}
280
281class DecomposeNormalizer2 : public Normalizer2WithImpl {
282public:
283    DecomposeNormalizer2(const Normalizer2Impl &ni) : Normalizer2WithImpl(ni) {}
284    virtual ~DecomposeNormalizer2();
285
286private:
287    virtual void
288    normalize(const UChar *src, const UChar *limit,
289              ReorderingBuffer &buffer, UErrorCode &errorCode) const {
290        impl.decompose(src, limit, &buffer, errorCode);
291    }
292    using Normalizer2WithImpl::normalize;  // Avoid warning about hiding base class function.
293    virtual void
294    normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize,
295                       UnicodeString &safeMiddle,
296                       ReorderingBuffer &buffer, UErrorCode &errorCode) const {
297        impl.decomposeAndAppend(src, limit, doNormalize, safeMiddle, buffer, errorCode);
298    }
299    virtual const UChar *
300    spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &errorCode) const {
301        return impl.decompose(src, limit, NULL, errorCode);
302    }
303    using Normalizer2WithImpl::spanQuickCheckYes;  // Avoid warning about hiding base class function.
304    virtual UNormalizationCheckResult getQuickCheck(UChar32 c) const {
305        return impl.isDecompYes(impl.getNorm16(c)) ? UNORM_YES : UNORM_NO;
306    }
307    virtual UBool hasBoundaryBefore(UChar32 c) const { return impl.hasDecompBoundary(c, TRUE); }
308    virtual UBool hasBoundaryAfter(UChar32 c) const { return impl.hasDecompBoundary(c, FALSE); }
309    virtual UBool isInert(UChar32 c) const { return impl.isDecompInert(c); }
310};
311
312DecomposeNormalizer2::~DecomposeNormalizer2() {}
313
314class ComposeNormalizer2 : public Normalizer2WithImpl {
315public:
316    ComposeNormalizer2(const Normalizer2Impl &ni, UBool fcc) :
317        Normalizer2WithImpl(ni), onlyContiguous(fcc) {}
318    virtual ~ComposeNormalizer2();
319
320private:
321    virtual void
322    normalize(const UChar *src, const UChar *limit,
323              ReorderingBuffer &buffer, UErrorCode &errorCode) const {
324        impl.compose(src, limit, onlyContiguous, TRUE, buffer, errorCode);
325    }
326    using Normalizer2WithImpl::normalize;  // Avoid warning about hiding base class function.
327    virtual void
328    normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize,
329                       UnicodeString &safeMiddle,
330                       ReorderingBuffer &buffer, UErrorCode &errorCode) const {
331        impl.composeAndAppend(src, limit, doNormalize, onlyContiguous, safeMiddle, buffer, errorCode);
332    }
333
334    virtual UBool
335    isNormalized(const UnicodeString &s, UErrorCode &errorCode) const {
336        if(U_FAILURE(errorCode)) {
337            return FALSE;
338        }
339        const UChar *sArray=s.getBuffer();
340        if(sArray==NULL) {
341            errorCode=U_ILLEGAL_ARGUMENT_ERROR;
342            return FALSE;
343        }
344        UnicodeString temp;
345        ReorderingBuffer buffer(impl, temp);
346        if(!buffer.init(5, errorCode)) {  // small destCapacity for substring normalization
347            return FALSE;
348        }
349        return impl.compose(sArray, sArray+s.length(), onlyContiguous, FALSE, buffer, errorCode);
350    }
351    virtual UNormalizationCheckResult
352    quickCheck(const UnicodeString &s, UErrorCode &errorCode) const {
353        if(U_FAILURE(errorCode)) {
354            return UNORM_MAYBE;
355        }
356        const UChar *sArray=s.getBuffer();
357        if(sArray==NULL) {
358            errorCode=U_ILLEGAL_ARGUMENT_ERROR;
359            return UNORM_MAYBE;
360        }
361        UNormalizationCheckResult qcResult=UNORM_YES;
362        impl.composeQuickCheck(sArray, sArray+s.length(), onlyContiguous, &qcResult);
363        return qcResult;
364    }
365    virtual const UChar *
366    spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &) const {
367        return impl.composeQuickCheck(src, limit, onlyContiguous, NULL);
368    }
369    using Normalizer2WithImpl::spanQuickCheckYes;  // Avoid warning about hiding base class function.
370    virtual UNormalizationCheckResult getQuickCheck(UChar32 c) const {
371        return impl.getCompQuickCheck(impl.getNorm16(c));
372    }
373    virtual UBool hasBoundaryBefore(UChar32 c) const {
374        return impl.hasCompBoundaryBefore(c);
375    }
376    virtual UBool hasBoundaryAfter(UChar32 c) const {
377        return impl.hasCompBoundaryAfter(c, onlyContiguous, FALSE);
378    }
379    virtual UBool isInert(UChar32 c) const {
380        return impl.hasCompBoundaryAfter(c, onlyContiguous, TRUE);
381    }
382
383    const UBool onlyContiguous;
384};
385
386ComposeNormalizer2::~ComposeNormalizer2() {}
387
388class FCDNormalizer2 : public Normalizer2WithImpl {
389public:
390    FCDNormalizer2(const Normalizer2Impl &ni) : Normalizer2WithImpl(ni) {}
391    virtual ~FCDNormalizer2();
392
393private:
394    virtual void
395    normalize(const UChar *src, const UChar *limit,
396              ReorderingBuffer &buffer, UErrorCode &errorCode) const {
397        impl.makeFCD(src, limit, &buffer, errorCode);
398    }
399    using Normalizer2WithImpl::normalize;  // Avoid warning about hiding base class function.
400    virtual void
401    normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize,
402                       UnicodeString &safeMiddle,
403                       ReorderingBuffer &buffer, UErrorCode &errorCode) const {
404        impl.makeFCDAndAppend(src, limit, doNormalize, safeMiddle, buffer, errorCode);
405    }
406    virtual const UChar *
407    spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &errorCode) const {
408        return impl.makeFCD(src, limit, NULL, errorCode);
409    }
410    using Normalizer2WithImpl::spanQuickCheckYes;  // Avoid warning about hiding base class function.
411    virtual UBool hasBoundaryBefore(UChar32 c) const { return impl.hasFCDBoundaryBefore(c); }
412    virtual UBool hasBoundaryAfter(UChar32 c) const { return impl.hasFCDBoundaryAfter(c); }
413    virtual UBool isInert(UChar32 c) const { return impl.isFCDInert(c); }
414};
415
416FCDNormalizer2::~FCDNormalizer2() {}
417
418// instance cache ---------------------------------------------------------- ***
419
420struct Norm2AllModes : public UMemory {
421    static Norm2AllModes *createInstance(const char *packageName,
422                                         const char *name,
423                                         UErrorCode &errorCode);
424    Norm2AllModes() : comp(impl, FALSE), decomp(impl), fcd(impl), fcc(impl, TRUE) {}
425
426    Normalizer2Impl impl;
427    ComposeNormalizer2 comp;
428    DecomposeNormalizer2 decomp;
429    FCDNormalizer2 fcd;
430    ComposeNormalizer2 fcc;
431};
432
433Norm2AllModes *
434Norm2AllModes::createInstance(const char *packageName,
435                              const char *name,
436                              UErrorCode &errorCode) {
437    if(U_FAILURE(errorCode)) {
438        return NULL;
439    }
440    LocalPointer<Norm2AllModes> allModes(new Norm2AllModes);
441    if(allModes.isNull()) {
442        errorCode=U_MEMORY_ALLOCATION_ERROR;
443        return NULL;
444    }
445    allModes->impl.load(packageName, name, errorCode);
446    return U_SUCCESS(errorCode) ? allModes.orphan() : NULL;
447}
448
449U_CDECL_BEGIN
450static UBool U_CALLCONV uprv_normalizer2_cleanup();
451U_CDECL_END
452
453class Norm2AllModesSingleton : public TriStateSingletonWrapper<Norm2AllModes> {
454public:
455    Norm2AllModesSingleton(TriStateSingleton &s, const char *n) :
456        TriStateSingletonWrapper<Norm2AllModes>(s), name(n) {}
457    Norm2AllModes *getInstance(UErrorCode &errorCode) {
458        return TriStateSingletonWrapper<Norm2AllModes>::getInstance(createInstance, name, errorCode);
459    }
460private:
461    static void *createInstance(const void *context, UErrorCode &errorCode) {
462        ucln_common_registerCleanup(UCLN_COMMON_NORMALIZER2, uprv_normalizer2_cleanup);
463        return Norm2AllModes::createInstance(NULL, (const char *)context, errorCode);
464    }
465
466    const char *name;
467};
468
469STATIC_TRI_STATE_SINGLETON(nfcSingleton);
470STATIC_TRI_STATE_SINGLETON(nfkcSingleton);
471STATIC_TRI_STATE_SINGLETON(nfkc_cfSingleton);
472
473class Norm2Singleton : public SimpleSingletonWrapper<Normalizer2> {
474public:
475    Norm2Singleton(SimpleSingleton &s) : SimpleSingletonWrapper<Normalizer2>(s) {}
476    Normalizer2 *getInstance(UErrorCode &errorCode) {
477        return SimpleSingletonWrapper<Normalizer2>::getInstance(createInstance, NULL, errorCode);
478    }
479private:
480    static void *createInstance(const void *, UErrorCode &errorCode) {
481        Normalizer2 *noop=new NoopNormalizer2;
482        if(noop==NULL) {
483            errorCode=U_MEMORY_ALLOCATION_ERROR;
484        }
485        ucln_common_registerCleanup(UCLN_COMMON_NORMALIZER2, uprv_normalizer2_cleanup);
486        return noop;
487    }
488};
489
490STATIC_SIMPLE_SINGLETON(noopSingleton);
491
492static UHashtable *cache=NULL;
493
494U_CDECL_BEGIN
495
496static void U_CALLCONV deleteNorm2AllModes(void *allModes) {
497    delete (Norm2AllModes *)allModes;
498}
499
500static UBool U_CALLCONV uprv_normalizer2_cleanup() {
501    Norm2AllModesSingleton(nfcSingleton, NULL).deleteInstance();
502    Norm2AllModesSingleton(nfkcSingleton, NULL).deleteInstance();
503    Norm2AllModesSingleton(nfkc_cfSingleton, NULL).deleteInstance();
504    Norm2Singleton(noopSingleton).deleteInstance();
505    uhash_close(cache);
506    cache=NULL;
507    return TRUE;
508}
509
510U_CDECL_END
511
512const Normalizer2 *Normalizer2Factory::getNFCInstance(UErrorCode &errorCode) {
513    Norm2AllModes *allModes=Norm2AllModesSingleton(nfcSingleton, "nfc").getInstance(errorCode);
514    return allModes!=NULL ? &allModes->comp : NULL;
515}
516
517const Normalizer2 *Normalizer2Factory::getNFDInstance(UErrorCode &errorCode) {
518    Norm2AllModes *allModes=Norm2AllModesSingleton(nfcSingleton, "nfc").getInstance(errorCode);
519    return allModes!=NULL ? &allModes->decomp : NULL;
520}
521
522const Normalizer2 *Normalizer2Factory::getFCDInstance(UErrorCode &errorCode) {
523    Norm2AllModes *allModes=Norm2AllModesSingleton(nfcSingleton, "nfc").getInstance(errorCode);
524    return allModes!=NULL ? &allModes->fcd : NULL;
525}
526
527const Normalizer2 *Normalizer2Factory::getFCCInstance(UErrorCode &errorCode) {
528    Norm2AllModes *allModes=Norm2AllModesSingleton(nfcSingleton, "nfc").getInstance(errorCode);
529    return allModes!=NULL ? &allModes->fcc : NULL;
530}
531
532const Normalizer2 *Normalizer2Factory::getNFKCInstance(UErrorCode &errorCode) {
533    Norm2AllModes *allModes=
534        Norm2AllModesSingleton(nfkcSingleton, "nfkc").getInstance(errorCode);
535    return allModes!=NULL ? &allModes->comp : NULL;
536}
537
538const Normalizer2 *Normalizer2Factory::getNFKDInstance(UErrorCode &errorCode) {
539    Norm2AllModes *allModes=
540        Norm2AllModesSingleton(nfkcSingleton, "nfkc").getInstance(errorCode);
541    return allModes!=NULL ? &allModes->decomp : NULL;
542}
543
544const Normalizer2 *Normalizer2Factory::getNFKC_CFInstance(UErrorCode &errorCode) {
545    Norm2AllModes *allModes=
546        Norm2AllModesSingleton(nfkc_cfSingleton, "nfkc_cf").getInstance(errorCode);
547    return allModes!=NULL ? &allModes->comp : NULL;
548}
549
550const Normalizer2 *Normalizer2Factory::getNoopInstance(UErrorCode &errorCode) {
551    return Norm2Singleton(noopSingleton).getInstance(errorCode);
552}
553
554const Normalizer2 *
555Normalizer2Factory::getInstance(UNormalizationMode mode, UErrorCode &errorCode) {
556    if(U_FAILURE(errorCode)) {
557        return NULL;
558    }
559    switch(mode) {
560    case UNORM_NFD:
561        return getNFDInstance(errorCode);
562    case UNORM_NFKD:
563        return getNFKDInstance(errorCode);
564    case UNORM_NFC:
565        return getNFCInstance(errorCode);
566    case UNORM_NFKC:
567        return getNFKCInstance(errorCode);
568    case UNORM_FCD:
569        return getFCDInstance(errorCode);
570    default:  // UNORM_NONE
571        return getNoopInstance(errorCode);
572    }
573}
574
575const Normalizer2Impl *
576Normalizer2Factory::getNFCImpl(UErrorCode &errorCode) {
577    Norm2AllModes *allModes=
578        Norm2AllModesSingleton(nfcSingleton, "nfc").getInstance(errorCode);
579    return allModes!=NULL ? &allModes->impl : NULL;
580}
581
582const Normalizer2Impl *
583Normalizer2Factory::getNFKCImpl(UErrorCode &errorCode) {
584    Norm2AllModes *allModes=
585        Norm2AllModesSingleton(nfkcSingleton, "nfkc").getInstance(errorCode);
586    return allModes!=NULL ? &allModes->impl : NULL;
587}
588
589const Normalizer2Impl *
590Normalizer2Factory::getNFKC_CFImpl(UErrorCode &errorCode) {
591    Norm2AllModes *allModes=
592        Norm2AllModesSingleton(nfkc_cfSingleton, "nfkc_cf").getInstance(errorCode);
593    return allModes!=NULL ? &allModes->impl : NULL;
594}
595
596const Normalizer2Impl *
597Normalizer2Factory::getImpl(const Normalizer2 *norm2) {
598    return &((Normalizer2WithImpl *)norm2)->impl;
599}
600
601const Normalizer2 *
602Normalizer2::getNFCInstance(UErrorCode &errorCode) {
603    return Normalizer2Factory::getNFCInstance(errorCode);
604}
605
606const Normalizer2 *
607Normalizer2::getNFDInstance(UErrorCode &errorCode) {
608    return Normalizer2Factory::getNFDInstance(errorCode);
609}
610
611const Normalizer2 *
612Normalizer2::getNFKCInstance(UErrorCode &errorCode) {
613    return Normalizer2Factory::getNFKCInstance(errorCode);
614}
615
616const Normalizer2 *
617Normalizer2::getNFKDInstance(UErrorCode &errorCode) {
618    return Normalizer2Factory::getNFKDInstance(errorCode);
619}
620
621const Normalizer2 *
622Normalizer2::getNFKCCasefoldInstance(UErrorCode &errorCode) {
623    return Normalizer2Factory::getNFKC_CFInstance(errorCode);
624}
625
626const Normalizer2 *
627Normalizer2::getInstance(const char *packageName,
628                         const char *name,
629                         UNormalization2Mode mode,
630                         UErrorCode &errorCode) {
631    if(U_FAILURE(errorCode)) {
632        return NULL;
633    }
634    if(name==NULL || *name==0) {
635        errorCode=U_ILLEGAL_ARGUMENT_ERROR;
636        return NULL;
637    }
638    Norm2AllModes *allModes=NULL;
639    if(packageName==NULL) {
640        if(0==uprv_strcmp(name, "nfc")) {
641            allModes=Norm2AllModesSingleton(nfcSingleton, "nfc").getInstance(errorCode);
642        } else if(0==uprv_strcmp(name, "nfkc")) {
643            allModes=Norm2AllModesSingleton(nfkcSingleton, "nfkc").getInstance(errorCode);
644        } else if(0==uprv_strcmp(name, "nfkc_cf")) {
645            allModes=Norm2AllModesSingleton(nfkc_cfSingleton, "nfkc_cf").getInstance(errorCode);
646        }
647    }
648    if(allModes==NULL && U_SUCCESS(errorCode)) {
649        {
650            Mutex lock;
651            if(cache!=NULL) {
652                allModes=(Norm2AllModes *)uhash_get(cache, name);
653            }
654        }
655        if(allModes==NULL) {
656            LocalPointer<Norm2AllModes> localAllModes(
657                Norm2AllModes::createInstance(packageName, name, errorCode));
658            if(U_SUCCESS(errorCode)) {
659                Mutex lock;
660                if(cache==NULL) {
661                    cache=uhash_open(uhash_hashChars, uhash_compareChars, NULL, &errorCode);
662                    if(U_FAILURE(errorCode)) {
663                        return NULL;
664                    }
665                    uhash_setKeyDeleter(cache, uprv_free);
666                    uhash_setValueDeleter(cache, deleteNorm2AllModes);
667                }
668                void *temp=uhash_get(cache, name);
669                if(temp==NULL) {
670                    int32_t keyLength=uprv_strlen(name)+1;
671                    char *nameCopy=(char *)uprv_malloc(keyLength);
672                    if(nameCopy==NULL) {
673                        errorCode=U_MEMORY_ALLOCATION_ERROR;
674                        return NULL;
675                    }
676                    uprv_memcpy(nameCopy, name, keyLength);
677                    uhash_put(cache, nameCopy, allModes=localAllModes.orphan(), &errorCode);
678                } else {
679                    // race condition
680                    allModes=(Norm2AllModes *)temp;
681                }
682            }
683        }
684    }
685    if(allModes!=NULL && U_SUCCESS(errorCode)) {
686        switch(mode) {
687        case UNORM2_COMPOSE:
688            return &allModes->comp;
689        case UNORM2_DECOMPOSE:
690            return &allModes->decomp;
691        case UNORM2_FCD:
692            return &allModes->fcd;
693        case UNORM2_COMPOSE_CONTIGUOUS:
694            return &allModes->fcc;
695        default:
696            break;  // do nothing
697        }
698    }
699    return NULL;
700}
701
702U_NAMESPACE_END
703
704// C API ------------------------------------------------------------------- ***
705
706U_NAMESPACE_USE
707
708U_CAPI const UNormalizer2 * U_EXPORT2
709unorm2_getNFCInstance(UErrorCode *pErrorCode) {
710    return (const UNormalizer2 *)Normalizer2::getNFCInstance(*pErrorCode);
711}
712
713U_CAPI const UNormalizer2 * U_EXPORT2
714unorm2_getNFDInstance(UErrorCode *pErrorCode) {
715    return (const UNormalizer2 *)Normalizer2::getNFDInstance(*pErrorCode);
716}
717
718U_CAPI const UNormalizer2 * U_EXPORT2
719unorm2_getNFKCInstance(UErrorCode *pErrorCode) {
720    return (const UNormalizer2 *)Normalizer2::getNFKCInstance(*pErrorCode);
721}
722
723U_CAPI const UNormalizer2 * U_EXPORT2
724unorm2_getNFKDInstance(UErrorCode *pErrorCode) {
725    return (const UNormalizer2 *)Normalizer2::getNFKDInstance(*pErrorCode);
726}
727
728U_CAPI const UNormalizer2 * U_EXPORT2
729unorm2_getNFKCCasefoldInstance(UErrorCode *pErrorCode) {
730    return (const UNormalizer2 *)Normalizer2::getNFKCCasefoldInstance(*pErrorCode);
731}
732
733U_CAPI const UNormalizer2 * U_EXPORT2
734unorm2_getInstance(const char *packageName,
735                   const char *name,
736                   UNormalization2Mode mode,
737                   UErrorCode *pErrorCode) {
738    return (const UNormalizer2 *)Normalizer2::getInstance(packageName, name, mode, *pErrorCode);
739}
740
741U_CAPI void U_EXPORT2
742unorm2_close(UNormalizer2 *norm2) {
743    delete (Normalizer2 *)norm2;
744}
745
746U_CAPI int32_t U_EXPORT2
747unorm2_normalize(const UNormalizer2 *norm2,
748                 const UChar *src, int32_t length,
749                 UChar *dest, int32_t capacity,
750                 UErrorCode *pErrorCode) {
751    if(U_FAILURE(*pErrorCode)) {
752        return 0;
753    }
754    if( (src==NULL ? length!=0 : length<-1) ||
755        (dest==NULL ? capacity!=0 : capacity<0) ||
756        (src==dest && src!=NULL)
757    ) {
758        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
759        return 0;
760    }
761    UnicodeString destString(dest, 0, capacity);
762    // length==0: Nothing to do, and n2wi->normalize(NULL, NULL, buffer, ...) would crash.
763    if(length!=0) {
764        const Normalizer2 *n2=(const Normalizer2 *)norm2;
765        const Normalizer2WithImpl *n2wi=dynamic_cast<const Normalizer2WithImpl *>(n2);
766        if(n2wi!=NULL) {
767            // Avoid duplicate argument checking and support NUL-terminated src.
768            ReorderingBuffer buffer(n2wi->impl, destString);
769            if(buffer.init(length, *pErrorCode)) {
770                n2wi->normalize(src, length>=0 ? src+length : NULL, buffer, *pErrorCode);
771            }
772        } else {
773            UnicodeString srcString(length<0, src, length);
774            n2->normalize(srcString, destString, *pErrorCode);
775        }
776    }
777    return destString.extract(dest, capacity, *pErrorCode);
778}
779
780static int32_t
781normalizeSecondAndAppend(const UNormalizer2 *norm2,
782                         UChar *first, int32_t firstLength, int32_t firstCapacity,
783                         const UChar *second, int32_t secondLength,
784                         UBool doNormalize,
785                         UErrorCode *pErrorCode) {
786    if(U_FAILURE(*pErrorCode)) {
787        return 0;
788    }
789    if( (second==NULL ? secondLength!=0 : secondLength<-1) ||
790        (first==NULL ? (firstCapacity!=0 || firstLength!=0) :
791                       (firstCapacity<0 || firstLength<-1)) ||
792        (first==second && first!=NULL)
793    ) {
794        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
795        return 0;
796    }
797    UnicodeString firstString(first, firstLength, firstCapacity);
798    firstLength=firstString.length();  // In case it was -1.
799    // secondLength==0: Nothing to do, and n2wi->normalizeAndAppend(NULL, NULL, buffer, ...) would crash.
800    if(secondLength!=0) {
801        const Normalizer2 *n2=(const Normalizer2 *)norm2;
802        const Normalizer2WithImpl *n2wi=dynamic_cast<const Normalizer2WithImpl *>(n2);
803        if(n2wi!=NULL) {
804            // Avoid duplicate argument checking and support NUL-terminated src.
805            UnicodeString safeMiddle;
806            {
807                ReorderingBuffer buffer(n2wi->impl, firstString);
808                if(buffer.init(firstLength+secondLength+1, *pErrorCode)) {  // destCapacity>=-1
809                    n2wi->normalizeAndAppend(second, secondLength>=0 ? second+secondLength : NULL,
810                                             doNormalize, safeMiddle, buffer, *pErrorCode);
811                }
812            }  // The ReorderingBuffer destructor finalizes firstString.
813            if(U_FAILURE(*pErrorCode) || firstString.length()>firstCapacity) {
814                // Restore the modified suffix of the first string.
815                // This does not restore first[] array contents between firstLength and firstCapacity.
816                // (That might be uninitialized memory, as far as we know.)
817                if(first!=NULL) { /* don't dereference NULL */
818                  safeMiddle.extract(0, 0x7fffffff, first+firstLength-safeMiddle.length());
819                  if(firstLength<firstCapacity) {
820                    first[firstLength]=0;  // NUL-terminate in case it was originally.
821                  }
822                }
823            }
824        } else {
825            UnicodeString secondString(secondLength<0, second, secondLength);
826            if(doNormalize) {
827                n2->normalizeSecondAndAppend(firstString, secondString, *pErrorCode);
828            } else {
829                n2->append(firstString, secondString, *pErrorCode);
830            }
831        }
832    }
833    return firstString.extract(first, firstCapacity, *pErrorCode);
834}
835
836U_CAPI int32_t U_EXPORT2
837unorm2_normalizeSecondAndAppend(const UNormalizer2 *norm2,
838                                UChar *first, int32_t firstLength, int32_t firstCapacity,
839                                const UChar *second, int32_t secondLength,
840                                UErrorCode *pErrorCode) {
841    return normalizeSecondAndAppend(norm2,
842                                    first, firstLength, firstCapacity,
843                                    second, secondLength,
844                                    TRUE, pErrorCode);
845}
846
847U_CAPI int32_t U_EXPORT2
848unorm2_append(const UNormalizer2 *norm2,
849              UChar *first, int32_t firstLength, int32_t firstCapacity,
850              const UChar *second, int32_t secondLength,
851              UErrorCode *pErrorCode) {
852    return normalizeSecondAndAppend(norm2,
853                                    first, firstLength, firstCapacity,
854                                    second, secondLength,
855                                    FALSE, pErrorCode);
856}
857
858U_CAPI int32_t U_EXPORT2
859unorm2_getDecomposition(const UNormalizer2 *norm2,
860                        UChar32 c, UChar *decomposition, int32_t capacity,
861                        UErrorCode *pErrorCode) {
862    if(U_FAILURE(*pErrorCode)) {
863        return 0;
864    }
865    if(decomposition==NULL ? capacity!=0 : capacity<0) {
866        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
867        return 0;
868    }
869    UnicodeString destString(decomposition, 0, capacity);
870    if(reinterpret_cast<const Normalizer2 *>(norm2)->getDecomposition(c, destString)) {
871        return destString.extract(decomposition, capacity, *pErrorCode);
872    } else {
873        return -1;
874    }
875}
876
877U_CAPI int32_t U_EXPORT2
878unorm2_getRawDecomposition(const UNormalizer2 *norm2,
879                           UChar32 c, UChar *decomposition, int32_t capacity,
880                           UErrorCode *pErrorCode) {
881    if(U_FAILURE(*pErrorCode)) {
882        return 0;
883    }
884    if(decomposition==NULL ? capacity!=0 : capacity<0) {
885        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
886        return 0;
887    }
888    UnicodeString destString(decomposition, 0, capacity);
889    if(reinterpret_cast<const Normalizer2 *>(norm2)->getRawDecomposition(c, destString)) {
890        return destString.extract(decomposition, capacity, *pErrorCode);
891    } else {
892        return -1;
893    }
894}
895
896U_CAPI UChar32 U_EXPORT2
897unorm2_composePair(const UNormalizer2 *norm2, UChar32 a, UChar32 b) {
898    return reinterpret_cast<const Normalizer2 *>(norm2)->composePair(a, b);
899}
900
901U_CAPI uint8_t U_EXPORT2
902unorm2_getCombiningClass(const UNormalizer2 *norm2, UChar32 c) {
903    return reinterpret_cast<const Normalizer2 *>(norm2)->getCombiningClass(c);
904}
905
906U_CAPI UBool U_EXPORT2
907unorm2_isNormalized(const UNormalizer2 *norm2,
908                    const UChar *s, int32_t length,
909                    UErrorCode *pErrorCode) {
910    if(U_FAILURE(*pErrorCode)) {
911        return 0;
912    }
913    if((s==NULL && length!=0) || length<-1) {
914        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
915        return 0;
916    }
917    UnicodeString sString(length<0, s, length);
918    return ((const Normalizer2 *)norm2)->isNormalized(sString, *pErrorCode);
919}
920
921U_CAPI UNormalizationCheckResult U_EXPORT2
922unorm2_quickCheck(const UNormalizer2 *norm2,
923                  const UChar *s, int32_t length,
924                  UErrorCode *pErrorCode) {
925    if(U_FAILURE(*pErrorCode)) {
926        return UNORM_NO;
927    }
928    if((s==NULL && length!=0) || length<-1) {
929        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
930        return UNORM_NO;
931    }
932    UnicodeString sString(length<0, s, length);
933    return ((const Normalizer2 *)norm2)->quickCheck(sString, *pErrorCode);
934}
935
936U_CAPI int32_t U_EXPORT2
937unorm2_spanQuickCheckYes(const UNormalizer2 *norm2,
938                         const UChar *s, int32_t length,
939                         UErrorCode *pErrorCode) {
940    if(U_FAILURE(*pErrorCode)) {
941        return 0;
942    }
943    if((s==NULL && length!=0) || length<-1) {
944        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
945        return 0;
946    }
947    UnicodeString sString(length<0, s, length);
948    return ((const Normalizer2 *)norm2)->spanQuickCheckYes(sString, *pErrorCode);
949}
950
951U_CAPI UBool U_EXPORT2
952unorm2_hasBoundaryBefore(const UNormalizer2 *norm2, UChar32 c) {
953    return ((const Normalizer2 *)norm2)->hasBoundaryBefore(c);
954}
955
956U_CAPI UBool U_EXPORT2
957unorm2_hasBoundaryAfter(const UNormalizer2 *norm2, UChar32 c) {
958    return ((const Normalizer2 *)norm2)->hasBoundaryAfter(c);
959}
960
961U_CAPI UBool U_EXPORT2
962unorm2_isInert(const UNormalizer2 *norm2, UChar32 c) {
963    return ((const Normalizer2 *)norm2)->isInert(c);
964}
965
966// Some properties APIs ---------------------------------------------------- ***
967
968U_CAPI uint8_t U_EXPORT2
969u_getCombiningClass(UChar32 c) {
970    UErrorCode errorCode=U_ZERO_ERROR;
971    const Normalizer2 *nfd=Normalizer2Factory::getNFDInstance(errorCode);
972    if(U_SUCCESS(errorCode)) {
973        return nfd->getCombiningClass(c);
974    } else {
975        return 0;
976    }
977}
978
979U_CFUNC UNormalizationCheckResult
980unorm_getQuickCheck(UChar32 c, UNormalizationMode mode) {
981    if(mode<=UNORM_NONE || UNORM_FCD<=mode) {
982        return UNORM_YES;
983    }
984    UErrorCode errorCode=U_ZERO_ERROR;
985    const Normalizer2 *norm2=Normalizer2Factory::getInstance(mode, errorCode);
986    if(U_SUCCESS(errorCode)) {
987        return ((const Normalizer2WithImpl *)norm2)->getQuickCheck(c);
988    } else {
989        return UNORM_MAYBE;
990    }
991}
992
993U_CFUNC uint16_t
994unorm_getFCD16(UChar32 c) {
995    UErrorCode errorCode=U_ZERO_ERROR;
996    const Normalizer2Impl *impl=Normalizer2Factory::getNFCImpl(errorCode);
997    if(U_SUCCESS(errorCode)) {
998        return impl->getFCD16(c);
999    } else {
1000        return 0;
1001    }
1002}
1003
1004#endif  // !UCONFIG_NO_NORMALIZATION
1005